1 //=-- SMEInstrFormats.td - AArch64 SME Instruction classes -*- tablegen -*--=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions.
11 //===----------------------------------------------------------------------===//
13 def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAB0, 0>", []>;
14 def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAH0, 1>", []>;
15 def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAS0, 3>", []>;
16 def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAD0, 7>", []>;
17 def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAQ0, 15>", []>;
18 def imm_to_zt : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZT0, 0>", []>;
20 def tileslice8 : ComplexPattern<i32 , 2, "SelectSMETileSlice<15, 1>", []>;
21 def tileslice16 : ComplexPattern<i32 , 2, "SelectSMETileSlice<7, 1>", []>;
22 def tileslice32 : ComplexPattern<i32 , 2, "SelectSMETileSlice<3, 1>", []>;
23 def tileslice64 : ComplexPattern<i32 , 2, "SelectSMETileSlice<1, 1>", []>;
24 def tileslice128 : ComplexPattern<i32 , 2, "SelectSMETileSlice<0, 1>", []>; // nop
26 def tileslicerange3s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<14, 2>", []>;
27 def tileslicerange2s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<6, 2>", []>;
28 def tileslicerange1s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<2, 2>", []>;
29 def tileslicerange0s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<0, 2>", []>;
31 def tileslicerange2s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<12, 4>", []>;
32 def tileslicerange1s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<4, 4>", []>;
33 def tileslicerange0s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<0, 4>", []>;
35 def am_sme_indexed_b4 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0,15>", [], [SDNPWantRoot]>;
37 def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>;
38 def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore,
39 [SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>;
40 def AArch64SMEStr : SDNode<"AArch64ISD::SME_ZA_STR", SDTZALoadStore,
41 [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>;
43 //===----------------------------------------------------------------------===//
45 //===----------------------------------------------------------------------===//
47 def getSMEPseudoMap : InstrMapping {
48 let FilterClass = "SMEPseudo2Instr";
49 let RowFields = ["PseudoName"];
50 let ColFields = ["IsInstr"];
52 let ValueCols = [["1"]];
55 class SMEPseudo2Instr<string name, bit instr> {
56 string PseudoName = name;
60 class sme_outer_product_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
61 : Pseudo<(outs), (ins i32imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm,
62 zpr_ty:$zn, zpr_ty:$zm), []>,
64 // Translated to the actual instructions in AArch64ISelLowering.cpp
65 let SMEMatrixType = za_flag;
66 let usesCustomInserter = 1;
69 class sme2_za_array_2op_multi_single_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
70 ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
71 : SMEPseudo2Instr<name, 0>,
72 Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), []> {
73 let SMEMatrixType = za_flag;
74 let usesCustomInserter = 1;
77 class sme2_za_array_2op_multi_multi_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
78 SMEMatrixTypeEnum za_flag>
79 : SMEPseudo2Instr<name, 0>,
80 Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), []> {
81 let SMEMatrixType = za_flag;
82 let usesCustomInserter = 1;
85 class sme2_za_array_2op_multi_index_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
86 ZPRRegOp zpr_ty, Operand imm_ty, SMEMatrixTypeEnum za_flag>
87 : SMEPseudo2Instr<name, 0>,
88 Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, imm_ty:$i), []> {
89 let SMEMatrixType = za_flag;
90 let usesCustomInserter = 1;
93 class sme2_move_to_za_pseudo<string name, Operand imm_ty, RegisterOperand multi_vector_ty, SMEMatrixTypeEnum za_flag>
94 : SMEPseudo2Instr<name, 0>,
95 Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> {
96 let SMEMatrixType = za_flag;
97 let usesCustomInserter = 1;
100 class sme2_move_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand multi_vector_ty, SMEMatrixTypeEnum za_flag>
101 : SMEPseudo2Instr<name, 0>,
102 Pseudo<(outs), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> {
103 let SMEMatrixType = za_flag;
104 let usesCustomInserter = 1;
107 //===----------------------------------------------------------------------===//
108 // SME pattern match helpers.
109 //===----------------------------------------------------------------------===//
111 class SME2_ZA_TwoOp_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
112 ValueType vt, ComplexPattern tileslice>
113 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm),
114 (!cast<Instruction>(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm)>;
117 class SME2_ZA_TwoOp_VG2_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
118 ValueType vt, ComplexPattern tileslice>
119 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm),
120 (!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2, vt:$Zn1, zsub0, vt:$Zn2, zsub1),
122 class SME2_ZA_TwoOp_VG4_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
123 ValueType vt, ComplexPattern tileslice>
124 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
125 vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm),
126 (!cast<Instruction>(name # _PSEUDO) $base, $offset,
127 (REG_SEQUENCE ZPR4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
130 class SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice>
131 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm1, vt:$Zm2),
132 (!cast<Instruction>(name # _PSEUDO) $base, $offset,
133 (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1),
134 (REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>;
136 class SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice>
137 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
138 vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm1, vt:$Zm2, vt:$Zm3, vt:$Zm4),
139 (!cast<Instruction>(name # _PSEUDO) $base, $offset,
140 (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
141 (REG_SEQUENCE ZPR4Mul4, vt:$Zm1, zsub0, vt:$Zm2, zsub1, vt:$Zm3, zsub2, vt:$Zm4, zsub3))>;
143 class SME2_ZA_TwoOp_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
144 Operand imm_ty, ComplexPattern tileslice>
145 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm, (i32 imm_ty:$i)),
146 (!cast<Instruction>(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm, (i32 imm_ty:$i))>;
149 class SME2_ZA_TwoOp_VG2_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
150 Operand imm_ty, ComplexPattern tileslice>
151 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i)),
152 (!cast<Instruction>(name # _PSEUDO) $base, $offset,
153 (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), zpr_ty:$Zm, imm_ty:$i)>;
155 class SME2_ZA_TwoOp_VG4_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
156 Operand imm_ty, ComplexPattern tileslice>
157 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
158 vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm, (i32 imm_ty:$i)),
159 (!cast<Instruction>(name # _PSEUDO) $base, $offset,
160 (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
161 zpr_ty:$Zm, imm_ty:$i)>;
163 class SME2_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
164 : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))),
165 (!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>;
167 class SME2_Sat_Shift_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
168 : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4, (i32 imm_ty:$i))),
169 (!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3),
172 class SME2_Cvt_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt>
173 : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4)),
174 (!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3))>;
176 class SME2_ZA_VG1x2_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice>
177 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2),
178 (!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>;
180 class SME2_ZA_VG1x4_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice>
181 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
182 (!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>;
184 class SME2_Tile_VG2_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice>
185 : Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2),
186 (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>;
188 class SME2_Tile_VG4_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice>
189 : Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
190 (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>;
192 //===----------------------------------------------------------------------===//
193 // SME pattern match helpers.
194 //===----------------------------------------------------------------------===//
196 class SME_ZA_Tile_TwoPred_TwoVec_Pat<string name, SDPatternOperator intrinsic, Operand imm_ty, ValueType pg_ty, ValueType vt>
197 : Pat<(intrinsic imm_ty:$tile, (pg_ty PPR3bAny:$Pn), (pg_ty PPR3bAny:$Pm), vt:$Zn, vt:$Zm),
198 (!cast<Instruction>(name # _PSEUDO) $tile, $Pn, $Pm, $Zn, $Zm)>;
201 //===----------------------------------------------------------------------===//
202 // SME smstart/smstop
203 //===----------------------------------------------------------------------===//
205 // SME defines three pstate fields to set or clear PSTATE.SM, PSTATE.ZA, or
208 // MSR SVCRSM, #<imm1>
209 // MSR SVCRZA, #<imm1>
210 // MSR SVCRSMZA, #<imm1>
212 // It's tricky to using the existing pstate operand defined in
213 // AArch64SystemOperands.td since it only encodes 5 bits including op1;op2,
214 // when these fields are also encoded in CRm[3:1].
215 def MSRpstatesvcrImm1
216 : PstateWriteSimple<(ins svcr_op:$pstatefield, timm0_1:$imm), "msr",
217 "\t$pstatefield, $imm">,
221 let Inst{18-16} = 0b011; // op1
222 let Inst{11-9} = pstatefield;
224 let Inst{7-5} = 0b011; // op2
225 let hasPostISelHook = 1;
228 def : InstAlias<"smstart", (MSRpstatesvcrImm1 0b011, 0b1)>;
229 def : InstAlias<"smstart sm", (MSRpstatesvcrImm1 0b001, 0b1)>;
230 def : InstAlias<"smstart za", (MSRpstatesvcrImm1 0b010, 0b1)>;
232 def : InstAlias<"smstop", (MSRpstatesvcrImm1 0b011, 0b0)>;
233 def : InstAlias<"smstop sm", (MSRpstatesvcrImm1 0b001, 0b0)>;
234 def : InstAlias<"smstop za", (MSRpstatesvcrImm1 0b010, 0b0)>;
237 //===----------------------------------------------------------------------===//
238 // SME Outer Products
239 //===----------------------------------------------------------------------===//
241 class sme_fp_outer_product_inst<bit S, bits<2> sz, bits<2> op, MatrixTileOperand za_ty,
242 ZPRRegOp zpr_ty, string mnemonic>
243 : I<(outs za_ty:$ZAda),
244 (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
245 mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
252 let Inst{31-25} = 0b1000000;
253 let Inst{24} = op{1};
255 let Inst{22-21} = sz;
256 let Inst{20-16} = Zm;
257 let Inst{15-13} = Pm;
258 let Inst{12-10} = Pn;
263 let Constraints = "$ZAda = $_ZAda";
266 multiclass sme_outer_product_fp32<bit S, bits<2> sz, ZPRRegOp zpr_ty, string mnemonic, SDPatternOperator op> {
267 def NAME : sme_fp_outer_product_inst<S, sz, 0b00, TileOp32, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1> {
269 let Inst{1-0} = ZAda;
273 def NAME # _PSEUDO : sme_outer_product_pseudo<zpr_ty, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
275 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv4i1, nxv4f32>;
278 multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op> {
279 def NAME : sme_fp_outer_product_inst<S, 0b10, 0b00, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr<NAME, 1> {
281 let Inst{2-0} = ZAda;
284 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR64, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
286 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv2i1, nxv2f64>;
289 multiclass sme2p1_fmop_tile_fp16<string mnemonic, bit bf, bit s, bits<2> op, ZPRRegOp zpr_ty>{
290 def NAME : sme_fp_outer_product_inst<s, {0,bf}, op, TileOp16, zpr_ty, mnemonic> {
292 let Inst{2-1} = 0b00;
297 class sme_int_outer_product_inst<bits<3> opc, bit sz, bit sme2,
298 MatrixTileOperand za_ty, ZPRRegOp zpr_ty,
300 : I<(outs za_ty:$ZAda),
301 (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
302 mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
309 let Inst{31-25} = 0b1010000;
310 let Inst{24} = opc{2}; // u0
313 let Inst{21} = opc{1}; // u1
314 let Inst{20-16} = Zm;
315 let Inst{15-13} = Pm;
316 let Inst{12-10} = Pn;
318 let Inst{4} = opc{0}; //S;
321 let Constraints = "$ZAda = $_ZAda";
324 multiclass sme_int_outer_product_i32<bits<3> opc, string mnemonic,
325 SDPatternOperator op> {
326 def NAME : sme_int_outer_product_inst<opc, 0b0, 0b0, TileOp32,
327 ZPR8, mnemonic>, SMEPseudo2Instr<NAME, 1> {
329 let Inst{1-0} = ZAda;
333 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
335 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv16i1, nxv16i8>;
338 multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic,
339 SDPatternOperator op> {
340 def NAME : sme_int_outer_product_inst<opc, 0b1, 0b0, TileOp64,
341 ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> {
343 let Inst{2-0} = ZAda;
346 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
348 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv8i1, nxv8i16>;
351 class sme_outer_product_widening_inst<bits<3> opc, ZPRRegOp zpr_ty, string mnemonic>
352 : I<(outs TileOp32:$ZAda),
353 (ins TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
354 mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
362 let Inst{31-25} = 0b1000000;
363 let Inst{24} = !if(opc{2}, 0, 1);
364 let Inst{23-22} = 0b10;
365 let Inst{21} = opc{1};
366 let Inst{20-16} = Zm;
367 let Inst{15-13} = Pm;
368 let Inst{12-10} = Pn;
370 let Inst{4} = opc{0};
371 let Inst{3} = opc{2};
373 let Inst{1-0} = ZAda;
375 let Constraints = "$ZAda = $_ZAda";
378 multiclass sme_bf16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> {
379 def NAME : sme_outer_product_widening_inst<opc, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1>;
381 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
383 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv8i1, nxv8bf16>;
386 multiclass sme_f16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> {
387 def NAME : sme_outer_product_widening_inst<opc, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1>;
389 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
391 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv8i1, nxv8f16>;
394 //===----------------------------------------------------------------------===//
395 // SME Add Vector to Tile
396 //===----------------------------------------------------------------------===//
398 class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
399 ZPRRegOp zpr_ty, string mnemonic>
400 : I<(outs tile_ty:$ZAda),
401 (ins tile_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
402 mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
407 let Inst{31-23} = 0b110000001;
409 let Inst{21-17} = 0b01000;
411 let Inst{15-13} = Pm;
412 let Inst{12-10} = Pn;
414 let Inst{4-3} = 0b00;
416 let Constraints = "$ZAda = $_ZAda";
419 class sme_add_vector_to_tile_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
421 (ins i32imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>,
423 // Translated to the actual instructions in AArch64ISelLowering.cpp
424 let SMEMatrixType = za_flag;
425 let usesCustomInserter = 1;
428 multiclass sme_add_vector_to_tile_u32<bit V, string mnemonic, SDPatternOperator op> {
429 def NAME : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic>, SMEPseudo2Instr<NAME, 1> {
432 let Inst{1-0} = ZAda;
435 def _PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
437 def : Pat<(op timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
438 (nxv4i32 ZPR32:$zn)),
439 (!cast<Instruction>(NAME # _PSEUDO_S) timm32_0_3:$tile, $pn, $pm, $zn)>;
442 multiclass sme_add_vector_to_tile_u64<bit V, string mnemonic, SDPatternOperator op> {
443 def NAME : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr<NAME, 1> {
445 let Inst{2-0} = ZAda;
448 def _PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
450 let Predicates = [HasSMEI16I64] in {
451 def : Pat<(op timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
452 (nxv2i64 ZPR64:$zn)),
453 (!cast<Instruction>(NAME # _PSEUDO_D) timm32_0_7:$tile, $pn, $pm, $zn)>;
457 //===----------------------------------------------------------------------===//
458 // SME Contiguous Loads
459 //===----------------------------------------------------------------------===//
461 class sme_mem_ld_ss_base<bit Q, bit V, bits<2> msz, dag outs, dag ins,
462 string mnemonic, string argstr>
463 : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
468 let Inst{31-25} = 0b1110000;
470 let Inst{23-22} = msz;
472 let Inst{20-16} = Rm;
474 let Inst{14-13} = Rv;
475 let Inst{12-10} = Pg;
482 class sme_mem_ld_ss_inst<bit Q, bits<2> msz, string mnemonic,
483 MatrixTileVectorOperand tile_ty, bit is_col,
484 Operand imm_ty, RegisterOperand gpr_ty>
485 : sme_mem_ld_ss_base<
486 Q, is_col, msz, (outs tile_ty:$ZAt),
487 (ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn,
489 mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">;
491 multiclass sme_mem_ss_aliases_base<string mnemonic, Instruction inst,
492 MatrixTileVectorOperand tile_ty,
493 Operand imm_ty, RegisterOperand gpr_ty,
494 string pg_suffix=""> {
495 def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn, $Rm]",
496 (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), 0>;
497 // Default XZR offset aliases
498 def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv, $imm]\\}, $Pg" # pg_suffix # ", [$Rn]",
499 (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>;
500 def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn]",
501 (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
504 multiclass sme_mem_ss_aliases<string mnemonic, string inst, bit is_col,
505 string pg_suffix=""> {
506 defm : sme_mem_ss_aliases_base<mnemonic # "b", !cast<Instruction>(inst # _B),
507 !if(is_col, TileVectorOpV8, TileVectorOpH8),
508 sme_elm_idx0_15, GPR64shifted8, pg_suffix>;
509 defm : sme_mem_ss_aliases_base<mnemonic # "h", !cast<Instruction>(inst # _H),
510 !if(is_col, TileVectorOpV16, TileVectorOpH16),
511 sme_elm_idx0_7, GPR64shifted16, pg_suffix>;
512 defm : sme_mem_ss_aliases_base<mnemonic # "w", !cast<Instruction>(inst # _S),
513 !if(is_col, TileVectorOpV32, TileVectorOpH32),
514 sme_elm_idx0_3, GPR64shifted32, pg_suffix>;
515 defm : sme_mem_ss_aliases_base<mnemonic # "d", !cast<Instruction>(inst # _D),
516 !if(is_col, TileVectorOpV64, TileVectorOpH64),
517 sme_elm_idx0_1, GPR64shifted64, pg_suffix>;
518 defm : sme_mem_ss_aliases_base<mnemonic # "q", !cast<Instruction>(inst # _Q),
519 !if(is_col, TileVectorOpV128, TileVectorOpH128),
520 sme_elm_idx0_0, GPR64shifted128, pg_suffix>;
523 multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> {
524 defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">;
527 multiclass sme_mem_ld_ss_patterns<Instruction Inst, SDPatternOperator Load,
528 Operand tile_ty, Operand offset_ty,
530 ComplexPattern tileslice> {
532 def : Pat<(Load PPR3bAny:$pg, GPR64sp:$base, tile_ty:$tile,
533 (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
534 (Inst tile_ty:$tile, $idx, $imm, $pg, $base, XZR)>;
536 // reg + reg, tileslice
537 let AddedComplexity = 1 in {
538 def : Pat<(Load PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset),
539 tile_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
541 (Inst tile_ty:$tile, $idx, $imm, $pg, $base, $offset)>;
545 class sme_load_pseudo
546 : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx,
547 i32imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>,
549 // Translated to the actual instructions in AArch64ISelLowering.cpp
550 let usesCustomInserter = 1;
554 multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> {
555 def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b",
556 !if(is_col, TileVectorOpV8, TileVectorOpH8),
557 is_col, sme_elm_idx0_15, GPR64shifted8> {
561 def _H : sme_mem_ld_ss_inst<0b0, 0b01, mnemonic # "h",
562 !if(is_col, TileVectorOpV16, TileVectorOpH16),
563 is_col, sme_elm_idx0_7, GPR64shifted16> {
569 def _S : sme_mem_ld_ss_inst<0b0, 0b10, mnemonic # "w",
570 !if(is_col, TileVectorOpV32, TileVectorOpH32),
571 is_col, sme_elm_idx0_3, GPR64shifted32> {
577 def _D : sme_mem_ld_ss_inst<0b0, 0b11, mnemonic # "d",
578 !if(is_col, TileVectorOpV64, TileVectorOpH64),
579 is_col, sme_elm_idx0_1, GPR64shifted64> {
585 def _Q : sme_mem_ld_ss_inst<0b1, 0b11, mnemonic # "q",
586 !if(is_col, TileVectorOpV128, TileVectorOpH128),
587 is_col, sme_elm_idx0_0, GPR64shifted128> {
592 defm : sme_mem_ld_ss_aliases<NAME, is_col>;
594 // Pseudo instructions for lowering intrinsics, using immediates instead of
596 def _PSEUDO_B : sme_load_pseudo;
597 def _PSEUDO_H : sme_load_pseudo;
598 def _PSEUDO_S : sme_load_pseudo;
599 def _PSEUDO_D : sme_load_pseudo;
600 def _PSEUDO_Q : sme_load_pseudo;
602 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
603 !if(is_col, int_aarch64_sme_ld1b_vert,
604 int_aarch64_sme_ld1b_horiz),
605 sme_elm_idx0_0, timm32_0_15, am_sve_regreg_lsl0,
607 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
608 !if(is_col, int_aarch64_sme_ld1h_vert,
609 int_aarch64_sme_ld1h_horiz),
610 timm32_0_1, timm32_0_7, am_sve_regreg_lsl1,
612 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
613 !if(is_col, int_aarch64_sme_ld1w_vert,
614 int_aarch64_sme_ld1w_horiz),
615 timm32_0_3, timm32_0_3, am_sve_regreg_lsl2,
617 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
618 !if(is_col, int_aarch64_sme_ld1d_vert,
619 int_aarch64_sme_ld1d_horiz),
620 timm32_0_7, timm32_0_1, am_sve_regreg_lsl3,
622 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
623 !if(is_col, int_aarch64_sme_ld1q_vert,
624 int_aarch64_sme_ld1q_horiz),
625 timm32_0_15, sme_elm_idx0_0, am_sve_regreg_lsl4,
629 multiclass sme_mem_ld_ss<string mnemonic> {
630 defm _H : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b0>;
631 defm _V : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b1>;
634 //===----------------------------------------------------------------------===//
635 // SME Contiguous Stores
636 //===----------------------------------------------------------------------===//
638 class sme_mem_st_ss_base<bit Q, bit V, bits<2> msz, dag ins,
639 string mnemonic, string argstr>
640 : I<(outs), ins, mnemonic, argstr, "", []>, Sched<[]> {
645 let Inst{31-25} = 0b1110000;
647 let Inst{23-22} = msz;
649 let Inst{20-16} = Rm;
651 let Inst{14-13} = Rv;
652 let Inst{12-10} = Pg;
657 let hasSideEffects = 1;
660 class sme_mem_st_ss_inst<bit Q, bits<2> msz, string mnemonic,
661 MatrixTileVectorOperand tile_ty, bit is_col,
662 Operand imm_ty, RegisterOperand gpr_ty>
663 : sme_mem_st_ss_base<
665 (ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg,
666 GPR64sp:$Rn, gpr_ty:$Rm),
667 mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">;
669 multiclass sme_mem_st_ss_aliases<string inst, bit is_col> {
670 defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>;
673 multiclass sme_mem_st_ss_patterns<Instruction Inst, SDPatternOperator Store,
675 ComplexPattern imm2tile,
677 ComplexPattern tileslice> {
679 def : Pat<(Store PPR3bAny:$pg, GPR64sp:$base, (imm2tile untyped:$tile),
680 (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
681 (Inst $tile, $idx, $imm, $pg, $base, XZR)>;
683 // reg + reg, tileslice
684 let AddedComplexity = 1 in {
685 def : Pat<(Store PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset),
686 (imm2tile untyped:$tile),
687 (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
688 (Inst $tile, $idx, $imm, $pg, $base, $offset)>;
692 multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> {
693 def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b",
694 !if(is_col, TileVectorOpV8, TileVectorOpH8),
695 is_col, sme_elm_idx0_15, GPR64shifted8> {
699 def _H : sme_mem_st_ss_inst<0b0, 0b01, mnemonic # "h",
700 !if(is_col, TileVectorOpV16, TileVectorOpH16),
701 is_col, sme_elm_idx0_7, GPR64shifted16> {
707 def _S : sme_mem_st_ss_inst<0b0, 0b10, mnemonic # "w",
708 !if(is_col, TileVectorOpV32, TileVectorOpH32),
709 is_col, sme_elm_idx0_3, GPR64shifted32> {
715 def _D : sme_mem_st_ss_inst<0b0, 0b11, mnemonic # "d",
716 !if(is_col, TileVectorOpV64, TileVectorOpH64),
717 is_col, sme_elm_idx0_1, GPR64shifted64> {
723 def _Q : sme_mem_st_ss_inst<0b1, 0b11, mnemonic # "q",
724 !if(is_col, TileVectorOpV128, TileVectorOpH128),
725 is_col, sme_elm_idx0_0, GPR64shifted128> {
730 defm : sme_mem_st_ss_aliases<NAME, is_col>;
732 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _B),
733 !if(is_col, int_aarch64_sme_st1b_vert,
734 int_aarch64_sme_st1b_horiz),
735 timm32_0_15, imm_to_tile8, am_sve_regreg_lsl0,
737 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _H),
738 !if(is_col, int_aarch64_sme_st1h_vert,
739 int_aarch64_sme_st1h_horiz),
740 timm32_0_7, imm_to_tile16, am_sve_regreg_lsl1,
742 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _S),
743 !if(is_col, int_aarch64_sme_st1w_vert,
744 int_aarch64_sme_st1w_horiz),
745 timm32_0_3, imm_to_tile32, am_sve_regreg_lsl2,
747 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _D),
748 !if(is_col, int_aarch64_sme_st1d_vert,
749 int_aarch64_sme_st1d_horiz),
750 timm32_0_1, imm_to_tile64, am_sve_regreg_lsl3,
752 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _Q),
753 !if(is_col, int_aarch64_sme_st1q_vert,
754 int_aarch64_sme_st1q_horiz),
755 sme_elm_idx0_0, imm_to_tile128,
756 am_sve_regreg_lsl4, tileslice128>;
759 multiclass sme_mem_st_ss<string mnemonic> {
760 defm _H : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b0>;
761 defm _V : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b1>;
764 //===----------------------------------------------------------------------===//
765 // SME Save and Restore Array
766 //===----------------------------------------------------------------------===//
768 class sme_spill_fill_base<bit isStore, dag outs, dag ins, string opcodestr>
769 : I<outs, ins, opcodestr, "\t$ZAt[$Rv, $imm4], [$Rn, $offset, mul vl]", "",
775 let Inst{31-22} = 0b1110000100;
776 let Inst{21} = isStore;
777 let Inst{20-15} = 0b000000;
778 let Inst{14-13} = Rv;
779 let Inst{12-10} = 0b000;
782 let Inst{3-0} = imm4;
786 class sme_spill_inst<string opcodestr>
787 : sme_spill_fill_base<0b1, (outs),
788 (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv,
789 sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
793 class sme_fill_inst<string opcodestr>
794 : sme_spill_fill_base<0b0, (outs MatrixOp:$ZAt),
795 (ins MatrixIndexGPR32Op12_15:$Rv,
796 sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
799 multiclass sme_spill<string opcodestr> {
800 def NAME : sme_spill_inst<opcodestr>;
801 def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
802 (!cast<Instruction>(NAME) MatrixOp:$ZAt,
803 MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
805 def : Pat<(AArch64SMEStr (i32 MatrixIndexGPR32Op12_15:$slice), (i64 GPR64sp:$base), (i32 sme_elm_idx0_15:$imm)),
806 (!cast<Instruction>(NAME) ZA, MatrixIndexGPR32Op12_15:$slice, sme_elm_idx0_15:$imm, GPR64sp:$base, imm32_0_15:$imm)>;
809 multiclass sme_fill<string opcodestr> {
810 def NAME : sme_fill_inst<opcodestr>;
811 def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
812 (!cast<Instruction>(NAME) MatrixOp:$ZAt,
813 MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
816 (ins MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm4,
819 // Translated to actual instruction in AArch64ISelLowering.cpp
820 let usesCustomInserter = 1;
823 def : Pat<(AArch64SMELdr MatrixIndexGPR32Op12_15:$slice, GPR64sp:$base, sme_elm_idx0_15:$imm),
824 (!cast<Instruction>(NAME # _PSEUDO) MatrixIndexGPR32Op12_15:$slice, sme_elm_idx0_15:$imm, GPR64sp:$base)>;
827 //===----------------------------------------------------------------------===//
829 //===----------------------------------------------------------------------===//
831 class sme_vector_to_tile_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
832 string mnemonic, string argstr>
833 : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
837 let Inst{31-24} = 0b11000000;
838 let Inst{23-22} = sz;
839 let Inst{21-17} = 0b00000;
842 let Inst{14-13} = Rv;
843 let Inst{12-10} = Pg;
848 class sme_vector_to_tile_inst<bit Q, bits<2> sz, MatrixTileVectorOperand tile_ty,
849 bit is_col, Operand imm_ty, ZPRRegOp zpr_ty,
851 : sme_vector_to_tile_base<Q, is_col, sz, (outs tile_ty:$ZAd),
852 (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn),
853 mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">{
855 let Constraints = "$ZAd = $_ZAd";
859 multiclass sme_vector_to_tile_aliases<Instruction inst,
860 MatrixTileVectorOperand tile_ty,
861 ZPRRegOp zpr_ty, Operand imm_ty> {
862 def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn",
863 (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
866 multiclass sme_vector_to_tile_patterns<Instruction inst, ValueType zpr_vt,
867 ValueType ppr_vt, Operand imm_ty,
869 SDPatternOperator op,
870 ComplexPattern tileslice> {
871 def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
873 (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)),
874 (inst imm_ty:$tile, $idx, $imm, $pg, $zn)>;
877 class sme_mova_insert_pseudo<SMEMatrixTypeEnum za_flag>
878 : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx,
879 i32imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>,
881 // Translated to the actual instructions in AArch64ISelLowering.cpp
882 let SMEMatrixType = za_flag;
883 let usesCustomInserter = 1;
886 multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
887 def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8,
889 is_col, sme_elm_idx0_15, ZPR8, mnemonic>,
890 SMEPseudo2Instr<NAME # _B, 1> {
894 def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16,
896 is_col, sme_elm_idx0_7, ZPR16, mnemonic>,
897 SMEPseudo2Instr<NAME # _H, 1> {
903 def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32,
905 is_col, sme_elm_idx0_3, ZPR32, mnemonic>,
906 SMEPseudo2Instr<NAME # _S, 1> {
912 def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64,
914 is_col, sme_elm_idx0_1, ZPR64, mnemonic>,
915 SMEPseudo2Instr<NAME # _D, 1> {
921 def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128,
923 is_col, sme_elm_idx0_0, ZPR128, mnemonic>,
924 SMEPseudo2Instr<NAME # _Q, 1> {
930 // Pseudo instructions for lowering intrinsics, using immediates instead of
932 def _PSEUDO_B : sme_mova_insert_pseudo<SMEMatrixTileB>, SMEPseudo2Instr<NAME # _B, 0>;
933 def _PSEUDO_H : sme_mova_insert_pseudo<SMEMatrixTileH>, SMEPseudo2Instr<NAME # _H, 0>;
934 def _PSEUDO_S : sme_mova_insert_pseudo<SMEMatrixTileS>, SMEPseudo2Instr<NAME # _S, 0>;
935 def _PSEUDO_D : sme_mova_insert_pseudo<SMEMatrixTileD>, SMEPseudo2Instr<NAME # _D, 0>;
936 def _PSEUDO_Q : sme_mova_insert_pseudo<SMEMatrixTileQ>, SMEPseudo2Instr<NAME # _Q, 0>;
938 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B),
939 !if(is_col, TileVectorOpV8,
941 ZPR8, sme_elm_idx0_15>;
942 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _H),
943 !if(is_col, TileVectorOpV16,
945 ZPR16, sme_elm_idx0_7>;
946 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _S),
947 !if(is_col, TileVectorOpV32,
949 ZPR32, sme_elm_idx0_3>;
950 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _D),
951 !if(is_col, TileVectorOpV64,
953 ZPR64, sme_elm_idx0_1>;
954 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _Q),
955 !if(is_col, TileVectorOpV128,
957 ZPR128, sme_elm_idx0_0>;
959 defvar op = !if(is_col, int_aarch64_sme_write_vert,
960 int_aarch64_sme_write_horiz);
962 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
963 nxv16i8, nxv16i1, sme_elm_idx0_0, sme_elm_idx0_15,
965 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
966 nxv8i16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
968 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
969 nxv8f16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
971 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
972 nxv8bf16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
974 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
975 nxv4i32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3,
977 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
978 nxv4f32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3,
980 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
981 nxv2i64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1,
983 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
984 nxv2f64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1,
987 defvar opq = !if(is_col, int_aarch64_sme_writeq_vert,
988 int_aarch64_sme_writeq_horiz);
990 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
991 nxv16i8, nxv16i1, sme_elm_idx0_15,
992 sme_elm_idx0_0, opq, tileslice128>;
993 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
994 nxv8i16, nxv8i1, sme_elm_idx0_15,
995 sme_elm_idx0_0, opq, tileslice128>;
996 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
997 nxv8f16, nxv8i1, sme_elm_idx0_15,
998 sme_elm_idx0_0, opq, tileslice128>;
999 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1000 nxv8bf16, nxv8i1, sme_elm_idx0_15,
1001 sme_elm_idx0_0, opq, tileslice128>;
1002 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1003 nxv4i32, nxv4i1, sme_elm_idx0_15,
1004 sme_elm_idx0_0, opq, tileslice128>;
1005 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1006 nxv4f32, nxv4i1, sme_elm_idx0_15,
1007 sme_elm_idx0_0, opq, tileslice128>;
1008 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1009 nxv2i64, nxv2i1, sme_elm_idx0_15,
1010 sme_elm_idx0_0, opq, tileslice128>;
1011 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1012 nxv2f64, nxv2i1, sme_elm_idx0_15,
1013 sme_elm_idx0_0, opq, tileslice128>;
1016 multiclass sme_vector_to_tile<string mnemonic> {
1017 defm _H : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b0>;
1018 defm _V : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b1>;
1021 class sme_tile_to_vector_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
1022 string mnemonic, string argstr>
1023 : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
1027 let Inst{31-24} = 0b11000000;
1028 let Inst{23-22} = sz;
1029 let Inst{21-17} = 0b00001;
1032 let Inst{14-13} = Rv;
1033 let Inst{12-10} = Pg;
1038 class sme_tile_to_vector_inst<bit Q, bits<2> sz, ZPRRegOp zpr_ty,
1039 MatrixTileVectorOperand tile_ty,
1040 bit is_col, Operand imm_ty, string mnemonic>
1041 : sme_tile_to_vector_base<Q, is_col, sz, (outs zpr_ty:$Zd),
1042 (ins zpr_ty:$_Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
1043 mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]"> {
1045 let Constraints = "$Zd = $_Zd";
1048 multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
1049 MatrixTileVectorOperand tile_ty,
1051 def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv, $imm]",
1052 (inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>;
1055 multiclass sme_tile_to_vector_patterns<Instruction inst, ValueType zpr_vt,
1056 ValueType ppr_vt, Operand offset_ty,
1057 ComplexPattern imm2tile,
1058 ComplexPattern tileslice,
1059 SDPatternOperator op> {
1060 def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg),
1061 (imm2tile untyped:$tile), MatrixIndexGPR32Op12_15:$idx)),
1062 (inst $passthru, $pg, $tile, $idx, 0)>;
1063 let AddedComplexity = 1 in {
1064 def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg),
1065 (imm2tile untyped:$tile),
1066 (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
1068 (inst $passthru, $pg, $tile, $idx, $imm)>;
1072 multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
1073 def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8,
1075 is_col, sme_elm_idx0_15, mnemonic> {
1077 let Inst{8-5} = imm;
1079 def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16,
1081 is_col, sme_elm_idx0_7, mnemonic> {
1085 let Inst{7-5} = imm;
1087 def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32,
1089 is_col, sme_elm_idx0_3, mnemonic> {
1092 let Inst{8-7} = ZAn;
1093 let Inst{6-5} = imm;
1095 def _D : sme_tile_to_vector_inst<0b0, 0b11, ZPR64, !if(is_col, TileVectorOpV64,
1097 is_col, sme_elm_idx0_1, mnemonic> {
1100 let Inst{8-6} = ZAn;
1103 def _Q : sme_tile_to_vector_inst<0b1, 0b11, ZPR128, !if(is_col, TileVectorOpV128,
1105 is_col, sme_elm_idx0_0, mnemonic> {
1107 let Inst{8-5} = ZAn;
1110 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _B), ZPR8,
1111 !if(is_col, TileVectorOpV8,
1112 TileVectorOpH8), sme_elm_idx0_15>;
1113 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _H), ZPR16,
1114 !if(is_col, TileVectorOpV16,
1115 TileVectorOpH16), sme_elm_idx0_7>;
1116 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _S), ZPR32,
1117 !if(is_col, TileVectorOpV32,
1118 TileVectorOpH32), sme_elm_idx0_3>;
1119 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _D), ZPR64,
1120 !if(is_col, TileVectorOpV64,
1121 TileVectorOpH64), sme_elm_idx0_1>;
1122 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _Q), ZPR128,
1123 !if(is_col, TileVectorOpV128,
1124 TileVectorOpH128), sme_elm_idx0_0>;
1126 defvar op = !if(is_col, int_aarch64_sme_read_vert,
1127 int_aarch64_sme_read_horiz);
1129 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _B),
1130 nxv16i8, nxv16i1, sme_elm_idx0_15,
1131 imm_to_tile8, tileslice8, op>;
1132 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
1133 nxv8i16, nxv8i1, sme_elm_idx0_7,
1134 imm_to_tile16, tileslice16, op>;
1135 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
1136 nxv8f16, nxv8i1, sme_elm_idx0_7,
1137 imm_to_tile16, tileslice16, op>;
1138 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
1139 nxv8bf16, nxv8i1, sme_elm_idx0_7,
1140 imm_to_tile16, tileslice16, op>;
1141 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
1142 nxv4i32, nxv4i1, sme_elm_idx0_3,
1143 imm_to_tile32, tileslice32, op>;
1144 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
1145 nxv4f32, nxv4i1, sme_elm_idx0_3,
1146 imm_to_tile32, tileslice32, op>;
1147 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
1148 nxv2i64, nxv2i1, sme_elm_idx0_1,
1149 imm_to_tile64, tileslice64, op>;
1150 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
1151 nxv2f64, nxv2i1, sme_elm_idx0_1,
1152 imm_to_tile64, tileslice64, op>;
1154 defvar opq = !if(is_col, int_aarch64_sme_readq_vert,
1155 int_aarch64_sme_readq_horiz);
1157 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1158 nxv16i8, nxv16i1, sme_elm_idx0_0,
1159 imm_to_tile128, tileslice128, opq>;
1160 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1161 nxv8i16, nxv8i1, sme_elm_idx0_0,
1162 imm_to_tile128, tileslice128, opq>;
1163 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1164 nxv8f16, nxv8i1, sme_elm_idx0_0,
1165 imm_to_tile128, tileslice128, opq>;
1166 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1167 nxv8bf16, nxv8i1, sme_elm_idx0_0,
1168 imm_to_tile128, tileslice128, opq>;
1169 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1170 nxv4i32, nxv4i1, sme_elm_idx0_0,
1171 imm_to_tile128, tileslice128, opq>;
1172 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1173 nxv4f32, nxv4i1, sme_elm_idx0_0,
1174 imm_to_tile128, tileslice128, opq>;
1175 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1176 nxv2i64, nxv2i1, sme_elm_idx0_0,
1177 imm_to_tile128, tileslice128, opq>;
1178 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1179 nxv2f64, nxv2i1, sme_elm_idx0_0,
1180 imm_to_tile128, tileslice128, opq>;
1183 multiclass sme_tile_to_vector<string mnemonic> {
1184 defm _H : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b0>;
1185 defm _V : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b1>;
1188 //===----------------------------------------------------------------------===//
1190 //===----------------------------------------------------------------------===//
1192 // NOTE: This definition isn't really correct because there are outputs, i.e.
1193 // the tile registers being zeroed. We fix this up in a custom inserter that
1194 // marks the appropriate registers as being implicitly defined.
1195 class sme_zero_inst<string mnemonic>
1196 : I<(outs), (ins MatrixTileList:$imm),
1197 mnemonic, "\t$imm", "", []>, Sched<[]> {
1199 let Inst{31-8} = 0b110000000000100000000000;
1200 let Inst{7-0} = imm;
1203 multiclass sme_zero<string mnemonic> {
1204 def NAME : sme_zero_inst<mnemonic>;
1206 def : InstAlias<"zero\t\\{za\\}", (!cast<Instruction>(NAME) 0b11111111), 1>;
1207 def : InstAlias<"zero\t\\{za0.h\\}", (!cast<Instruction>(NAME) 0b01010101), 1>;
1208 def : InstAlias<"zero\t\\{za1.h\\}", (!cast<Instruction>(NAME) 0b10101010), 1>;
1209 def : InstAlias<"zero\t\\{za0.s\\}", (!cast<Instruction>(NAME) 0b00010001), 1>;
1210 def : InstAlias<"zero\t\\{za1.s\\}", (!cast<Instruction>(NAME) 0b00100010), 1>;
1211 def : InstAlias<"zero\t\\{za2.s\\}", (!cast<Instruction>(NAME) 0b01000100), 1>;
1212 def : InstAlias<"zero\t\\{za3.s\\}", (!cast<Instruction>(NAME) 0b10001000), 1>;
1213 def : InstAlias<"zero\t\\{za0.s,za1.s\\}", (!cast<Instruction>(NAME) 0b00110011), 1>;
1214 def : InstAlias<"zero\t\\{za0.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10011001), 1>;
1215 def : InstAlias<"zero\t\\{za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01100110), 1>;
1216 def : InstAlias<"zero\t\\{za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11001100), 1>;
1217 def : InstAlias<"zero\t\\{za0.s,za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01110111), 1>;
1218 def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10111011), 1>;
1219 def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>;
1220 def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>;
1222 def NAME # _PSEUDO : Pseudo<(outs), (ins i32imm:$tilelist), []>,
1224 // Translated to the actual instructions in AArch64ISelLowering.cpp
1225 let usesCustomInserter = 1;
1228 def : Pat<(int_aarch64_sme_zero timm32_0_255:$imm),
1229 (!cast<Instruction>(NAME # _PSEUDO) timm32_0_255:$imm)>;
1232 //===----------------------------------------------------------------------===//
1233 // SVE2 Instructions
1234 //===----------------------------------------------------------------------===//
1236 class sve2_int_perm_revd<string asm>
1237 : I<(outs ZPR128:$Zd), (ins ZPR128:$_Zd, PPR3bAny:$Pg, ZPR128:$Zn),
1238 asm, "\t$Zd, $Pg/m, $Zn", "", []>,
1243 let Inst{31-24} = 0b00000101;
1244 let Inst{23-22} = 0b00; // size
1245 let Inst{21-13} = 0b101110100;
1246 let Inst{12-10} = Pg;
1250 let Constraints = "$Zd = $_Zd";
1251 let DestructiveInstType = DestructiveUnary;
1252 let ElementSize = ZPR128.ElementSize;
1255 multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> {
1256 def NAME : sve2_int_perm_revd<asm>;
1258 def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME)>;
1259 def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME)>;
1260 def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME)>;
1261 def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME)>;
1263 def : SVE_1_Op_Passthru_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, !cast<Instruction>(NAME)>;
1264 def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME)>;
1265 def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
1266 def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME)>;
1270 class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
1271 : I<(outs zpr_ty:$Zd), (ins zpr_ty:$_Zd, zpr_ty:$Zn, zpr_ty:$Zm),
1272 asm, "\t$Zd, $Zn, $Zm", "", []>,
1277 let Inst{31-24} = 0b01000100;
1278 let Inst{23-22} = sz;
1280 let Inst{20-16} = Zm;
1281 let Inst{15-11} = 0b11000;
1286 let Constraints = "$Zd = $_Zd";
1287 let DestructiveInstType = DestructiveOther;
1288 let ElementSize = zpr_ty.ElementSize;
1291 multiclass sve2_clamp<string asm, bit U, SDPatternOperator op> {
1292 def _B : sve2_clamp<asm, 0b00, U, ZPR8>;
1293 def _H : sve2_clamp<asm, 0b01, U, ZPR16>;
1294 def _S : sve2_clamp<asm, 0b10, U, ZPR32>;
1295 def _D : sve2_clamp<asm, 0b11, U, ZPR64>;
1297 def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
1298 def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
1299 def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
1300 def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
1303 class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
1304 : I<(outs PPRorPNRAny:$Pd), (ins PPRorPNRAny:$Pn, ppr_ty:$Pm,
1305 MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
1306 asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>,
1312 let Inst{31-24} = 0b00100101;
1314 let Inst{17-16} = Rv;
1315 let Inst{15-14} = 0b01;
1316 let Inst{13-10} = Pn;
1323 multiclass sve2_int_perm_sel_p<string asm, SDPatternOperator op> {
1324 def _B : sve2_int_perm_sel_p<asm, PPR8, sme_elm_idx0_15> {
1326 let Inst{23-22} = imm{3-2};
1327 let Inst{20-19} = imm{1-0};
1330 def _H : sve2_int_perm_sel_p<asm, PPR16, sme_elm_idx0_7> {
1332 let Inst{23-22} = imm{2-1};
1333 let Inst{20} = imm{0};
1334 let Inst{19-18} = 0b10;
1336 def _S : sve2_int_perm_sel_p<asm, PPR32, sme_elm_idx0_3> {
1338 let Inst{23-22} = imm{1-0};
1339 let Inst{20-18} = 0b100;
1341 def _D : sve2_int_perm_sel_p<asm, PPR64, sme_elm_idx0_1> {
1345 let Inst{20-18} = 0b000;
1348 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm),
1349 MatrixIndexGPR32Op12_15:$idx)),
1350 (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, 0)>;
1351 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm),
1352 MatrixIndexGPR32Op12_15:$idx)),
1353 (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, 0)>;
1354 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm),
1355 MatrixIndexGPR32Op12_15:$idx)),
1356 (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, 0)>;
1357 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm),
1358 MatrixIndexGPR32Op12_15:$idx)),
1359 (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, 0)>;
1361 let AddedComplexity = 1 in {
1362 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm),
1363 (i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))),
1364 (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, $imm)>;
1365 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm),
1366 (i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))),
1367 (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, $imm)>;
1368 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm),
1369 (i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))),
1370 (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, $imm)>;
1371 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm),
1372 (i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))),
1373 (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, $imm)>;
1377 //===----------------------------------------------------------------------===//
1378 // SME2 Instructions
1379 //===----------------------------------------------------------------------===//
1381 //===----------------------------------------------------------------------===//
1382 // SME2 single-multi ternary int/fp, two/four registers
1384 class sme2_dot_mla_add_sub_array_vg24_single<bits<7> op,
1385 MatrixOperand matrix_ty,
1386 RegisterOperand multi_vector_ty,
1389 : I<(outs matrix_ty:$ZAd),
1390 (ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv,
1391 sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm),
1392 mnemonic,"\t$ZAd[$Rv, $imm3, " # !if(op{5}, "vgx4", "vgx2") # "], $Zn, $Zm",
1393 "", []> , Sched<[]> {
1398 let Inst{31-23} = 0b110000010;
1399 let Inst{22} = op{6}; //sz
1401 let Inst{20} = op{5}; //vgx4
1402 let Inst{19-16} = Zm;
1404 let Inst{14-13} = Rv;
1405 let Inst{12-10} = op{4-2};
1407 let Inst{4-3} = op{1-0};
1408 let Inst{2-0} = imm3;
1409 let Constraints = "$ZAd = $_ZAd";
1412 multiclass sme2_dot_mla_add_sub_array_vg24_single<string mnemonic, bits<7> op,
1413 MatrixOperand matrix_ty,
1414 RegisterOperand multi_vector_ty,
1416 def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
1418 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
1419 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
1422 multiclass sme2_dot_mla_add_sub_array_vg2_single<string mnemonic, bits<7> op,
1423 MatrixOperand matrix_ty,
1424 RegisterOperand multi_vector_ty,
1425 ZPRRegOp zpr_ty, ValueType vty, SDPatternOperator intrinsic>{
1426 def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
1428 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
1429 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
1431 def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, zpr_ty, SMEMatrixArray>;
1433 def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, vty, tileslice16>;
1436 multiclass sme2_dot_mla_add_sub_array_vg4_single<string mnemonic, bits<7> op,
1437 MatrixOperand matrix_ty,
1438 RegisterOperand multi_vector_ty,
1439 ZPRRegOp zpr_ty, ValueType vty, SDPatternOperator intrinsic>{
1440 def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
1442 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
1443 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
1445 def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, zpr_ty, SMEMatrixArray>;
1447 def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, vty, tileslice16>;
1450 //===----------------------------------------------------------------------===//
1451 // SME2 multiple vectors ternary INT/FP two and four registers
1452 class sme2_dot_mla_add_sub_array_vg2_multi<bits<7> op,
1453 MatrixOperand matrix_ty,
1454 RegisterOperand multi_vector_ty,
1456 : I<(outs matrix_ty:$ZAd),
1457 (ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv,
1458 sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm),
1459 mnemonic, "\t$ZAd[$Rv, $imm3, vgx2], $Zn, $Zm",
1465 let Inst{31-23} = 0b110000011;
1466 let Inst{22} = op{6}; //sz
1468 let Inst{20-17} = Zm;
1469 let Inst{16-15} = 0b00;
1470 let Inst{14-13} = Rv;
1471 let Inst{12-10} = op{5-3};
1473 let Inst{5-3} = op{2-0};
1474 let Inst{2-0} = imm3;
1475 let Constraints = "$ZAd = $_ZAd";
1478 multiclass sme2_dot_mla_add_sub_array_vg2_multi<string mnemonic, bits<7> op,
1479 MatrixOperand matrix_ty,
1480 RegisterOperand multi_vector_ty, ValueType zpr_ty,
1481 SDPatternOperator intrinsic> {
1482 def NAME : sme2_dot_mla_add_sub_array_vg2_multi<op, matrix_ty, multi_vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
1484 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, SMEMatrixArray>;
1486 def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, tileslice16>;
1488 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
1489 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
1492 class sme2_dot_mla_add_sub_array_vg4_multi<bits<7> op,
1493 MatrixOperand matrix_ty,
1494 RegisterOperand multi_vector_ty,
1496 : I<(outs matrix_ty:$ZAd),
1497 (ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv,
1498 sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm),
1499 mnemonic, "\t$ZAd[$Rv, $imm3, vgx4], $Zn, $Zm",
1505 let Inst{31-23} = 0b110000011;
1506 let Inst{22} = op{6}; //sz
1508 let Inst{20-18} = Zm;
1509 let Inst{17-15} = 0b010;
1510 let Inst{14-13} = Rv;
1511 let Inst{12-10} = op{5-3};
1514 let Inst{5-3} = op{2-0};
1515 let Inst{2-0} = imm3;
1516 let Constraints = "$ZAd = $_ZAd";
1519 multiclass sme2_dot_mla_add_sub_array_vg4_multi<string mnemonic, bits<7> op,
1520 MatrixOperand matrix_ty,
1521 RegisterOperand multi_vector_ty,
1522 ValueType zpr_ty, SDPatternOperator intrinsic>{
1523 def NAME : sme2_dot_mla_add_sub_array_vg4_multi<op, matrix_ty, multi_vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
1525 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, SMEMatrixArray>;
1527 def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, tileslice16>;
1529 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
1530 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
1533 //===----------------------------------------------------------------------===//
1534 // SME2 multiple vectors binary two or four registers
1536 class sme2_multivec_accum_add_sub<string mnemonic, bit sz, bit vg4, bits<3> op,
1537 MatrixOperand matrix_ty,
1538 RegisterOperand vector_ty>
1539 : I<(outs matrix_ty:$ZAdn),
1540 (ins matrix_ty:$_ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm),
1541 mnemonic, "\t$ZAdn[$Rv, $imm3, " # !if(vg4, "vgx4", "vgx2") # "], $Zm",
1542 "", []>, Sched<[]> {
1545 let Inst{31-23} = 0b110000011;
1547 let Inst{21-19} = 0b100;
1548 let Inst{18} = op{2};
1552 let Inst{14-13} = Rv;
1553 let Inst{12-10} = 0b111;
1555 let Inst{4-3} = op{1-0};
1556 let Inst{2-0} = imm3;
1558 let Constraints = "$ZAdn = $_ZAdn";
1561 class sme2_multivec_accum_add_sub_vg2<string mnemonic, bit sz, bits<3> op,
1562 MatrixOperand matrix_ty,
1563 RegisterOperand vector_ty>
1564 : sme2_multivec_accum_add_sub<mnemonic, sz, 0b0, op, matrix_ty, vector_ty> {
1570 multiclass sme2_multivec_accum_add_sub_vg2<string mnemonic, bits<4> op,
1571 MatrixOperand matrix_ty,
1572 RegisterOperand vector_ty,
1574 SDPatternOperator intrinsic> {
1575 def NAME : sme2_multivec_accum_add_sub_vg2<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>,
1576 SMEPseudo2Instr<NAME, 1>;
1577 def : InstAlias<mnemonic # "\t$ZAdn[$Rv, $imm3], $Zm",
1578 (!cast<Instruction>(NAME) matrix_ty:$ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>;
1580 def _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, vector_ty, SMEMatrixArray>;
1581 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, vty, sme_elm_idx0_7, tileslice16>;
1584 class sme2_multivec_accum_add_sub_vg4<string mnemonic, bit sz, bits<3> op,
1585 MatrixOperand matrix_ty,
1586 RegisterOperand vector_ty>
1587 : sme2_multivec_accum_add_sub<mnemonic, sz, 0b1, op, matrix_ty, vector_ty> {
1593 multiclass sme2_multivec_accum_add_sub_vg4<string mnemonic, bits<4> op,
1594 MatrixOperand matrix_ty,
1595 RegisterOperand vector_ty,
1597 SDPatternOperator intrinsic> {
1598 def NAME : sme2_multivec_accum_add_sub_vg4<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>,
1599 SMEPseudo2Instr<NAME, 1>;
1600 def : InstAlias<mnemonic # "\t$ZAdn[$Rv, $imm3], $Zm",
1601 (!cast<Instruction>(NAME) matrix_ty:$ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>;
1603 def _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, vector_ty, SMEMatrixArray>;
1604 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, vty, sme_elm_idx0_7, tileslice16>;
1607 //===----------------------------------------------------------------------===//
1608 // SME2 Multi-vector - Multiple and Single SVE Destructive
1609 // Two and Four registers
1611 class sme2_sve_destructive_vector_vg2_single<bits<2> sz, bits<7> op,
1612 RegisterOperand vector_ty,
1615 : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm),
1616 mnemonic, "\t$Zdn, $_Zdn, $Zm",
1617 "", []>, Sched<[]> {
1620 let Inst{31-24} = 0b11000001;
1621 let Inst{23-22} = sz;
1622 let Inst{21-20} = 0b10;
1623 let Inst{19-16} = Zm;
1624 let Inst{15-11} = 0b10100;
1625 let Inst{10-5} = op{6-1};
1626 let Inst{4-1} = Zdn;
1627 let Inst{0} = op{0};
1629 let Constraints = "$Zdn = $_Zdn";
1632 multiclass sme2_fp_sve_destructive_vector_vg2_single<string mnemonic, bits<7> op> {
1633 def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>;
1634 def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>;
1635 def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>;
1638 multiclass sme2_int_sve_destructive_vector_vg2_single<string mnemonic, bits<7> op> {
1639 def _B : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_b_mul_r, ZPR4b8, mnemonic>;
1640 def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>;
1641 def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>;
1642 def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>;
1645 // SME2.1 fmax/fmin instructions.
1646 multiclass sme2p1_bf_max_min_vector_vg2_single<string mnemonic, bits<7>op> {
1647 def _H : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_h_mul_r,
1651 class sme2_sve_destructive_vector_vg4_single<bits<2> sz, bits<7> op,
1652 RegisterOperand vector_ty,
1655 : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm),
1656 mnemonic, "\t$Zdn, $_Zdn, $Zm",
1657 "", []>, Sched<[]> {
1660 let Inst{31-24} = 0b11000001;
1661 let Inst{23-22} = sz;
1662 let Inst{21-20} = 0b10;
1663 let Inst{19-16} = Zm;
1664 let Inst{15-11} = 0b10101;
1665 let Inst{10-5} = op{6-1};
1666 let Inst{4-2} = Zdn;
1668 let Inst{0} = op{0};
1670 let Constraints = "$Zdn = $_Zdn";
1673 multiclass sme2_fp_sve_destructive_vector_vg4_single<string mnemonic, bits<7> op> {
1674 def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>;
1675 def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>;
1676 def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>;
1679 multiclass sme2_int_sve_destructive_vector_vg4_single<string mnemonic, bits<7> op> {
1680 def _B : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_b_mul_r, ZPR4b8, mnemonic>;
1681 def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>;
1682 def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>;
1683 def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>;
1686 // SME2.1 fmax/fmin instructions.
1687 multiclass sme2p1_bf_max_min_vector_vg4_single<string mnemonic, bits<7>op> {
1688 def _H : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_h_mul_r,
1692 class sme2_sve_destructive_vector_vg2_multi<bits<2> sz, bits<7> op,
1693 RegisterOperand vector_ty,
1695 : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm),
1696 mnemonic, "\t$Zdn, $_Zdn, $Zm",
1697 "", []>, Sched<[]> {
1700 let Inst{31-24} = 0b11000001;
1701 let Inst{23-22} = sz;
1703 let Inst{20-17} = Zm;
1704 let Inst{16-11} = 0b010110;
1705 let Inst{10-5} = op{6-1};
1706 let Inst{4-1} = Zdn;
1707 let Inst{0} = op{0};
1709 let Constraints = "$Zdn = $_Zdn";
1712 multiclass sme2_fp_sve_destructive_vector_vg2_multi<string mnemonic, bits<7> op> {
1713 def _H : sme2_sve_destructive_vector_vg2_multi<0b01, op, ZZ_h_mul_r, mnemonic>;
1714 def _S : sme2_sve_destructive_vector_vg2_multi<0b10, op, ZZ_s_mul_r, mnemonic>;
1715 def _D : sme2_sve_destructive_vector_vg2_multi<0b11, op, ZZ_d_mul_r, mnemonic>;
1718 multiclass sme2_int_sve_destructive_vector_vg2_multi<string mnemonic, bits<7> op> {
1719 def _B : sme2_sve_destructive_vector_vg2_multi<0b00, op, ZZ_b_mul_r, mnemonic>;
1720 def _H : sme2_sve_destructive_vector_vg2_multi<0b01, op, ZZ_h_mul_r, mnemonic>;
1721 def _S : sme2_sve_destructive_vector_vg2_multi<0b10, op, ZZ_s_mul_r, mnemonic>;
1722 def _D : sme2_sve_destructive_vector_vg2_multi<0b11, op, ZZ_d_mul_r, mnemonic>;
1725 // SME2.1 fmax/fmin instructions.
1726 multiclass sme2p1_bf_max_min_vector_vg2_multi<string mnemonic, bits<7>op> {
1727 def _H : sme2_sve_destructive_vector_vg2_multi<0b00, op, ZZ_h_mul_r,
1731 class sme2_sve_destructive_vector_vg4_multi<bits<2> sz, bits<7> op,
1732 RegisterOperand vector_ty,
1734 : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm),
1735 mnemonic, "\t$Zdn, $_Zdn, $Zm",
1736 "", []>, Sched<[]> {
1739 let Inst{31-24} = 0b11000001;
1740 let Inst{23-22} = sz;
1742 let Inst{20-18} = Zm;
1743 let Inst{17-11} = 0b0010111;
1744 let Inst{10-5} = op{6-1};
1745 let Inst{4-2} = Zdn;
1747 let Inst{0} = op{0};
1749 let Constraints = "$Zdn = $_Zdn";
1752 multiclass sme2_fp_sve_destructive_vector_vg4_multi<string mnemonic, bits<7> op> {
1753 def _H : sme2_sve_destructive_vector_vg4_multi<0b01, op, ZZZZ_h_mul_r, mnemonic>;
1754 def _S : sme2_sve_destructive_vector_vg4_multi<0b10, op, ZZZZ_s_mul_r, mnemonic>;
1755 def _D : sme2_sve_destructive_vector_vg4_multi<0b11, op, ZZZZ_d_mul_r, mnemonic>;
1758 multiclass sme2_int_sve_destructive_vector_vg4_multi<string mnemonic, bits<7> op> {
1759 def _B : sme2_sve_destructive_vector_vg4_multi<0b00, op, ZZZZ_b_mul_r, mnemonic>;
1760 def _H : sme2_sve_destructive_vector_vg4_multi<0b01, op, ZZZZ_h_mul_r, mnemonic>;
1761 def _S : sme2_sve_destructive_vector_vg4_multi<0b10, op, ZZZZ_s_mul_r, mnemonic>;
1762 def _D : sme2_sve_destructive_vector_vg4_multi<0b11, op, ZZZZ_d_mul_r, mnemonic>;
1765 // SME2.1 fmax/fmin instructions.
1766 multiclass sme2p1_bf_max_min_vector_vg4_multi<string mnemonic, bits<7>op> {
1767 def _H : sme2_sve_destructive_vector_vg4_multi<0b00, op, ZZZZ_h_mul_r,
1771 //===----------------------------------------------------------------------===//
1772 // SME2 Multi-vector - Index/Single/Multi Array Vectors FMA sources
1774 class sme2_mla_long_array_index_base<bits<2> op0, bits<2> op, Operand index_ty,
1775 RegisterOperand multi_vector_ty,
1776 string mnemonic, string vg_acronym="">
1777 : I<(outs MatrixOp32:$ZAda),
1778 (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm, multi_vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3),
1779 mnemonic, "\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm$i3",
1780 "", []>, Sched<[]> {
1783 let Inst{31-24} = 0b11000001;
1784 let Inst{23-22} = op0;
1786 let Inst{20} = !if(!eq(vg_acronym, ""), 0, 1);
1787 let Inst{19-16} = Zm;
1788 let Inst{14-13} = Rv;
1792 let Constraints = "$ZAda = $_ZAda";
1795 multiclass sme2_mla_long_array_index<string mnemonic, bits<2> op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
1796 def _HtoS : sme2_mla_long_array_index_base<op0, op, uimm3s2range, ZPR16,
1797 mnemonic>, SMEPseudo2Instr<NAME # _HtoS, 1> {
1801 let Inst{15} = i3{2};
1802 let Inst{11-10} = i3{1-0};
1804 let Inst{2-0} = imm;
1807 def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm3s2range, ZPR16, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
1809 def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange3s2>;
1812 class sme2_mla_long_array_vg2_index<string mnemonic, bits<2> op0, bits<2> op>
1813 : sme2_mla_long_array_index_base<op0, op, uimm2s2range, ZZ_h_mul_r,
1819 let Inst{11-10} = i3{2-1};
1822 let Inst{2} = i3{0};
1823 let Inst{1-0} = imm;
1826 multiclass sme2_fp_mla_long_array_vg2_index<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
1827 def _HtoS : sme2_mla_long_array_vg2_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _HtoS, 1>;
1829 def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
1831 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>;
1833 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
1834 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
1837 multiclass sme2_int_mla_long_array_vg2_index<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
1838 def _S : sme2_mla_long_array_vg2_index<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _S, 1>;
1840 def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm2s2range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
1842 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s2>;
1844 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
1845 (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
1848 class sme2_mla_long_array_vg4_index<string mnemonic, bits<2> op0, bits<2> op>
1849 : sme2_mla_long_array_index_base<op0, op, uimm2s2range, ZZZZ_h_mul_r,
1855 let Inst{11-10} = i3{2-1};
1857 let Inst{6-5} = 0b00;
1858 let Inst{2} = i3{0};
1859 let Inst{1-0} = imm;
1862 multiclass sme2_fp_mla_long_array_vg4_index<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
1863 def _HtoS : sme2_mla_long_array_vg4_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _HtoS, 1>;
1865 def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
1867 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>;
1869 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
1870 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
1873 multiclass sme2_int_mla_long_array_vg4_index<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
1874 def _HtoS : sme2_mla_long_array_vg4_index<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _HtoS, 1>;
1876 def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
1878 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s2>;
1880 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
1881 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
1884 class sme2_mla_long_array<bits<2>op0, bits<2> op,
1885 MatrixOperand matrix_ty,
1887 RegisterOperand first_vector_ty,
1888 RegisterOperand second_vector_ty,
1889 string mnemonic, string vg_acronym="">
1890 : I<(outs matrix_ty:$ZAda),
1891 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv,
1892 index_ty:$imm, first_vector_ty:$Zn, second_vector_ty:$Zm),
1893 mnemonic,"\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm",
1894 "", []> , Sched<[]> {
1896 let Inst{31-24} = 0b11000001;
1897 let Inst{23-22} = op0;
1900 let Inst{14-13} = Rv;
1901 let Inst{12-11} = 0b01;
1902 let Inst{10} = !if(!eq(vg_acronym, ""), 1, 0);
1905 let Constraints = "$ZAda = $_ZAda";
1908 multiclass sme2_mla_long_array_single<string mnemonic, bits<2> op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
1909 def _HtoS : sme2_mla_long_array<op0, op, MatrixOp32, uimm3s2range, ZPR16, ZPR4b16,
1910 mnemonic> , SMEPseudo2Instr<NAME # _HtoS, 1>{
1915 let Inst{19-16} = Zm;
1917 let Inst{2-0} = imm;
1920 def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm3s2range, ZPR16, ZPR4b16, SMEMatrixArray>;
1922 def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, tileslicerange3s2>;
1925 class sme2_mla_long_array_single_16b<string mnemonic>
1926 : sme2_mla_long_array<0b00, 0b00, MatrixOp16, uimm3s2range, ZPR8, ZPR4b8, mnemonic> {
1931 let Inst{19-16} = Zm;
1933 let Inst{2-0} = imm;
1936 class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op, bit o2,
1937 MatrixOperand matrix_ty, RegisterOperand multi_vector_ty,
1938 ZPRRegOp zpr_ty, string mnemonic, string vg_acronym>
1939 : sme2_mla_long_array<op0, op, matrix_ty, uimm2s2range, multi_vector_ty, zpr_ty,
1940 mnemonic, vg_acronym> {
1945 let Inst{19-16} = Zm;
1948 let Inst{1-0} = imm;
1951 multiclass sme2_fp_mla_long_array_vg2_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
1952 RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
1953 ValueType zpr_ty, SDPatternOperator intrinsic> {
1954 def NAME : sme2_mla_long_array_vg24_single<0b00, 0b0, op{2-1}, op{0}, matrix_ty, multi_vector_ty,
1955 vector_ty, mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1>;
1957 def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s2range, multi_vector_ty,
1958 vector_ty, SMEMatrixArray>;
1960 def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, uimm2s2range, vector_ty, zpr_ty,
1963 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
1964 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
1965 uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>;
1968 multiclass sme2_int_mla_long_array_vg2_single<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
1969 def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b0, op, 0b0, MatrixOp32, ZZ_h, ZPR4b16, mnemonic,
1970 "vgx2">, SMEPseudo2Instr<NAME # _HtoS, 1>;
1972 def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h, ZPR4b16, SMEMatrixArray>;
1974 def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>;
1976 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
1977 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>;
1980 multiclass sme2_fp_mla_long_array_vg4_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
1981 RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
1982 ValueType zpr_ty, SDPatternOperator intrinsic> {
1983 def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty,
1984 vector_ty, mnemonic, "vgx4">,
1985 SMEPseudo2Instr<NAME, 1>;
1987 def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s2range, multi_vector_ty, vector_ty,
1990 def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, uimm2s2range, vector_ty, zpr_ty,
1993 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
1994 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
1995 uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>;
1998 multiclass sme2_int_mla_long_array_vg4_single<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
1999 def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b1, op, 0b0, MatrixOp32, ZZZZ_h, ZPR4b16, mnemonic,
2000 "vgx4">, SMEPseudo2Instr<NAME # _HtoS, 1>;
2002 def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h, ZPR4b16, SMEMatrixArray>;
2004 def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>;
2006 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2007 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>;
2010 class sme2_mla_long_array_vg2_multi<string mnemonic, bits<2> op0, bits<3> op,
2011 MatrixOperand matrix_ty, RegisterOperand multi_vector_ty>
2012 : sme2_mla_long_array<op0, op{1-0}, matrix_ty, uimm2s2range, multi_vector_ty, multi_vector_ty,
2017 let Inst{20-17} = Zm;
2020 let Inst{5} = op{2}; // fp8
2022 let Inst{1-0} = imm;
2025 multiclass sme2_fp_mla_long_array_vg2_multi<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
2026 RegisterOperand multi_vector_ty,
2027 ValueType zpr_ty, SDPatternOperator intrinsic> {
2029 def NAME : sme2_mla_long_array_vg2_multi<mnemonic, 0b10, op, matrix_ty, multi_vector_ty>,
2030 SMEPseudo2Instr<NAME, 1>;
2032 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm2s2range, multi_vector_ty, SMEMatrixArray>;
2034 def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>;
2036 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2037 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
2038 uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
2041 multiclass sme2_int_mla_long_array_vg2_multi<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2042 def _HtoS : sme2_mla_long_array_vg2_multi<mnemonic, 0b11, {0b0, op}, MatrixOp32, ZZ_h_mul_r>,
2043 SMEPseudo2Instr<NAME # _HtoS, 1>;
2045 def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h_mul_r, SMEMatrixArray>;
2047 def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME # _HtoS, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>;
2049 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm",
2050 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>;
2053 class sme2_mla_long_array_vg4_multi<string mnemonic, bits<2> op0, bits<3> op,
2054 MatrixOperand matrix_ty,
2055 RegisterOperand multi_vector_ty>
2056 : sme2_mla_long_array<op0, op{1-0}, matrix_ty, uimm2s2range, multi_vector_ty, multi_vector_ty,
2061 let Inst{20-18} = Zm;
2066 let Inst{5} = op{2}; //fp8
2068 let Inst{1-0} = imm;
2071 multiclass sme2_fp_mla_long_array_vg4_multi<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
2072 RegisterOperand multi_vector_ty, ValueType zpr_ty,
2073 SDPatternOperator intrinsic> {
2074 def NAME : sme2_mla_long_array_vg4_multi<mnemonic, 0b10, op, matrix_ty, multi_vector_ty>,
2075 SMEPseudo2Instr<NAME, 1>;
2077 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm2s2range, multi_vector_ty, SMEMatrixArray>;
2079 def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>;
2081 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2082 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
2083 uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
2086 multiclass sme2_int_mla_long_array_vg4_multi<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2087 def _HtoS : sme2_mla_long_array_vg4_multi<mnemonic, 0b11, {0b0, op}, MatrixOp32, ZZZZ_h_mul_r>,
2088 SMEPseudo2Instr<NAME # _HtoS, 1>;
2090 def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, SMEMatrixArray>;
2092 def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME # _HtoS, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>;
2094 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm",
2095 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>;
2098 //===----------------------------------------------------------------------===//
2099 class sme2_frint_cvt_vg2_multi<bits<2>sz, bits<5>op, RegisterOperand first_ty,
2100 RegisterOperand second_ty, string mnemonic>
2101 : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2102 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2105 let Inst{31-24} = 0b11000001;
2106 let Inst{23-22} = sz;
2107 let Inst{21-20} = 0b10;
2108 let Inst{19-16} = op{4-1};
2109 let Inst{15-10} = 0b111000;
2111 let Inst{5} = op{0};
2116 // SME2 multi-vec FP to int convert two registers
2117 // SME2 multi-vec int to FP two registers
2118 multiclass sme2_fp_cvt_vg2_multi<string mnemonic, bits<5> op> {
2119 def NAME : sme2_frint_cvt_vg2_multi<0b00, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>;
2122 // SME2 multi-vec FRINT two registers
2123 multiclass sme2_frint_vector_vg2_multi<string mnemonic, bits<5> op> {
2124 def _S : sme2_frint_cvt_vg2_multi<0b10, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>;
2127 class sme2_frint_zip_cvt_vg4_multi<bits<2>sz, bits<7>op, RegisterOperand first_ty,
2128 RegisterOperand second_ty, string mnemonic>
2129 : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2130 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2133 let Inst{31-24} = 0b11000001;
2134 let Inst{23-22} = sz;
2135 let Inst{21-20} = 0b11;
2136 let Inst{19-16} = op{6-3};
2137 let Inst{15-10} = 0b111000;
2139 let Inst{6-5} = op{2-1};
2141 let Inst{1} = op{0};
2145 // SME2 multi-vec FP to int convert four registers
2146 // SME2 multi-vec int to FP four registers
2147 multiclass sme2_fp_cvt_vg4_multi<string mnemonic, bits<7> op> {
2148 def NAME : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, mnemonic>;
2151 // SME2 multi-vec quadwords ZIP four registers
2152 multiclass sme2_zip_vector_vg4<string mnemonic, bits<7> op> {
2153 def _B : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_b_mul_r, ZZZZ_b_mul_r,
2155 def _H : sme2_frint_zip_cvt_vg4_multi<0b01, op, ZZZZ_h_mul_r, ZZZZ_h_mul_r,
2157 def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r,
2159 def _D : sme2_frint_zip_cvt_vg4_multi<0b11, op, ZZZZ_d_mul_r, ZZZZ_d_mul_r,
2163 // SME2 multi-vec quadwords ZIP four registers
2164 multiclass sme2_zip_vector_vg4_Q<string mnemonic, bits<7> op> {
2165 def NAME: sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_q_mul_r, ZZZZ_q_mul_r,
2169 // SME2 multi-vec FRINT four registers
2170 multiclass sme2_frint_vector_vg4_multi<string mnemonic, bits<7> op> {
2171 def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r,
2175 class sme2_cvt_vg2_single<string mnemonic, bits<5> op,
2176 RegisterOperand first_ty, RegisterOperand second_ty>
2177 : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2178 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2181 let Inst{31-23} = 0b110000010;
2182 let Inst{22} = op{4};
2183 let Inst{21-19} = 0b100;
2184 let Inst{18-16} = op{3-1};
2185 let Inst{15-10} = 0b111000;
2187 let Inst{5} = op{0};
2191 // SME2 multi-vec FP down convert two registers
2192 // SME2 multi-vec int down convert two registers
2193 multiclass sme2_cvt_vg2_single<string mnemonic, bits<5> op, ValueType out_vt,
2194 ValueType in_vt, SDPatternOperator intrinsic> {
2195 def NAME : sme2_cvt_vg2_single<mnemonic, op, ZPR16, ZZ_s_mul_r>;
2196 def : SVE2p1_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>;
2199 // SME2 multi-vec FP8 down convert two registers
2200 multiclass sme2_fp8_cvt_vg2_single<string mnemonic, bit op> {
2201 def NAME : sme2_cvt_vg2_single<mnemonic, {op, 0b1000}, ZPR8, ZZ_h_mul_r>;
2204 class sme2_cvt_unpk_vector_vg2<bits<2>sz, bits<3> op, bit u, RegisterOperand first_ty,
2205 RegisterOperand second_ty, string mnemonic>
2206 : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2207 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2210 let Inst{31-24} = 0b11000001;
2211 let Inst{23-22} = sz;
2212 let Inst{21-19} = 0b100;
2213 let Inst{18-16} = op;
2214 let Inst{15-10} = 0b111000;
2220 // SME2 multi-vec unpack two registers
2221 multiclass sme2_unpk_vector_vg2<string mnemonic, bit u> {
2222 def _H : sme2_cvt_unpk_vector_vg2<0b01, 0b101, u, ZZ_h_mul_r, ZPR8, mnemonic>;
2223 def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b101, u, ZZ_s_mul_r, ZPR16, mnemonic>;
2224 def _D : sme2_cvt_unpk_vector_vg2<0b11, 0b101, u, ZZ_d_mul_r, ZPR32, mnemonic>;
2227 // SME2.1 multi-vec convert two registers
2228 multiclass sme2p1_fp_cvt_vector_vg2_single<string mnemonic, bit l> {
2229 def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b000, l, ZZ_s_mul_r, ZPR16, mnemonic>;
2232 // SME2 multi-vec FP8 up convert two registers
2233 multiclass sme2p1_fp8_cvt_vector_vg2_single<string mnemonic, bits<2> opc, bit L> {
2234 def _NAME : sme2_cvt_unpk_vector_vg2<opc, 0b110, L, ZZ_h_mul_r, ZPR8, mnemonic>;
2238 class sme2_cvt_vg4_single<bit sz, bits<3> op, bits<4>op2, RegisterOperand first_ty,
2239 RegisterOperand second_ty, string mnemonic>
2240 : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2241 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2244 let Inst{31-24} = 0b11000001;
2246 let Inst{22} = op{2};
2247 let Inst{21-20} = 0b11;
2248 let Inst{19-16} = op2;
2249 let Inst{15-10} = 0b111000;
2251 let Inst{6-5} = op{1-0};
2255 // SME2 multi-vec int down convert four registers
2256 multiclass sme2_int_cvt_vg4_single<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
2257 def _StoB : sme2_cvt_vg4_single<0, op, 0b0011, ZPR8, ZZZZ_s_mul_r, mnemonic>;
2258 def _DtoH : sme2_cvt_vg4_single<1, op, 0b0011, ZPR16, ZZZZ_d_mul_r, mnemonic>;
2260 def : SME2_Cvt_VG4_Pat<NAME # _StoB, intrinsic, nxv16i8, nxv4i32>;
2261 def : SME2_Cvt_VG4_Pat<NAME # _DtoH, intrinsic, nxv8i16, nxv2i64>;
2264 //SME2 multi-vec FP8 down convert four registers
2265 multiclass sme2_fp8_cvt_vg4_single<string mnemonic, bit N> {
2266 def _NAME : sme2_cvt_vg4_single<0b0, {0b00, N}, 0b0100, ZPR8, ZZZZ_s_mul_r, mnemonic>;
2269 class sme2_unpk_vector_vg4<bits<2>sz, bit u, RegisterOperand first_ty,
2270 RegisterOperand second_ty, string mnemonic>
2271 : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2272 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2275 let Inst{31-24} = 0b11000001;
2276 let Inst{23-22} = sz;
2277 let Inst{21-10} = 0b110101111000;
2285 // SME2 multi-vec UNPK four registers
2286 multiclass sme2_unpk_vector_vg4<string mnemonic, bit u> {
2287 def _H : sme2_unpk_vector_vg4<0b01, u, ZZZZ_h_mul_r, ZZ_b_mul_r, mnemonic>;
2288 def _S : sme2_unpk_vector_vg4<0b10, u, ZZZZ_s_mul_r, ZZ_h_mul_r, mnemonic>;
2289 def _D : sme2_unpk_vector_vg4<0b11, u, ZZZZ_d_mul_r, ZZ_s_mul_r, mnemonic>;
2292 //===----------------------------------------------------------------------===//
2293 // SME2 multi-vec CLAMP registers
2295 class sme2_clamp_vector_vg24_multi<bits<2> sz, bits<3> op1, bit u,
2296 RegisterOperand multi_vector_ty,
2297 ZPRRegOp vector_ty, string mnemonic>
2298 : I<(outs multi_vector_ty:$Zd),
2299 (ins multi_vector_ty:$_Zd, vector_ty:$Zn, vector_ty:$Zm),
2300 mnemonic, "\t$Zd, $Zn, $Zm",
2304 let Inst{31-24} = 0b11000001;
2305 let Inst{23-22} = sz;
2307 let Inst{20-16} = Zm;
2308 let Inst{15-13} = 0b110;
2309 let Inst{12-10} = op1;
2313 let Constraints = "$Zd = $_Zd";
2316 class sme2_clamp_vector_vg2_multi<bits<2> sz, bits<3> op1, bit u,
2317 RegisterOperand multi_vector_ty,
2318 ZPRRegOp vector_ty, string mnemonic>
2319 : sme2_clamp_vector_vg24_multi<sz, op1, u, multi_vector_ty, vector_ty,
2325 multiclass sme2_fp_clamp_vector_vg2_multi<string mnemonic>{
2326 def _H : sme2_clamp_vector_vg2_multi<0b01, 0b000, 0b0, ZZ_h_mul_r, ZPR16, mnemonic>;
2327 def _S : sme2_clamp_vector_vg2_multi<0b10, 0b000, 0b0, ZZ_s_mul_r, ZPR32, mnemonic>;
2328 def _D : sme2_clamp_vector_vg2_multi<0b11, 0b000, 0b0, ZZ_d_mul_r, ZPR64, mnemonic>;
2331 multiclass sme2_int_clamp_vector_vg2_multi<string mnemonic, bit u>{
2332 def _B : sme2_clamp_vector_vg2_multi<0b00, 0b001, u, ZZ_b_mul_r, ZPR8, mnemonic>;
2333 def _H : sme2_clamp_vector_vg2_multi<0b01, 0b001, u, ZZ_h_mul_r, ZPR16, mnemonic>;
2334 def _S : sme2_clamp_vector_vg2_multi<0b10, 0b001, u, ZZ_s_mul_r, ZPR32, mnemonic>;
2335 def _D : sme2_clamp_vector_vg2_multi<0b11, 0b001, u, ZZ_d_mul_r, ZPR64, mnemonic>;
2338 // SME2.1 multi-vec FCLAMP two registers
2339 multiclass sme2p1_bfclamp_vector_vg2_multi<string mnemonic> {
2340 def _H : sme2_clamp_vector_vg2_multi<0b00, 0b000, 0b0, ZZ_h_mul_r, ZPR16,
2344 class sme2_clamp_vector_vg4_multi<bits<2> sz, bits<3> op1, bit u,
2345 RegisterOperand multi_vector_ty,
2346 ZPRRegOp vector_ty, string mnemonic>
2347 : sme2_clamp_vector_vg24_multi<sz, op1, u, multi_vector_ty, vector_ty,
2354 multiclass sme2_fp_clamp_vector_vg4_multi<string mnemonic>{
2355 def _H : sme2_clamp_vector_vg4_multi<0b01, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16, mnemonic>;
2356 def _S : sme2_clamp_vector_vg4_multi<0b10, 0b010, 0b0, ZZZZ_s_mul_r, ZPR32, mnemonic>;
2357 def _D : sme2_clamp_vector_vg4_multi<0b11, 0b010, 0b0, ZZZZ_d_mul_r, ZPR64, mnemonic>;
2360 multiclass sme2_int_clamp_vector_vg4_multi<string mnemonic, bit u>{
2361 def _B : sme2_clamp_vector_vg4_multi<0b00, 0b011, u, ZZZZ_b_mul_r, ZPR8, mnemonic>;
2362 def _H : sme2_clamp_vector_vg4_multi<0b01, 0b011, u, ZZZZ_h_mul_r, ZPR16, mnemonic>;
2363 def _S : sme2_clamp_vector_vg4_multi<0b10, 0b011, u, ZZZZ_s_mul_r, ZPR32, mnemonic>;
2364 def _D : sme2_clamp_vector_vg4_multi<0b11, 0b011, u, ZZZZ_d_mul_r, ZPR64, mnemonic>;
2367 // SME2.1 multi-vec FCLAMP four registers
2368 multiclass sme2p1_bfclamp_vector_vg4_multi<string mnemonic> {
2369 def _H : sme2_clamp_vector_vg4_multi<0b00, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16,
2373 // SME2 multi-vec ZIP two registers
2374 class sme2_zip_vector_vg2<bits<2> sz, bit q, bit u,
2375 RegisterOperand multi_vector_ty,
2376 ZPRRegOp vector_ty, string mnemonic>
2377 : I<(outs multi_vector_ty:$Zd), (ins vector_ty:$Zn, vector_ty:$Zm),
2378 mnemonic, "\t$Zd, $Zn, $Zm",
2383 let Inst{31-24} = 0b11000001;
2384 let Inst{23-22} = sz;
2386 let Inst{20-16} = Zm;
2387 let Inst{15-11} = 0b11010;
2394 multiclass sme2_zip_vector_vg2<string mnemonic, bit op> {
2395 def _B : sme2_zip_vector_vg2<0b00, 0b0, op, ZZ_b_mul_r, ZPR8, mnemonic>;
2396 def _H : sme2_zip_vector_vg2<0b01, 0b0, op, ZZ_h_mul_r, ZPR16, mnemonic>;
2397 def _S : sme2_zip_vector_vg2<0b10, 0b0, op, ZZ_s_mul_r, ZPR32, mnemonic>;
2398 def _D : sme2_zip_vector_vg2<0b11, 0b0, op, ZZ_d_mul_r, ZPR64, mnemonic>;
2399 def _Q : sme2_zip_vector_vg2<0b00, 0b1, op, ZZ_q_mul_r, ZPR128, mnemonic>;
2402 //===----------------------------------------------------------------------===//
2403 // SME2 Dot Products and MLA
2404 class sme2_multi_vec_array_vg2_index<bits<2> sz, bits<6> op, MatrixOperand matrix_ty,
2405 RegisterOperand multi_vector_ty,
2406 ZPRRegOp vector_ty, Operand index_ty,
2408 : I<(outs matrix_ty:$ZAda),
2409 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2410 multi_vector_ty:$Zn, vector_ty:$Zm, index_ty:$i),
2411 mnemonic, "\t$ZAda[$Rv, $imm3, vgx2], $Zn, $Zm$i",
2412 "", []>, Sched<[]> {
2417 let Inst{31-24} = 0b11000001;
2418 let Inst{23-22} = sz;
2419 let Inst{21-20} = 0b01;
2420 let Inst{19-16} = Zm;
2422 let Inst{14-13} = Rv;
2423 let Inst{12-10} = op{5-3};
2425 let Inst{5-3} = op{2-0};
2426 let Inst{2-0} = imm3;
2428 let Constraints = "$ZAda = $_ZAda";
2431 // SME2 multi-vec ternary indexed two registers 32-bit
2432 multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<4> op,
2433 RegisterOperand multi_vector_ty,
2434 ZPRRegOp vector_ty, ValueType vt,
2435 SDPatternOperator intrinsic> {
2436 def NAME : sme2_multi_vec_array_vg2_index<sz, {op{3},?,?,op{2-0}}, MatrixOp32, multi_vector_ty, vector_ty,
2437 VectorIndexS32b_timm, mnemonic>, SMEPseudo2Instr<NAME, 1> {
2439 let Inst{11-10} = i;
2441 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>;
2443 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>;
2445 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2446 (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2447 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexS32b_timm:$i), 0>;
2450 // SME2.1 multi-vec ternary indexed two registers 16-bit
2451 // SME2 multi-vec indexed FP8 two-way dot product to FP16 two registers
2452 multiclass sme2p1_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3> op,
2453 RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> {
2454 def NAME : sme2_multi_vec_array_vg2_index<sz, {op{2},?,?,op{1-0},?}, MatrixOp16,
2455 multi_vector_ty, zpr_ty,
2456 VectorIndexH, mnemonic> {
2458 let Inst{11-10} = i{2-1};
2462 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2463 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2464 multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>;
2467 // SME2 multi-vec indexed FP8 two-way vertical dot product to single precision
2469 class sme2_fp8_multi_vec_array_vg4_index<string mnemonic, bit T>
2470 : sme2_multi_vec_array_vg2_index<0b11, {0b01,?,0b0, T,?}, MatrixOp32,
2471 ZZ_b_mul_r, ZPR4b8, VectorIndexS, mnemonic> {
2474 let Inst{10} = i{1};
2476 let AsmString = !strconcat(mnemonic, "{\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i}");
2479 // SME2 multi-vec ternary indexed two registers 64-bit
2481 class sme2_multi_vec_array_vg2_index_64b<bits<2> op,
2482 RegisterOperand multi_vector_ty,
2485 : I<(outs MatrixOp64:$ZAda),
2486 (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2487 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1),
2488 mnemonic, "\t$ZAda[$Rv, $imm3, vgx2], $Zn, $Zm$i1",
2489 "", []>, Sched<[]> {
2495 let Inst{31-20} = 0b110000011101;
2496 let Inst{19-16} = Zm;
2498 let Inst{14-13} = Rv;
2499 let Inst{12-11} = 0b00;
2504 let Inst{2-0} = imm3;
2506 let Constraints = "$ZAda = $_ZAda";
2509 multiclass sme2_multi_vec_array_vg2_index_64b<string mnemonic, bits<2> op,
2510 RegisterOperand multi_vector_ty,
2511 ZPRRegOp vector_ty, ValueType vt,
2512 SDPatternOperator intrinsic> {
2513 def NAME : sme2_multi_vec_array_vg2_index_64b<op, multi_vector_ty, vector_ty,
2514 mnemonic>, SMEPseudo2Instr<NAME, 1>;
2516 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexD32b_timm, SMEMatrixArray>;
2518 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexD32b_timm, tileslice16>;
2520 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i1",
2521 (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2522 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>;
2525 class sme2_multi_vec_array_vg4_index<bit sz, bits<7> op, MatrixOperand matrix_ty,
2526 RegisterOperand multi_vector_ty,
2527 ZPRRegOp vector_ty, Operand index_ty,
2529 : I<(outs matrix_ty:$ZAda),
2530 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2531 multi_vector_ty:$Zn, vector_ty:$Zm, index_ty:$i),
2532 mnemonic, "\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i",
2533 "", []>, Sched<[]> {
2538 let Inst{31-23} = 0b110000010;
2540 let Inst{21-20} = 0b01;
2541 let Inst{19-16} = Zm;
2543 let Inst{14-13} = Rv;
2544 let Inst{12-10} = op{6-4};
2546 let Inst{6-3} = op{3-0};
2547 let Inst{2-0} = imm3;
2549 let Constraints = "$ZAda = $_ZAda";
2552 // SME2 multi-vec ternary indexed four registers 32-bit
2553 multiclass sme2_multi_vec_array_vg4_index_32b<string mnemonic, bits<4> op,
2554 RegisterOperand multi_vector_ty,
2555 ZPRRegOp vector_ty, ValueType vt,
2556 SDPatternOperator intrinsic> {
2557 def NAME : sme2_multi_vec_array_vg4_index<0b1, {op{3},?,?,0b0, op{2-0}}, MatrixOp32, multi_vector_ty,
2558 vector_ty, VectorIndexS32b_timm, mnemonic>, SMEPseudo2Instr<NAME, 1> {
2560 let Inst{11-10} = i;
2563 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>;
2565 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>;
2567 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2568 (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2569 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexS32b_timm:$i), 0>;
2572 // SME2.1 multi-vec ternary indexed four registers 16-bit
2573 multiclass sme2p1_multi_vec_array_vg4_index_16b<string mnemonic, bits<3> op,
2574 RegisterOperand multi_vector_ty,
2576 def NAME : sme2_multi_vec_array_vg4_index<0b0,{0b1,?,?,op,?}, MatrixOp16,
2577 multi_vector_ty, zpr_ty,
2578 VectorIndexH, mnemonic>{
2580 let Inst{11-10} = i{2-1};
2584 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2585 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
2586 sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>;
2589 // SME2 multi-vec ternary indexed four registers 64-bit
2590 class sme2_multi_vec_array_vg4_index_64b<bits<3> op,
2591 RegisterOperand multi_vector_ty,
2594 : I<(outs MatrixOp64:$ZAda),
2595 (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2596 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1),
2597 mnemonic, "\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i1",
2598 "", []>, Sched<[]> {
2604 let Inst{31-20} = 0b110000011101;
2605 let Inst{19-16} = Zm;
2607 let Inst{14-13} = Rv;
2609 let Inst{11} = op{2};
2612 let Inst{6-5} = 0b00;
2613 let Inst{4-3} = op{1-0};
2614 let Inst{2-0} = imm3;
2616 let Constraints = "$ZAda = $_ZAda";
2619 multiclass sme2_multi_vec_array_vg4_index_64b<string mnemonic, bits<3> op,
2620 RegisterOperand multi_vector_ty,
2621 ZPRRegOp vector_ty, ValueType vty,
2622 SDPatternOperator intrinsic> {
2623 def NAME : sme2_multi_vec_array_vg4_index_64b<op, multi_vector_ty, vector_ty,
2624 mnemonic>, SMEPseudo2Instr<NAME, 1>;
2626 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexD32b_timm, SMEMatrixArray>;
2628 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vty, VectorIndexD32b_timm, tileslice16>;
2630 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i1",
2631 (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2632 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>;
2635 // FMLAL (multiple and indexed vector, FP8 to FP16)
2636 class sme2_multi_vec_array_vg24_index_16b<bits<2> sz, bit vg4, bits<3> op,
2637 RegisterOperand multi_vector_ty, string mnemonic>
2638 : I<(outs MatrixOp16:$ZAda),
2639 (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2,
2640 multi_vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB:$i),
2641 mnemonic, "\t$ZAda[$Rv, $imm2, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
2642 "", []>, Sched<[]> {
2647 let Inst{31-24} = 0b11000001;
2648 let Inst{23-22} = sz;
2649 let Inst{21-20} = 0b01;
2650 let Inst{19-16} = Zm;
2652 let Inst{14-13} = Rv;
2653 let Inst{12} = op{2};
2654 let Inst{11-10} = i{3-2};
2655 let Inst{5-4} = op{1-0};
2656 let Inst{3-2} = i{1-0};
2657 let Inst{1-0} = imm2;
2659 let Constraints = "$ZAda = $_ZAda";
2662 multiclass sme2_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3>op> {
2663 def NAME : sme2_multi_vec_array_vg24_index_16b<sz, 0b0, op, ZZ_b_mul_r, mnemonic> {
2667 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
2668 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
2669 uimm2s2range:$imm2, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
2672 multiclass sme2_multi_vec_array_vg4_index_16b<string mnemonic, bits<2>sz, bits<3>op> {
2673 def NAME: sme2_multi_vec_array_vg24_index_16b<sz, 0b1, op, ZZZZ_b_mul_r, mnemonic> {
2678 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
2679 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
2680 uimm2s2range:$imm2, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
2683 //===----------------------------------------------------------------------===//
2684 // SME2 multi-vec indexed long long MLA one source 16-bit
2685 class sme2_mla_ll_array_index_16b<string mnemonic, bits<2> sz,bits<2> op>
2686 : I<(outs MatrixOp16:$ZAda),
2687 (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm3s2range:$imm3, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
2688 mnemonic, "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2689 "", []>, Sched<[]> {
2695 let Inst{31-24} = 0b11000001;
2696 let Inst{23-22} = sz;
2697 let Inst{21-20} = 0b00;
2698 let Inst{19-16} = Zm;
2699 let Inst{15} = i{3};
2700 let Inst{14-13} = Rv;
2701 let Inst{12} = op{1};
2702 let Inst{11-10} = i{2-1};
2704 let Inst{4} = op{0};
2706 let Inst{2-0} = imm3;
2708 let Constraints = "$ZAda = $_ZAda";
2711 // SME2 multi-vec indexed long long MLA one source 32-bit
2712 class sme2_mla_ll_array_index_32b<string mnemonic, bits<2> sz, bits<3> op>
2713 : I<(outs MatrixOp32:$ZAda),
2714 (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
2715 mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
2716 "", []>, Sched<[]> {
2722 let Inst{31-24} = 0b11000001;
2723 let Inst{23-22} = sz;
2724 let Inst{21-20} = 0b00;
2725 let Inst{19-16} = Zm;
2726 let Inst{15} = i{3};
2727 let Inst{14-13} = Rv;
2728 let Inst{12-10} = i{2-0};
2731 let Inst{1-0} = imm2;
2733 let Constraints = "$ZAda = $_ZAda";
2736 multiclass sme2_mla_ll_array_index_32b<string mnemonic, bits<2> sz, bits<3> op, SDPatternOperator intrinsic> {
2737 def NAME : sme2_mla_ll_array_index_32b<mnemonic, sz, op>, SMEPseudo2Instr<NAME, 1>;
2739 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s4range, ZPR8, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
2741 def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME, intrinsic, uimm2s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange2s4>;
2744 // SME2 multi-vec indexed long long MLA one source 64-bit
2746 class sme2_mla_ll_array_index_64b<string mnemonic, bits<2> op>
2747 : I<(outs MatrixOp64:$ZAda),
2748 (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR16:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i),
2749 mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
2750 "", []>, Sched<[]> {
2756 let Inst{31-20} = 0b110000011000;
2757 let Inst{19-16} = Zm;
2758 let Inst{15} = i{2};
2759 let Inst{14-13} = Rv;
2761 let Inst{11-10} = i{1-0};
2765 let Inst{1-0} = imm2;
2767 let Constraints = "$ZAda = $_ZAda";
2770 multiclass sme2_mla_ll_array_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2771 def NAME : sme2_mla_ll_array_index_64b<mnemonic, op>, SMEPseudo2Instr<NAME, 1>;
2773 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s4range, ZPR16, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
2775 def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME, intrinsic, uimm2s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s4>;
2778 class sme2_mla_ll_array_vg24_index_32b<bits<2> sz, bit vg4, bits<3> op,
2779 RegisterOperand vector_ty,
2781 : I<(outs MatrixOp32:$ZAda),
2782 (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
2783 vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
2784 mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
2785 "", []>, Sched<[]> {
2790 let Inst{31-24} = 0b11000001;
2791 let Inst{23-22} = sz;
2792 let Inst{21-20} = 0b01;
2793 let Inst{19-16} = Zm;
2795 let Inst{14-13} = Rv;
2797 let Inst{11-10} = i{3-2};
2799 let Inst{2-1} = i{1-0};
2802 let Constraints = "$ZAda = $_ZAda";
2805 //SME2 multi-vec indexed long long MLA two sources 32-bit
2807 multiclass sme2_mla_ll_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<3> op, SDPatternOperator intrinsic> {
2808 def NAME: sme2_mla_ll_array_vg24_index_32b<sz, 0b0, op, ZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
2813 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
2815 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange1s4>;
2817 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
2818 (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>;
2821 // SME2 multi-vec indexed long long MLA four sources 32-bit
2823 multiclass sme2_mla_ll_array_vg4_index_32b<string mnemonic, bits<2> sz, bits<4> op, SDPatternOperator intrinsic> {
2824 def NAME: sme2_mla_ll_array_vg24_index_32b<sz, 0b1, op{2-0}, ZZZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
2827 let Inst{6} = op{3};
2830 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
2832 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange1s4>;
2834 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
2835 (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>;
2837 class sme2_mla_ll_array_vg24_index_64b<bit vg4, bits<2> op,
2838 RegisterOperand vector_ty,
2840 : I<(outs MatrixOp64:$ZAda),
2841 (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
2842 vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i),
2843 mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
2844 "", []>, Sched<[]> {
2849 let Inst{31-20} = 0b110000011001;
2850 let Inst{19-16} = Zm;
2852 let Inst{14-13} = Rv;
2853 let Inst{12-11} = 0b00;
2854 let Inst{10} = i{2};
2857 let Inst{2-1} = i{1-0};
2860 let Constraints = "$ZAda = $_ZAda";
2863 // SME2 multi-vec indexed long long MLA two sources 64-bit
2865 multiclass sme2_mla_ll_array_vg2_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2866 def NAME: sme2_mla_ll_array_vg24_index_64b<0b0, op, ZZ_h_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
2871 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
2873 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange1s4>;
2875 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
2876 (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 0>;
2879 // SME2 multi-vec indexed long long MLA four sources 64-bit
2881 multiclass sme2_mla_ll_array_vg4_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2882 def NAME: sme2_mla_ll_array_vg24_index_64b<0b1, op, ZZZZ_h_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
2888 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
2890 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange1s4>;
2892 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
2893 (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 0>;
2897 //SME2 multiple and single vector long long FMA one source
2899 class sme2_mla_ll_array_single<string mnemonic, bits<5> op,
2900 MatrixOperand matrix_ty, ZPRRegOp vector_ty,
2902 : I<(outs matrix_ty:$ZAda),
2903 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm,
2904 vector_ty:$Zn, zpr_ty:$Zm),
2905 mnemonic, "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2906 "", []>, Sched<[]> {
2911 let Inst{31-23} = 0b110000010;
2912 let Inst{22} = op{4}; //sz
2914 let Inst{20} = op{3}; //fp8
2915 let Inst{19-16} = Zm;
2917 let Inst{14-13} = Rv;
2918 let Inst{12-10} = 0b001;
2920 let Inst{4-2} = op{2-0};
2921 let Inst{1-0} = imm;
2923 let Constraints = "$ZAda = $_ZAda";
2926 multiclass sme2_mla_ll_array_single<string mnemonic, bits<5> op,
2927 MatrixOperand matrix_ty, ZPRRegOp vector_ty,
2928 ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> {
2929 def NAME : sme2_mla_ll_array_single<mnemonic, op, matrix_ty, vector_ty, zpr_ty>, SMEPseudo2Instr<NAME, 1>;
2931 def NAME # _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s4range, vector_ty, zpr_ty, SMEMatrixArray>;
2933 def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME, intrinsic, uimm2s4range, zpr_ty, vt, tileslicerange2s4>;
2936 class sme2_mla_ll_array_vg24_single<bits<6> op, MatrixOperand matrix_ty,
2937 RegisterOperand vector_ty, ZPRRegOp zpr_ty,
2939 : I<(outs matrix_ty:$ZAda),
2940 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
2941 vector_ty:$Zn, zpr_ty:$Zm),
2942 mnemonic, "\t$ZAda[$Rv, $imm, " # !if(op{4}, "vgx4", "vgx2") # "], $Zn, $Zm",
2943 "", []>, Sched<[]> {
2948 let Inst{31-23} = 0b110000010;
2949 let Inst{22} = op{5}; //sz
2951 let Inst{20} = op{4}; //vg4
2952 let Inst{19-16} = Zm;
2954 let Inst{14-13} = Rv;
2955 let Inst{12-10} = 0b000;
2957 let Inst{4-1} = op{3-0};
2960 let Constraints = "$ZAda = $_ZAda";
2963 //SME2 single-multi long long MLA two and four sources
2965 multiclass sme2_mla_ll_array_vg24_single<string mnemonic, bits<6> op,
2966 MatrixOperand matrix_ty,
2967 RegisterOperand multi_vector_ty,
2969 def NAME: sme2_mla_ll_array_vg24_single<op, matrix_ty, multi_vector_ty,
2970 zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
2972 def NAME # _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm1s4range, multi_vector_ty, zpr_ty, SMEMatrixArray>;
2974 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm], $Zn, $Zm",
2975 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
2978 multiclass sme2_mla_ll_array_vg2_single<string mnemonic, bits<5> op,
2979 MatrixOperand matrix_ty,
2980 RegisterOperand multi_vector_ty,
2981 ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> {
2983 defm NAME: sme2_mla_ll_array_vg24_single<mnemonic, {op, 0b0}, matrix_ty, multi_vector_ty, zpr_ty>;
2985 def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, uimm1s4range, zpr_ty, vt, tileslicerange1s4>;
2988 multiclass sme2_mla_ll_array_vg4_single<string mnemonic, bits<5> op,
2989 MatrixOperand matrix_ty,
2990 RegisterOperand multi_vector_ty,
2991 ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> {
2992 defm NAME: sme2_mla_ll_array_vg24_single<mnemonic, {op, 0b0}, matrix_ty, multi_vector_ty, zpr_ty>;
2994 def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, uimm1s4range, zpr_ty, vt, tileslicerange1s4>;
2997 // SME2 multiple vectors long long MLA two sources
2999 class sme2_mla_ll_array_vg2_multi<bits<5> op, MatrixOperand matrix_ty,
3000 RegisterOperand vector_ty,string mnemonic>
3001 : I<(outs matrix_ty:$ZAda),
3002 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
3003 vector_ty:$Zn, vector_ty:$Zm),
3004 mnemonic, "\t$ZAda[$Rv, $imm, vgx2], $Zn, $Zm",
3005 "", []>, Sched<[]> {
3010 let Inst{31-23} = 0b110000011;
3011 let Inst{22} = op{4}; // sz
3013 let Inst{20-17} = Zm;
3014 let Inst{16-15} = 0b00;
3015 let Inst{14-13} = Rv;
3016 let Inst{12-10} = 0b000;
3018 let Inst{5-2} = op{3-0};
3022 let Constraints = "$ZAda = $_ZAda";
3025 multiclass sme2_mla_ll_array_vg2_multi<string mnemonic, bits<5> op,
3026 MatrixOperand matrix_ty,
3027 RegisterOperand vector_ty,
3028 ValueType vt, SDPatternOperator intrinsic> {
3029 def NAME : sme2_mla_ll_array_vg2_multi<op, matrix_ty, vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
3031 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm1s4range, vector_ty, SMEMatrixArray>;
3033 def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, uimm1s4range, vt, tileslicerange1s4>;
3035 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
3036 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>;
3039 // SME2 multiple vectors long long MLA four sources
3041 class sme2_mla_ll_array_vg4_multi<bits<5> op,MatrixOperand matrix_ty,
3042 RegisterOperand vector_ty,
3044 : I<(outs matrix_ty:$ZAda),
3045 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
3046 vector_ty:$Zn, vector_ty:$Zm),
3047 mnemonic, "\t$ZAda[$Rv, $imm, vgx4], $Zn, $Zm",
3048 "", []>, Sched<[]> {
3053 let Inst{31-23} = 0b110000011;
3054 let Inst{22} = op{4}; // sz
3056 let Inst{20-18} = Zm;
3057 let Inst{17-15} = 0b010;
3058 let Inst{14-13} = Rv;
3059 let Inst{12-10} = 0b000;
3062 let Inst{5-2} = op{3-0};
3066 let Constraints = "$ZAda = $_ZAda";
3069 multiclass sme2_mla_ll_array_vg4_multi<string mnemonic, bits<5> op,
3070 MatrixOperand matrix_ty,
3071 RegisterOperand vector_ty,
3072 ValueType vt, SDPatternOperator intrinsic> {
3073 def NAME : sme2_mla_ll_array_vg4_multi<op, matrix_ty, vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
3075 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm1s4range, vector_ty, SMEMatrixArray>;
3077 def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, uimm1s4range, vt, tileslicerange1s4>;
3079 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
3080 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>;
3083 //===----------------------------------------------------------------------===//
3084 // SME2 Outer Product and Accumulate
3086 multiclass sme2_int_mopx_tile<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
3087 def NAME : sme_int_outer_product_inst<op, 0b0, 0b1, TileOp32, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> {
3089 let Inst{1-0} = ZAda;
3093 def _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
3095 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_3, nxv8i1, nxv8i16>;
3098 multiclass sme2_int_bmopx_tile<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
3099 def NAME : sme_outer_product_widening_inst<op, ZPR32, mnemonic>, SMEPseudo2Instr<NAME, 1>;
3101 def _PSEUDO : sme_outer_product_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
3103 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_3, nxv4i1, nxv4i32>;
3106 //===----------------------------------------------------------------------===///
3107 // SME2 Zero Lookup Table.
3108 class sme2_zero_zt<string mnemonic, bits<4> opc>
3109 : I<(outs ZTR:$ZT), (ins ),
3110 mnemonic, "\t\\{ $ZT \\}",
3111 "", []>, Sched<[]> {
3112 let Inst{31-4} = 0b1100000001001000000000000000;
3113 let Inst{3-0} = opc;
3116 multiclass sme2_zero_zt<string mnemonic, bits<4> opc> {
3117 def NAME : sme2_zero_zt<mnemonic, opc>;
3119 : Pseudo<(outs), (ins ZTR:$ZT), []>, Sched<[]> {
3120 // Translated to actual instruction in AArch64ISelLowering.cpp
3121 let usesCustomInserter = 1;
3123 def : Pat<(int_aarch64_sme_zero_zt (imm_to_zt untyped:$zt)),
3124 (!cast<Instruction>(NAME # _PSEUDO) $zt)>;
3127 //===----------------------------------------------------------------------===//
3128 // SME2 lookup table load/store
3129 class sme2_spill_fill_vector<string mnemonic, bits<8> opc>
3130 : I<!if(opc{7}, (outs ), (outs ZTR:$ZTt)),
3131 !if(opc{7}, (ins ZTR:$ZTt, GPR64sp:$Rn), (ins GPR64sp:$Rn)),
3132 mnemonic, "\t$ZTt, [$Rn]",
3133 "", []>, Sched<[]> {
3135 let Inst{31-22} = 0b1110000100;
3136 let Inst{21-16} = opc{7-2};
3137 let Inst{15-10} = 0b100000;
3139 let Inst{4-2} = 0b000;
3140 let Inst{1-0} = opc{1-0};
3142 let mayLoad = !not(opc{7});
3143 let mayStore = opc{7};
3147 multiclass sme2_spill_fill_vector<string mnemonic, bits<8> opc, SDPatternOperator op> {
3148 def NAME : sme2_spill_fill_vector<mnemonic, opc>;
3150 : Pseudo<(outs), (ins ZTR:$ZTt, GPR64sp:$base), []>, Sched<[]> {
3151 // Translated to actual instruction in AArch64ISelLowering.cpp
3152 let usesCustomInserter = 1;
3154 def : Pat<(op (imm_to_zt untyped:$tile), GPR64sp:$base),
3155 (!cast<Instruction>(NAME # _PSEUDO) $tile, $base)>;
3158 //===----------------------------------------------------------------------===///
3159 // SME2 move to/from lookup table
3160 class sme2_movt_zt_to_scalar<string mnemonic, bits<7> opc>
3161 : I<(outs GPR64:$Rt), (ins ZTR:$ZTt, uimm3s8:$imm3),
3162 mnemonic, "\t$Rt, $ZTt[$imm3]",
3163 "", []>, Sched<[]> {
3166 let Inst{31-15} = 0b11000000010011000;
3167 let Inst{14-12} = imm3;
3168 let Inst{11-5} = opc;
3172 class sme2_movt_scalar_to_zt<string mnemonic, bits<7> opc>
3173 : I<(outs ZTR:$ZTt), (ins uimm3s8:$imm3, GPR64:$Rt),
3174 mnemonic, "\t$ZTt[$imm3], $Rt",
3175 "", []>, Sched<[]> {
3178 let Inst{31-15} = 0b11000000010011100;
3179 let Inst{14-12} = imm3;
3180 let Inst{11-5} = opc;
3184 // SME2 move vector to lookup table
3185 class sme2_movt_zt_to_zt<string mnemonic, bits<7> opc>
3186 : I<(outs ZTR:$ZTt), (ins sme_elm_idx0_3:$off2, ZPRAny:$Zt),
3187 mnemonic, "\t$ZTt[$off2, mul vl], $Zt",
3188 "", []>, Sched<[]> {
3191 let Inst{31-14} = 0b110000000100111100;
3192 let Inst{13-12} = off2;
3193 let Inst{11-5} = opc;
3197 multiclass sme2_movt_zt_to_zt<string mnemonic, bits<7> opc> {
3198 def NAME : sme2_movt_zt_to_zt<mnemonic, opc>;
3199 def : InstAlias<mnemonic # "\t$ZTt, $Zt",
3200 (!cast<Instruction>(NAME) ZTR:$ZTt, 0, ZPRAny:$Zt), 1>;
3203 //===----------------------------------------------------------------------===//
3204 // SME2 lookup table expand one register
3205 class sme2_luti_vector_index<bits<2> sz, bits<7> opc, RegisterOperand vector_ty,
3206 AsmVectorIndexOpnd index_ty, string mnemonic>
3207 : I<(outs vector_ty:$Zd),
3208 (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
3209 mnemonic, "\t$Zd, $ZTt, $Zn$i",
3210 "", []>, Sched<[]> {
3213 let Inst{31-19} = 0b1100000011001;
3214 let Inst{18-14} = opc{6-2};
3215 let Inst{13-12} = sz;
3216 let Inst{11-10} = opc{1-0};
3221 class sme2_luti2_vector_index<bits<2> sz, RegisterOperand vector_ty,
3223 : sme2_luti_vector_index<sz, {1,?,?,?,?,0,0}, vector_ty, VectorIndexB32b_timm, mnemonic> {
3225 let Inst{17-14} = i;
3228 multiclass sme2_luti2_vector_index<string mnemonic, SDPatternOperator intrinsic> {
3229 def _B : sme2_luti2_vector_index<0b00, ZPR8, mnemonic>;
3230 def _H : sme2_luti2_vector_index<0b01, ZPR16, mnemonic>;
3231 def _S : sme2_luti2_vector_index<0b10, ZPR32, mnemonic>;
3233 def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3234 (!cast<Instruction>(NAME # _B) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3235 def : Pat<(nxv8i16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3236 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3237 def : Pat<(nxv4i32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3238 (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3239 def : Pat<(nxv8f16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3240 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3241 def : Pat<(nxv8bf16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3242 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3243 def : Pat<(nxv4f32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3244 (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3247 class sme2_luti4_vector_index<bits<2> sz, RegisterOperand vector_ty,
3249 : sme2_luti_vector_index<sz, {0,1,?,?,?,0,0}, vector_ty, VectorIndexH32b_timm, mnemonic> {
3251 let Inst{16-14} = i;
3254 multiclass sme2_luti4_vector_index<string mnemonic, SDPatternOperator intrinsic> {
3255 def _B : sme2_luti4_vector_index<0b00, ZPR8, mnemonic>;
3256 def _H : sme2_luti4_vector_index<0b01, ZPR16, mnemonic>;
3257 def _S : sme2_luti4_vector_index<0b10, ZPR32, mnemonic>;
3259 def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3260 (!cast<Instruction>(NAME # _B) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3261 def : Pat<(nxv8i16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3262 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3263 def : Pat<(nxv4i32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3264 (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3265 def : Pat<(nxv8f16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3266 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3267 def : Pat<(nxv8bf16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3268 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3269 def : Pat<(nxv4f32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3270 (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3273 // SME2 lookup table expand two contiguous registers
3274 class sme2_luti_vector_vg2_index<bits<2> sz, bits<6> opc, RegisterOperand vector_ty,
3275 AsmVectorIndexOpnd index_ty, string mnemonic>
3276 : I<(outs vector_ty:$Zd),
3277 (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
3278 mnemonic, "\t$Zd, $ZTt, $Zn$i",
3279 "", []>, Sched<[]> {
3282 let Inst{31-19} = 0b1100000010001;
3283 let Inst{18-15} = opc{5-2};
3285 let Inst{13-12} = sz;
3286 let Inst{11-10} = opc{1-0};
3292 class sme2_luti2_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty,
3294 : sme2_luti_vector_vg2_index<sz, {1,?,?,?,0,0}, vector_ty, VectorIndexH, mnemonic> {
3296 let Inst{17-15} = i;
3299 multiclass sme2_luti2_vector_vg2_index<string mnemonic> {
3300 def _B : sme2_luti2_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>;
3301 def _H : sme2_luti2_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>;
3302 def _S : sme2_luti2_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>;
3305 class sme2_luti4_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty,
3307 : sme2_luti_vector_vg2_index<sz, {0,1,?,?,0,0}, vector_ty, VectorIndexS, mnemonic> {
3309 let Inst{16-15} = i;
3312 multiclass sme2_luti4_vector_vg2_index<string mnemonic> {
3313 def _B : sme2_luti4_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>;
3314 def _H : sme2_luti4_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>;
3315 def _S : sme2_luti4_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>;
3318 // SME2 lookup table expand four contiguous registers
3319 class sme2_luti_vector_vg4_index<bits<2> sz, bits<5>opc, RegisterOperand vector_ty,
3320 AsmVectorIndexOpnd index_ty, string mnemonic>
3321 : I<(outs vector_ty:$Zd),
3322 (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
3323 mnemonic, "\t$Zd, $ZTt, $Zn$i",
3324 "", []>, Sched<[]> {
3327 let Inst{31-19} = 0b1100000010001;
3328 let Inst{18-16} = opc{4-2};
3329 let Inst{15-14} = 0b10;
3330 let Inst{13-12} = sz;
3331 let Inst{11-10} = opc{1-0};
3334 let Inst{1-0} = 0b00;
3337 class sme2_luti2_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
3339 : sme2_luti_vector_vg4_index<sz, {1,?,?,0,0}, vector_ty, VectorIndexS, mnemonic> {
3341 let Inst{17-16} = i;
3344 multiclass sme2_luti2_vector_vg4_index<string mnemonic> {
3345 def _B : sme2_luti2_vector_vg4_index<0b00, ZZZZ_b_mul_r, mnemonic>;
3346 def _H : sme2_luti2_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>;
3347 def _S : sme2_luti2_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>;
3350 class sme2_luti4_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
3352 : sme2_luti_vector_vg4_index<sz, {0,1,?,0,0}, vector_ty, VectorIndexD, mnemonic> {
3357 multiclass sme2_luti4_vector_vg4_index<string mnemonic> {
3358 def _H : sme2_luti4_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>;
3359 def _S : sme2_luti4_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>;
3362 //===----------------------------------------------------------------------===//
3364 class sme2_mova_vec_to_tile_vg2_multi_base<bits<2> sz, bit v,
3365 RegisterOperand tile_ty,
3367 RegisterOperand vector_ty,
3369 : I<(outs tile_ty:$ZAd),
3370 (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm, vector_ty:$Zn),
3371 mnemonic, "\t$ZAd[$Rs, $imm], $Zn",
3372 "", []>, Sched<[]> {
3375 let Inst{31-24} = 0b11000000;
3376 let Inst{23-22} = sz;
3377 let Inst{21-16} = 0b000100;
3379 let Inst{14-13} = Rs;
3380 let Inst{12-10} = 0b000;
3382 let Inst{5-3} = 0b000;
3384 let Constraints = "$ZAd = $_ZAd";
3387 multiclass sme2_mova_vec_to_tile_or_array_aliases<int prefer, Instruction inst,
3388 RegisterOperand tile_or_array_ty,
3389 RegisterOperand rv_ty,
3391 RegisterOperand vector_ty,
3393 string vg_acronym=""> {
3394 def : InstAlias<mnemonic # "\t$ZAd[$Rs, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn",
3395 (inst tile_or_array_ty:$ZAd, rv_ty:$Rs, index_ty:$imm, vector_ty:$Zn), prefer>;
3399 // SME2 move vector to tile, two registers
3400 multiclass sme2_mova_vec_to_tile_vg2_multi_base<bit v, string mnemonic, SDPatternOperator intrinsic> {
3402 def _B : sme2_mova_vec_to_tile_vg2_multi_base<0b00, v,
3403 !if(v, TileVectorOpV8,
3405 uimm3s2range, ZZ_b_mul_r,
3406 mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
3408 let Inst{2-0} = imm;
3411 def _H : sme2_mova_vec_to_tile_vg2_multi_base<0b01, v,
3412 !if(v, TileVectorOpV16,
3414 uimm2s2range, ZZ_h_mul_r,
3415 mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
3419 let Inst{1-0} = imm;
3422 def _S : sme2_mova_vec_to_tile_vg2_multi_base<0b10, v,
3423 !if(v, TileVectorOpV32,
3425 uimm1s2range, ZZ_s_mul_r,
3426 mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
3429 let Inst{2-1} = ZAd;
3433 def _D : sme2_mova_vec_to_tile_vg2_multi_base<0b11, v,
3434 !if(v, TileVectorOpV64,
3436 uimm0s2range, ZZ_d_mul_r,
3437 mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
3439 let Inst{2-0} = ZAd;
3442 def NAME # _B_PSEUDO : sme2_move_to_tile_pseudo<NAME # _B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
3443 def NAME # _H_PSEUDO : sme2_move_to_tile_pseudo<NAME # _H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
3444 def NAME # _S_PSEUDO : sme2_move_to_tile_pseudo<NAME # _S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
3445 def NAME # _D_PSEUDO : sme2_move_to_tile_pseudo<NAME # _D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
3447 def : SME2_Tile_VG2_Multi_Pat<NAME # _B, intrinsic, sme_elm_idx0_0, nxv16i8, uimm3s2range, tileslicerange3s2>;
3448 def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8i16, uimm2s2range, tileslicerange2s2>;
3449 def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8f16, uimm2s2range, tileslicerange2s2>;
3450 def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8bf16, uimm2s2range, tileslicerange2s2>;
3451 def : SME2_Tile_VG2_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4i32, uimm1s2range, tileslicerange1s2>;
3452 def : SME2_Tile_VG2_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4f32, uimm1s2range, tileslicerange1s2>;
3453 def : SME2_Tile_VG2_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2i64, uimm0s2range, tileslicerange0s2>;
3454 def : SME2_Tile_VG2_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2f64, uimm0s2range, tileslicerange0s2>;
3456 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _B),
3457 !if(v, TileVectorOpV8,
3459 MatrixIndexGPR32Op12_15,
3460 uimm3s2range, ZZ_b_mul_r,
3462 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _H),
3463 !if(v, TileVectorOpV16,
3465 MatrixIndexGPR32Op12_15,
3466 uimm2s2range, ZZ_h_mul_r,
3468 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _S),
3469 !if(v, TileVectorOpV32,
3471 MatrixIndexGPR32Op12_15,
3472 uimm1s2range, ZZ_s_mul_r,
3474 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _D),
3475 !if(v, TileVectorOpV64,
3477 MatrixIndexGPR32Op12_15,
3478 uimm0s2range, ZZ_d_mul_r,
3481 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B),
3482 !if(v, TileVectorOpV8,
3484 MatrixIndexGPR32Op12_15,
3485 uimm3s2range, ZZ_b_mul_r,
3487 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H),
3488 !if(v, TileVectorOpV16,
3490 MatrixIndexGPR32Op12_15,
3491 uimm2s2range, ZZ_h_mul_r,
3493 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S),
3494 !if(v, TileVectorOpV32,
3496 MatrixIndexGPR32Op12_15,
3497 uimm1s2range, ZZ_s_mul_r,
3499 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D),
3500 !if(v, TileVectorOpV64,
3502 MatrixIndexGPR32Op12_15,
3503 uimm0s2range, ZZ_d_mul_r,
3506 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B),
3507 !if(v, TileVectorOpV8,
3509 MatrixIndexGPR32Op12_15,
3510 uimm3s2range, ZZ_b_mul_r,
3512 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H),
3513 !if(v, TileVectorOpV16,
3515 MatrixIndexGPR32Op12_15,
3516 uimm2s2range, ZZ_h_mul_r,
3518 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S),
3519 !if(v, TileVectorOpV32,
3521 MatrixIndexGPR32Op12_15,
3522 uimm1s2range, ZZ_s_mul_r,
3524 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D),
3525 !if(v, TileVectorOpV64,
3527 MatrixIndexGPR32Op12_15,
3528 uimm0s2range, ZZ_d_mul_r,
3532 multiclass sme2_mova_vec_to_tile_vg2_multi<string mnemonic,
3533 SDPatternOperator int_h, SDPatternOperator int_v>{
3534 defm _H : sme2_mova_vec_to_tile_vg2_multi_base<0b0, mnemonic, int_h>;
3535 defm _V : sme2_mova_vec_to_tile_vg2_multi_base<0b1, mnemonic, int_v>;
3538 class sme2_mova_vec_to_tile_vg4_multi_base<bits<2> sz, bit v, bits<3> op,
3539 RegisterOperand tile_ty,
3541 RegisterOperand vector_ty,
3543 : I<(outs tile_ty:$ZAd),
3544 (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm,
3547 "\t$ZAd[$Rs, $imm], $Zn",
3548 "", []>, Sched<[]> {
3551 let Inst{31-24} = 0b11000000;
3552 let Inst{23-22} = sz;
3553 let Inst{21-16} = 0b000100;
3555 let Inst{14-13} = Rs;
3556 let Inst{12-10} = 0b001;
3558 let Inst{6-3} = 0b0000;
3560 let Constraints = "$ZAd = $_ZAd";
3563 // SME2 move vector to tile, four registers
3564 multiclass sme2_mova_vec_to_tile_vg4_multi_base<bit v, string mnemonic, SDPatternOperator intrinsic> {
3566 def _B : sme2_mova_vec_to_tile_vg4_multi_base<0b00, v, {0,?,?},
3567 !if(v, TileVectorOpV8,
3569 uimm2s4range, ZZZZ_b_mul_r,
3570 mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
3572 let Inst{1-0} = imm;
3575 def _H : sme2_mova_vec_to_tile_vg4_multi_base<0b01, v, {0,?,?},
3576 !if(v, TileVectorOpV16,
3578 uimm1s4range, ZZZZ_h_mul_r,
3579 mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
3586 def _S : sme2_mova_vec_to_tile_vg4_multi_base<0b10, v, {0,?,?},
3587 !if(v, TileVectorOpV32,
3589 uimm0s4range, ZZZZ_s_mul_r,
3590 mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
3592 let Inst{1-0} = ZAd;
3595 def _D : sme2_mova_vec_to_tile_vg4_multi_base<0b11, v, {?,?,?},
3596 !if(v, TileVectorOpV64,
3598 uimm0s4range, ZZZZ_d_mul_r,
3599 mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
3601 let Inst{2-0} = ZAd;
3604 def NAME # _B_PSEUDO : sme2_move_to_tile_pseudo<NAME # _B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
3605 def NAME # _H_PSEUDO : sme2_move_to_tile_pseudo<NAME # _H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
3606 def NAME # _S_PSEUDO : sme2_move_to_tile_pseudo<NAME # _S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
3607 def NAME # _D_PSEUDO : sme2_move_to_tile_pseudo<NAME # _D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
3609 def : SME2_Tile_VG4_Multi_Pat<NAME # _B, intrinsic, sme_elm_idx0_0, nxv16i8, uimm2s4range, tileslicerange2s4>;
3610 def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8i16, uimm1s4range, tileslicerange1s4>;
3611 def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8f16, uimm1s4range, tileslicerange1s4>;
3612 def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8bf16, uimm1s4range, tileslicerange1s4>;
3613 def : SME2_Tile_VG4_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4i32, uimm0s4range, tileslicerange0s4>;
3614 def : SME2_Tile_VG4_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4f32, uimm0s4range, tileslicerange0s4>;
3615 def : SME2_Tile_VG4_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2i64, uimm0s4range, tileslicerange0s4>;
3616 def : SME2_Tile_VG4_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2f64, uimm0s4range, tileslicerange0s4>;
3618 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _B),
3619 !if(v, TileVectorOpV8,
3621 MatrixIndexGPR32Op12_15,
3622 uimm2s4range, ZZZZ_b_mul_r,
3624 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _H),
3625 !if(v, TileVectorOpV16,
3627 MatrixIndexGPR32Op12_15,
3628 uimm1s4range, ZZZZ_h_mul_r,
3630 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _S),
3631 !if(v, TileVectorOpV32,
3633 MatrixIndexGPR32Op12_15,
3634 uimm0s4range, ZZZZ_s_mul_r,
3636 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _D),
3637 !if(v, TileVectorOpV64,
3639 MatrixIndexGPR32Op12_15,
3640 uimm0s4range, ZZZZ_d_mul_r,
3643 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B),
3644 !if(v, TileVectorOpV8,
3646 MatrixIndexGPR32Op12_15,
3647 uimm2s4range, ZZZZ_b_mul_r,
3649 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H),
3650 !if(v, TileVectorOpV16,
3652 MatrixIndexGPR32Op12_15,
3653 uimm1s4range, ZZZZ_h_mul_r,
3655 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S),
3656 !if(v, TileVectorOpV32,
3658 MatrixIndexGPR32Op12_15,
3659 uimm0s4range, ZZZZ_s_mul_r,
3661 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D),
3662 !if(v, TileVectorOpV64,
3664 MatrixIndexGPR32Op12_15,
3665 uimm0s4range, ZZZZ_d_mul_r,
3670 multiclass sme2_mova_vec_to_tile_vg4_multi<string mnemonic,
3671 SDPatternOperator int_h, SDPatternOperator int_v>{
3672 defm _H : sme2_mova_vec_to_tile_vg4_multi_base<0b0, mnemonic, int_h>;
3673 defm _V : sme2_mova_vec_to_tile_vg4_multi_base<0b1, mnemonic, int_v>;
3676 // SME Move into Array
3677 class sme2_mova_vec_to_array_vg24_multi< bits<5> op, RegisterOperand array_ty,
3678 RegisterOperand vector_ty,
3680 string vg_acronym="">
3681 : I<(outs array_ty:$ZAd),
3682 (ins array_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rs, sme_elm_idx0_7:$imm,
3684 mnemonic, "\t$ZAd[$Rs, $imm, " # vg_acronym # "], $Zn",
3685 "", []>, Sched<[]> {
3688 let Inst{31-15} = 0b11000000000001000;
3689 let Inst{14-13} = Rs;
3690 let Inst{12-11} = 0b01;
3691 let Inst{10-6} = op;
3692 let Inst{5-3} = 0b000;
3693 let Inst{2-0} = imm;
3695 let Constraints = "$ZAd = $_ZAd";
3698 // MOVA (vector to array, two registers)
3699 multiclass sme2_mova_vec_to_array_vg2_multi<string mnemonic, SDPatternOperator intrinsic> {
3700 def NAME : sme2_mova_vec_to_array_vg24_multi<{0,?,?,?,?}, MatrixOp64,
3701 ZZ_d_mul_r, mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1> {
3706 def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>;
3708 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv16i8, sme_elm_idx0_7, tileslice16>;
3709 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8i16, sme_elm_idx0_7, tileslice16>;
3710 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8f16, sme_elm_idx0_7, tileslice16>;
3711 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>;
3712 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4i32, sme_elm_idx0_7, tileslice16>;
3713 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4f32, sme_elm_idx0_7, tileslice16>;
3714 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>;
3715 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>;
3717 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3719 MatrixIndexGPR32Op8_11,
3720 sme_elm_idx0_7, ZZ_b_mul_r,
3722 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3724 MatrixIndexGPR32Op8_11,
3725 sme_elm_idx0_7, ZZ_h_mul_r,
3727 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3729 MatrixIndexGPR32Op8_11,
3730 sme_elm_idx0_7, ZZ_s_mul_r,
3732 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3734 MatrixIndexGPR32Op8_11,
3735 sme_elm_idx0_7, ZZ_d_mul_r,
3738 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3740 MatrixIndexGPR32Op8_11,
3741 sme_elm_idx0_7, ZZ_b_mul_r,
3743 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3745 MatrixIndexGPR32Op8_11,
3746 sme_elm_idx0_7, ZZ_h_mul_r,
3748 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3750 MatrixIndexGPR32Op8_11,
3751 sme_elm_idx0_7, ZZ_s_mul_r,
3754 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3756 MatrixIndexGPR32Op8_11,
3757 sme_elm_idx0_7, ZZ_b_mul_r,
3759 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3761 MatrixIndexGPR32Op8_11,
3762 sme_elm_idx0_7, ZZ_h_mul_r,
3764 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3766 MatrixIndexGPR32Op8_11,
3767 sme_elm_idx0_7, ZZ_s_mul_r,
3769 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3771 MatrixIndexGPR32Op8_11,
3772 sme_elm_idx0_7, ZZ_d_mul_r,
3775 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3777 MatrixIndexGPR32Op8_11,
3778 sme_elm_idx0_7, ZZ_b_mul_r,
3780 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3782 MatrixIndexGPR32Op8_11,
3783 sme_elm_idx0_7, ZZ_h_mul_r,
3785 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3787 MatrixIndexGPR32Op8_11,
3788 sme_elm_idx0_7, ZZ_s_mul_r,
3790 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME),
3792 MatrixIndexGPR32Op8_11,
3793 sme_elm_idx0_7, ZZ_d_mul_r,
3797 // MOVA (vector to array, four registers)
3798 multiclass sme2_mova_vec_to_array_vg4_multi<string mnemonic, SDPatternOperator intrinsic> {
3799 def NAME : sme2_mova_vec_to_array_vg24_multi<{1,?,?,?,0}, MatrixOp64,
3800 ZZZZ_d_mul_r, mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1> {
3805 def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>;
3807 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv16i8, sme_elm_idx0_7, tileslice16>;
3808 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8i16, sme_elm_idx0_7, tileslice16>;
3809 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8f16, sme_elm_idx0_7, tileslice16>;
3810 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>;
3811 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4i32, sme_elm_idx0_7, tileslice16>;
3812 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4f32, sme_elm_idx0_7, tileslice16>;
3813 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>;
3814 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>;
3816 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3818 MatrixIndexGPR32Op8_11,
3819 sme_elm_idx0_7, ZZZZ_b_mul_r,
3821 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3823 MatrixIndexGPR32Op8_11,
3824 sme_elm_idx0_7, ZZZZ_h_mul_r,
3826 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3828 MatrixIndexGPR32Op8_11,
3829 sme_elm_idx0_7, ZZZZ_s_mul_r,
3831 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3833 MatrixIndexGPR32Op8_11,
3834 sme_elm_idx0_7, ZZZZ_d_mul_r,
3837 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3839 MatrixIndexGPR32Op8_11,
3840 sme_elm_idx0_7, ZZZZ_b_mul_r,
3842 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3844 MatrixIndexGPR32Op8_11,
3845 sme_elm_idx0_7, ZZZZ_h_mul_r,
3847 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3849 MatrixIndexGPR32Op8_11,
3850 sme_elm_idx0_7, ZZZZ_s_mul_r,
3853 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3855 MatrixIndexGPR32Op8_11,
3856 sme_elm_idx0_7, ZZZZ_b_mul_r,
3858 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3860 MatrixIndexGPR32Op8_11,
3861 sme_elm_idx0_7, ZZZZ_h_mul_r,
3863 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3865 MatrixIndexGPR32Op8_11,
3866 sme_elm_idx0_7, ZZZZ_s_mul_r,
3868 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3870 MatrixIndexGPR32Op8_11,
3871 sme_elm_idx0_7, ZZZZ_d_mul_r,
3874 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3876 MatrixIndexGPR32Op8_11,
3877 sme_elm_idx0_7, ZZZZ_b_mul_r,
3879 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3881 MatrixIndexGPR32Op8_11,
3882 sme_elm_idx0_7, ZZZZ_h_mul_r,
3884 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3886 MatrixIndexGPR32Op8_11,
3887 sme_elm_idx0_7, ZZZZ_s_mul_r,
3889 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME),
3891 MatrixIndexGPR32Op8_11,
3892 sme_elm_idx0_7, ZZZZ_d_mul_r,
3897 class sme2_mova_tile_to_vec_vg2_multi_base<bits<2> sz, bit v, bits<3> op,
3898 RegisterOperand vector_ty,
3899 RegisterOperand tile_ty,
3902 : I<!if(op{1}, (outs vector_ty:$Zd, tile_ty:$_ZAn), (outs vector_ty:$Zd)),
3903 (ins tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm),
3905 "\t$Zd, $ZAn[$Rs, $imm]",
3906 "", []>, Sched<[]> {
3909 let Inst{31-24} = 0b11000000;
3910 let Inst{23-22} = sz;
3911 let Inst{21-16} = 0b000110;
3913 let Inst{14-13} = Rs;
3914 let Inst{12-11} = 0b00;
3915 let Inst{10-8} = op;
3919 let Constraints = !if(op{1}, "$ZAn = $_ZAn", "");
3922 multiclass sme2_mova_tile_or_array_to_vec_aliases<int op, Instruction inst,
3923 RegisterOperand vector_ty,
3924 RegisterOperand tile_or_array_ty,
3925 RegisterOperand rv_ty,
3928 string vg_acronym=""> {
3929 def : InstAlias<mnemonic # "\t$Zd, $ZAn[$Rs, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "]",
3930 (inst vector_ty:$Zd, tile_or_array_ty:$ZAn, rv_ty:$Rs, index_ty:$imm), op>;
3934 multiclass sme2_mova_tile_to_vec_vg2_multi_inst<bit v, bits<3> opc, string mnemonic> {
3936 def _B : sme2_mova_tile_to_vec_vg2_multi_base<0b00, v, opc, ZZ_b_mul_r,
3937 !if(v, TileVectorOpV8,
3939 uimm3s2range, mnemonic> {
3941 let Inst{7-5} = imm;
3944 def _H : sme2_mova_tile_to_vec_vg2_multi_base<0b01, v, opc, ZZ_h_mul_r,
3945 !if(v, TileVectorOpV16,
3947 uimm2s2range, mnemonic> {
3951 let Inst{6-5} = imm;
3954 def _S : sme2_mova_tile_to_vec_vg2_multi_base<0b10, v, opc, ZZ_s_mul_r,
3955 !if(v, TileVectorOpV32,
3957 uimm1s2range, mnemonic> {
3960 let Inst{7-6} = ZAn;
3964 def _D : sme2_mova_tile_to_vec_vg2_multi_base<0b11, v, opc, ZZ_d_mul_r,
3965 !if(v, TileVectorOpV64,
3967 uimm0s2range, mnemonic> {
3969 let Inst{7-5} = ZAn;
3972 if !eq(mnemonic, "mova") then {
3973 defm : sme2_mova_tile_or_array_to_vec_aliases<1,!cast<Instruction>(NAME # _B),
3975 !if(v, TileVectorOpV8,
3977 MatrixIndexGPR32Op12_15,
3978 uimm3s2range, "mov">;
3979 defm : sme2_mova_tile_or_array_to_vec_aliases<1,!cast<Instruction>(NAME # _H),
3981 !if(v, TileVectorOpV16,
3983 MatrixIndexGPR32Op12_15,
3984 uimm2s2range, "mov">;
3985 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _S),
3987 !if(v, TileVectorOpV32,
3989 MatrixIndexGPR32Op12_15,
3990 uimm1s2range, "mov">;
3991 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _D),
3993 !if(v, TileVectorOpV64,
3995 MatrixIndexGPR32Op12_15,
3996 uimm0s2range, "mov">;
3999 defm : sme2_mova_tile_or_array_to_vec_aliases<0,!cast<Instruction>(NAME # _B),
4001 !if(v, TileVectorOpV8,
4003 MatrixIndexGPR32Op12_15,
4004 uimm3s2range, mnemonic>;
4005 defm : sme2_mova_tile_or_array_to_vec_aliases<0,!cast<Instruction>(NAME # _H),
4007 !if(v, TileVectorOpV16,
4009 MatrixIndexGPR32Op12_15,
4010 uimm2s2range, mnemonic>;
4011 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _S),
4013 !if(v, TileVectorOpV32,
4015 MatrixIndexGPR32Op12_15,
4016 uimm1s2range, mnemonic>;
4017 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _D),
4019 !if(v, TileVectorOpV64,
4021 MatrixIndexGPR32Op12_15,
4022 uimm0s2range, mnemonic>;
4026 // SME2 move tile to vector, two registers
4027 multiclass sme2_mova_tile_to_vec_vg2_multi<string mnemonic>{
4028 defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b000, mnemonic>;
4029 defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b000, mnemonic>;
4032 // SME2p1 move tile to vector and zero tile, two registers
4033 multiclass sme2p1_movaz_tile_to_vec_vg2<string mnemonic>{
4034 defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b010, mnemonic>;
4035 defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b010, mnemonic>;
4038 class sme2_mova_tile_to_vec_vg4_multi_base<bits<2> sz, bit v, bits<6> op,
4039 RegisterOperand vector_ty,
4040 RegisterOperand tile_ty,
4043 : I<!if(op{4}, (outs vector_ty:$Zd, tile_ty:$_ZAn), (outs vector_ty:$Zd)),
4044 (ins tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm),
4046 "\t$Zd, $ZAn[$Rs, $imm]",
4047 "", []>, Sched<[]> {
4050 let Inst{31-24} = 0b11000000;
4051 let Inst{23-22} = sz;
4052 let Inst{21-16} = 0b000110;
4054 let Inst{14-13} = Rs;
4055 let Inst{12-11} = 0b00;
4056 let Inst{10-5} = op{5-0};
4058 let Inst{1-0} = 0b00;
4060 let Constraints = !if(op{4}, "$ZAn = $_ZAn", "");
4063 multiclass sme2_mova_tile_to_vec_vg4_multi_base<bit v, bits<3> opc, string mnemonic> {
4065 def _B : sme2_mova_tile_to_vec_vg4_multi_base<0b00, v, {opc,0,?,?},
4067 !if(v, TileVectorOpV8,
4069 uimm2s4range, mnemonic> {
4071 let Inst{6-5} = imm;
4074 def _H : sme2_mova_tile_to_vec_vg4_multi_base<0b01, v, {opc,0,?,?},
4076 !if(v, TileVectorOpV16,
4078 uimm1s4range, mnemonic> {
4085 def _S : sme2_mova_tile_to_vec_vg4_multi_base<0b10, v, {opc,0,?,?},
4087 !if(v, TileVectorOpV32,
4089 uimm0s4range, mnemonic> {
4091 let Inst{6-5} = ZAn;
4094 def _D : sme2_mova_tile_to_vec_vg4_multi_base<0b11, v, {opc,?,?,?},
4096 !if(v, TileVectorOpV64,
4098 uimm0s4range, mnemonic> {
4100 let Inst{7-5} = ZAn;
4103 if !eq(mnemonic, "mova") then {
4104 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _B),
4106 !if(v, TileVectorOpV8,
4108 MatrixIndexGPR32Op12_15,
4109 uimm2s4range, "mov">;
4110 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _H),
4112 !if(v, TileVectorOpV16,
4114 MatrixIndexGPR32Op12_15,
4115 uimm1s4range, "mov">;
4116 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _S),
4118 !if(v, TileVectorOpV32,
4120 MatrixIndexGPR32Op12_15,
4121 uimm0s4range, "mov">;
4122 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _D),
4124 !if(v, TileVectorOpV64,
4126 MatrixIndexGPR32Op12_15,
4127 uimm0s4range, "mov">;
4130 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _B),
4132 !if(v, TileVectorOpV8,
4134 MatrixIndexGPR32Op12_15,
4135 uimm2s4range, mnemonic>;
4136 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _H),
4138 !if(v, TileVectorOpV16,
4140 MatrixIndexGPR32Op12_15,
4141 uimm1s4range, mnemonic>;
4142 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _S),
4144 !if(v, TileVectorOpV32,
4146 MatrixIndexGPR32Op12_15,
4147 uimm0s4range, mnemonic>;
4148 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _D),
4150 !if(v, TileVectorOpV64,
4152 MatrixIndexGPR32Op12_15,
4153 uimm0s4range, mnemonic>;
4157 // SME2 move tile to vector, four registers
4158 multiclass sme2_mova_tile_to_vec_vg4_multi<string mnemonic>{
4159 defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b100, mnemonic>;
4160 defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b100, mnemonic>;
4163 // SME2p1 move tile to vector and zero tile, four registers
4164 multiclass sme2p1_movaz_tile_to_vec_vg4<string mnemonic>{
4165 defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b110, mnemonic>;
4166 defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b110, mnemonic>;
4170 class sme2_mova_array_to_vec_vg24_multi<bits<4>op, RegisterOperand vector_ty,
4171 RegisterOperand array_ty,
4172 string mnemonic, string vg_acronym>
4173 : I<!if(op{2}, (outs vector_ty:$Zd, array_ty:$_ZAn), (outs vector_ty:$Zd)),
4174 (ins array_ty:$ZAn, MatrixIndexGPR32Op8_11:$Rs, sme_elm_idx0_7:$imm),
4176 "\t$Zd, $ZAn[$Rs, $imm, " # vg_acronym # "]",
4177 "", []>, Sched<[]> {
4180 let Inst{31-15} = 0b11000000000001100;
4181 let Inst{14-13} = Rs;
4182 let Inst{12-11} = 0b01;
4183 let Inst{10-8} = op{3-1};
4184 let Inst{7-5} = imm;
4185 let Inst{1} = op{0};
4187 let Constraints = !if(op{2}, "$ZAn = $_ZAn", "");
4190 // move array to vector, two registers.
4191 multiclass sme2_mova_array_to_vec_vg2_multi<bits<3> opc, string mnemonic> {
4192 def NAME : sme2_mova_array_to_vec_vg24_multi<{opc,?}, ZZ_d_mul_r, MatrixOp64,
4198 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4199 ZZ_b_mul_r, MatrixOp8,
4200 MatrixIndexGPR32Op8_11,
4201 sme_elm_idx0_7, mnemonic>;
4202 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4203 ZZ_h_mul_r, MatrixOp16,
4204 MatrixIndexGPR32Op8_11,
4205 sme_elm_idx0_7, mnemonic>;
4206 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4207 ZZ_s_mul_r, MatrixOp32,
4208 MatrixIndexGPR32Op8_11,
4209 sme_elm_idx0_7, mnemonic>;
4210 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4211 ZZ_d_mul_r, MatrixOp64,
4212 MatrixIndexGPR32Op8_11,
4213 sme_elm_idx0_7, mnemonic>;
4215 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4216 ZZ_b_mul_r, MatrixOp8,
4217 MatrixIndexGPR32Op8_11,
4218 sme_elm_idx0_7, mnemonic, "vgx2">;
4219 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4220 ZZ_h_mul_r, MatrixOp16,
4221 MatrixIndexGPR32Op8_11,
4222 sme_elm_idx0_7, mnemonic, "vgx2">;
4223 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4224 ZZ_s_mul_r, MatrixOp32,
4225 MatrixIndexGPR32Op8_11,
4226 sme_elm_idx0_7, mnemonic, "vgx2">;
4228 if !eq(mnemonic, "mova") then {
4229 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4230 ZZ_b_mul_r, MatrixOp8,
4231 MatrixIndexGPR32Op8_11,
4232 sme_elm_idx0_7, "mov">;
4233 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4234 ZZ_h_mul_r, MatrixOp16,
4235 MatrixIndexGPR32Op8_11,
4236 sme_elm_idx0_7, "mov">;
4237 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4238 ZZ_s_mul_r, MatrixOp32,
4239 MatrixIndexGPR32Op8_11,
4240 sme_elm_idx0_7, "mov">;
4241 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4242 ZZ_d_mul_r, MatrixOp64,
4243 MatrixIndexGPR32Op8_11,
4244 sme_elm_idx0_7, "mov">;
4246 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4247 ZZ_b_mul_r, MatrixOp8,
4248 MatrixIndexGPR32Op8_11,
4249 sme_elm_idx0_7, "mov", "vgx2">;
4250 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4251 ZZ_h_mul_r, MatrixOp16,
4252 MatrixIndexGPR32Op8_11,
4253 sme_elm_idx0_7, "mov", "vgx2">;
4254 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4255 ZZ_s_mul_r, MatrixOp32,
4256 MatrixIndexGPR32Op8_11,
4257 sme_elm_idx0_7, "mov", "vgx2">;
4258 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME),
4259 ZZ_d_mul_r, MatrixOp64,
4260 MatrixIndexGPR32Op8_11,
4261 sme_elm_idx0_7, "mov", "vgx2">;
4265 // move array to vector, four registers
4266 multiclass sme2_mova_array_to_vec_vg4_multi<bits<4> opc, string mnemonic> {
4267 def NAME : sme2_mova_array_to_vec_vg24_multi<opc, ZZZZ_d_mul_r, MatrixOp64,
4273 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4274 ZZZZ_b_mul_r, MatrixOp8,
4275 MatrixIndexGPR32Op8_11,
4276 sme_elm_idx0_7, mnemonic>;
4277 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4278 ZZZZ_h_mul_r, MatrixOp16,
4279 MatrixIndexGPR32Op8_11,
4280 sme_elm_idx0_7, mnemonic>;
4281 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4282 ZZZZ_s_mul_r, MatrixOp32,
4283 MatrixIndexGPR32Op8_11,
4284 sme_elm_idx0_7, mnemonic>;
4285 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4286 ZZZZ_d_mul_r, MatrixOp64,
4287 MatrixIndexGPR32Op8_11,
4288 sme_elm_idx0_7, mnemonic>;
4290 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4291 ZZZZ_b_mul_r, MatrixOp8,
4292 MatrixIndexGPR32Op8_11,
4293 sme_elm_idx0_7, mnemonic, "vgx4">;
4294 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4295 ZZZZ_h_mul_r, MatrixOp16,
4296 MatrixIndexGPR32Op8_11,
4297 sme_elm_idx0_7, mnemonic, "vgx4">;
4298 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4299 ZZZZ_s_mul_r, MatrixOp32,
4300 MatrixIndexGPR32Op8_11,
4301 sme_elm_idx0_7, mnemonic, "vgx4">;
4303 if !eq(mnemonic, "mova") then {
4304 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4305 ZZZZ_b_mul_r, MatrixOp8,
4306 MatrixIndexGPR32Op8_11,
4307 sme_elm_idx0_7, "mov">;
4308 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4309 ZZZZ_h_mul_r, MatrixOp16,
4310 MatrixIndexGPR32Op8_11,
4311 sme_elm_idx0_7, "mov">;
4312 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4313 ZZZZ_s_mul_r, MatrixOp32,
4314 MatrixIndexGPR32Op8_11,
4315 sme_elm_idx0_7, "mov">;
4316 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4317 ZZZZ_d_mul_r, MatrixOp64,
4318 MatrixIndexGPR32Op8_11,
4319 sme_elm_idx0_7, "mov">;
4321 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4322 ZZZZ_b_mul_r, MatrixOp8,
4323 MatrixIndexGPR32Op8_11,
4324 sme_elm_idx0_7, "mov", "vgx4">;
4325 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4326 ZZZZ_h_mul_r, MatrixOp16,
4327 MatrixIndexGPR32Op8_11,
4328 sme_elm_idx0_7, "mov", "vgx4">;
4329 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4330 ZZZZ_s_mul_r, MatrixOp32,
4331 MatrixIndexGPR32Op8_11,
4332 sme_elm_idx0_7, "mov", "vgx4">;
4333 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME),
4334 ZZZZ_d_mul_r, MatrixOp64,
4335 MatrixIndexGPR32Op8_11,
4336 sme_elm_idx0_7, "mov", "vgx4">;
4340 //===----------------------------------------------------------------------===//
4341 // SME2 multi-vec saturating shift right narrow
4342 class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
4343 : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4),
4344 mnemonic, "\t$Zd, $Zn, $imm4",
4345 "", []>, Sched<[]> {
4349 let Inst{31-21} = 0b11000001111;
4351 let Inst{19-16} = imm4;
4352 let Inst{15-10} = 0b110101;
4358 multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u, SDPatternOperator intrinsic> {
4359 def _H : sme2_sat_shift_vector_vg2<mnemonic, op, u>;
4361 def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>;
4364 class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty,
4365 RegisterOperand vector_ty, Operand imm_ty,
4367 : I<(outs zpr_ty:$Zd), (ins vector_ty:$Zn, imm_ty:$imm),
4368 mnemonic, "\t$Zd, $Zn, $imm",
4369 "", []>, Sched<[]> {
4372 let Inst{31-24} = 0b11000001;
4373 let Inst{23-22} = sz;
4375 // Inst{20-16} = imm5;
4376 let Inst{15-11} = 0b11011;
4377 let Inst{10} = op{2};
4379 let Inst{6-5} = op{1-0};
4383 multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
4384 def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, tvecshiftR32,
4387 let Inst{20-16} = imm;
4389 def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, tvecshiftR64,
4392 let Inst{22} = imm{5};
4393 let Inst{20-16} = imm{4-0};
4396 def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, tvecshiftR32>;
4397 def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, tvecshiftR64>;
4400 //===----------------------------------------------------------------------===//
4401 // SME2 Multi-vector - SVE Select
4402 class sme2_sel_vector_vg24<bits<2> sz, bits<4> op, RegisterOperand vector_ty,
4404 : I<(outs vector_ty:$Zd),
4405 (ins PNRAny_p8to15:$PNg, vector_ty:$Zn, vector_ty:$Zm),
4406 mnemonic, "\t$Zd, $PNg, $Zn, $Zm",
4407 "", []>, Sched<[]> {
4409 let Inst{31-24} = 0b11000001;
4410 let Inst{23-22} = sz;
4412 let Inst{17-16} = op{3-2};
4413 let Inst{15-13} = 0b100;
4414 let Inst{12-10} = PNg;
4415 let Inst{6} = op{1};
4417 let Inst{1} = op{0};
4421 class sme2_sel_vector_vg2<bits<2> sz, RegisterOperand vector_ty,
4423 : sme2_sel_vector_vg24<sz, {?,0,?,?}, vector_ty, mnemonic> {
4427 let Inst{20-17} = Zm;
4432 multiclass sme2_sel_vector_vg2<string mnemonic>{
4433 def _B : sme2_sel_vector_vg2<0b00, ZZ_b_mul_r, mnemonic>;
4434 def _H : sme2_sel_vector_vg2<0b01, ZZ_h_mul_r, mnemonic>;
4435 def _S : sme2_sel_vector_vg2<0b10, ZZ_s_mul_r, mnemonic>;
4436 def _D : sme2_sel_vector_vg2<0b11, ZZ_d_mul_r, mnemonic>;
4438 class sme2_sel_vector_vg4<bits<2> sz, RegisterOperand vector_ty,
4440 : sme2_sel_vector_vg24<sz, 0b0100, vector_ty, mnemonic> {
4444 let Inst{20-18} = Zm;
4448 multiclass sme2_sel_vector_vg4<string mnemonic> {
4449 def _B : sme2_sel_vector_vg4<0b00, ZZZZ_b_mul_r, mnemonic>;
4450 def _H : sme2_sel_vector_vg4<0b01, ZZZZ_h_mul_r, mnemonic>;
4451 def _S : sme2_sel_vector_vg4<0b10, ZZZZ_s_mul_r, mnemonic>;
4452 def _D : sme2_sel_vector_vg4<0b11, ZZZZ_d_mul_r, mnemonic>;
4455 //===----------------------------------------------------------------------===//
4456 // Non contiguous Load and Store
4458 class sme2_ld_vector_vg2_multi_scalar_scalar<bits<2> msz, bit n,
4459 RegisterOperand multi_vector_ty,
4460 RegisterOperand gpr_ty,
4462 : I<(outs multi_vector_ty:$Zt),
4463 (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm),
4464 mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]",
4465 "", []>, Sched<[]> {
4470 let Inst{31-21} = 0b10100001000;
4471 let Inst{20-16} = Rm;
4473 let Inst{14-13} = msz;
4474 let Inst{12-10} = PNg;
4476 let Inst{4} = Zt{3};
4478 let Inst{2-0} = Zt{2-0};
4483 class sme2_ld_vector_vg4_multi_scalar_scalar<bits<2> msz, bit n,
4484 RegisterOperand multi_vector_ty,
4485 RegisterOperand gpr_ty,
4487 : I<(outs multi_vector_ty:$Zt),
4488 (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm),
4489 mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]",
4490 "", []>, Sched<[]> {
4495 let Inst{31-21} = 0b10100001000;
4496 let Inst{20-16} = Rm;
4498 let Inst{14-13} = msz;
4499 let Inst{12-10} = PNg;
4501 let Inst{4} = Zt{2};
4504 let Inst{1-0} = Zt{1-0};
4509 class sme2_ld_vector_vg24_multi_scalar_immediate<bits<2> msz, bit n, bits<2> op,
4510 RegisterOperand multi_vector_ty,
4513 : I<(outs multi_vector_ty:$Zt),
4514 (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, index_ty:$imm4),
4515 mnemonic, "\t$Zt, $PNg/z, [$Rn, $imm4, mul vl]",
4516 "", []>, Sched<[]> {
4520 let Inst{31-20} = 0b101000010100;
4521 let Inst{19-16} = imm4;
4522 let Inst{15} = op{1};
4523 let Inst{14-13} = msz;
4524 let Inst{12-10} = PNg;
4527 let Inst{2} = op{0};
4532 multiclass sme2_ld_vector_vg2_multi_scalar_immediate<bits<2> msz, bit n,
4533 RegisterOperand multi_vector_ty,
4536 def NAME : sme2_ld_vector_vg24_multi_scalar_immediate<msz, n, {0,?},
4538 index_ty, mnemonic> {
4540 let Inst{4} = Zt{3};
4541 let Inst{2-0} = Zt{2-0};
4544 def : InstAlias<mnemonic # "\t$Zt, $PNg/z, [$Rn]",
4545 (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, 0), 1>;
4548 multiclass sme2_ld_vector_vg4_multi_scalar_immediate<bits<2> msz, bit n,
4549 RegisterOperand multi_vector_ty,
4552 def NAME : sme2_ld_vector_vg24_multi_scalar_immediate<msz, n, 0b10,
4554 index_ty, mnemonic> {
4556 let Inst{4} = Zt{2};
4557 let Inst{1-0} = Zt{1-0};
4560 def : InstAlias<mnemonic # "\t$Zt, $PNg/z, [$Rn]",
4561 (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, 0), 1>;
4564 //===----------------------------------------------------------------------===//
4565 // SME2 Non-Contiguous Store
4566 class sme2_st_vector_vg2_multi_scalar_scalar<bits<2> msz, bit n,
4567 RegisterOperand multi_vector_ty,
4568 RegisterOperand gpr_ty,
4571 (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm),
4572 mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]",
4573 "", []>, Sched<[]> {
4578 let Inst{31-21} = 0b10100001001;
4579 let Inst{20-16} = Rm;
4581 let Inst{14-13} = msz;
4582 let Inst{12-10} = PNg;
4584 let Inst{4} = Zt{3};
4586 let Inst{2-0} = Zt{2-0};
4591 class sme2_st_vector_vg4_multi_scalar_scalar<bits<2> msz, bit n,
4592 RegisterOperand multi_vector_ty,
4593 RegisterOperand gpr_ty,
4596 (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm),
4597 mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]",
4598 "", []>, Sched<[]> {
4603 let Inst{31-21} = 0b10100001001;
4604 let Inst{20-16} = Rm;
4606 let Inst{14-13} = msz;
4607 let Inst{12-10} = PNg;
4609 let Inst{4} = Zt{2};
4612 let Inst{1-0} = Zt{1-0};
4617 class sme2_st_vector_vg24_multi_scalar_immediate<bits<2> msz, bit n, bits<2> op,
4618 RegisterOperand multi_vector_ty,
4622 (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, index_ty:$imm4),
4623 mnemonic, "\t$Zt, $PNg, [$Rn, $imm4, mul vl]",
4624 "", []>, Sched<[]> {
4628 let Inst{31-20} = 0b101000010110;
4629 let Inst{19-16} = imm4;
4630 let Inst{15} = op{1};
4631 let Inst{14-13} = msz;
4632 let Inst{12-10} = PNg;
4635 let Inst{2} = op{0};
4641 multiclass sme2_st_vector_vg2_multi_scalar_immediate<bits<2> msz, bit n,
4642 RegisterOperand multi_vector_ty,
4645 def NAME: sme2_st_vector_vg24_multi_scalar_immediate<msz, n, {0,?},
4647 index_ty, mnemonic> {
4649 let Inst{4} = Zt{3};
4650 let Inst{2-0} = Zt{2-0};
4653 def : InstAlias<mnemonic # "\t$Zt, $PNg, [$Rn]",
4654 (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn,0), 1>;
4657 multiclass sme2_st_vector_vg4_multi_scalar_immediate<bits<2> msz, bit n,
4658 RegisterOperand multi_vector_ty,
4661 def NAME : sme2_st_vector_vg24_multi_scalar_immediate<msz, n, 0b10,
4663 index_ty, mnemonic> {
4665 let Inst{4} = Zt{2};
4666 let Inst{1-0} = Zt{1-0};
4669 def : InstAlias<mnemonic # "\t$Zt, $PNg, [$Rn]",
4670 (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn,0), 1>;
4673 //===----------------------------------------------------------------------===//
4675 //===----------------------------------------------------------------------===//
4676 // SME zeroing move array to vector
4677 class sme2p1_movaz_tile_to_vec_base<bits<2> sz, bit q, bit v, ZPRRegOp vector_ty,
4678 RegisterOperand tile_ty, Operand index_ty,
4680 : I<(outs vector_ty:$Zd, tile_ty:$ZAn),
4681 (ins tile_ty:$_ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm),
4682 mnemonic, "\t$Zd, $ZAn[$Rs, $imm]",
4683 "", []>, Sched<[]> {
4686 let Inst{31-24} = 0b11000000;
4687 let Inst{23-22} = sz;
4688 let Inst{21-17} = 0b00001;
4691 let Inst{14-13} = Rs;
4692 let Inst{12-9} = 0b0001;
4694 let Constraints = "$ZAn = $_ZAn";
4697 multiclass sme2p1_movaz_tile_to_vec_base<bit v, string mnemonic> {
4698 def _B : sme2p1_movaz_tile_to_vec_base<0b00, 0b0, v, ZPR8,
4699 !if(v, TileVectorOpV8, TileVectorOpH8),
4700 sme_elm_idx0_15, mnemonic> {
4702 let Inst{8-5} = imm;
4705 def _H : sme2p1_movaz_tile_to_vec_base<0b01, 0b0, v, ZPR16,
4706 !if(v, TileVectorOpV16, TileVectorOpH16),
4707 sme_elm_idx0_7, mnemonic> {
4711 let Inst{7-5} = imm;
4714 def _S : sme2p1_movaz_tile_to_vec_base<0b10, 0b0, v, ZPR32,
4715 !if(v, TileVectorOpV32, TileVectorOpH32),
4716 sme_elm_idx0_3, mnemonic> {
4719 let Inst{8-7} = ZAn;
4720 let Inst{6-5} = imm;
4723 def _D : sme2p1_movaz_tile_to_vec_base<0b11, 0b0, v, ZPR64,
4724 !if(v, TileVectorOpV64, TileVectorOpH64),
4725 sme_elm_idx0_1, mnemonic> {
4728 let Inst{8-6} = ZAn;
4732 def _Q : sme2p1_movaz_tile_to_vec_base<0b11, 0b1, v, ZPR128,
4733 !if(v, TileVectorOpV128, TileVectorOpH128),
4734 sme_elm_idx0_0, mnemonic> {
4736 let Inst{8-5} = ZAn;
4740 multiclass sme2p1_movaz_tile_to_vec<string mnemonic>{
4741 defm _H : sme2p1_movaz_tile_to_vec_base<0b0, mnemonic>;
4742 defm _V : sme2p1_movaz_tile_to_vec_base<0b1, mnemonic>;
4745 //===----------------------------------------------------------------------===//
4746 // SME2.1 multiple vectors zero array
4748 class sme2p1_zero_matrix<bits<6> opc, Operand index_ty, string mnemonic,
4749 string vg_acronym="">
4750 : I<(outs MatrixOp64:$ZAd),
4751 (ins MatrixOp64:$_ZAd, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm),
4752 mnemonic, "\t$ZAd[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "]",
4753 "", []>, Sched<[]> {
4755 let Inst{31-18} = 0b11000000000011;
4756 let Inst{17-15} = opc{5-3};
4757 let Inst{14-13} = Rv;
4758 let Inst{12-3} = 0b0000000000;
4759 let Inst{2-0} = opc{2-0};
4760 let Constraints = "$ZAd = $_ZAd";
4763 multiclass sme2p1_zero_matrix<string mnemonic> {
4764 def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx2"> {
4766 let Inst{2-0} = imm;
4768 def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic> {
4770 let Inst{2-0} = imm;
4772 def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, "vgx2"> {
4774 let Inst{1-0} = imm;
4776 def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, "vgx4"> {
4778 let Inst{1-0} = imm;
4780 def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx4"> {
4782 let Inst{2-0} = imm;
4784 def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic> {
4786 let Inst{1-0} = imm;
4788 def _VG2_4Z :sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, "vgx2"> {
4792 def _VG4_4Z :sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, "vgx4"> {
4798 //===----------------------------------------------------------------------===//
4799 // SME2.1 lookup table expand two non-contiguous registers
4801 class sme2p1_luti_vector_vg2_index<bits<4> op, bits<2> sz, RegisterOperand vector_ty,
4802 AsmVectorIndexOpnd index_ty,
4804 : I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
4805 mnemonic, "\t$Zd, $ZTt, $Zn$i",
4806 "", []>, Sched<[]> {
4809 let Inst{31-19} = 0b1100000010011;
4810 let Inst{18-15} = op;
4812 let Inst{13-12} = sz;
4813 let Inst{11-10} = 0b00;
4815 let Inst{4} = Zd{3};
4817 let Inst{2-0} = Zd{2-0};
4820 class sme2p1_luti2_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty,
4821 AsmVectorIndexOpnd index_ty,
4823 : sme2p1_luti_vector_vg2_index<{1,?,?,?}, sz, vector_ty, index_ty, mnemonic> {
4825 let Inst{17-15} = i;
4828 multiclass sme2p1_luti2_vector_vg2_index<string mnemonic> {
4829 def _B : sme2p1_luti2_vector_vg2_index<0b00, ZZ_b_strided, VectorIndexH,
4831 def _H : sme2p1_luti2_vector_vg2_index<0b01, ZZ_h_strided, VectorIndexH,
4835 class sme2p1_luti4_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty,
4836 AsmVectorIndexOpnd index_ty,
4838 : sme2p1_luti_vector_vg2_index<{0b01,?,?}, sz, vector_ty, index_ty, mnemonic> {
4840 let Inst{16-15} = i;
4842 multiclass sme2p1_luti4_vector_vg2_index<string mnemonic> {
4843 def _B : sme2p1_luti4_vector_vg2_index<0b00, ZZ_b_strided, VectorIndexS,
4845 def _H : sme2p1_luti4_vector_vg2_index<0b01, ZZ_h_strided, VectorIndexS,
4849 // SME2.1 lookup table expand four non-contiguous registers
4850 class sme2p1_luti_vector_vg4_index<bits<3> op, bits<2> sz, RegisterOperand vector_ty,
4851 AsmVectorIndexOpnd index_ty,
4853 : I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
4854 mnemonic, "\t$Zd, $ZTt, $Zn$i",
4855 "", []>, Sched<[]> {
4858 let Inst{31-19} = 0b1100000010011;
4859 let Inst{18-16} = op;
4860 let Inst{15-14} = 0b10;
4861 let Inst{13-12} = sz;
4862 let Inst{11-10} = 0b00;
4864 let Inst{4} = Zd{2};
4865 let Inst{3-2} = 0b00;
4866 let Inst{1-0} = Zd{1-0};
4869 class sme2p1_luti2_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
4870 AsmVectorIndexOpnd index_ty,
4872 : sme2p1_luti_vector_vg4_index<{1,?,?}, sz, vector_ty, index_ty, mnemonic> {
4874 let Inst{17-16} = i;
4877 multiclass sme2p1_luti2_vector_vg4_index<string mnemonic> {
4878 def _B : sme2p1_luti2_vector_vg4_index<0b00, ZZZZ_b_strided, VectorIndexS,
4880 def _H : sme2p1_luti2_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexS,
4884 class sme2p1_luti4_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
4885 AsmVectorIndexOpnd index_ty,
4887 : sme2p1_luti_vector_vg4_index<{0b01,?}, sz, vector_ty, index_ty, mnemonic> {
4892 multiclass sme2p1_luti4_vector_vg4_index<string mnemonic> {
4893 def _H: sme2p1_luti4_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexD, mnemonic>;
4896 // SME2 lookup table two source registers expand to four contiguous destination registers
4897 class sme2_luti4_vector_vg4<bits<2> sz, bits<2> op, string mnemonic>
4898 : I<(outs ZZZZ_b_mul_r:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn),
4899 mnemonic, "\t$Zd, $ZTt, $Zn",
4900 "", []>, Sched<[]> {
4903 let Inst{31-14} = 0b110000001000101100;
4904 let Inst{13-12} = sz;
4905 let Inst{11-10} = op;
4909 let Inst{1-0} = 0b00;
4912 // SME2 lookup table two source registers expand to four non-contiguous destination registers
4913 class sme2_luti4_vector_vg4_strided<bits<2> sz, bits<2> op, string mnemonic>
4914 : I<(outs ZZZZ_b_strided:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn),
4915 mnemonic, "\t$Zd, $ZTt, $Zn",
4916 "", []>, Sched<[]> {
4919 let Inst{31-14} = 0b110000001001101100;
4920 let Inst{13-12} = sz;
4921 let Inst{11-10} = op;
4924 let Inst{4} = Zd{2};
4925 let Inst{3-2} = 0b00;
4926 let Inst{1-0} = Zd{1-0};