1 //=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // AArch64 Instruction definitions.
11 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // ARM Instruction Predicate Definitions.
18 class AssemblerPredicateWithAll<dag cond, string name="">
19 : AssemblerPredicate<(any_of FeatureAll, cond), name>;
21 def HasV8_0a : Predicate<"Subtarget->hasV8_0aOps()">,
22 AssemblerPredicate<(all_of HasV8_0aOps), "armv8.0a">;
23 def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
24 AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">;
25 def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
26 AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">;
27 def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
28 AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">;
29 def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
30 AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">;
31 def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
32 AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">;
33 def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
34 AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">;
35 def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">,
36 AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">;
37 def HasV8_8a : Predicate<"Subtarget->hasV8_8aOps()">,
38 AssemblerPredicateWithAll<(all_of HasV8_8aOps), "armv8.8a">;
39 def HasV8_9a : Predicate<"Subtarget->hasV8_9aOps()">,
40 AssemblerPredicateWithAll<(all_of HasV8_9aOps), "armv8.9a">;
41 def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">,
42 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">;
43 def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">,
44 AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">;
45 def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">,
46 AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">;
47 def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">,
48 AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">;
49 def HasV9_4a : Predicate<"Subtarget->hasV9_4aOps()">,
50 AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">;
51 def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">,
52 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">;
54 def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">,
55 AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">;
57 def HasEL3 : Predicate<"Subtarget->hasEL3()">,
58 AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">;
60 def HasVH : Predicate<"Subtarget->hasVH()">,
61 AssemblerPredicateWithAll<(all_of FeatureVH), "vh">;
63 def HasLOR : Predicate<"Subtarget->hasLOR()">,
64 AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">;
66 def HasPAuth : Predicate<"Subtarget->hasPAuth()">,
67 AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">;
69 def HasPAuthLR : Predicate<"Subtarget->hasPAuthLR()">,
70 AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">;
72 def HasJS : Predicate<"Subtarget->hasJS()">,
73 AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">;
75 def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">,
76 AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">;
78 def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">,
79 AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">;
81 def HasNV : Predicate<"Subtarget->hasNV()">,
82 AssemblerPredicateWithAll<(all_of FeatureNV), "nv">;
84 def HasMPAM : Predicate<"Subtarget->hasMPAM()">,
85 AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">;
87 def HasDIT : Predicate<"Subtarget->hasDIT()">,
88 AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">;
90 def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">,
91 AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">;
93 def HasAM : Predicate<"Subtarget->hasAM()">,
94 AssemblerPredicateWithAll<(all_of FeatureAM), "am">;
96 def HasSEL2 : Predicate<"Subtarget->hasSEL2()">,
97 AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">;
99 def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
100 AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">;
102 def HasFlagM : Predicate<"Subtarget->hasFlagM()">,
103 AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">;
105 def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">,
106 AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
108 def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
109 AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
110 def HasNEON : Predicate<"Subtarget->hasNEON()">,
111 AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">;
112 def HasSM4 : Predicate<"Subtarget->hasSM4()">,
113 AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">;
114 def HasSHA3 : Predicate<"Subtarget->hasSHA3()">,
115 AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">;
116 def HasSHA2 : Predicate<"Subtarget->hasSHA2()">,
117 AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">;
118 def HasAES : Predicate<"Subtarget->hasAES()">,
119 AssemblerPredicateWithAll<(all_of FeatureAES), "aes">;
120 def HasDotProd : Predicate<"Subtarget->hasDotProd()">,
121 AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">;
122 def HasCRC : Predicate<"Subtarget->hasCRC()">,
123 AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">;
124 def HasCSSC : Predicate<"Subtarget->hasCSSC()">,
125 AssemblerPredicateWithAll<(all_of FeatureCSSC), "cssc">;
126 def HasNoCSSC : Predicate<"!Subtarget->hasCSSC()">;
127 def HasLSE : Predicate<"Subtarget->hasLSE()">,
128 AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">;
129 def HasNoLSE : Predicate<"!Subtarget->hasLSE()">;
130 def HasRAS : Predicate<"Subtarget->hasRAS()">,
131 AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">;
132 def HasRDM : Predicate<"Subtarget->hasRDM()">,
133 AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">;
134 def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
135 AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">;
136 def HasNoFullFP16 : Predicate<"!Subtarget->hasFullFP16()">;
137 def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
138 AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">;
139 def HasSPE : Predicate<"Subtarget->hasSPE()">,
140 AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">;
141 def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
142 AssemblerPredicateWithAll<(all_of FeatureFuseAES),
144 def HasSVE : Predicate<"Subtarget->hasSVE()">,
145 AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">;
146 def HasSVE2 : Predicate<"Subtarget->hasSVE2()">,
147 AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">;
148 def HasSVE2p1 : Predicate<"Subtarget->hasSVE2p1()">,
149 AssemblerPredicateWithAll<(all_of FeatureSVE2p1), "sve2p1">;
150 def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">,
151 AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">;
152 def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">,
153 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">;
154 def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">,
155 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">;
156 def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">,
157 AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
158 def HasB16B16 : Predicate<"Subtarget->hasB16B16()">,
159 AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">;
160 def HasSME : Predicate<"Subtarget->hasSME()">,
161 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">;
162 def HasSMEF64F64 : Predicate<"Subtarget->hasSMEF64F64()">,
163 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">;
164 def HasSMEF16F16 : Predicate<"Subtarget->hasSMEF16F16()">,
165 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">;
166 def HasSMEFA64 : Predicate<"Subtarget->hasSMEFA64()">,
167 AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">;
168 def HasSMEI16I64 : Predicate<"Subtarget->hasSMEI16I64()">,
169 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">;
170 def HasSME2 : Predicate<"Subtarget->hasSME2()">,
171 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">;
172 def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">,
173 AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">;
174 def HasFPMR : Predicate<"Subtarget->hasFPMR()">,
175 AssemblerPredicateWithAll<(all_of FeatureFPMR), "fpmr">;
176 def HasFP8 : Predicate<"Subtarget->hasFP8()">,
177 AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">;
178 def HasFAMINMAX : Predicate<"Subtarget->hasFAMINMAX()">,
179 AssemblerPredicateWithAll<(all_of FeatureFAMINMAX), "faminmax">;
180 def HasFP8FMA : Predicate<"Subtarget->hasFP8FMA()">,
181 AssemblerPredicateWithAll<(all_of FeatureFP8FMA), "fp8fma">;
182 def HasSSVE_FP8FMA : Predicate<"Subtarget->hasSSVE_FP8FMA() || "
183 "(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">,
184 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8FMA,
185 (all_of FeatureSVE2, FeatureFP8FMA)),
186 "ssve-fp8fma or (sve2 and fp8fma)">;
187 def HasFP8DOT2 : Predicate<"Subtarget->hasFP8DOT2()">,
188 AssemblerPredicateWithAll<(all_of FeatureFP8DOT2), "fp8dot2">;
189 def HasSSVE_FP8DOT2 : Predicate<"Subtarget->hasSSVE_FP8DOT2() || "
190 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">,
191 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT2,
192 (all_of FeatureSVE2, FeatureFP8DOT2)),
193 "ssve-fp8dot2 or (sve2 and fp8dot2)">;
194 def HasFP8DOT4 : Predicate<"Subtarget->hasFP8DOT4()">,
195 AssemblerPredicateWithAll<(all_of FeatureFP8DOT4), "fp8dot4">;
196 def HasSSVE_FP8DOT4 : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
197 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT4())">,
198 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4,
199 (all_of FeatureSVE2, FeatureFP8DOT4)),
200 "ssve-fp8dot4 or (sve2 and fp8dot4)">;
201 def HasLUT : Predicate<"Subtarget->hasLUT()">,
202 AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">;
203 def HasSME_LUTv2 : Predicate<"Subtarget->hasSME_LUTv2()">,
204 AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
205 def HasSMEF8F16 : Predicate<"Subtarget->hasSMEF8F16()">,
206 AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">;
207 def HasSMEF8F32 : Predicate<"Subtarget->hasSMEF8F32()">,
208 AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
210 // A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
211 // they should be enabled if either has been specified.
213 : Predicate<"Subtarget->hasSVEorSME()">,
214 AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
217 : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">,
218 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
221 : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME2()">,
222 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2),
224 def HasSVE2p1_or_HasSME
225 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">,
226 AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">;
227 def HasSVE2p1_or_HasSME2
228 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2()">,
229 AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">;
230 def HasSVE2p1_or_HasSME2p1
231 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()">,
232 AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">;
234 def HasSMEF16F16orSMEF8F16
235 : Predicate<"Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16()">,
236 AssemblerPredicateWithAll<(any_of FeatureSMEF16F16, FeatureSMEF8F16),
237 "sme-f16f16 or sme-f8f16">;
239 // A subset of NEON instructions are legal in Streaming SVE execution mode,
240 // they should be enabled if either has been specified.
242 : Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">,
243 AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME),
245 def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
246 AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
247 def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
248 AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">;
249 def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">,
250 AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">;
251 def HasSB : Predicate<"Subtarget->hasSB()">,
252 AssemblerPredicateWithAll<(all_of FeatureSB), "sb">;
253 def HasPredRes : Predicate<"Subtarget->hasPredRes()">,
254 AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">;
255 def HasCCDP : Predicate<"Subtarget->hasCCDP()">,
256 AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">;
257 def HasBTI : Predicate<"Subtarget->hasBTI()">,
258 AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">;
259 def HasMTE : Predicate<"Subtarget->hasMTE()">,
260 AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">;
261 def HasTME : Predicate<"Subtarget->hasTME()">,
262 AssemblerPredicateWithAll<(all_of FeatureTME), "tme">;
263 def HasETE : Predicate<"Subtarget->hasETE()">,
264 AssemblerPredicateWithAll<(all_of FeatureETE), "ete">;
265 def HasTRBE : Predicate<"Subtarget->hasTRBE()">,
266 AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">;
267 def HasBF16 : Predicate<"Subtarget->hasBF16()">,
268 AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">;
269 def HasNoBF16 : Predicate<"!Subtarget->hasBF16()">;
270 def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">,
271 AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">;
272 def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">,
273 AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">;
274 def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">,
275 AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">;
276 def HasXS : Predicate<"Subtarget->hasXS()">,
277 AssemblerPredicateWithAll<(all_of FeatureXS), "xs">;
278 def HasWFxT : Predicate<"Subtarget->hasWFxT()">,
279 AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">;
280 def HasLS64 : Predicate<"Subtarget->hasLS64()">,
281 AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">;
282 def HasBRBE : Predicate<"Subtarget->hasBRBE()">,
283 AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">;
284 def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">,
285 AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">;
286 def HasHBC : Predicate<"Subtarget->hasHBC()">,
287 AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">;
288 def HasMOPS : Predicate<"Subtarget->hasMOPS()">,
289 AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">;
290 def HasCLRBHB : Predicate<"Subtarget->hasCLRBHB()">,
291 AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">;
292 def HasSPECRES2 : Predicate<"Subtarget->hasSPECRES2()">,
293 AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">;
294 def HasITE : Predicate<"Subtarget->hasITE()">,
295 AssemblerPredicateWithAll<(all_of FeatureITE), "ite">;
296 def HasTHE : Predicate<"Subtarget->hasTHE()">,
297 AssemblerPredicateWithAll<(all_of FeatureTHE), "the">;
298 def HasRCPC3 : Predicate<"Subtarget->hasRCPC3()">,
299 AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">;
300 def HasLSE128 : Predicate<"Subtarget->hasLSE128()">,
301 AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">;
302 def HasD128 : Predicate<"Subtarget->hasD128()">,
303 AssemblerPredicateWithAll<(all_of FeatureD128), "d128">;
304 def HasCHK : Predicate<"Subtarget->hasCHK()">,
305 AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">;
306 def HasGCS : Predicate<"Subtarget->hasGCS()">,
307 AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">;
308 def HasCPA : Predicate<"Subtarget->hasCPA()">,
309 AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">;
310 def IsLE : Predicate<"Subtarget->isLittleEndian()">;
311 def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
312 def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
313 def UseExperimentalZeroingPseudos
314 : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
315 def UseAlternateSExtLoadCVTF32
316 : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
318 def UseNegativeImmediates
319 : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
320 "NegativeImmediates">;
322 def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
324 def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
326 def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
328 def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">;
330 def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
331 SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
335 //===----------------------------------------------------------------------===//
336 // AArch64-specific DAG Nodes.
339 // SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
340 def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
343 SDTCisInt<0>, SDTCisVT<1, i32>]>;
345 // SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
346 def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
352 // SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
353 def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
360 def SDT_AArch64Brcond : SDTypeProfile<0, 3,
361 [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
363 def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
364 def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
365 SDTCisVT<2, OtherVT>]>;
368 def SDT_AArch64CSel : SDTypeProfile<1, 4,
373 def SDT_AArch64CCMP : SDTypeProfile<1, 5,
380 def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
387 def SDT_AArch64FCmp : SDTypeProfile<0, 2,
389 SDTCisSameAs<0, 1>]>;
390 def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
391 def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
392 def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
393 def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>,
395 SDTCisSameAs<0, 2>]>;
396 def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
397 def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
398 def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
399 SDTCisInt<2>, SDTCisInt<3>]>;
400 def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
401 def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
402 SDTCisSameAs<0,2>, SDTCisInt<3>]>;
403 def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
404 def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
405 SDTCisVec<2>, SDTCisSameAs<2,3>]>;
407 def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>,
411 def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
412 def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
413 def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
414 def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
416 def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
419 def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
420 def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
422 def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
424 def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
427 def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
429 def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
430 def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
431 def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
432 def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
433 def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
434 def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
436 // Generates the general dynamic sequences, i.e.
437 // adrp x0, :tlsdesc:var
438 // ldr x1, [x0, #:tlsdesc_lo12:var]
439 // add x0, x0, #:tlsdesc_lo12:var
443 // (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
444 // number of operands (the variable)
445 def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
448 def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
449 [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
450 SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
451 SDTCisSameAs<1, 4>]>;
453 def SDT_AArch64TBL : SDTypeProfile<1, 2, [
454 SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
457 // non-extending masked load fragment.
458 def nonext_masked_load :
459 PatFrag<(ops node:$ptr, node:$pred, node:$def),
460 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
461 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
462 cast<MaskedLoadSDNode>(N)->isUnindexed() &&
463 !cast<MaskedLoadSDNode>(N)->isNonTemporal();
465 // Any/Zero extending masked load fragments.
466 def azext_masked_load :
467 PatFrag<(ops node:$ptr, node:$pred, node:$def),
468 (masked_ld node:$ptr, undef, node:$pred, node:$def),[{
469 return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
470 cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) &&
471 cast<MaskedLoadSDNode>(N)->isUnindexed();
473 def azext_masked_load_i8 :
474 PatFrag<(ops node:$ptr, node:$pred, node:$def),
475 (azext_masked_load node:$ptr, node:$pred, node:$def), [{
476 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
478 def azext_masked_load_i16 :
479 PatFrag<(ops node:$ptr, node:$pred, node:$def),
480 (azext_masked_load node:$ptr, node:$pred, node:$def), [{
481 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
483 def azext_masked_load_i32 :
484 PatFrag<(ops node:$ptr, node:$pred, node:$def),
485 (azext_masked_load node:$ptr, node:$pred, node:$def), [{
486 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
488 // Sign extending masked load fragments.
489 def sext_masked_load :
490 PatFrag<(ops node:$ptr, node:$pred, node:$def),
491 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
492 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD &&
493 cast<MaskedLoadSDNode>(N)->isUnindexed();
495 def sext_masked_load_i8 :
496 PatFrag<(ops node:$ptr, node:$pred, node:$def),
497 (sext_masked_load node:$ptr, node:$pred, node:$def), [{
498 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
500 def sext_masked_load_i16 :
501 PatFrag<(ops node:$ptr, node:$pred, node:$def),
502 (sext_masked_load node:$ptr, node:$pred, node:$def), [{
503 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
505 def sext_masked_load_i32 :
506 PatFrag<(ops node:$ptr, node:$pred, node:$def),
507 (sext_masked_load node:$ptr, node:$pred, node:$def), [{
508 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
511 def non_temporal_load :
512 PatFrag<(ops node:$ptr, node:$pred, node:$def),
513 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
514 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
515 cast<MaskedLoadSDNode>(N)->isUnindexed() &&
516 cast<MaskedLoadSDNode>(N)->isNonTemporal();
519 // non-truncating masked store fragment.
520 def nontrunc_masked_store :
521 PatFrag<(ops node:$val, node:$ptr, node:$pred),
522 (masked_st node:$val, node:$ptr, undef, node:$pred), [{
523 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
524 cast<MaskedStoreSDNode>(N)->isUnindexed() &&
525 !cast<MaskedStoreSDNode>(N)->isNonTemporal();
527 // truncating masked store fragments.
528 def trunc_masked_store :
529 PatFrag<(ops node:$val, node:$ptr, node:$pred),
530 (masked_st node:$val, node:$ptr, undef, node:$pred), [{
531 return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
532 cast<MaskedStoreSDNode>(N)->isUnindexed();
534 def trunc_masked_store_i8 :
535 PatFrag<(ops node:$val, node:$ptr, node:$pred),
536 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
537 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
539 def trunc_masked_store_i16 :
540 PatFrag<(ops node:$val, node:$ptr, node:$pred),
541 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
542 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
544 def trunc_masked_store_i32 :
545 PatFrag<(ops node:$val, node:$ptr, node:$pred),
546 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
547 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
550 def non_temporal_store :
551 PatFrag<(ops node:$val, node:$ptr, node:$pred),
552 (masked_st node:$val, node:$ptr, undef, node:$pred), [{
553 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
554 cast<MaskedStoreSDNode>(N)->isUnindexed() &&
555 cast<MaskedStoreSDNode>(N)->isNonTemporal();
558 multiclass masked_gather_scatter<PatFrags GatherScatterOp> {
559 // offsets = (signed)Index << sizeof(elt)
560 def NAME#_signed_scaled :
561 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
562 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
563 auto MGS = cast<MaskedGatherScatterSDNode>(N);
564 bool Signed = MGS->isIndexSigned() ||
565 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
566 return Signed && MGS->isIndexScaled();
568 // offsets = (signed)Index
569 def NAME#_signed_unscaled :
570 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
571 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
572 auto MGS = cast<MaskedGatherScatterSDNode>(N);
573 bool Signed = MGS->isIndexSigned() ||
574 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
575 return Signed && !MGS->isIndexScaled();
577 // offsets = (unsigned)Index << sizeof(elt)
578 def NAME#_unsigned_scaled :
579 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
580 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
581 auto MGS = cast<MaskedGatherScatterSDNode>(N);
582 bool Signed = MGS->isIndexSigned() ||
583 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
584 return !Signed && MGS->isIndexScaled();
586 // offsets = (unsigned)Index
587 def NAME#_unsigned_unscaled :
588 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
589 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
590 auto MGS = cast<MaskedGatherScatterSDNode>(N);
591 bool Signed = MGS->isIndexSigned() ||
592 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
593 return !Signed && !MGS->isIndexScaled();
597 defm nonext_masked_gather : masked_gather_scatter<nonext_masked_gather>;
598 defm azext_masked_gather_i8 : masked_gather_scatter<azext_masked_gather_i8>;
599 defm azext_masked_gather_i16 : masked_gather_scatter<azext_masked_gather_i16>;
600 defm azext_masked_gather_i32 : masked_gather_scatter<azext_masked_gather_i32>;
601 defm sext_masked_gather_i8 : masked_gather_scatter<sext_masked_gather_i8>;
602 defm sext_masked_gather_i16 : masked_gather_scatter<sext_masked_gather_i16>;
603 defm sext_masked_gather_i32 : masked_gather_scatter<sext_masked_gather_i32>;
605 defm nontrunc_masked_scatter : masked_gather_scatter<nontrunc_masked_scatter>;
606 defm trunc_masked_scatter_i8 : masked_gather_scatter<trunc_masked_scatter_i8>;
607 defm trunc_masked_scatter_i16 : masked_gather_scatter<trunc_masked_scatter_i16>;
608 defm trunc_masked_scatter_i32 : masked_gather_scatter<trunc_masked_scatter_i32>;
610 // top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
611 def top16Zero: PatLeaf<(i32 GPR32:$src), [{
612 return SDValue(N,0)->getValueType(0) == MVT::i32 &&
613 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
616 // top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise
617 def top32Zero: PatLeaf<(i64 GPR64:$src), [{
618 return SDValue(N,0)->getValueType(0) == MVT::i64 &&
619 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
622 // topbitsallzero - Return true if all bits except the lowest bit are known zero
623 def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{
624 return SDValue(N,0)->getValueType(0) == MVT::i32 &&
625 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31));
627 def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{
628 return SDValue(N,0)->getValueType(0) == MVT::i64 &&
629 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63));
633 def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
634 def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
635 def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
636 def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
637 def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
638 SDCallSeqStart<[ SDTCisVT<0, i32>,
640 [SDNPHasChain, SDNPOutGlue]>;
641 def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END",
642 SDCallSeqEnd<[ SDTCisVT<0, i32>,
644 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
645 def AArch64call : SDNode<"AArch64ISD::CALL",
646 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
647 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
650 def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI",
651 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
652 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
655 def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
656 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
657 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
660 def AArch64call_arm64ec_to_x64 : SDNode<"AArch64ISD::CALL_ARM64EC_TO_X64",
661 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
662 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
665 def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
667 def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
669 def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
671 def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz,
673 def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
677 def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
678 def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
679 def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
680 def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
681 def AArch64retglue : SDNode<"AArch64ISD::RET_GLUE", SDTNone,
682 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
683 def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >;
684 def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>;
685 def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut,
687 def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>;
688 def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut,
690 def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>;
691 def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>;
693 def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>;
694 def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>;
695 def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
697 def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
699 def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
700 def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp,
702 def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp,
704 def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
705 [(AArch64strict_fcmp node:$lhs, node:$rhs),
706 (AArch64fcmp node:$lhs, node:$rhs)]>;
708 def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
709 def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
710 def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
711 def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
712 def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
713 def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>;
715 def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>;
717 def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
718 def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
719 def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
720 def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
721 def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
722 def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
724 def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
725 def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
726 def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
727 def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
728 def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
729 def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
730 def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
732 def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
733 def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
734 def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
735 def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
737 def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
738 def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
739 def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
740 def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
741 def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
742 def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
743 def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
744 def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
745 def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
746 def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
748 def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
750 def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
751 def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
752 def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
753 def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
754 def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
756 def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
757 def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
758 def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
760 def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
761 def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
762 def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
763 def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
764 def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
765 def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
766 (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
768 def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
769 def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
770 def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
771 def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
772 def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
774 def AArch64fcvtxn_n: SDNode<"AArch64ISD::FCVTXN", SDTFPRoundOp>;
775 def AArch64fcvtxnsdr: PatFrags<(ops node:$Rn),
776 [(f32 (int_aarch64_sisd_fcvtxn (f64 node:$Rn))),
777 (f32 (AArch64fcvtxn_n (f64 node:$Rn)))]>;
778 def AArch64fcvtxnv: PatFrags<(ops node:$Rn),
779 [(int_aarch64_neon_fcvtxn node:$Rn),
780 (AArch64fcvtxn_n node:$Rn)]>;
782 //def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;
784 def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
785 def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
787 def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
788 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
790 def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
791 [SDNPHasChain, SDNPSideEffect]>;
793 def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
794 def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
796 def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
797 SDT_AArch64TLSDescCallSeq,
798 [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
802 def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
803 SDT_AArch64WrapperLarge>;
805 def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
807 def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
808 SDTCisSameAs<1, 2>]>;
809 def AArch64pmull : SDNode<"AArch64ISD::PMULL", SDT_AArch64mull,
811 def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull,
813 def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull,
816 def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
817 def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
818 def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
819 def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>;
821 def AArch64sdot : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>;
822 def AArch64udot : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>;
824 def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
825 def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
826 def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
827 def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
828 def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
829 def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
830 def AArch64uaddlv : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>;
831 def AArch64saddlv : SDNode<"AArch64ISD::SADDLV", SDT_AArch64uaddlp>;
833 def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs),
834 [(abdu node:$lhs, node:$rhs),
835 (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
836 def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs),
837 [(abds node:$lhs, node:$rhs),
838 (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
840 def AArch64addp_n : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>;
841 def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>;
842 def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>;
843 def AArch64addp : PatFrags<(ops node:$Rn, node:$Rm),
844 [(AArch64addp_n node:$Rn, node:$Rm),
845 (int_aarch64_neon_addp node:$Rn, node:$Rm)]>;
846 def AArch64uaddlp : PatFrags<(ops node:$src),
847 [(AArch64uaddlp_n node:$src),
848 (int_aarch64_neon_uaddlp node:$src)]>;
849 def AArch64saddlp : PatFrags<(ops node:$src),
850 [(AArch64saddlp_n node:$src),
851 (int_aarch64_neon_saddlp node:$src)]>;
852 def AArch64faddp : PatFrags<(ops node:$Rn, node:$Rm),
853 [(AArch64addp_n node:$Rn, node:$Rm),
854 (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>;
855 def AArch64roundingvlshr : ComplexPattern<vAny, 2, "SelectRoundingVLShr", [AArch64vlshr]>;
856 def AArch64rshrn : PatFrags<(ops node:$LHS, node:$RHS),
857 [(trunc (AArch64roundingvlshr node:$LHS, node:$RHS)),
858 (int_aarch64_neon_rshrn node:$LHS, node:$RHS)]>;
859 def AArch64facge : PatFrags<(ops node:$Rn, node:$Rm),
860 [(AArch64fcmge (fabs node:$Rn), (fabs node:$Rm)),
861 (int_aarch64_neon_facge node:$Rn, node:$Rm)]>;
862 def AArch64facgt : PatFrags<(ops node:$Rn, node:$Rm),
863 [(AArch64fcmgt (fabs node:$Rn), (fabs node:$Rm)),
864 (int_aarch64_neon_facgt node:$Rn, node:$Rm)]>;
866 def AArch64fmaxnmv : PatFrags<(ops node:$Rn),
867 [(vecreduce_fmax node:$Rn),
868 (int_aarch64_neon_fmaxnmv node:$Rn)]>;
869 def AArch64fminnmv : PatFrags<(ops node:$Rn),
870 [(vecreduce_fmin node:$Rn),
871 (int_aarch64_neon_fminnmv node:$Rn)]>;
872 def AArch64fmaxv : PatFrags<(ops node:$Rn),
873 [(vecreduce_fmaximum node:$Rn),
874 (int_aarch64_neon_fmaxv node:$Rn)]>;
875 def AArch64fminv : PatFrags<(ops node:$Rn),
876 [(vecreduce_fminimum node:$Rn),
877 (int_aarch64_neon_fminv node:$Rn)]>;
879 def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
880 def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
881 def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
882 def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
883 def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
885 def SDT_AArch64unpk : SDTypeProfile<1, 1, [
886 SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>
888 def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>;
889 def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>;
890 def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>;
891 def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
893 def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
894 def AArch64ldiapp : SDNode<"AArch64ISD::LDIAPP", SDT_AArch64ldiapp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
895 def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
896 def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
897 def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
898 def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
900 def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
902 def AArch64probedalloca
903 : SDNode<"AArch64ISD::PROBED_ALLOCA",
904 SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
905 [SDNPHasChain, SDNPMayStore]>;
907 def AArch64mrs : SDNode<"AArch64ISD::MRS",
908 SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>,
909 [SDNPHasChain, SDNPOutGlue]>;
911 def SD_AArch64rshrnb : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<2>]>;
912 def AArch64rshrnb : SDNode<"AArch64ISD::RSHRNB_I", SD_AArch64rshrnb>;
913 def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i),
914 [(AArch64rshrnb node:$rs, node:$i),
915 (int_aarch64_sve_rshrnb node:$rs, node:$i)]>;
917 def AArch64CttzElts : SDNode<"AArch64ISD::CTTZ_ELTS", SDTypeProfile<1, 1,
918 [SDTCisInt<0>, SDTCisVec<1>]>, []>;
920 // Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
921 // have no common bits.
922 def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
923 [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{
924 if (N->getOpcode() == ISD::ADD)
926 return CurDAG->isADDLike(SDValue(N,0));
928 let GISelPredicateCode = [{
929 // Only handle G_ADD for now. FIXME. build capability to compute whether
930 // operands of G_OR have common bits set or not.
931 return MI.getOpcode() == TargetOpcode::G_ADD;
935 // Match mul with enough sign-bits. Can be reduced to a smaller mul operand.
936 def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{
937 return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 &&
938 CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
941 //===----------------------------------------------------------------------===//
943 //===----------------------------------------------------------------------===//
945 // AArch64 Instruction Predicate Definitions.
946 // We could compute these on a per-module basis but doing so requires accessing
947 // the Function object through the <Target>Subtarget and objections were raised
948 // to that (see post-commit review comments for r301750).
949 let RecomputePerFunction = 1 in {
950 def ForCodeSize : Predicate<"shouldOptForSize(MF)">;
951 def NotForCodeSize : Predicate<"!shouldOptForSize(MF)">;
952 // Avoid generating STRQro if it is slow, unless we're optimizing for code size.
953 def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
955 // Register restrictions for indirect tail-calls:
956 // - If branch target enforcement is enabled, indirect calls must use x16 or
957 // x17, because these are the only registers which can target the BTI C
959 // - If PAuthLR is enabled, x16 is used in the epilogue to hold the address
960 // of the signing instruction. This can't be changed because it is used by a
961 // HINT instruction which only accepts x16. We can't load anything from the
962 // stack after this because the authentication instruction checks that SP is
963 // the same as it was at function entry, so we can't have anything on the
966 // BTI on, PAuthLR off: x16 or x17
967 def TailCallX16X17 : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && !MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
968 // BTI on, PAuthLR on: x17 only
969 def TailCallX17 : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
970 // BTI off, PAuthLR on: Any non-callee-saved register except x16
971 def TailCallNotX16 : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
972 // BTI off, PAuthLR off: Any non-callee-saved register
973 def TailCallAny : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && !MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
975 def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
976 def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
977 // Toggles patterns which aren't beneficial in GlobalISel when we aren't
978 // optimizing. This allows us to selectively use patterns without impacting
979 // SelectionDAG's behaviour.
980 // FIXME: One day there will probably be a nicer way to check for this, but
981 // today is not that day.
982 def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
985 include "AArch64InstrFormats.td"
986 include "SVEInstrFormats.td"
987 include "SMEInstrFormats.td"
989 //===----------------------------------------------------------------------===//
991 //===----------------------------------------------------------------------===//
992 // Miscellaneous instructions.
993 //===----------------------------------------------------------------------===//
995 let hasSideEffects = 1, isCodeGenOnly = 1 in {
996 let Defs = [SP], Uses = [SP] in {
997 // We set Sched to empty list because we expect these instructions to simply get
998 // removed in most cases.
999 def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
1000 [(AArch64callseq_start timm:$amt1, timm:$amt2)]>,
1002 def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
1003 [(AArch64callseq_end timm:$amt1, timm:$amt2)]>,
1008 let Defs = [SP, NZCV], Uses = [SP] in {
1009 // Probed stack allocation of a constant size, used in function prologues when
1010 // stack-clash protection is enabled.
1011 def PROBED_STACKALLOC : Pseudo<(outs GPR64:$scratch),
1012 (ins i64imm:$stacksize, i64imm:$fixed_offset,
1013 i64imm:$scalable_offset),
1017 // Probed stack allocation of a variable size, used in function prologues when
1018 // stack-clash protection is enabled.
1019 def PROBED_STACKALLOC_VAR : Pseudo<(outs),
1020 (ins GPR64sp:$target),
1024 // Probed stack allocations of a variable size, used for allocas of unknown size
1025 // when stack-clash protection is enabled.
1026 let usesCustomInserter = 1 in
1027 def PROBED_STACKALLOC_DYN : Pseudo<(outs),
1028 (ins GPR64common:$target),
1029 [(AArch64probedalloca GPR64common:$target)]>,
1032 } // Defs = [SP, NZCV], Uses = [SP] in
1033 } // hasSideEffects = 1, isCodeGenOnly = 1
1035 let isReMaterializable = 1, isCodeGenOnly = 1 in {
1036 // FIXME: The following pseudo instructions are only needed because remat
1037 // cannot handle multiple instructions. When that changes, they can be
1038 // removed, along with the AArch64Wrapper node.
1040 let AddedComplexity = 10 in
1041 def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr),
1042 [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
1043 Sched<[WriteLDAdr]>;
1045 // The MOVaddr instruction should match only when the add is not folded
1046 // into a load or store address.
1048 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1049 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
1050 tglobaladdr:$low))]>,
1051 Sched<[WriteAdrAdr]>;
1053 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1054 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
1055 tjumptable:$low))]>,
1056 Sched<[WriteAdrAdr]>;
1058 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1059 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
1060 tconstpool:$low))]>,
1061 Sched<[WriteAdrAdr]>;
1063 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1064 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
1065 tblockaddress:$low))]>,
1066 Sched<[WriteAdrAdr]>;
1068 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1069 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
1070 tglobaltlsaddr:$low))]>,
1071 Sched<[WriteAdrAdr]>;
1073 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1074 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
1075 texternalsym:$low))]>,
1076 Sched<[WriteAdrAdr]>;
1077 // Normally AArch64addlow either gets folded into a following ldr/str,
1078 // or together with an adrp into MOVaddr above. For cases with TLS, it
1079 // might appear without either of them, so allow lowering it into a plain
1082 : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low),
1083 [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src,
1084 tglobaltlsaddr:$low))]>,
1087 } // isReMaterializable, isCodeGenOnly
1089 def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr),
1090 (LOADgot tglobaltlsaddr:$addr)>;
1092 def : Pat<(AArch64LOADgot texternalsym:$addr),
1093 (LOADgot texternalsym:$addr)>;
1095 def : Pat<(AArch64LOADgot tconstpool:$addr),
1096 (LOADgot tconstpool:$addr)>;
1098 // In general these get lowered into a sequence of three 4-byte instructions.
1099 // 32-bit jump table destination is actually only 2 instructions since we can
1100 // use the table itself as a PC-relative base. But optimization occurs after
1101 // branch relaxation so be pessimistic.
1102 let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch",
1103 isNotDuplicable = 1 in {
1104 def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
1105 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
1107 def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
1108 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
1110 def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
1111 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
1115 // Space-consuming pseudo to aid testing of placement and reachability
1116 // algorithms. Immediate operand is the number of bytes this "instruction"
1117 // occupies; register operands can be used to enforce dependency and constrain
1119 let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
1120 def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn),
1121 [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>,
1124 let hasSideEffects = 1, isCodeGenOnly = 1 in {
1125 def SpeculationSafeValueX
1126 : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>;
1127 def SpeculationSafeValueW
1128 : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>;
1131 // SpeculationBarrierEndBB must only be used after an unconditional control
1132 // flow, i.e. after a terminator for which isBarrier is True.
1133 let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
1134 // This gets lowered to a pair of 4-byte instructions.
1136 def SpeculationBarrierISBDSBEndBB
1137 : Pseudo<(outs), (ins), []>, Sched<[]>;
1138 // This gets lowered to a 4-byte instruction.
1140 def SpeculationBarrierSBEndBB
1141 : Pseudo<(outs), (ins), []>, Sched<[]>;
1144 //===----------------------------------------------------------------------===//
1145 // System instructions.
1146 //===----------------------------------------------------------------------===//
1148 def HINT : HintI<"hint">;
1149 def : InstAlias<"nop", (HINT 0b000)>;
1150 def : InstAlias<"yield",(HINT 0b001)>;
1151 def : InstAlias<"wfe", (HINT 0b010)>;
1152 def : InstAlias<"wfi", (HINT 0b011)>;
1153 def : InstAlias<"sev", (HINT 0b100)>;
1154 def : InstAlias<"sevl", (HINT 0b101)>;
1155 def : InstAlias<"dgh", (HINT 0b110)>;
1156 def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>;
1157 def : InstAlias<"csdb", (HINT 20)>;
1158 // In order to be able to write readable assembly, LLVM should accept assembly
1159 // inputs that use Branch Target Indentification mnemonics, even with BTI disabled.
1160 // However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1161 // should not emit these mnemonics unless BTI is enabled.
1162 def : InstAlias<"bti", (HINT 32), 0>;
1163 def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>;
1164 def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>;
1165 def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>;
1167 // v8.2a Statistical Profiling extension
1168 def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
1170 // As far as LLVM is concerned this writes to the system's exclusive monitors.
1171 let mayLoad = 1, mayStore = 1 in
1172 def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
1174 // NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
1175 // model patterns with sufficiently fine granularity.
1176 let mayLoad = ?, mayStore = ? in {
1177 def DMB : CRmSystemI<barrier_op, 0b101, "dmb",
1178 [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>;
1180 def DSB : CRmSystemI<barrier_op, 0b100, "dsb",
1181 [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>;
1183 def ISB : CRmSystemI<barrier_op, 0b110, "isb",
1184 [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>;
1186 def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> {
1189 let Predicates = [HasTRACEV8_4];
1192 def DSBnXS : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> {
1193 let CRm{1-0} = 0b11;
1194 let Inst{9-8} = 0b10;
1195 let Predicates = [HasXS];
1198 let Predicates = [HasWFxT] in {
1199 def WFET : RegInputSystemI<0b0000, 0b000, "wfet">;
1200 def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">;
1203 // Branch Record Buffer two-word mnemonic instructions
1204 class BRBEI<bits<3> op2, string keyword>
1205 : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> {
1206 let Inst{31-8} = 0b110101010000100101110010;
1207 let Inst{7-5} = op2;
1208 let Predicates = [HasBRBE];
1210 def BRB_IALL: BRBEI<0b100, "\tiall">;
1211 def BRB_INJ: BRBEI<0b101, "\tinj">;
1215 // Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ
1216 def : TokenAlias<"INJ", "inj">;
1217 def : TokenAlias<"IALL", "iall">;
1220 // ARMv9.4-A Guarded Control Stack
1221 class GCSNoOp<bits<3> op2, string mnemonic>
1222 : SimpleSystemI<0, (ins), mnemonic, "">, Sched<[]> {
1223 let Inst{20-8} = 0b0100001110111;
1224 let Inst{7-5} = op2;
1225 let Predicates = [HasGCS];
1227 def GCSPUSHX : GCSNoOp<0b100, "gcspushx">;
1228 def GCSPOPCX : GCSNoOp<0b101, "gcspopcx">;
1229 def GCSPOPX : GCSNoOp<0b110, "gcspopx">;
1231 class GCSRtIn<bits<3> op1, bits<3> op2, string mnemonic,
1232 list<dag> pattern = []>
1233 : RtSystemI<0, (outs), (ins GPR64:$Rt), mnemonic, "\t$Rt", pattern> {
1234 let Inst{20-19} = 0b01;
1235 let Inst{18-16} = op1;
1236 let Inst{15-8} = 0b01110111;
1237 let Inst{7-5} = op2;
1238 let Predicates = [HasGCS];
1241 def GCSSS1 : GCSRtIn<0b011, 0b010, "gcsss1">;
1242 def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">;
1244 class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic,
1245 list<dag> pattern = []>
1246 : RtSystemI<1, (outs GPR64:$Rt), (ins), mnemonic, "\t$Rt", pattern> {
1247 let Inst{20-19} = 0b01;
1248 let Inst{18-16} = op1;
1249 let Inst{15-8} = 0b01110111;
1250 let Inst{7-5} = op2;
1251 let Predicates = [HasGCS];
1254 def GCSSS2 : GCSRtOut<0b011, 0b011, "gcsss2">;
1255 def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm">;
1256 def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent
1258 def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>;
1259 def GCSB_DSYNC : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGCS]>;
1261 def : TokenAlias<"DSYNC", "dsync">;
1263 let Uses = [X16], Defs = [X16], CRm = 0b0101 in {
1264 def CHKFEAT : SystemNoOperands<0b000, "hint\t#40">;
1266 def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>;
1267 def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>;
1269 class GCSSt<string mnemonic, bits<3> op>
1270 : I<(outs), (ins GPR64:$Rt, GPR64sp:$Rn), mnemonic, "\t$Rt, [$Rn]", "", []>, Sched<[]> {
1273 let Inst{31-15} = 0b11011001000111110;
1274 let Inst{14-12} = op;
1275 let Inst{11-10} = 0b11;
1278 let Predicates = [HasGCS];
1280 def GCSSTR : GCSSt<"gcsstr", 0b000>;
1281 def GCSSTTR : GCSSt<"gcssttr", 0b001>;
1284 // ARMv8.2-A Dot Product
1285 let Predicates = [HasDotProd] in {
1286 defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>;
1287 defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>;
1288 defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>;
1289 defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>;
1293 let Predicates = [HasNEON, HasBF16] in {
1294 defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">;
1295 defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">;
1296 def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">;
1297 def BFMLALB : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
1298 def BFMLALT : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
1299 def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
1300 def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
1301 def BFCVTN : SIMD_BFCVTN;
1302 def BFCVTN2 : SIMD_BFCVTN2;
1304 def : Pat<(v4bf16 (any_fpround (v4f32 V128:$Rn))),
1305 (EXTRACT_SUBREG (BFCVTN V128:$Rn), dsub)>;
1307 // Vector-scalar BFDOT:
1308 // The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
1309 // register (the instruction uses a single 32-bit lane from it), so the pattern
1311 def : Pat<(v2f32 (int_aarch64_neon_bfdot
1312 (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
1314 (v2i32 (AArch64duplane32
1316 (v8bf16 (insert_subvector undef,
1319 VectorIndexS:$idx)))))),
1320 (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
1321 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
1322 VectorIndexS:$idx)>;
1325 let Predicates = [HasNEONorSME, HasBF16] in {
1326 def BFCVT : BF16ToSinglePrecision<"bfcvt">;
1327 // Round FP32 to BF16.
1328 def : Pat<(bf16 (any_fpround (f32 FPR32:$Rn))), (BFCVT $Rn)>;
1331 // ARMv8.6A AArch64 matrix multiplication
1332 let Predicates = [HasMatMulInt8] in {
1333 def SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>;
1334 def UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>;
1335 def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>;
1336 defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>;
1337 defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>;
1339 // sudot lane has a pattern where usdot is expected (there is no sudot).
1340 // The second operand is used in the dup operation to repeat the indexed
1342 class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind,
1343 string rhs_kind, RegisterOperand RegType,
1344 ValueType AccumType, ValueType InputType>
1345 : BaseSIMDThreeSameVectorIndexS<Q, 0, 0b00, 0b1111, "sudot", dst_kind,
1346 lhs_kind, rhs_kind, RegType, AccumType,
1347 InputType, null_frag> {
1348 let Pattern = [(set (AccumType RegType:$dst),
1349 (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd),
1350 (InputType (bitconvert (AccumType
1351 (AArch64duplane32 (v4i32 V128:$Rm),
1352 VectorIndexS:$idx)))),
1353 (InputType RegType:$Rn))))];
1356 multiclass SIMDSUDOTIndex {
1357 def v8i8 : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>;
1358 def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>;
1361 defm SUDOTlane : SIMDSUDOTIndex;
1365 // ARMv8.2-A FP16 Fused Multiply-Add Long
1366 let Predicates = [HasNEON, HasFP16FML] in {
1367 defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
1368 defm FMLSL : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>;
1369 defm FMLAL2 : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>;
1370 defm FMLSL2 : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>;
1371 defm FMLALlane : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>;
1372 defm FMLSLlane : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>;
1373 defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>;
1374 defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>;
1377 // Armv8.2-A Crypto extensions
1378 let Predicates = [HasSHA3] in {
1379 def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">;
1380 def SHA512H2 : CryptoRRRTied<0b0, 0b01, "sha512h2">;
1381 def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">;
1382 def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">;
1383 def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">;
1384 def EOR3 : CryptoRRRR_16B<0b00, "eor3">;
1385 def BCAX : CryptoRRRR_16B<0b01, "bcax">;
1386 def XAR : CryptoRRRi6<"xar">;
1388 class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy>
1389 : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))),
1390 (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>;
1392 def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
1393 (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1395 def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>;
1396 def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>;
1397 def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>;
1399 def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>;
1400 def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>;
1401 def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>;
1402 def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>;
1404 class EOR3_pattern<ValueType VecTy>
1405 : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)),
1406 (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
1408 def : EOR3_pattern<v16i8>;
1409 def : EOR3_pattern<v8i16>;
1410 def : EOR3_pattern<v4i32>;
1411 def : EOR3_pattern<v2i64>;
1413 class BCAX_pattern<ValueType VecTy>
1414 : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))),
1415 (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
1417 def : BCAX_pattern<v16i8>;
1418 def : BCAX_pattern<v8i16>;
1419 def : BCAX_pattern<v4i32>;
1420 def : BCAX_pattern<v2i64>;
1422 def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
1423 def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
1424 def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;
1425 def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>;
1427 def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>;
1428 def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>;
1429 def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>;
1430 def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>;
1432 def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>;
1433 def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>;
1434 def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>;
1435 def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>;
1437 def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
1438 (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1440 def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))),
1441 (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>;
1443 def : Pat<(xor (v2i64 V128:$Vn), (or (AArch64vlshr (v2i64 V128:$Vm), (i32 63)), (AArch64vshl (v2i64 V128:$Vm), (i32 1)))),
1444 (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1448 let Predicates = [HasSM4] in {
1449 def SM3TT1A : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">;
1450 def SM3TT1B : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">;
1451 def SM3TT2A : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">;
1452 def SM3TT2B : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">;
1453 def SM3SS1 : CryptoRRRR_4S<0b10, "sm3ss1">;
1454 def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">;
1455 def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">;
1456 def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">;
1457 def SM4E : CryptoRRTied_4S<0b0, 0b01, "sm4e">;
1459 def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))),
1460 (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>;
1462 class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode>
1463 : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
1464 (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
1466 class SM3TT_pattern<Instruction INST, Intrinsic OpNode>
1467 : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )),
1468 (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>;
1470 class SM4_pattern<Instruction INST, Intrinsic OpNode>
1471 : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
1472 (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
1474 def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>;
1475 def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>;
1477 def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>;
1478 def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>;
1479 def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>;
1480 def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>;
1482 def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>;
1483 def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>;
1486 let Predicates = [HasRCPC] in {
1487 // v8.3 Release Consistent Processor Consistent support, optional in v8.2.
1488 def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>;
1489 def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>;
1490 def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>;
1491 def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>;
1494 // v8.3a complex add and multiply-accumulate. No predicate here, that is done
1495 // inside the multiclass as the FP16 versions need different predicates.
1496 defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
1497 "fcmla", null_frag>;
1498 defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
1499 "fcadd", null_frag>;
1500 defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">;
1502 let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
1503 def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
1504 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>;
1505 def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
1506 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>;
1507 def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
1508 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>;
1509 def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
1510 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
1513 let Predicates = [HasComplxNum, HasNEON] in {
1514 def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
1515 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>;
1516 def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
1517 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>;
1518 foreach Ty = [v4f32, v2f64] in {
1519 def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))),
1520 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>;
1521 def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))),
1522 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>;
1526 multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> {
1527 def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1528 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>;
1529 def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1530 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>;
1531 def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1532 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>;
1533 def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1534 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>;
1537 multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> {
1538 def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1539 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>;
1540 def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1541 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>;
1542 def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1543 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>;
1544 def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1545 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>;
1549 let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
1550 defm : FCMLA_PATS<v4f16, V64>;
1551 defm : FCMLA_PATS<v8f16, V128>;
1553 defm : FCMLA_LANE_PATS<v4f16, V64,
1554 (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>;
1555 defm : FCMLA_LANE_PATS<v8f16, V128,
1556 (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>;
1558 let Predicates = [HasComplxNum, HasNEON] in {
1559 defm : FCMLA_PATS<v2f32, V64>;
1560 defm : FCMLA_PATS<v4f32, V128>;
1561 defm : FCMLA_PATS<v2f64, V128>;
1563 defm : FCMLA_LANE_PATS<v4f32, V128,
1564 (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
1567 // v8.3a Pointer Authentication
1568 // These instructions inhabit part of the hint space and so can be used for
1569 // armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
1570 // important for compatibility with other assemblers (e.g. GAS) when building
1571 // software compatible with both CPUs that do or don't implement PA.
1572 let Uses = [LR], Defs = [LR] in {
1573 def PACIAZ : SystemNoOperands<0b000, "hint\t#24">;
1574 def PACIBZ : SystemNoOperands<0b010, "hint\t#26">;
1575 let isAuthenticated = 1 in {
1576 def AUTIAZ : SystemNoOperands<0b100, "hint\t#28">;
1577 def AUTIBZ : SystemNoOperands<0b110, "hint\t#30">;
1580 let Uses = [LR, SP], Defs = [LR] in {
1581 def PACIASP : SystemNoOperands<0b001, "hint\t#25">;
1582 def PACIBSP : SystemNoOperands<0b011, "hint\t#27">;
1583 let isAuthenticated = 1 in {
1584 def AUTIASP : SystemNoOperands<0b101, "hint\t#29">;
1585 def AUTIBSP : SystemNoOperands<0b111, "hint\t#31">;
1588 let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in {
1589 def PACIA1716 : SystemNoOperands<0b000, "hint\t#8">;
1590 def PACIB1716 : SystemNoOperands<0b010, "hint\t#10">;
1591 let isAuthenticated = 1 in {
1592 def AUTIA1716 : SystemNoOperands<0b100, "hint\t#12">;
1593 def AUTIB1716 : SystemNoOperands<0b110, "hint\t#14">;
1597 let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
1598 def XPACLRI : SystemNoOperands<0b111, "hint\t#7">;
1601 // In order to be able to write readable assembly, LLVM should accept assembly
1602 // inputs that use pointer authentication mnemonics, even with PA disabled.
1603 // However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1604 // should not emit these mnemonics unless PA is enabled.
1605 def : InstAlias<"paciaz", (PACIAZ), 0>;
1606 def : InstAlias<"pacibz", (PACIBZ), 0>;
1607 def : InstAlias<"autiaz", (AUTIAZ), 0>;
1608 def : InstAlias<"autibz", (AUTIBZ), 0>;
1609 def : InstAlias<"paciasp", (PACIASP), 0>;
1610 def : InstAlias<"pacibsp", (PACIBSP), 0>;
1611 def : InstAlias<"autiasp", (AUTIASP), 0>;
1612 def : InstAlias<"autibsp", (AUTIBSP), 0>;
1613 def : InstAlias<"pacia1716", (PACIA1716), 0>;
1614 def : InstAlias<"pacib1716", (PACIB1716), 0>;
1615 def : InstAlias<"autia1716", (AUTIA1716), 0>;
1616 def : InstAlias<"autib1716", (AUTIB1716), 0>;
1617 def : InstAlias<"xpaclri", (XPACLRI), 0>;
1621 let Uses = [LR, SP], Defs = [LR] in {
1622 // Insertion point of LR signing code.
1623 def PAUTH_PROLOGUE : Pseudo<(outs), (ins), []>, Sched<[]>;
1624 // Insertion point of LR authentication code.
1625 // The RET terminator of the containing machine basic block may be replaced
1626 // with a combined RETA(A|B) instruction when rewriting this Pseudo.
1627 def PAUTH_EPILOGUE : Pseudo<(outs), (ins), []>, Sched<[]>;
1630 def PAUTH_BLEND : Pseudo<(outs GPR64:$disc),
1631 (ins GPR64:$addr_disc, i32imm:$int_disc), []>, Sched<[]>;
1633 // These pointer authentication instructions require armv8.3a
1634 let Predicates = [HasPAuth] in {
1636 // When PA is enabled, a better mnemonic should be emitted.
1637 def : InstAlias<"paciaz", (PACIAZ), 1>;
1638 def : InstAlias<"pacibz", (PACIBZ), 1>;
1639 def : InstAlias<"autiaz", (AUTIAZ), 1>;
1640 def : InstAlias<"autibz", (AUTIBZ), 1>;
1641 def : InstAlias<"paciasp", (PACIASP), 1>;
1642 def : InstAlias<"pacibsp", (PACIBSP), 1>;
1643 def : InstAlias<"autiasp", (AUTIASP), 1>;
1644 def : InstAlias<"autibsp", (AUTIBSP), 1>;
1645 def : InstAlias<"pacia1716", (PACIA1716), 1>;
1646 def : InstAlias<"pacib1716", (PACIB1716), 1>;
1647 def : InstAlias<"autia1716", (AUTIA1716), 1>;
1648 def : InstAlias<"autib1716", (AUTIB1716), 1>;
1649 def : InstAlias<"xpaclri", (XPACLRI), 1>;
1651 multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm,
1652 SDPatternOperator op> {
1653 def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia"), op>;
1654 def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib"), op>;
1655 def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da"), op>;
1656 def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db"), op>;
1657 def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza"), op>;
1658 def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza"), op>;
1659 def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb"), op>;
1660 def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>;
1663 defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>;
1664 defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>;
1666 def XPACI : ClearAuth<0, "xpaci">;
1667 def : Pat<(int_ptrauth_strip GPR64:$Rd, 0), (XPACI GPR64:$Rd)>;
1668 def : Pat<(int_ptrauth_strip GPR64:$Rd, 1), (XPACI GPR64:$Rd)>;
1670 def XPACD : ClearAuth<1, "xpacd">;
1671 def : Pat<(int_ptrauth_strip GPR64:$Rd, 2), (XPACD GPR64:$Rd)>;
1672 def : Pat<(int_ptrauth_strip GPR64:$Rd, 3), (XPACD GPR64:$Rd)>;
1674 def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>;
1676 // Combined Instructions
1677 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
1678 def BRAA : AuthBranchTwoOperands<0, 0, "braa">;
1679 def BRAB : AuthBranchTwoOperands<0, 1, "brab">;
1681 let isCall = 1, Defs = [LR], Uses = [SP] in {
1682 def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">;
1683 def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">;
1686 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
1687 def BRAAZ : AuthOneOperand<0b000, 0, "braaz">;
1688 def BRABZ : AuthOneOperand<0b000, 1, "brabz">;
1690 let isCall = 1, Defs = [LR], Uses = [SP] in {
1691 def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">;
1692 def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">;
1695 let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
1696 def RETAA : AuthReturn<0b010, 0, "retaa">;
1697 def RETAB : AuthReturn<0b010, 1, "retab">;
1698 def ERETAA : AuthReturn<0b100, 0, "eretaa">;
1699 def ERETAB : AuthReturn<0b100, 1, "eretab">;
1702 defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>;
1703 defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>;
1707 // v9.5-A pointer authentication extensions
1709 // Always accept "pacm" as an alias for "hint #39", but don't emit it when
1710 // disassembling if we don't have the pauth-lr feature.
1711 let CRm = 0b0100 in {
1712 def PACM : SystemNoOperands<0b111, "hint\t#39">;
1714 def : InstAlias<"pacm", (PACM), 0>;
1716 let Predicates = [HasPAuthLR] in {
1717 let Defs = [LR], Uses = [LR, SP] in {
1718 // opcode2, opcode, asm
1719 def PACIASPPC : SignAuthFixedRegs<0b00001, 0b101000, "paciasppc">;
1720 def PACIBSPPC : SignAuthFixedRegs<0b00001, 0b101001, "pacibsppc">;
1721 def PACNBIASPPC : SignAuthFixedRegs<0b00001, 0b100000, "pacnbiasppc">;
1722 def PACNBIBSPPC : SignAuthFixedRegs<0b00001, 0b100001, "pacnbibsppc">;
1724 def AUTIASPPCi : SignAuthPCRel<0b00, "autiasppc">;
1725 def AUTIBSPPCi : SignAuthPCRel<0b01, "autibsppc">;
1726 // opcode2, opcode, asm
1727 def AUTIASPPCr : SignAuthOneReg<0b00001, 0b100100, "autiasppc">;
1728 def AUTIBSPPCr : SignAuthOneReg<0b00001, 0b100101, "autibsppc">;
1729 // opcode2, opcode, asm
1730 def PACIA171615 : SignAuthFixedRegs<0b00001, 0b100010, "pacia171615">;
1731 def PACIB171615 : SignAuthFixedRegs<0b00001, 0b100011, "pacib171615">;
1732 def AUTIA171615 : SignAuthFixedRegs<0b00001, 0b101110, "autia171615">;
1733 def AUTIB171615 : SignAuthFixedRegs<0b00001, 0b101111, "autib171615">;
1736 let Uses = [LR, SP], isReturn = 1, isTerminator = 1, isBarrier = 1 in {
1738 def RETAASPPCi : SignAuthReturnPCRel<0b000, 0b11111, "retaasppc">;
1739 def RETABSPPCi : SignAuthReturnPCRel<0b001, 0b11111, "retabsppc">;
1741 def RETAASPPCr : SignAuthReturnReg<0b000010, "retaasppc">;
1742 def RETABSPPCr : SignAuthReturnReg<0b000011, "retabsppc">;
1744 def : InstAlias<"pacm", (PACM), 1>;
1748 // v8.3a floating point conversion for javascript
1749 let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in
1750 def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
1753 (int_aarch64_fjcvtzs FPR64:$Rn))]> {
1755 } // HasJS, HasFPARMv8
1757 // v8.4 Flag manipulation instructions
1758 let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in {
1759 def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
1760 let Inst{20-5} = 0b0000001000000000;
1762 def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
1763 def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
1764 def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
1765 "{\t$Rn, $imm, $mask}">;
1768 // v8.5 flag manipulation instructions
1769 let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
1771 def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> {
1772 let Inst{18-16} = 0b000;
1773 let Inst{11-8} = 0b0000;
1774 let Unpredictable{11-8} = 0b1111;
1775 let Inst{7-5} = 0b001;
1778 def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> {
1779 let Inst{18-16} = 0b000;
1780 let Inst{11-8} = 0b0000;
1781 let Unpredictable{11-8} = 0b1111;
1782 let Inst{7-5} = 0b010;
1787 // Armv8.5-A speculation barrier
1788 def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> {
1789 let Inst{20-5} = 0b0001100110000111;
1790 let Unpredictable{11-8} = 0b1111;
1791 let Predicates = [HasSB];
1792 let hasSideEffects = 1;
1795 def : InstAlias<"clrex", (CLREX 0xf)>;
1796 def : InstAlias<"isb", (ISB 0xf)>;
1797 def : InstAlias<"ssbb", (DSB 0)>;
1798 def : InstAlias<"pssbb", (DSB 4)>;
1799 def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>;
1803 def MSRpstateImm1 : MSRpstateImm0_1;
1804 def MSRpstateImm4 : MSRpstateImm0_15;
1806 def : Pat<(AArch64mrs imm:$id),
1809 // The thread pointer (on Linux, at least, where this has been implemented) is
1811 def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
1812 [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
1814 // This gets lowered into a 24-byte instruction sequence
1815 let Defs = [ X9, X16, X17, NZCV ], Size = 24 in {
1816 def KCFI_CHECK : Pseudo<
1817 (outs), (ins GPR64:$ptr, i32imm:$type), []>, Sched<[]>;
1820 let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
1821 def HWASAN_CHECK_MEMACCESS : Pseudo<
1822 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
1823 [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
1827 let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in {
1828 def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<
1829 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
1830 [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
1834 let Defs = [ X16, X17, LR, NZCV ] in {
1835 def HWASAN_CHECK_MEMACCESS_FIXEDSHADOW : Pseudo<
1836 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo, i64imm:$fixed_shadow),
1837 [(int_hwasan_check_memaccess_fixedshadow GPR64noip:$ptr, (i32 timm:$accessinfo), (i64 timm:$fixed_shadow))]>,
1841 let Defs = [ X16, X17, LR, NZCV ] in {
1842 def HWASAN_CHECK_MEMACCESS_SHORTGRANULES_FIXEDSHADOW : Pseudo<
1843 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo, i64imm:$fixed_shadow),
1844 [(int_hwasan_check_memaccess_shortgranules_fixedshadow GPR64noip:$ptr, (i32 timm:$accessinfo), (i64 timm:$fixed_shadow))]>,
1848 // The virtual cycle counter register is CNTVCT_EL0.
1849 def : Pat<(readcyclecounter), (MRS 0xdf02)>;
1851 // FPCR and FPSR registers.
1852 let Uses = [FPCR] in
1853 def MRS_FPCR : Pseudo<(outs GPR64:$dst), (ins),
1854 [(set GPR64:$dst, (int_aarch64_get_fpcr))]>,
1855 PseudoInstExpansion<(MRS GPR64:$dst, 0xda20)>,
1857 let Defs = [FPCR] in
1858 def MSR_FPCR : Pseudo<(outs), (ins GPR64:$val),
1859 [(int_aarch64_set_fpcr i64:$val)]>,
1860 PseudoInstExpansion<(MSR 0xda20, GPR64:$val)>,
1863 let Uses = [FPSR] in
1864 def MRS_FPSR : Pseudo<(outs GPR64:$dst), (ins),
1865 [(set GPR64:$dst, (int_aarch64_get_fpsr))]>,
1866 PseudoInstExpansion<(MRS GPR64:$dst, 0xda21)>,
1868 let Defs = [FPSR] in
1869 def MSR_FPSR : Pseudo<(outs), (ins GPR64:$val),
1870 [(int_aarch64_set_fpsr i64:$val)]>,
1871 PseudoInstExpansion<(MSR 0xda21, GPR64:$val)>,
1874 // Generic system instructions
1875 def SYSxt : SystemXtI<0, "sys">;
1876 def SYSLxt : SystemLXtI<1, "sysl">;
1878 def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
1879 (SYSxt imm0_7:$op1, sys_cr_op:$Cn,
1880 sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
1883 let Predicates = [HasTME] in {
1885 def TSTART : TMSystemI<0b0000, "tstart",
1886 [(set GPR64:$Rt, (int_aarch64_tstart))]>;
1888 def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>;
1890 def TCANCEL : TMSystemException<0b011, "tcancel",
1891 [(int_aarch64_tcancel timm64_0_65535:$imm)]>;
1893 def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> {
1899 //===----------------------------------------------------------------------===//
1900 // Move immediate instructions.
1901 //===----------------------------------------------------------------------===//
1903 defm MOVK : InsertImmediate<0b11, "movk">;
1904 defm MOVN : MoveImmediate<0b00, "movn">;
1906 let PostEncoderMethod = "fixMOVZ" in
1907 defm MOVZ : MoveImmediate<0b10, "movz">;
1909 // First group of aliases covers an implicit "lsl #0".
1910 def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>;
1911 def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>;
1912 def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
1913 def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
1914 def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
1915 def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
1917 // Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
1918 def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
1919 def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
1920 def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
1921 def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
1923 def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
1924 def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
1925 def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
1926 def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
1928 def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>;
1929 def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>;
1930 def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>;
1931 def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>;
1933 def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
1934 def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
1936 def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
1937 def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
1939 def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>;
1940 def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>;
1942 // Final group of aliases covers true "mov $Rd, $imm" cases.
1943 multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
1944 int width, int shift> {
1945 def _asmoperand : AsmOperandClass {
1946 let Name = basename # width # "_lsl" # shift # "MovAlias";
1947 let PredicateMethod = "is" # basename # "MovAlias<" # width # ", "
1949 let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">";
1952 def _movimm : Operand<i32> {
1953 let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand");
1956 def : InstAlias<"mov $Rd, $imm",
1957 (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>;
1960 defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>;
1961 defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>;
1963 defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>;
1964 defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>;
1965 defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>;
1966 defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>;
1968 defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>;
1969 defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>;
1971 defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>;
1972 defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>;
1973 defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>;
1974 defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>;
1976 let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
1977 isAsCheapAsAMove = 1 in {
1978 // FIXME: The following pseudo instructions are only needed because remat
1979 // cannot handle multiple instructions. When that changes, we can select
1980 // directly to the real instructions and get rid of these pseudos.
1983 : Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
1984 [(set GPR32:$dst, imm:$src)]>,
1987 : Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
1988 [(set GPR64:$dst, imm:$src)]>,
1990 } // isReMaterializable, isCodeGenOnly
1992 // If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
1993 // eventual expansion code fewer bits to worry about getting right. Marshalling
1994 // the types is a little tricky though:
1995 def i64imm_32bit : ImmLeaf<i64, [{
1996 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
1999 def s64imm_32bit : ImmLeaf<i64, [{
2000 int64_t Imm64 = static_cast<int64_t>(Imm);
2001 return Imm64 >= std::numeric_limits<int32_t>::min() &&
2002 Imm64 <= std::numeric_limits<int32_t>::max();
2005 def trunc_imm : SDNodeXForm<imm, [{
2006 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
2009 def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">,
2010 GISDNodeXFormEquiv<trunc_imm>;
2012 let Predicates = [OptimizedGISelOrOtherSelector] in {
2013 // The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless
2015 def : Pat<(i64 i64imm_32bit:$src),
2016 (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
2019 // Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
2020 def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
2021 return CurDAG->getTargetConstant(
2022 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
2025 def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
2026 return CurDAG->getTargetConstant(
2027 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
2031 def : Pat<(f32 fpimm:$in),
2032 (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
2033 def : Pat<(f64 fpimm:$in),
2034 (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
2037 // Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
2039 def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
2040 tglobaladdr:$g1, tglobaladdr:$g0),
2041 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0),
2042 tglobaladdr:$g1, 16),
2043 tglobaladdr:$g2, 32),
2044 tglobaladdr:$g3, 48)>;
2046 def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
2047 tblockaddress:$g1, tblockaddress:$g0),
2048 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0),
2049 tblockaddress:$g1, 16),
2050 tblockaddress:$g2, 32),
2051 tblockaddress:$g3, 48)>;
2053 def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
2054 tconstpool:$g1, tconstpool:$g0),
2055 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0),
2056 tconstpool:$g1, 16),
2057 tconstpool:$g2, 32),
2058 tconstpool:$g3, 48)>;
2060 def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
2061 tjumptable:$g1, tjumptable:$g0),
2062 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0),
2063 tjumptable:$g1, 16),
2064 tjumptable:$g2, 32),
2065 tjumptable:$g3, 48)>;
2068 //===----------------------------------------------------------------------===//
2069 // Arithmetic instructions.
2070 //===----------------------------------------------------------------------===//
2072 // Add/subtract with carry.
2073 defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>;
2074 defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
2076 def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>;
2077 def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>;
2078 def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
2079 def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
2082 defm ADD : AddSub<0, "add", "sub", add>;
2083 defm SUB : AddSub<1, "sub", "add">;
2085 def : InstAlias<"mov $dst, $src",
2086 (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
2087 def : InstAlias<"mov $dst, $src",
2088 (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>;
2089 def : InstAlias<"mov $dst, $src",
2090 (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>;
2091 def : InstAlias<"mov $dst, $src",
2092 (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
2094 defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
2095 defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
2097 def copyFromSP: PatLeaf<(i64 GPR64:$src), [{
2098 return N->getOpcode() == ISD::CopyFromReg &&
2099 cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP;
2102 // Use SUBS instead of SUB to enable CSE between SUBS and SUB.
2103 def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
2104 (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
2105 def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
2106 (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
2107 def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
2108 (SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
2109 def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
2110 (SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
2111 def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
2112 (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
2113 def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
2114 (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
2115 let AddedComplexity = 1 in {
2116 def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3),
2117 (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>;
2118 def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3),
2119 (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>;
2120 def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)),
2121 (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>;
2124 // Because of the immediate format for add/sub-imm instructions, the
2125 // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2126 // These patterns capture that transformation.
2127 let AddedComplexity = 1 in {
2128 def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2129 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2130 def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2131 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2132 def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2133 (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2134 def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2135 (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2138 // Because of the immediate format for add/sub-imm instructions, the
2139 // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2140 // These patterns capture that transformation.
2141 let AddedComplexity = 1 in {
2142 def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2143 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2144 def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2145 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2146 def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2147 (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2148 def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2149 (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2152 def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
2153 def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
2154 def : InstAlias<"neg $dst, $src$shift",
2155 (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
2156 def : InstAlias<"neg $dst, $src$shift",
2157 (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
2159 def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
2160 def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
2161 def : InstAlias<"negs $dst, $src$shift",
2162 (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
2163 def : InstAlias<"negs $dst, $src$shift",
2164 (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
2167 // Unsigned/Signed divide
2168 defm UDIV : Div<0, "udiv", udiv>;
2169 defm SDIV : Div<1, "sdiv", sdiv>;
2171 def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>;
2172 def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>;
2173 def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>;
2174 def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>;
2177 defm ASRV : Shift<0b10, "asr", sra>;
2178 defm LSLV : Shift<0b00, "lsl", shl>;
2179 defm LSRV : Shift<0b01, "lsr", srl>;
2180 defm RORV : Shift<0b11, "ror", rotr>;
2182 def : ShiftAlias<"asrv", ASRVWr, GPR32>;
2183 def : ShiftAlias<"asrv", ASRVXr, GPR64>;
2184 def : ShiftAlias<"lslv", LSLVWr, GPR32>;
2185 def : ShiftAlias<"lslv", LSLVXr, GPR64>;
2186 def : ShiftAlias<"lsrv", LSRVWr, GPR32>;
2187 def : ShiftAlias<"lsrv", LSRVXr, GPR64>;
2188 def : ShiftAlias<"rorv", RORVWr, GPR32>;
2189 def : ShiftAlias<"rorv", RORVXr, GPR64>;
2192 let AddedComplexity = 5 in {
2193 defm MADD : MulAccum<0, "madd">;
2194 defm MSUB : MulAccum<1, "msub">;
2196 def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
2197 (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
2198 def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
2199 (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
2201 def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
2202 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
2203 def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
2204 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
2205 def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
2206 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
2207 def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
2208 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
2209 } // AddedComplexity = 5
2211 let AddedComplexity = 5 in {
2212 def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
2213 def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
2214 def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
2215 def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
2217 def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))),
2218 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2219 def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))),
2220 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2221 def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
2222 (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2223 def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))),
2224 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2225 def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))),
2226 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2227 def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
2228 (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2230 def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
2231 (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2232 def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
2233 (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2235 def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))),
2236 (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2237 def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))),
2238 (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2239 def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))),
2240 (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2241 (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2243 def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
2244 (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2245 def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
2246 (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2247 def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))),
2248 (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2249 (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2251 def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)),
2252 (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2253 def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)),
2254 (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2255 def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)),
2257 (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2258 (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2260 def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
2261 (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2262 def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
2263 (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2264 def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32),
2265 (s64imm_32bit:$C)))),
2266 (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2267 (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2269 def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)),
2270 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2271 def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))),
2272 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2274 def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)),
2275 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2276 def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)),
2277 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2279 def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
2280 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2281 def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
2282 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2284 def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
2285 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2286 def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
2287 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2289 def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)),
2290 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2291 def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))),
2292 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2294 def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)),
2295 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2296 def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)),
2297 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2299 def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))),
2300 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2301 def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))),
2302 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2304 def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))),
2305 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2306 def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, (zext GPR32:$Rm)))),
2307 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2308 } // AddedComplexity = 5
2310 def : MulAccumWAlias<"mul", MADDWrrr>;
2311 def : MulAccumXAlias<"mul", MADDXrrr>;
2312 def : MulAccumWAlias<"mneg", MSUBWrrr>;
2313 def : MulAccumXAlias<"mneg", MSUBXrrr>;
2314 def : WideMulAccumAlias<"smull", SMADDLrrr>;
2315 def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
2316 def : WideMulAccumAlias<"umull", UMADDLrrr>;
2317 def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
2320 def SMULHrr : MulHi<0b010, "smulh", mulhs>;
2321 def UMULHrr : MulHi<0b110, "umulh", mulhu>;
2324 def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
2325 def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">;
2326 def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">;
2327 def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
2329 def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
2330 def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
2331 def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
2332 def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
2335 defm CAS : CompareAndSwap<0, 0, "">;
2336 defm CASA : CompareAndSwap<1, 0, "a">;
2337 defm CASL : CompareAndSwap<0, 1, "l">;
2338 defm CASAL : CompareAndSwap<1, 1, "al">;
2341 defm CASP : CompareAndSwapPair<0, 0, "">;
2342 defm CASPA : CompareAndSwapPair<1, 0, "a">;
2343 defm CASPL : CompareAndSwapPair<0, 1, "l">;
2344 defm CASPAL : CompareAndSwapPair<1, 1, "al">;
2347 defm SWP : Swap<0, 0, "">;
2348 defm SWPA : Swap<1, 0, "a">;
2349 defm SWPL : Swap<0, 1, "l">;
2350 defm SWPAL : Swap<1, 1, "al">;
2352 // v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register)
2353 defm LDADD : LDOPregister<0b000, "add", 0, 0, "">;
2354 defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">;
2355 defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">;
2356 defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">;
2358 defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">;
2359 defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">;
2360 defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">;
2361 defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">;
2363 defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">;
2364 defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">;
2365 defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">;
2366 defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">;
2368 defm LDSET : LDOPregister<0b011, "set", 0, 0, "">;
2369 defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">;
2370 defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">;
2371 defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">;
2373 defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">;
2374 defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">;
2375 defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">;
2376 defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">;
2378 defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">;
2379 defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">;
2380 defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">;
2381 defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">;
2383 defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">;
2384 defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">;
2385 defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">;
2386 defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">;
2388 defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">;
2389 defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">;
2390 defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">;
2391 defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">;
2393 // v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
2394 defm : STOPregister<"stadd","LDADD">; // STADDx
2395 defm : STOPregister<"stclr","LDCLR">; // STCLRx
2396 defm : STOPregister<"steor","LDEOR">; // STEORx
2397 defm : STOPregister<"stset","LDSET">; // STSETx
2398 defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx
2399 defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
2400 defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
2401 defm : STOPregister<"stumin","LDUMIN">;// STUMINx
2403 // v8.5 Memory Tagging Extension
2404 let Predicates = [HasMTE] in {
2406 def IRG : BaseTwoOperandRegReg<0b1, 0b0, 0b000100, GPR64sp, "irg",
2407 int_aarch64_irg, GPR64sp, GPR64>, Sched<[]>;
2409 def GMI : BaseTwoOperandRegReg<0b1, 0b0, 0b000101, GPR64, "gmi",
2410 int_aarch64_gmi, GPR64sp>, Sched<[]> {
2411 let isNotDuplicable = 1;
2413 def ADDG : AddSubG<0, "addg", null_frag>;
2414 def SUBG : AddSubG<1, "subg", null_frag>;
2416 def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
2418 def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>;
2419 def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
2423 def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;
2425 def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">;
2427 def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4),
2428 (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>;
2429 def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
2430 (LDG GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
2432 def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
2434 def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]",
2435 (outs GPR64:$Rt), (ins GPR64sp:$Rn)>;
2436 def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]",
2437 (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>;
2438 def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]",
2439 (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> {
2443 defm STG : MemTagStore<0b00, "stg">;
2444 defm STZG : MemTagStore<0b01, "stzg">;
2445 defm ST2G : MemTagStore<0b10, "st2g">;
2446 defm STZ2G : MemTagStore<0b11, "stz2g">;
2448 def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2449 (STGi $Rn, $Rm, $imm)>;
2450 def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2451 (STZGi $Rn, $Rm, $imm)>;
2452 def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2453 (ST2Gi $Rn, $Rm, $imm)>;
2454 def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2455 (STZ2Gi $Rn, $Rm, $imm)>;
2457 defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
2458 def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
2459 def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
2461 def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
2462 (STGi GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
2464 def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
2465 (STGPi $Rt, $Rt2, $Rn, $imm)>;
2468 : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>,
2471 : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>,
2474 // Explicit SP in the first operand prevents ShrinkWrap optimization
2475 // from leaving this instruction out of the stack frame. When IRGstack
2476 // is transformed into IRG, this operand is replaced with the actual
2477 // register / expression for the tagged base pointer of the current function.
2478 def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
2480 // Large STG to be expanded into a loop. $sz is the size, $Rn is start address.
2481 // $Rn_wback is one past the end of the range. $Rm is the loop counter.
2482 let isCodeGenOnly=1, mayStore=1, Defs=[NZCV] in {
2484 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
2485 [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
2486 Sched<[WriteAdr, WriteST]>;
2489 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
2490 [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
2491 Sched<[WriteAdr, WriteST]>;
2493 // A variant of the above where $Rn2 is an independent register not tied to the input register $Rn.
2494 // Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back).
2496 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
2497 [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
2498 Sched<[WriteAdr, WriteST]>;
2501 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
2502 [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
2503 Sched<[WriteAdr, WriteST]>;
2506 } // Predicates = [HasMTE]
2508 //===----------------------------------------------------------------------===//
2509 // Logical instructions.
2510 //===----------------------------------------------------------------------===//
2513 defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
2514 defm AND : LogicalImm<0b00, "and", and, "bic">;
2515 defm EOR : LogicalImm<0b10, "eor", xor, "eon">;
2516 defm ORR : LogicalImm<0b01, "orr", or, "orn">;
2518 // FIXME: these aliases *are* canonical sometimes (when movz can't be
2519 // used). Actually, it seems to be working right now, but putting logical_immXX
2520 // here is a bit dodgy on the AsmParser side too.
2521 def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
2522 logical_imm32:$imm), 0>;
2523 def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
2524 logical_imm64:$imm), 0>;
2528 defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>;
2529 defm BICS : LogicalRegS<0b11, 1, "bics",
2530 BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
2531 defm AND : LogicalReg<0b00, 0, "and", and>;
2532 defm BIC : LogicalReg<0b00, 1, "bic",
2533 BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>;
2534 defm EON : LogicalReg<0b10, 1, "eon",
2535 BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
2536 defm EOR : LogicalReg<0b10, 0, "eor", xor>;
2537 defm ORN : LogicalReg<0b01, 1, "orn",
2538 BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
2539 defm ORR : LogicalReg<0b01, 0, "orr", or>;
2541 def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>;
2542 def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>;
2544 def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>;
2545 def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>;
2547 def : InstAlias<"mvn $Wd, $Wm$sh",
2548 (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>;
2549 def : InstAlias<"mvn $Xd, $Xm$sh",
2550 (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>;
2552 def : InstAlias<"tst $src1, $src2",
2553 (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>;
2554 def : InstAlias<"tst $src1, $src2",
2555 (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>;
2557 def : InstAlias<"tst $src1, $src2",
2558 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>;
2559 def : InstAlias<"tst $src1, $src2",
2560 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>;
2562 def : InstAlias<"tst $src1, $src2$sh",
2563 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>;
2564 def : InstAlias<"tst $src1, $src2$sh",
2565 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>;
2568 def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
2569 def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
2572 //===----------------------------------------------------------------------===//
2573 // One operand data processing instructions.
2574 //===----------------------------------------------------------------------===//
2576 defm CLS : OneOperandData<0b000101, "cls">;
2577 defm CLZ : OneOperandData<0b000100, "clz", ctlz>;
2578 defm RBIT : OneOperandData<0b000000, "rbit", bitreverse>;
2580 def REV16Wr : OneWRegData<0b000001, "rev16",
2581 UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
2582 def REV16Xr : OneXRegData<0b000001, "rev16", null_frag>;
2584 def : Pat<(cttz GPR32:$Rn),
2585 (CLZWr (RBITWr GPR32:$Rn))>;
2586 def : Pat<(cttz GPR64:$Rn),
2587 (CLZXr (RBITXr GPR64:$Rn))>;
2588 def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
2591 def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
2594 def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>;
2595 def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>;
2597 // Unlike the other one operand instructions, the instructions with the "rev"
2598 // mnemonic do *not* just different in the size bit, but actually use different
2599 // opcode bits for the different sizes.
2600 def REVWr : OneWRegData<0b000010, "rev", bswap>;
2601 def REVXr : OneXRegData<0b000011, "rev", bswap>;
2602 def REV32Xr : OneXRegData<0b000010, "rev32",
2603 UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
2605 def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
2607 // The bswap commutes with the rotr so we want a pattern for both possible
2609 def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
2610 def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
2612 // Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
2613 def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
2614 def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
2616 def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)),
2617 (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))),
2618 (REV16Xr GPR64:$Rn)>;
2620 //===----------------------------------------------------------------------===//
2621 // Bitfield immediate extraction instruction.
2622 //===----------------------------------------------------------------------===//
2623 let hasSideEffects = 0 in
2624 defm EXTR : ExtractImm<"extr">;
2625 def : InstAlias<"ror $dst, $src, $shift",
2626 (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
2627 def : InstAlias<"ror $dst, $src, $shift",
2628 (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
2630 def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
2631 (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
2632 def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
2633 (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
2635 //===----------------------------------------------------------------------===//
2636 // Other bitfield immediate instructions.
2637 //===----------------------------------------------------------------------===//
2638 let hasSideEffects = 0 in {
2639 defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">;
2640 defm SBFM : BitfieldImm<0b00, "sbfm">;
2641 defm UBFM : BitfieldImm<0b10, "ubfm">;
2644 def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
2645 uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
2646 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2649 def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
2650 uint64_t enc = 31 - N->getZExtValue();
2651 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2654 // min(7, 31 - shift_amt)
2655 def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
2656 uint64_t enc = 31 - N->getZExtValue();
2657 enc = enc > 7 ? 7 : enc;
2658 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2661 // min(15, 31 - shift_amt)
2662 def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
2663 uint64_t enc = 31 - N->getZExtValue();
2664 enc = enc > 15 ? 15 : enc;
2665 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2668 def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
2669 uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
2670 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2673 def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
2674 uint64_t enc = 63 - N->getZExtValue();
2675 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2678 // min(7, 63 - shift_amt)
2679 def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
2680 uint64_t enc = 63 - N->getZExtValue();
2681 enc = enc > 7 ? 7 : enc;
2682 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2685 // min(15, 63 - shift_amt)
2686 def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
2687 uint64_t enc = 63 - N->getZExtValue();
2688 enc = enc > 15 ? 15 : enc;
2689 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2692 // min(31, 63 - shift_amt)
2693 def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
2694 uint64_t enc = 63 - N->getZExtValue();
2695 enc = enc > 31 ? 31 : enc;
2696 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2699 def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
2700 (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
2701 (i64 (i32shift_b imm0_31:$imm)))>;
2702 def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
2703 (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
2704 (i64 (i64shift_b imm0_63:$imm)))>;
2706 let AddedComplexity = 10 in {
2707 def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
2708 (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
2709 def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
2710 (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
2713 def : InstAlias<"asr $dst, $src, $shift",
2714 (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
2715 def : InstAlias<"asr $dst, $src, $shift",
2716 (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
2717 def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
2718 def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
2719 def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
2720 def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
2721 def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
2723 def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
2724 (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
2725 def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
2726 (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
2728 def : InstAlias<"lsr $dst, $src, $shift",
2729 (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
2730 def : InstAlias<"lsr $dst, $src, $shift",
2731 (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
2732 def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
2733 def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
2734 def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
2735 def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
2736 def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
2738 //===----------------------------------------------------------------------===//
2739 // Conditional comparison instructions.
2740 //===----------------------------------------------------------------------===//
2741 defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
2742 defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
2744 //===----------------------------------------------------------------------===//
2745 // Conditional select instructions.
2746 //===----------------------------------------------------------------------===//
2747 defm CSEL : CondSelect<0, 0b00, "csel">;
2749 def inc : PatFrag<(ops node:$in), (add_and_or_is_add node:$in, 1)>;
2750 defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
2751 defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
2752 defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
2754 def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2755 (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2756 def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2757 (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2758 def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2759 (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2760 def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2761 (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2762 def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2763 (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2764 def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2765 (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2767 def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
2768 (CSINCWr WZR, WZR, (i32 imm:$cc))>;
2769 def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
2770 (CSINCXr XZR, XZR, (i32 imm:$cc))>;
2771 def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
2772 (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
2773 def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV),
2774 (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>;
2775 def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV),
2776 (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2777 def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV),
2778 (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2779 def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
2780 (CSINVWr WZR, WZR, (i32 imm:$cc))>;
2781 def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
2782 (CSINVXr XZR, XZR, (i32 imm:$cc))>;
2783 def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV),
2784 (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>;
2785 def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV),
2786 (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>;
2787 def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV),
2788 (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2789 def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV),
2790 (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2792 def : Pat<(add_and_or_is_add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
2793 (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>;
2794 def : Pat<(add_and_or_is_add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
2795 (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>;
2797 def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
2798 (CSINCWr GPR32:$val, WZR, imm:$cc)>;
2799 def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
2800 (CSINCXr GPR64:$val, XZR, imm:$cc)>;
2801 def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
2802 (CSINCXr GPR64:$val, XZR, imm:$cc)>;
2804 def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
2805 (CSELWr WZR, GPR32:$val, imm:$cc)>;
2806 def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
2807 (CSELXr XZR, GPR64:$val, imm:$cc)>;
2808 def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
2809 (CSELXr XZR, GPR64:$val, imm:$cc)>;
2811 // The inverse of the condition code from the alias instruction is what is used
2812 // in the aliased instruction. The parser all ready inverts the condition code
2813 // for these aliases.
2814 def : InstAlias<"cset $dst, $cc",
2815 (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
2816 def : InstAlias<"cset $dst, $cc",
2817 (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
2819 def : InstAlias<"csetm $dst, $cc",
2820 (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
2821 def : InstAlias<"csetm $dst, $cc",
2822 (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
2824 def : InstAlias<"cinc $dst, $src, $cc",
2825 (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2826 def : InstAlias<"cinc $dst, $src, $cc",
2827 (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2829 def : InstAlias<"cinv $dst, $src, $cc",
2830 (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2831 def : InstAlias<"cinv $dst, $src, $cc",
2832 (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2834 def : InstAlias<"cneg $dst, $src, $cc",
2835 (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2836 def : InstAlias<"cneg $dst, $src, $cc",
2837 (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2839 //===----------------------------------------------------------------------===//
2840 // PC-relative instructions.
2841 //===----------------------------------------------------------------------===//
2842 let isReMaterializable = 1 in {
2843 let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
2844 def ADR : ADRI<0, "adr", adrlabel,
2845 [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>;
2846 } // hasSideEffects = 0
2848 def ADRP : ADRI<1, "adrp", adrplabel,
2849 [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
2850 } // isReMaterializable = 1
2852 // page address of a constant pool entry, block address
2853 def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>;
2854 def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>;
2855 def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>;
2856 def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>;
2857 def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
2858 def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
2859 def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>;
2861 //===----------------------------------------------------------------------===//
2862 // Unconditional branch (register) instructions.
2863 //===----------------------------------------------------------------------===//
2865 let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
2866 def RET : BranchReg<0b0010, "ret", []>;
2867 def DRPS : SpecialReturn<0b0101, "drps">;
2868 def ERET : SpecialReturn<0b0100, "eret">;
2869 } // isReturn = 1, isTerminator = 1, isBarrier = 1
2871 // Default to the LR register.
2872 def : InstAlias<"ret", (RET LR)>;
2874 let isCall = 1, Defs = [LR], Uses = [SP] in {
2875 def BLR : BranchReg<0b0001, "blr", []>;
2876 def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
2877 Sched<[WriteBrReg]>,
2878 PseudoInstExpansion<(BLR GPR64:$Rn)>;
2879 def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
2880 Sched<[WriteBrReg]>;
2881 def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>,
2882 Sched<[WriteBrReg]>;
2883 let Uses = [X16, SP] in
2884 def BLR_X16 : Pseudo<(outs), (ins), [(AArch64call_arm64ec_to_x64 X16)]>,
2885 Sched<[WriteBrReg]>,
2886 PseudoInstExpansion<(BLR X16)>;
2889 def : Pat<(AArch64call GPR64:$Rn),
2891 Requires<[NoSLSBLRMitigation]>;
2892 def : Pat<(AArch64call GPR64noip:$Rn),
2893 (BLRNoIP GPR64noip:$Rn)>,
2894 Requires<[SLSBLRMitigation]>;
2896 def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn),
2897 (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>,
2898 Requires<[NoSLSBLRMitigation]>;
2900 def : Pat<(AArch64call_bti GPR64:$Rn),
2901 (BLR_BTI GPR64:$Rn)>,
2902 Requires<[NoSLSBLRMitigation]>;
2903 def : Pat<(AArch64call_bti GPR64noip:$Rn),
2904 (BLR_BTI GPR64noip:$Rn)>,
2905 Requires<[SLSBLRMitigation]>;
2907 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
2908 def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
2909 } // isBranch, isTerminator, isBarrier, isIndirectBranch
2911 // Create a separate pseudo-instruction for codegen to use so that we don't
2912 // flag lr as used in every function. It'll be restored before the RET by the
2913 // epilogue if it's legitimately used.
2914 def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>,
2915 Sched<[WriteBrReg]> {
2916 let isTerminator = 1;
2921 // This is a directive-like pseudo-instruction. The purpose is to insert an
2922 // R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
2923 // (which in the usual case is a BLR).
2924 let hasSideEffects = 1 in
2925 def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
2926 let AsmString = ".tlsdesccall $sym";
2929 // Pseudo instruction to tell the streamer to emit a 'B' character into the
2930 // augmentation string.
2931 def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
2933 // Pseudo instruction to tell the streamer to emit a 'G' character into the
2934 // augmentation string.
2935 def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {}
2937 // FIXME: maybe the scratch register used shouldn't be fixed to X1?
2938 // FIXME: can "hasSideEffects be dropped?
2939 // This gets lowered to an instruction sequence which takes 16 bytes
2940 let isCall = 1, Defs = [NZCV, LR, X0, X1], hasSideEffects = 1, Size = 16,
2941 isCodeGenOnly = 1 in
2943 : Pseudo<(outs), (ins i64imm:$sym),
2944 [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>,
2945 Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>;
2946 def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
2947 (TLSDESC_CALLSEQ texternalsym:$sym)>;
2949 //===----------------------------------------------------------------------===//
2950 // Conditional branch (immediate) instruction.
2951 //===----------------------------------------------------------------------===//
2952 def Bcc : BranchCond<0, "b">;
2954 // Armv8.8-A variant form which hints to the branch predictor that
2955 // this branch is very likely to go the same way nearly all the time
2956 // (even though it is not known at compile time _which_ way that is).
2957 def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>;
2959 //===----------------------------------------------------------------------===//
2960 // Compare-and-branch instructions.
2961 //===----------------------------------------------------------------------===//
2962 defm CBZ : CmpBranch<0, "cbz", AArch64cbz>;
2963 defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
2965 //===----------------------------------------------------------------------===//
2966 // Test-bit-and-branch instructions.
2967 //===----------------------------------------------------------------------===//
2968 defm TBZ : TestBranch<0, "tbz", AArch64tbz>;
2969 defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
2971 //===----------------------------------------------------------------------===//
2972 // Unconditional branch (immediate) instructions.
2973 //===----------------------------------------------------------------------===//
2974 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
2975 def B : BranchImm<0, "b", [(br bb:$addr)]>;
2976 } // isBranch, isTerminator, isBarrier
2978 let isCall = 1, Defs = [LR], Uses = [SP] in {
2979 def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
2981 def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
2983 //===----------------------------------------------------------------------===//
2984 // Exception generation instructions.
2985 //===----------------------------------------------------------------------===//
2987 def BRK : ExceptionGeneration<0b001, 0b00, "brk",
2988 [(int_aarch64_break timm32_0_65535:$imm)]>;
2990 def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
2991 def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
2992 def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>;
2993 def HLT : ExceptionGeneration<0b010, 0b00, "hlt">;
2994 def HVC : ExceptionGeneration<0b000, 0b10, "hvc">;
2995 def SMC : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>;
2996 def SVC : ExceptionGeneration<0b000, 0b01, "svc">;
2998 // DCPSn defaults to an immediate operand of zero if unspecified.
2999 def : InstAlias<"dcps1", (DCPS1 0)>;
3000 def : InstAlias<"dcps2", (DCPS2 0)>;
3001 def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>;
3003 def UDF : UDFType<0, "udf">;
3005 //===----------------------------------------------------------------------===//
3006 // Load instructions.
3007 //===----------------------------------------------------------------------===//
3009 // Pair (indexed, offset)
3010 defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">;
3011 defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">;
3012 let Predicates = [HasFPARMv8] in {
3013 defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">;
3014 defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">;
3015 defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">;
3018 defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">;
3020 // Pair (pre-indexed)
3021 def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
3022 def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
3023 let Predicates = [HasFPARMv8] in {
3024 def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
3025 def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
3026 def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
3029 def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
3031 // Pair (post-indexed)
3032 def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
3033 def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
3034 let Predicates = [HasFPARMv8] in {
3035 def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
3036 def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
3037 def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
3040 def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
3043 // Pair (no allocate)
3044 defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">;
3045 defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">;
3046 let Predicates = [HasFPARMv8] in {
3047 defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">;
3048 defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">;
3049 defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">;
3052 def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
3053 (LDPXi GPR64sp:$Rn, simm7s8:$offset)>;
3055 def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
3056 (LDNPQi GPR64sp:$Rn, simm7s16:$offset)>;
3058 // (register offset)
3062 defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>;
3063 defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>;
3064 defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
3065 defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
3068 let Predicates = [HasFPARMv8] in {
3069 defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", i8, load>;
3070 defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>;
3071 defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>;
3072 defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>;
3073 defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>;
3076 // Load sign-extended half-word
3077 defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>;
3078 defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>;
3080 // Load sign-extended byte
3081 defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>;
3082 defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>;
3084 // Load sign-extended word
3085 defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
3088 defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
3090 // For regular load, we do not have any alignment requirement.
3091 // Thus, it is safe to directly map the vector loads with interesting
3092 // addressing modes.
3093 // FIXME: We could do the same for bitconvert to floating point vectors.
3094 multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
3095 ValueType ScalTy, ValueType VecTy,
3096 Instruction LOADW, Instruction LOADX,
3098 def : Pat<(VecTy (scalar_to_vector (ScalTy
3099 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
3100 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
3101 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
3104 def : Pat<(VecTy (scalar_to_vector (ScalTy
3105 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
3106 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
3107 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
3111 let AddedComplexity = 10 in {
3112 defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>;
3113 defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>;
3115 defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
3116 defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
3118 defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>;
3119 defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>;
3121 defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>;
3122 defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>;
3124 defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>;
3125 defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>;
3127 defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>;
3129 defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>;
3132 def : Pat <(v1i64 (scalar_to_vector (i64
3133 (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
3134 ro_Wextend64:$extend))))),
3135 (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
3137 def : Pat <(v1i64 (scalar_to_vector (i64
3138 (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
3139 ro_Xextend64:$extend))))),
3140 (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
3143 // Match all load 64 bits width whose type is compatible with FPR64
3144 multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy,
3145 Instruction LOADW, Instruction LOADX> {
3147 def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
3148 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3150 def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
3151 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3154 let AddedComplexity = 10 in {
3155 let Predicates = [IsLE] in {
3156 // We must do vector loads with LD1 in big-endian.
3157 defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>;
3158 defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
3159 defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>;
3160 defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
3161 defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
3162 defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>;
3165 defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>;
3166 defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>;
3168 // Match all load 128 bits width whose type is compatible with FPR128
3169 let Predicates = [IsLE] in {
3170 // We must do vector loads with LD1 in big-endian.
3171 defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>;
3172 defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>;
3173 defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>;
3174 defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>;
3175 defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>;
3176 defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>;
3177 defm : VecROLoadPat<ro128, v8bf16, LDRQroW, LDRQroX>;
3178 defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>;
3180 } // AddedComplexity = 10
3183 multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop,
3184 Instruction INSTW, Instruction INSTX> {
3185 def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
3186 (SUBREG_TO_REG (i64 0),
3187 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
3190 def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
3191 (SUBREG_TO_REG (i64 0),
3192 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
3196 let AddedComplexity = 10 in {
3197 defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>;
3198 defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>;
3199 defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>;
3201 // zextloadi1 -> zextloadi8
3202 defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
3204 // extload -> zextload
3205 defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>;
3206 defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>;
3207 defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>;
3209 // extloadi1 -> zextloadi8
3210 defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>;
3215 multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop,
3216 Instruction INSTW, Instruction INSTX> {
3217 def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
3218 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3220 def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
3221 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3225 let AddedComplexity = 10 in {
3226 // extload -> zextload
3227 defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>;
3228 defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>;
3229 defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>;
3231 // zextloadi1 -> zextloadi8
3232 defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
3236 // (unsigned immediate)
3238 defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr",
3240 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
3241 defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr",
3243 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
3244 let Predicates = [HasFPARMv8] in {
3245 defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr",
3247 (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
3248 defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr",
3249 [(set (f16 FPR16Op:$Rt),
3250 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>;
3251 defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr",
3252 [(set (f32 FPR32Op:$Rt),
3253 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
3254 defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr",
3255 [(set (f64 FPR64Op:$Rt),
3256 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
3257 defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr",
3258 [(set (f128 FPR128Op:$Rt),
3259 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>;
3262 // bf16 load pattern
3263 def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3264 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
3266 // For regular load, we do not have any alignment requirement.
3267 // Thus, it is safe to directly map the vector loads with interesting
3268 // addressing modes.
3269 // FIXME: We could do the same for bitconvert to floating point vectors.
3270 def : Pat <(v8i8 (scalar_to_vector (i32
3271 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
3272 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
3273 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3274 def : Pat <(v16i8 (scalar_to_vector (i32
3275 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
3276 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3277 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3278 def : Pat <(v4i16 (scalar_to_vector (i32
3279 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
3280 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
3281 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3282 def : Pat <(v8i16 (scalar_to_vector (i32
3283 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
3284 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
3285 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3286 def : Pat <(v2i32 (scalar_to_vector (i32
3287 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
3288 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
3289 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3290 def : Pat <(v4i32 (scalar_to_vector (i32
3291 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
3292 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
3293 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3294 def : Pat <(v1i64 (scalar_to_vector (i64
3295 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
3296 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3297 def : Pat <(v2i64 (scalar_to_vector (i64
3298 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
3299 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
3300 (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
3302 // Match all load 64 bits width whose type is compatible with FPR64
3303 let Predicates = [IsLE] in {
3304 // We must use LD1 to perform vector loads in big-endian.
3305 def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3306 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3307 def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3308 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3309 def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3310 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3311 def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3312 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3313 def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3314 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3315 def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3316 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3318 def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3319 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3320 def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3321 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3323 // Match all load 128 bits width whose type is compatible with FPR128
3324 let Predicates = [IsLE] in {
3325 // We must use LD1 to perform vector loads in big-endian.
3326 def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3327 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3328 def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3329 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3330 def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3331 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3332 def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3333 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3334 def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3335 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3336 def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3337 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3338 def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3339 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3340 def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3341 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3343 def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3344 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3346 defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh",
3348 (zextloadi16 (am_indexed16 GPR64sp:$Rn,
3349 uimm12s2:$offset)))]>;
3350 defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb",
3352 (zextloadi8 (am_indexed8 GPR64sp:$Rn,
3353 uimm12s1:$offset)))]>;
3355 def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3356 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3357 def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3358 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
3360 // zextloadi1 -> zextloadi8
3361 def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3362 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3363 def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3364 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3366 // extload -> zextload
3367 def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3368 (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
3369 def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3370 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3371 def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3372 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3373 def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
3374 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
3375 def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3376 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
3377 def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3378 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3379 def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3380 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3382 // load sign-extended half-word
3383 defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh",
3385 (sextloadi16 (am_indexed16 GPR64sp:$Rn,
3386 uimm12s2:$offset)))]>;
3387 defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh",
3389 (sextloadi16 (am_indexed16 GPR64sp:$Rn,
3390 uimm12s2:$offset)))]>;
3392 // load sign-extended byte
3393 defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb",
3395 (sextloadi8 (am_indexed8 GPR64sp:$Rn,
3396 uimm12s1:$offset)))]>;
3397 defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb",
3399 (sextloadi8 (am_indexed8 GPR64sp:$Rn,
3400 uimm12s1:$offset)))]>;
3402 // load sign-extended word
3403 defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
3405 (sextloadi32 (am_indexed32 GPR64sp:$Rn,
3406 uimm12s4:$offset)))]>;
3408 // load zero-extended word
3409 def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
3410 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
3413 def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
3414 [(AArch64Prefetch timm:$Rt,
3415 (am_indexed64 GPR64sp:$Rn,
3416 uimm12s8:$offset))]>;
3418 def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
3423 def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{
3424 if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) {
3425 const DataLayout &DL = MF->getDataLayout();
3426 Align Align = G->getGlobal()->getPointerAlignment(DL);
3427 return Align >= 4 && G->getOffset() % 4 == 0;
3429 if (auto *C = dyn_cast<ConstantPoolSDNode>(N))
3430 return C->getAlign() >= 4 && C->getOffset() % 4 == 0;
3434 def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr",
3435 [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
3436 def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr",
3437 [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
3438 let Predicates = [HasFPARMv8] in {
3439 def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr",
3440 [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3441 def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr",
3442 [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3443 def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr",
3444 [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3447 // load sign-extended word
3448 def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw",
3449 [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>;
3451 let AddedComplexity = 20 in {
3452 def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))),
3453 (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>;
3457 def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
3458 // [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>;
3461 // (unscaled immediate)
3462 defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur",
3464 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
3465 defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur",
3467 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3468 let Predicates = [HasFPARMv8] in {
3469 defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur",
3471 (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3472 defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur",
3473 [(set (f16 FPR16Op:$Rt),
3474 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3475 defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur",
3476 [(set (f32 FPR32Op:$Rt),
3477 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3478 defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur",
3479 [(set (f64 FPR64Op:$Rt),
3480 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
3481 defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur",
3482 [(set (f128 FPR128Op:$Rt),
3483 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>;
3487 : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh",
3489 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3491 : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb",
3493 (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3495 // bf16 load pattern
3496 def : Pat <(bf16 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3497 (LDURHi GPR64sp:$Rn, simm9:$offset)>;
3499 // Match all load 64 bits width whose type is compatible with FPR64
3500 let Predicates = [IsLE] in {
3501 def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3502 (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3503 def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3504 (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3505 def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3506 (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3507 def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3508 (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3509 def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3510 (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3512 def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3513 (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3514 def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3515 (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3517 // Match all load 128 bits width whose type is compatible with FPR128
3518 let Predicates = [IsLE] in {
3519 def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3520 (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3521 def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3522 (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3523 def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3524 (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3525 def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3526 (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3527 def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3528 (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3529 def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3530 (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3531 def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3532 (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3536 def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3537 (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
3538 def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3539 (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3540 def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3541 (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3542 def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
3543 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3544 def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3545 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3546 def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3547 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3548 def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3549 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3551 def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3552 (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
3553 def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3554 (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3555 def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3556 (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3557 def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
3558 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3559 def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3560 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3561 def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3562 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3563 def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3564 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3568 // LDR mnemonics fall back to LDUR for negative or unaligned offsets.
3570 // Define new assembler match classes as we want to only match these when
3571 // the don't otherwise match the scaled addressing mode for LDR/STR. Don't
3572 // associate a DiagnosticType either, as we want the diagnostic for the
3573 // canonical form (the scaled operand) to take precedence.
3574 class SImm9OffsetOperand<int Width> : AsmOperandClass {
3575 let Name = "SImm9OffsetFB" # Width;
3576 let PredicateMethod = "isSImm9OffsetFB<" # Width # ">";
3577 let RenderMethod = "addImmOperands";
3580 def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>;
3581 def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>;
3582 def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>;
3583 def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>;
3584 def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>;
3586 def simm9_offset_fb8 : Operand<i64> {
3587 let ParserMatchClass = SImm9OffsetFB8Operand;
3589 def simm9_offset_fb16 : Operand<i64> {
3590 let ParserMatchClass = SImm9OffsetFB16Operand;
3592 def simm9_offset_fb32 : Operand<i64> {
3593 let ParserMatchClass = SImm9OffsetFB32Operand;
3595 def simm9_offset_fb64 : Operand<i64> {
3596 let ParserMatchClass = SImm9OffsetFB64Operand;
3598 def simm9_offset_fb128 : Operand<i64> {
3599 let ParserMatchClass = SImm9OffsetFB128Operand;
3602 def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3603 (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3604 def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3605 (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3606 let Predicates = [HasFPARMv8] in {
3607 def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3608 (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3609 def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3610 (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3611 def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3612 (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3613 def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3614 (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3615 def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3616 (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
3620 def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3621 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3622 def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3623 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3625 // load sign-extended half-word
3627 : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh",
3629 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3631 : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh",
3633 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3635 // load sign-extended byte
3637 : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb",
3639 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3641 : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb",
3643 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3645 // load sign-extended word
3647 : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw",
3649 (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3651 // zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
3652 def : InstAlias<"ldrb $Rt, [$Rn, $offset]",
3653 (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3654 def : InstAlias<"ldrh $Rt, [$Rn, $offset]",
3655 (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3656 def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
3657 (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3658 def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
3659 (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3660 def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
3661 (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3662 def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
3663 (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3664 def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
3665 (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3667 // A LDR will implicitly zero the rest of the vector, so vector_insert(zeros,
3668 // load, 0) can use a single load.
3669 multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType HVT, ValueType SVT,
3670 ValueType ScalarVT, Instruction LoadInst, Instruction UnscaledLoadInst,
3671 ComplexPattern Addr, ComplexPattern UnscaledAddr, Operand AddrImm,
3672 SubRegIndex SubReg> {
3674 def : Pat <(vector_insert (VT immAllZerosV),
3675 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3676 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3678 def : Pat <(vector_insert (VT immAllZerosV),
3679 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3680 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3682 // Half-vector patterns
3683 def : Pat <(vector_insert (HVT immAllZerosV),
3684 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3685 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3687 def : Pat <(vector_insert (HVT immAllZerosV),
3688 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3689 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3692 def : Pat <(vector_insert (SVT immAllZerosV),
3693 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3694 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3696 def : Pat <(vector_insert (SVT immAllZerosV),
3697 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3698 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3701 defm : LoadInsertZeroPatterns<extloadi8, v16i8, v8i8, nxv16i8, i32, LDRBui, LDURBi,
3702 am_indexed8, am_unscaled8, uimm12s1, bsub>;
3703 defm : LoadInsertZeroPatterns<extloadi16, v8i16, v4i16, nxv8i16, i32, LDRHui, LDURHi,
3704 am_indexed16, am_unscaled16, uimm12s2, hsub>;
3705 defm : LoadInsertZeroPatterns<load, v4i32, v2i32, nxv4i32, i32, LDRSui, LDURSi,
3706 am_indexed32, am_unscaled32, uimm12s4, ssub>;
3707 defm : LoadInsertZeroPatterns<load, v2i64, v1i64, nxv2i64, i64, LDRDui, LDURDi,
3708 am_indexed64, am_unscaled64, uimm12s8, dsub>;
3709 defm : LoadInsertZeroPatterns<load, v8f16, v4f16, nxv8f16, f16, LDRHui, LDURHi,
3710 am_indexed16, am_unscaled16, uimm12s2, hsub>;
3711 defm : LoadInsertZeroPatterns<load, v8bf16, v4bf16, nxv8bf16, bf16, LDRHui, LDURHi,
3712 am_indexed16, am_unscaled16, uimm12s2, hsub>;
3713 defm : LoadInsertZeroPatterns<load, v4f32, v2f32, nxv4f32, f32, LDRSui, LDURSi,
3714 am_indexed32, am_unscaled32, uimm12s4, ssub>;
3715 defm : LoadInsertZeroPatterns<load, v2f64, v1f64, nxv2f64, f64, LDRDui, LDURDi,
3716 am_indexed64, am_unscaled64, uimm12s8, dsub>;
3719 defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
3720 [(AArch64Prefetch timm:$Rt,
3721 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
3724 // (unscaled immediate, unprivileged)
3725 defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
3726 defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
3728 defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
3729 defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
3731 // load sign-extended half-word
3732 defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
3733 defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
3735 // load sign-extended byte
3736 defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
3737 defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
3739 // load sign-extended word
3740 defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
3743 // (immediate pre-indexed)
3744 def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">;
3745 def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">;
3746 let Predicates = [HasFPARMv8] in {
3747 def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op, "ldr">;
3748 def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
3749 def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
3750 def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
3751 def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
3754 // load sign-extended half-word
3755 def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
3756 def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
3758 // load sign-extended byte
3759 def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
3760 def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
3762 // load zero-extended byte
3763 def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
3764 def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
3766 // load sign-extended word
3767 def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
3770 // (immediate post-indexed)
3771 def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">;
3772 def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">;
3773 let Predicates = [HasFPARMv8] in {
3774 def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op, "ldr">;
3775 def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
3776 def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
3777 def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
3778 def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
3781 // load sign-extended half-word
3782 def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
3783 def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
3785 // load sign-extended byte
3786 def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
3787 def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
3789 // load zero-extended byte
3790 def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
3791 def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
3793 // load sign-extended word
3794 def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
3796 //===----------------------------------------------------------------------===//
3797 // Store instructions.
3798 //===----------------------------------------------------------------------===//
3800 // Pair (indexed, offset)
3801 // FIXME: Use dedicated range-checked addressing mode operand here.
3802 defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">;
3803 defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">;
3804 let Predicates = [HasFPARMv8] in {
3805 defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">;
3806 defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">;
3807 defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">;
3810 // Pair (pre-indexed)
3811 def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">;
3812 def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">;
3813 let Predicates = [HasFPARMv8] in {
3814 def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
3815 def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
3816 def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
3819 // Pair (post-indexed)
3820 def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">;
3821 def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">;
3822 let Predicates = [HasFPARMv8] in {
3823 def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
3824 def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
3825 def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
3828 // Pair (no allocate)
3829 defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">;
3830 defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">;
3831 let Predicates = [HasFPARMv8] in {
3832 defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">;
3833 defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">;
3834 defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
3837 def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
3838 (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>;
3840 def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
3841 (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>;
3845 // (Register offset)
3848 defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>;
3849 defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>;
3850 defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>;
3851 defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>;
3855 let Predicates = [HasFPARMv8] in {
3856 defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", i8, store>;
3857 defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>;
3858 defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>;
3859 defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>;
3860 defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">;
3863 let Predicates = [UseSTRQro], AddedComplexity = 10 in {
3864 def : Pat<(store (f128 FPR128:$Rt),
3865 (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
3866 ro_Wextend128:$extend)),
3867 (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>;
3868 def : Pat<(store (f128 FPR128:$Rt),
3869 (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
3870 ro_Xextend128:$extend)),
3871 (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>;
3874 multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
3875 Instruction STRW, Instruction STRX> {
3877 def : Pat<(storeop GPR64:$Rt,
3878 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3879 (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32),
3880 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3882 def : Pat<(storeop GPR64:$Rt,
3883 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3884 (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32),
3885 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3888 let AddedComplexity = 10 in {
3890 defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>;
3891 defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>;
3892 defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>;
3895 multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR,
3896 Instruction STRW, Instruction STRX> {
3897 def : Pat<(store (VecTy FPR:$Rt),
3898 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3899 (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3901 def : Pat<(store (VecTy FPR:$Rt),
3902 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3903 (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3906 let AddedComplexity = 10 in {
3907 // Match all store 64 bits width whose type is compatible with FPR64
3908 let Predicates = [IsLE] in {
3909 // We must use ST1 to store vectors in big-endian.
3910 defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>;
3911 defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
3912 defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
3913 defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
3914 defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
3915 defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>;
3918 defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
3919 defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
3921 // Match all store 128 bits width whose type is compatible with FPR128
3922 let Predicates = [IsLE, UseSTRQro] in {
3923 // We must use ST1 to store vectors in big-endian.
3924 defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
3925 defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
3926 defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>;
3927 defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
3928 defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
3929 defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
3930 defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
3931 defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>;
3933 } // AddedComplexity = 10
3935 // Match stores from lane 0 to the appropriate subreg's store.
3936 multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
3937 ValueType VecTy, ValueType STy,
3939 SubRegIndex SubRegIdx,
3940 Instruction STRW, Instruction STRX> {
3942 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))),
3943 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3944 (STRW (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
3945 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3947 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))),
3948 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3949 (STRX (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
3950 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3953 let AddedComplexity = 19 in {
3954 defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, f16, hsub, STRHroW, STRHroX>;
3955 defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, f16, hsub, STRHroW, STRHroX>;
3956 defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, i32, ssub, STRSroW, STRSroX>;
3957 defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, i32, ssub, STRSroW, STRSroX>;
3958 defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, i64, dsub, STRDroW, STRDroX>;
3959 defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, i64, dsub, STRDroW, STRDroX>;
3963 // (unsigned immediate)
3964 defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str",
3966 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
3967 defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str",
3969 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
3970 let Predicates = [HasFPARMv8] in {
3971 defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str",
3973 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
3974 defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str",
3975 [(store (f16 FPR16Op:$Rt),
3976 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>;
3977 defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str",
3978 [(store (f32 FPR32Op:$Rt),
3979 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
3980 defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str",
3981 [(store (f64 FPR64Op:$Rt),
3982 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
3983 defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>;
3986 defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh",
3987 [(truncstorei16 GPR32z:$Rt,
3988 (am_indexed16 GPR64sp:$Rn,
3989 uimm12s2:$offset))]>;
3990 defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb",
3991 [(truncstorei8 GPR32z:$Rt,
3992 (am_indexed8 GPR64sp:$Rn,
3993 uimm12s1:$offset))]>;
3995 // bf16 store pattern
3996 def : Pat<(store (bf16 FPR16Op:$Rt),
3997 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
3998 (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>;
4000 let AddedComplexity = 10 in {
4002 // Match all store 64 bits width whose type is compatible with FPR64
4003 def : Pat<(store (v1i64 FPR64:$Rt),
4004 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4005 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4006 def : Pat<(store (v1f64 FPR64:$Rt),
4007 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4008 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4010 let Predicates = [IsLE] in {
4011 // We must use ST1 to store vectors in big-endian.
4012 def : Pat<(store (v2f32 FPR64:$Rt),
4013 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4014 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4015 def : Pat<(store (v8i8 FPR64:$Rt),
4016 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4017 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4018 def : Pat<(store (v4i16 FPR64:$Rt),
4019 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4020 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4021 def : Pat<(store (v2i32 FPR64:$Rt),
4022 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4023 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4024 def : Pat<(store (v4f16 FPR64:$Rt),
4025 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4026 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4027 def : Pat<(store (v4bf16 FPR64:$Rt),
4028 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4029 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4032 // Match all store 128 bits width whose type is compatible with FPR128
4033 def : Pat<(store (f128 FPR128:$Rt),
4034 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4035 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4037 let Predicates = [IsLE] in {
4038 // We must use ST1 to store vectors in big-endian.
4039 def : Pat<(store (v4f32 FPR128:$Rt),
4040 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4041 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4042 def : Pat<(store (v2f64 FPR128:$Rt),
4043 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4044 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4045 def : Pat<(store (v16i8 FPR128:$Rt),
4046 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4047 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4048 def : Pat<(store (v8i16 FPR128:$Rt),
4049 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4050 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4051 def : Pat<(store (v4i32 FPR128:$Rt),
4052 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4053 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4054 def : Pat<(store (v2i64 FPR128:$Rt),
4055 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4056 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4057 def : Pat<(store (v8f16 FPR128:$Rt),
4058 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4059 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4060 def : Pat<(store (v8bf16 FPR128:$Rt),
4061 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4062 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4066 def : Pat<(truncstorei32 GPR64:$Rt,
4067 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
4068 (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>;
4069 def : Pat<(truncstorei16 GPR64:$Rt,
4070 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
4071 (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>;
4072 def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
4073 (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>;
4075 } // AddedComplexity = 10
4077 // Match stores from lane 0 to the appropriate subreg's store.
4078 multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
4079 ValueType VTy, ValueType STy,
4081 SubRegIndex SubRegIdx, Operand IndexType,
4083 def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), (i64 0))),
4084 (UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
4085 (STR (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
4086 GPR64sp:$Rn, IndexType:$offset)>;
4089 let AddedComplexity = 19 in {
4090 defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, f16, hsub, uimm12s2, STRHui>;
4091 defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, f16, hsub, uimm12s2, STRHui>;
4092 defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, i32, ssub, uimm12s4, STRSui>;
4093 defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, i32, ssub, uimm12s4, STRSui>;
4094 defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, i64, dsub, uimm12s8, STRDui>;
4095 defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, i64, dsub, uimm12s8, STRDui>;
4099 // (unscaled immediate)
4100 defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur",
4102 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
4103 defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur",
4105 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
4106 let Predicates = [HasFPARMv8] in {
4107 defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur",
4109 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
4110 defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur",
4111 [(store (f16 FPR16Op:$Rt),
4112 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
4113 defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur",
4114 [(store (f32 FPR32Op:$Rt),
4115 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
4116 defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur",
4117 [(store (f64 FPR64Op:$Rt),
4118 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
4119 defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur",
4120 [(store (f128 FPR128Op:$Rt),
4121 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>;
4123 defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh",
4124 [(truncstorei16 GPR32z:$Rt,
4125 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
4126 defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb",
4127 [(truncstorei8 GPR32z:$Rt,
4128 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
4130 // bf16 store pattern
4131 def : Pat<(store (bf16 FPR16Op:$Rt),
4132 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
4133 (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4135 // Armv8.4 Weaker Release Consistency enhancements
4136 // LDAPR & STLR with Immediate Offset instructions
4137 let Predicates = [HasRCPC_IMMO] in {
4138 defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>;
4139 defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>;
4140 defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>;
4141 defm STLURX : BaseStoreUnscaleV84<"stlur", 0b11, 0b00, GPR64>;
4142 defm LDAPURB : BaseLoadUnscaleV84<"ldapurb", 0b00, 0b01, GPR32>;
4143 defm LDAPURSBW : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>;
4144 defm LDAPURSBX : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>;
4145 defm LDAPURH : BaseLoadUnscaleV84<"ldapurh", 0b01, 0b01, GPR32>;
4146 defm LDAPURSHW : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>;
4147 defm LDAPURSHX : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>;
4148 defm LDAPUR : BaseLoadUnscaleV84<"ldapur", 0b10, 0b01, GPR32>;
4149 defm LDAPURSW : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>;
4150 defm LDAPURX : BaseLoadUnscaleV84<"ldapur", 0b11, 0b01, GPR64>;
4153 // Match all store 64 bits width whose type is compatible with FPR64
4154 def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4155 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4156 def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4157 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4159 let AddedComplexity = 10 in {
4161 let Predicates = [IsLE] in {
4162 // We must use ST1 to store vectors in big-endian.
4163 def : Pat<(store (v2f32 FPR64:$Rt),
4164 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4165 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4166 def : Pat<(store (v8i8 FPR64:$Rt),
4167 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4168 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4169 def : Pat<(store (v4i16 FPR64:$Rt),
4170 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4171 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4172 def : Pat<(store (v2i32 FPR64:$Rt),
4173 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4174 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4175 def : Pat<(store (v4f16 FPR64:$Rt),
4176 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4177 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4178 def : Pat<(store (v4bf16 FPR64:$Rt),
4179 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4180 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4183 // Match all store 128 bits width whose type is compatible with FPR128
4184 def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4185 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4187 let Predicates = [IsLE] in {
4188 // We must use ST1 to store vectors in big-endian.
4189 def : Pat<(store (v4f32 FPR128:$Rt),
4190 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4191 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4192 def : Pat<(store (v2f64 FPR128:$Rt),
4193 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4194 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4195 def : Pat<(store (v16i8 FPR128:$Rt),
4196 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4197 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4198 def : Pat<(store (v8i16 FPR128:$Rt),
4199 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4200 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4201 def : Pat<(store (v4i32 FPR128:$Rt),
4202 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4203 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4204 def : Pat<(store (v2i64 FPR128:$Rt),
4205 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4206 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4207 def : Pat<(store (v2f64 FPR128:$Rt),
4208 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4209 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4210 def : Pat<(store (v8f16 FPR128:$Rt),
4211 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4212 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4213 def : Pat<(store (v8bf16 FPR128:$Rt),
4214 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4215 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4218 } // AddedComplexity = 10
4220 // unscaled i64 truncating stores
4221 def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
4222 (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
4223 def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
4224 (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
4225 def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
4226 (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
4228 // Match stores from lane 0 to the appropriate subreg's store.
4229 multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
4230 ValueType VTy, ValueType STy,
4232 SubRegIndex SubRegIdx, Instruction STR> {
4233 defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegTy, SubRegIdx, simm9, STR>;
4236 let AddedComplexity = 19 in {
4237 defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, f16, hsub, STURHi>;
4238 defm : VecStoreULane0Pat<store, v8f16, f16, f16, hsub, STURHi>;
4239 defm : VecStoreULane0Pat<store, v4i32, i32, i32, ssub, STURSi>;
4240 defm : VecStoreULane0Pat<store, v4f32, f32, i32, ssub, STURSi>;
4241 defm : VecStoreULane0Pat<store, v2i64, i64, i64, dsub, STURDi>;
4242 defm : VecStoreULane0Pat<store, v2f64, f64, i64, dsub, STURDi>;
4246 // STR mnemonics fall back to STUR for negative or unaligned offsets.
4247 def : InstAlias<"str $Rt, [$Rn, $offset]",
4248 (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
4249 def : InstAlias<"str $Rt, [$Rn, $offset]",
4250 (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
4251 let Predicates = [HasFPARMv8] in {
4252 def : InstAlias<"str $Rt, [$Rn, $offset]",
4253 (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
4254 def : InstAlias<"str $Rt, [$Rn, $offset]",
4255 (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
4256 def : InstAlias<"str $Rt, [$Rn, $offset]",
4257 (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
4258 def : InstAlias<"str $Rt, [$Rn, $offset]",
4259 (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
4260 def : InstAlias<"str $Rt, [$Rn, $offset]",
4261 (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
4264 def : InstAlias<"strb $Rt, [$Rn, $offset]",
4265 (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
4266 def : InstAlias<"strh $Rt, [$Rn, $offset]",
4267 (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
4270 // (unscaled immediate, unprivileged)
4271 defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">;
4272 defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">;
4274 defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">;
4275 defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
4278 // (immediate pre-indexed)
4279 def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>;
4280 def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>;
4281 let Predicates = [HasFPARMv8] in {
4282 def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, i8>;
4283 def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>;
4284 def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>;
4285 def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>;
4286 def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>;
4289 def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>;
4290 def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>;
4293 def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4294 (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4296 def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4297 (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4299 def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4300 (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4303 def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4304 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4305 def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4306 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4307 def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4308 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4309 def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4310 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4311 def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4312 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4313 def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4314 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4315 def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4316 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4318 def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4319 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4320 def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4321 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4322 def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4323 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4324 def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4325 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4326 def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4327 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4328 def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4329 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4330 def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4331 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4334 // (immediate post-indexed)
4335 def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>;
4336 def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>;
4337 let Predicates = [HasFPARMv8] in {
4338 def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, i8>;
4339 def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>;
4340 def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>;
4341 def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>;
4342 def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>;
4345 def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>;
4346 def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>;
4349 def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4350 (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4352 def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4353 (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4355 def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4356 (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4359 def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off),
4360 (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>;
4362 def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4363 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4364 def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4365 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4366 def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4367 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4368 def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4369 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4370 def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4371 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4372 def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4373 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4374 def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4375 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4376 def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4377 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4379 def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4380 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4381 def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4382 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4383 def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4384 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4385 def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4386 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4387 def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4388 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4389 def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4390 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4391 def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4392 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4393 def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4394 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4396 //===----------------------------------------------------------------------===//
4397 // Load/store exclusive instructions.
4398 //===----------------------------------------------------------------------===//
4400 def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">;
4401 def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">;
4402 def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
4403 def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
4405 def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
4406 def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
4407 def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
4408 def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
4410 def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
4411 def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
4412 def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
4413 def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
4415 def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">;
4416 def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">;
4417 def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
4418 def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
4421 Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn
4422 of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an
4423 alias for the case of immediate #0. This is because new STLR versions (from
4424 LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not
4425 appropriate anymore (it parses and discards the optional zero). This is not the
4426 case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed,
4427 and the immediate values are not inside the [] brackets and thus not accepted
4430 def STLRW0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRW GPR32: $Rt, GPR64sp:$Rn)>;
4431 def STLRX0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRX GPR64: $Rt, GPR64sp:$Rn)>;
4432 def STLRB0 : InstAlias<"stlrb\t$Rt, [$Rn, #0]", (STLRB GPR32: $Rt, GPR64sp:$Rn)>;
4433 def STLRH0 : InstAlias<"stlrh\t$Rt, [$Rn, #0]", (STLRH GPR32: $Rt, GPR64sp:$Rn)>;
4435 def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
4436 def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
4437 def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
4438 def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
4440 def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">;
4441 def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">;
4442 def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">;
4443 def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">;
4445 def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
4446 def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
4448 def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
4449 def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
4451 def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
4452 def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
4454 def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
4455 def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
4457 let Predicates = [HasLOR] in {
4458 // v8.1a "Limited Order Region" extension load-acquire instructions
4459 def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
4460 def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
4461 def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
4462 def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
4464 // v8.1a "Limited Order Region" extension store-release instructions
4465 def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">;
4466 def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">;
4467 def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
4468 def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
4470 // Aliases for when offset=0
4471 def STLLRW0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRW GPR32: $Rt, GPR64sp:$Rn)>;
4472 def STLLRX0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRX GPR64: $Rt, GPR64sp:$Rn)>;
4473 def STLLRB0 : InstAlias<"stllrb\t$Rt, [$Rn, #0]", (STLLRB GPR32: $Rt, GPR64sp:$Rn)>;
4474 def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]", (STLLRH GPR32: $Rt, GPR64sp:$Rn)>;
4477 //===----------------------------------------------------------------------===//
4478 // Scaled floating point to integer conversion instructions.
4479 //===----------------------------------------------------------------------===//
4481 defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>;
4482 defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>;
4483 defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>;
4484 defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>;
4485 defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>;
4486 defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
4487 defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
4488 defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
4489 defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
4490 defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
4491 defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
4492 defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
4494 // AArch64's FCVT instructions saturate when out of range.
4495 multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
4496 let Predicates = [HasFullFP16] in {
4497 def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
4498 (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
4499 def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
4500 (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
4502 def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
4503 (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4504 def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
4505 (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4506 def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
4507 (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4508 def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
4509 (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4511 let Predicates = [HasFullFP16] in {
4512 def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
4513 (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4514 def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
4515 (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4517 def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
4518 (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
4519 def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
4520 (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
4521 def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
4522 (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
4523 def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
4524 (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
4527 defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
4528 defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;
4530 multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
4531 let Predicates = [HasFullFP16] in {
4532 def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
4533 def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
4535 def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
4536 def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
4537 def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
4538 def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
4540 let Predicates = [HasFullFP16] in {
4541 def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
4542 (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4543 def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
4544 (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4546 def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
4547 (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
4548 def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
4549 (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
4550 def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
4551 (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
4552 def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
4553 (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
4556 defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
4557 defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
4559 multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
4560 def : Pat<(i32 (to_int (round f32:$Rn))),
4561 (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4562 def : Pat<(i64 (to_int (round f32:$Rn))),
4563 (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4564 def : Pat<(i32 (to_int (round f64:$Rn))),
4565 (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4566 def : Pat<(i64 (to_int (round f64:$Rn))),
4567 (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4569 // These instructions saturate like fp_to_[su]int_sat.
4570 let Predicates = [HasFullFP16] in {
4571 def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
4572 (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
4573 def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
4574 (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
4576 def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
4577 (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4578 def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
4579 (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4580 def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
4581 (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4582 def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
4583 (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4586 defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">;
4587 defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">;
4588 defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
4589 defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
4590 defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
4591 defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
4592 defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
4593 defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
4597 let Predicates = [HasFullFP16] in {
4598 def : Pat<(i32 (any_lround f16:$Rn)),
4599 (FCVTASUWHr f16:$Rn)>;
4600 def : Pat<(i64 (any_lround f16:$Rn)),
4601 (FCVTASUXHr f16:$Rn)>;
4602 def : Pat<(i64 (any_llround f16:$Rn)),
4603 (FCVTASUXHr f16:$Rn)>;
4605 def : Pat<(i32 (any_lround f32:$Rn)),
4606 (FCVTASUWSr f32:$Rn)>;
4607 def : Pat<(i32 (any_lround f64:$Rn)),
4608 (FCVTASUWDr f64:$Rn)>;
4609 def : Pat<(i64 (any_lround f32:$Rn)),
4610 (FCVTASUXSr f32:$Rn)>;
4611 def : Pat<(i64 (any_lround f64:$Rn)),
4612 (FCVTASUXDr f64:$Rn)>;
4613 def : Pat<(i64 (any_llround f32:$Rn)),
4614 (FCVTASUXSr f32:$Rn)>;
4615 def : Pat<(i64 (any_llround f64:$Rn)),
4616 (FCVTASUXDr f64:$Rn)>;
4618 //===----------------------------------------------------------------------===//
4619 // Scaled integer to floating point conversion instructions.
4620 //===----------------------------------------------------------------------===//
4622 defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
4623 defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
4625 def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
4626 (SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
4627 def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)),
4628 (SCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>;
4629 def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)),
4630 (SCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>;
4632 def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)),
4633 (SCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>;
4634 def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)),
4635 (SCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>;
4636 def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)),
4637 (SCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>;
4639 def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)),
4640 (UCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>;
4641 def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)),
4642 (UCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>;
4643 def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)),
4644 (UCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>;
4646 def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
4647 (UCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
4648 def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)),
4649 (UCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>;
4650 def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)),
4651 (UCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>;
4653 //===----------------------------------------------------------------------===//
4654 // Unscaled integer to floating point conversion instruction.
4655 //===----------------------------------------------------------------------===//
4657 defm FMOV : UnscaledConversion<"fmov">;
4659 // Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
4660 let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1,
4661 Predicates = [HasFPARMv8] in {
4662 def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
4664 def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
4666 def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
4670 // Similarly add aliases
4671 def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
4672 Requires<[HasFullFP16]>;
4673 let Predicates = [HasFPARMv8] in {
4674 def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
4675 def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
4678 def : Pat<(bf16 fpimm0),
4681 // Pattern for FP16 and BF16 immediates
4682 let Predicates = [HasFullFP16] in {
4683 def : Pat<(f16 fpimm:$in),
4684 (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>;
4686 def : Pat<(bf16 fpimm:$in),
4687 (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 bf16:$in)))>;
4690 //===----------------------------------------------------------------------===//
4691 // Floating point conversion instruction.
4692 //===----------------------------------------------------------------------===//
4694 defm FCVT : FPConversion<"fcvt">;
4695 // Helper to get bf16 into fp32.
4696 def cvt_bf16_to_fp32 :
4697 OutPatFrag<(ops node:$Rn),
4698 (f32 (COPY_TO_REGCLASS
4700 (i32 (COPY_TO_REGCLASS (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
4701 node:$Rn, hsub), GPR32)),
4702 (i64 (i32shift_a (i64 16))),
4703 (i64 (i32shift_b (i64 16))))),
4705 // Pattern for bf16 -> fp32.
4706 def : Pat<(f32 (any_fpextend (bf16 FPR16:$Rn))),
4707 (cvt_bf16_to_fp32 FPR16:$Rn)>;
4708 // Pattern for bf16 -> fp64.
4709 def : Pat<(f64 (any_fpextend (bf16 FPR16:$Rn))),
4710 (FCVTDSr (f32 (cvt_bf16_to_fp32 FPR16:$Rn)))>;
4712 //===----------------------------------------------------------------------===//
4713 // Floating point single operand instructions.
4714 //===----------------------------------------------------------------------===//
4716 defm FABS : SingleOperandFPDataNoException<0b0001, "fabs", fabs>;
4717 defm FMOV : SingleOperandFPDataNoException<0b0000, "fmov">;
4718 defm FNEG : SingleOperandFPDataNoException<0b0010, "fneg", fneg>;
4719 defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>;
4720 defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>;
4721 defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>;
4722 defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>;
4723 defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>;
4725 defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>;
4726 defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>;
4728 let SchedRW = [WriteFDiv] in {
4729 defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>;
4732 let Predicates = [HasFRInt3264] in {
4733 defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>;
4734 defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>;
4735 defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>;
4736 defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>;
4739 // Pattern to convert 1x64 vector intrinsics to equivalent scalar instructions
4740 def : Pat<(v1f64 (int_aarch64_neon_frint32z (v1f64 FPR64:$Rn))),
4741 (FRINT32ZDr FPR64:$Rn)>;
4742 def : Pat<(v1f64 (int_aarch64_neon_frint64z (v1f64 FPR64:$Rn))),
4743 (FRINT64ZDr FPR64:$Rn)>;
4744 def : Pat<(v1f64 (int_aarch64_neon_frint32x (v1f64 FPR64:$Rn))),
4745 (FRINT32XDr FPR64:$Rn)>;
4746 def : Pat<(v1f64 (int_aarch64_neon_frint64x (v1f64 FPR64:$Rn))),
4747 (FRINT64XDr FPR64:$Rn)>;
4749 // Emitting strict_lrint as two instructions is valid as any exceptions that
4750 // occur will happen in exactly one of the instructions (e.g. if the input is
4751 // not an integer the inexact exception will happen in the FRINTX but not then
4752 // in the FCVTZS as the output of FRINTX is an integer).
4753 let Predicates = [HasFullFP16] in {
4754 def : Pat<(i32 (any_lrint f16:$Rn)),
4755 (FCVTZSUWHr (FRINTXHr f16:$Rn))>;
4756 def : Pat<(i64 (any_lrint f16:$Rn)),
4757 (FCVTZSUXHr (FRINTXHr f16:$Rn))>;
4758 def : Pat<(i64 (any_llrint f16:$Rn)),
4759 (FCVTZSUXHr (FRINTXHr f16:$Rn))>;
4761 def : Pat<(i32 (any_lrint f32:$Rn)),
4762 (FCVTZSUWSr (FRINTXSr f32:$Rn))>;
4763 def : Pat<(i32 (any_lrint f64:$Rn)),
4764 (FCVTZSUWDr (FRINTXDr f64:$Rn))>;
4765 def : Pat<(i64 (any_lrint f32:$Rn)),
4766 (FCVTZSUXSr (FRINTXSr f32:$Rn))>;
4767 def : Pat<(i64 (any_lrint f64:$Rn)),
4768 (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
4769 def : Pat<(i64 (any_llrint f32:$Rn)),
4770 (FCVTZSUXSr (FRINTXSr f32:$Rn))>;
4771 def : Pat<(i64 (any_llrint f64:$Rn)),
4772 (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
4774 //===----------------------------------------------------------------------===//
4775 // Floating point two operand instructions.
4776 //===----------------------------------------------------------------------===//
4778 defm FADD : TwoOperandFPData<0b0010, "fadd", any_fadd>;
4779 let SchedRW = [WriteFDiv] in {
4780 defm FDIV : TwoOperandFPData<0b0001, "fdiv", any_fdiv>;
4782 defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>;
4783 defm FMAX : TwoOperandFPData<0b0100, "fmax", any_fmaximum>;
4784 defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>;
4785 defm FMIN : TwoOperandFPData<0b0101, "fmin", any_fminimum>;
4786 let SchedRW = [WriteFMul] in {
4787 defm FMUL : TwoOperandFPData<0b0000, "fmul", any_fmul>;
4788 defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>;
4790 defm FSUB : TwoOperandFPData<0b0011, "fsub", any_fsub>;
4792 multiclass FMULScalarFromIndexedLane0Patterns<string inst,
4793 string inst_f16_suffix,
4794 string inst_f32_suffix,
4795 string inst_f64_suffix,
4796 SDPatternOperator OpNode,
4797 list<Predicate> preds = []> {
4798 let Predicates = !listconcat(preds, [HasFullFP16]) in {
4799 def : Pat<(f16 (OpNode (f16 FPR16:$Rn),
4800 (f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))),
4801 (!cast<Instruction>(inst # inst_f16_suffix)
4802 FPR16:$Rn, (f16 (EXTRACT_SUBREG V128:$Rm, hsub)))>;
4804 let Predicates = preds in {
4805 def : Pat<(f32 (OpNode (f32 FPR32:$Rn),
4806 (f32 (vector_extract (v4f32 V128:$Rm), (i64 0))))),
4807 (!cast<Instruction>(inst # inst_f32_suffix)
4808 FPR32:$Rn, (EXTRACT_SUBREG V128:$Rm, ssub))>;
4809 def : Pat<(f64 (OpNode (f64 FPR64:$Rn),
4810 (f64 (vector_extract (v2f64 V128:$Rm), (i64 0))))),
4811 (!cast<Instruction>(inst # inst_f64_suffix)
4812 FPR64:$Rn, (EXTRACT_SUBREG V128:$Rm, dsub))>;
4816 defm : FMULScalarFromIndexedLane0Patterns<"FMUL", "Hrr", "Srr", "Drr",
4819 // Match reassociated forms of FNMUL.
4820 def : Pat<(fmul (fneg FPR16:$a), (f16 FPR16:$b)),
4821 (FNMULHrr FPR16:$a, FPR16:$b)>,
4822 Requires<[HasFullFP16]>;
4823 def : Pat<(fmul (fneg FPR32:$a), (f32 FPR32:$b)),
4824 (FNMULSrr FPR32:$a, FPR32:$b)>;
4825 def : Pat<(fmul (fneg FPR64:$a), (f64 FPR64:$b)),
4826 (FNMULDrr FPR64:$a, FPR64:$b)>;
4828 def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4829 (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
4830 def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4831 (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
4832 def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4833 (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
4834 def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4835 (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
4837 //===----------------------------------------------------------------------===//
4838 // Floating point three operand instructions.
4839 //===----------------------------------------------------------------------===//
4841 defm FMADD : ThreeOperandFPData<0, 0, "fmadd", any_fma>;
4842 defm FMSUB : ThreeOperandFPData<0, 1, "fmsub",
4843 TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
4844 defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
4845 TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >;
4846 defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
4847 TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
4849 // The following def pats catch the case where the LHS of an FMA is negated.
4850 // The TriOpFrag above catches the case where the middle operand is negated.
4852 // N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
4853 // the NEON variant.
4855 // Here we handle first -(a + b*c) for FNMADD:
4857 let Predicates = [HasNEON, HasFullFP16] in
4858 def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)),
4859 (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
4861 def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
4862 (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
4864 def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
4865 (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
4867 // Now it's time for "(-a) + (-b)*c"
4869 let Predicates = [HasNEON, HasFullFP16] in
4870 def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))),
4871 (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
4873 def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
4874 (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
4876 def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
4877 (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
4879 //===----------------------------------------------------------------------===//
4880 // Floating point comparison instructions.
4881 //===----------------------------------------------------------------------===//
4883 defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>;
4884 defm FCMP : FPComparison<0, "fcmp", AArch64any_fcmp>;
4886 //===----------------------------------------------------------------------===//
4887 // Floating point conditional comparison instructions.
4888 //===----------------------------------------------------------------------===//
4890 defm FCCMPE : FPCondComparison<1, "fccmpe">;
4891 defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>;
4893 //===----------------------------------------------------------------------===//
4894 // Floating point conditional select instruction.
4895 //===----------------------------------------------------------------------===//
4897 defm FCSEL : FPCondSelect<"fcsel">;
4899 let Predicates = [HasFullFP16] in
4900 def : Pat<(bf16 (AArch64csel (bf16 FPR16:$Rn), (bf16 FPR16:$Rm), (i32 imm:$cond), NZCV)),
4901 (FCSELHrrr FPR16:$Rn, FPR16:$Rm, imm:$cond)>;
4903 // CSEL instructions providing f128 types need to be handled by a
4904 // pseudo-instruction since the eventual code will need to introduce basic
4905 // blocks and control flow.
4906 let Predicates = [HasFPARMv8] in
4907 def F128CSEL : Pseudo<(outs FPR128:$Rd),
4908 (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
4909 [(set (f128 FPR128:$Rd),
4910 (AArch64csel FPR128:$Rn, FPR128:$Rm,
4911 (i32 imm:$cond), NZCV))]> {
4913 let usesCustomInserter = 1;
4914 let hasNoSchedulingInfo = 1;
4917 //===----------------------------------------------------------------------===//
4918 // Instructions used for emitting unwind opcodes on ARM64 Windows.
4919 //===----------------------------------------------------------------------===//
4920 let isPseudo = 1 in {
4921 def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>;
4922 def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4923 def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4924 def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4925 def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4926 def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4927 def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4928 def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4929 def SEH_SaveFReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4930 def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4931 def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4932 def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>;
4933 def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4934 def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>;
4935 def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
4936 def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>;
4937 def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
4938 def SEH_PACSignLR : Pseudo<(outs), (ins), []>, Sched<[]>;
4939 def SEH_SaveAnyRegQP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4940 def SEH_SaveAnyRegQPX : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4943 // Pseudo instructions for Windows EH
4944 //===----------------------------------------------------------------------===//
4945 let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
4946 isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in {
4947 def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>;
4948 let usesCustomInserter = 1 in
4949 def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>,
4953 // Pseudo instructions for homogeneous prolog/epilog
4954 let isPseudo = 1 in {
4955 // Save CSRs in order, {FPOffset}
4956 def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
4957 // Restore CSRs in order
4958 def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
4961 //===----------------------------------------------------------------------===//
4962 // Floating point immediate move.
4963 //===----------------------------------------------------------------------===//
4965 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
4966 defm FMOV : FPMoveImmediate<"fmov">;
4969 let Predicates = [HasFullFP16] in {
4970 def : Pat<(bf16 fpimmbf16:$in),
4971 (FMOVHi (fpimm16XForm bf16:$in))>;
4974 //===----------------------------------------------------------------------===//
4975 // Advanced SIMD two vector instructions.
4976 //===----------------------------------------------------------------------===//
4978 defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
4980 // Match UABDL in log2-shuffle patterns.
4981 def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
4982 (zext (v8i8 V64:$opB))))),
4983 (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
4984 def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))),
4985 (zext (extract_high_v16i8 (v16i8 V128:$opB)))))),
4986 (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
4987 def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)),
4988 (zext (v4i16 V64:$opB))))),
4989 (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
4990 def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))),
4991 (zext (extract_high_v8i16 (v8i16 V128:$opB)))))),
4992 (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
4993 def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)),
4994 (zext (v2i32 V64:$opB))))),
4995 (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
4996 def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))),
4997 (zext (extract_high_v4i32 (v4i32 V128:$opB)))))),
4998 (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
5000 defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>;
5001 defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
5002 defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
5003 defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
5004 defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>;
5005 defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>;
5006 defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
5007 defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
5008 defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
5009 defm FABS : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>;
5011 def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))),
5012 (CMLTv8i8rz V64:$Rn)>;
5013 def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))),
5014 (CMLTv4i16rz V64:$Rn)>;
5015 def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))),
5016 (CMLTv2i32rz V64:$Rn)>;
5017 def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))),
5018 (CMLTv16i8rz V128:$Rn)>;
5019 def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))),
5020 (CMLTv8i16rz V128:$Rn)>;
5021 def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))),
5022 (CMLTv4i32rz V128:$Rn)>;
5023 def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))),
5024 (CMLTv2i64rz V128:$Rn)>;
5026 defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
5027 defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
5028 defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
5029 defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
5030 defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
5031 defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
5032 defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
5033 defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
5034 def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
5035 (FCVTLv4i16 V64:$Rn)>;
5036 def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
5038 (FCVTLv8i16 V128:$Rn)>;
5039 def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))),
5040 (FCVTLv2i32 V64:$Rn)>;
5041 def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))),
5042 (FCVTLv4i32 V128:$Rn)>;
5043 def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))),
5044 (FCVTLv4i16 V64:$Rn)>;
5045 def : Pat<(v4f32 (any_fpextend (v4f16 (extract_high_v8f16 (v8f16 V128:$Rn))))),
5046 (FCVTLv8i16 V128:$Rn)>;
5048 defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
5049 defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
5050 defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
5051 defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
5052 defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
5053 def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
5054 (FCVTNv4i16 V128:$Rn)>;
5055 def : Pat<(concat_vectors V64:$Rd,
5056 (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
5057 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
5058 def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))),
5059 (FCVTNv2i32 V128:$Rn)>;
5060 def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))),
5061 (FCVTNv4i16 V128:$Rn)>;
5062 def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))),
5063 (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
5064 def : Pat<(concat_vectors V64:$Rd, (v4f16 (any_fpround (v4f32 V128:$Rn)))),
5065 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
5066 defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
5067 defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
5068 defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
5070 defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
5071 defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
5073 // AArch64's FCVT instructions saturate when out of range.
5074 multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
5075 let Predicates = [HasFullFP16] in {
5076 def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)),
5077 (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
5078 def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)),
5079 (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
5081 def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)),
5082 (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
5083 def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)),
5084 (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
5085 def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)),
5086 (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
5088 defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">;
5089 defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">;
5091 def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
5092 def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
5093 def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>;
5094 def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>;
5095 def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>;
5097 def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>;
5098 def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>;
5099 def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>;
5100 def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>;
5101 def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>;
5103 defm FNEG : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>;
5104 defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
5105 defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>;
5106 defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>;
5107 defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>;
5108 defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>;
5109 defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>;
5110 defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>;
5111 defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>;
5113 let Predicates = [HasFRInt3264] in {
5114 defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>;
5115 defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>;
5116 defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>;
5117 defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>;
5120 defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
5121 defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>;
5122 defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg",
5123 UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
5124 defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
5125 // Aliases for MVN -> NOT.
5126 let Predicates = [HasNEON] in {
5127 def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
5128 (NOTv8i8 V64:$Vd, V64:$Vn)>;
5129 def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
5130 (NOTv16i8 V128:$Vd, V128:$Vn)>;
5133 def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
5134 def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
5135 def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
5136 def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
5137 def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
5138 def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
5140 defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>;
5141 defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
5142 defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
5143 defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
5144 defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
5145 BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >;
5146 defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>;
5147 defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>;
5148 defm SHLL : SIMDVectorLShiftLongBySizeBHS;
5149 defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
5150 defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
5151 defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>;
5152 defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>;
5153 defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>;
5154 defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
5155 BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >;
5156 defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>;
5157 defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>;
5158 defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
5159 defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
5160 defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
5161 defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
5162 defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
5164 def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>;
5165 def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>;
5166 def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>;
5167 def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>;
5168 def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
5169 def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
5170 def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
5171 def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
5172 def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>;
5173 def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
5175 // Patterns for vector long shift (by element width). These need to match all
5176 // three of zext, sext and anyext so it's easier to pull the patterns out of the
5178 multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
5179 def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
5180 (SHLLv8i8 V64:$Rn)>;
5181 def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)),
5182 (SHLLv16i8 V128:$Rn)>;
5183 def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
5184 (SHLLv4i16 V64:$Rn)>;
5185 def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)),
5186 (SHLLv8i16 V128:$Rn)>;
5187 def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
5188 (SHLLv2i32 V64:$Rn)>;
5189 def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)),
5190 (SHLLv4i32 V128:$Rn)>;
5193 defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
5194 defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
5195 defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
5197 // Constant vector values, used in the S/UQXTN patterns below.
5198 def VImmFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>;
5199 def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>;
5200 def VImm7F: PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>;
5201 def VImm80: PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>;
5202 def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
5203 def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;
5205 // trunc(umin(X, 255)) -> UQXTRN v8i8
5206 def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))),
5207 (UQXTNv8i8 V128:$Vn)>;
5208 // trunc(umin(X, 65535)) -> UQXTRN v4i16
5209 def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))),
5210 (UQXTNv4i16 V128:$Vn)>;
5211 // trunc(smin(smax(X, -128), 128)) -> SQXTRN
5212 // with reversed min/max
5213 def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
5215 (SQXTNv8i8 V128:$Vn)>;
5216 def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
5218 (SQXTNv8i8 V128:$Vn)>;
5219 // trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
5220 // with reversed min/max
5221 def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
5222 (v4i32 VImm7FFF)))),
5223 (SQXTNv4i16 V128:$Vn)>;
5224 def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
5225 (v4i32 VImm8000)))),
5226 (SQXTNv4i16 V128:$Vn)>;
5228 // concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
5229 // with reversed min/max
5230 def : Pat<(v16i8 (concat_vectors
5232 (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
5233 (v8i16 VImm7F)))))),
5234 (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5235 def : Pat<(v16i8 (concat_vectors
5237 (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
5238 (v8i16 VImm80)))))),
5239 (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5241 // concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
5242 // with reversed min/max
5243 def : Pat<(v8i16 (concat_vectors
5245 (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
5246 (v4i32 VImm7FFF)))))),
5247 (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5248 def : Pat<(v8i16 (concat_vectors
5250 (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
5251 (v4i32 VImm8000)))))),
5252 (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5254 // Select BSWAP vector instructions into REV instructions
5255 def : Pat<(v4i16 (bswap (v4i16 V64:$Rn))),
5256 (v4i16 (REV16v8i8 (v4i16 V64:$Rn)))>;
5257 def : Pat<(v8i16 (bswap (v8i16 V128:$Rn))),
5258 (v8i16 (REV16v16i8 (v8i16 V128:$Rn)))>;
5259 def : Pat<(v2i32 (bswap (v2i32 V64:$Rn))),
5260 (v2i32 (REV32v8i8 (v2i32 V64:$Rn)))>;
5261 def : Pat<(v4i32 (bswap (v4i32 V128:$Rn))),
5262 (v4i32 (REV32v16i8 (v4i32 V128:$Rn)))>;
5263 def : Pat<(v2i64 (bswap (v2i64 V128:$Rn))),
5264 (v2i64 (REV64v16i8 (v2i64 V128:$Rn)))>;
5266 //===----------------------------------------------------------------------===//
5267 // Advanced SIMD three vector instructions.
5268 //===----------------------------------------------------------------------===//
5270 defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>;
5271 defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>;
5272 defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
5273 defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>;
5274 defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
5275 defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
5276 defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
5277 defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
5278 foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in {
5279 def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>;
5281 defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
5282 let Predicates = [HasNEON] in {
5283 foreach VT = [ v2f32, v4f32, v2f64 ] in
5284 def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
5286 let Predicates = [HasNEON, HasFullFP16] in {
5287 foreach VT = [ v4f16, v8f16 ] in
5288 def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
5290 defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",AArch64facge>;
5291 defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",AArch64facgt>;
5292 defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>;
5293 defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>;
5294 defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
5295 defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
5296 defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
5297 defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>;
5298 defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
5299 defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>;
5300 defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
5301 defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>;
5302 defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
5303 defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>;
5304 defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
5305 defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>;
5307 // NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
5308 // instruction expects the addend first, while the fma intrinsic puts it last.
5309 defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
5310 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
5311 defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
5312 TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
5314 defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
5315 defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>;
5316 defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
5317 defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
5318 defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>;
5320 // MLA and MLS are generated in MachineCombine
5321 defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>;
5322 defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
5324 defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
5325 defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
5326 defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
5327 TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
5328 defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
5329 defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>;
5330 defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
5331 defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
5332 defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
5333 defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
5334 defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
5335 defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
5336 defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
5337 defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
5338 defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
5339 defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
5340 defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
5341 defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>;
5342 defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
5343 defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
5344 defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
5345 defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
5346 TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
5347 defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
5348 defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>;
5349 defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
5350 defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
5351 defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
5352 defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
5353 defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
5354 defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
5355 defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
5356 defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
5357 defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
5358 defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>;
5359 defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
5360 defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
5361 defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
5362 int_aarch64_neon_sqrdmlah>;
5363 defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
5364 int_aarch64_neon_sqrdmlsh>;
5366 // Extra saturate patterns, other than the intrinsics matches above
5367 defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
5368 defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
5369 defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
5370 defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
5372 defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
5373 defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
5374 BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
5375 defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
5376 defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
5377 BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
5378 defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
5380 // Pseudo bitwise select pattern BSP.
5381 // It is expanded into BSL/BIT/BIF after register allocation.
5382 defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS),
5383 (and (vnot node:$LHS), node:$RHS))>>;
5384 defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">;
5385 defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit">;
5386 defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">;
5388 def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
5389 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5390 def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
5391 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5392 def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
5393 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5394 def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
5395 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5397 def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
5398 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5399 def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
5400 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5401 def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
5402 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5403 def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
5404 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5406 let Predicates = [HasNEON] in {
5407 def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
5408 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
5409 def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}",
5410 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5411 def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}",
5412 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5413 def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}",
5414 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5416 def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}",
5417 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>;
5418 def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}",
5419 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5420 def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}",
5421 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5422 def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}",
5423 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5425 def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" #
5426 "|cmls.8b\t$dst, $src1, $src2}",
5427 (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5428 def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" #
5429 "|cmls.16b\t$dst, $src1, $src2}",
5430 (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5431 def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" #
5432 "|cmls.4h\t$dst, $src1, $src2}",
5433 (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5434 def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" #
5435 "|cmls.8h\t$dst, $src1, $src2}",
5436 (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5437 def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" #
5438 "|cmls.2s\t$dst, $src1, $src2}",
5439 (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5440 def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" #
5441 "|cmls.4s\t$dst, $src1, $src2}",
5442 (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5443 def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" #
5444 "|cmls.2d\t$dst, $src1, $src2}",
5445 (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5447 def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" #
5448 "|cmlo.8b\t$dst, $src1, $src2}",
5449 (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5450 def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" #
5451 "|cmlo.16b\t$dst, $src1, $src2}",
5452 (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5453 def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" #
5454 "|cmlo.4h\t$dst, $src1, $src2}",
5455 (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5456 def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" #
5457 "|cmlo.8h\t$dst, $src1, $src2}",
5458 (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5459 def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" #
5460 "|cmlo.2s\t$dst, $src1, $src2}",
5461 (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5462 def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" #
5463 "|cmlo.4s\t$dst, $src1, $src2}",
5464 (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5465 def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" #
5466 "|cmlo.2d\t$dst, $src1, $src2}",
5467 (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5469 def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" #
5470 "|cmle.8b\t$dst, $src1, $src2}",
5471 (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5472 def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" #
5473 "|cmle.16b\t$dst, $src1, $src2}",
5474 (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5475 def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" #
5476 "|cmle.4h\t$dst, $src1, $src2}",
5477 (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5478 def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" #
5479 "|cmle.8h\t$dst, $src1, $src2}",
5480 (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5481 def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" #
5482 "|cmle.2s\t$dst, $src1, $src2}",
5483 (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5484 def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" #
5485 "|cmle.4s\t$dst, $src1, $src2}",
5486 (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5487 def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" #
5488 "|cmle.2d\t$dst, $src1, $src2}",
5489 (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5491 def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" #
5492 "|cmlt.8b\t$dst, $src1, $src2}",
5493 (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5494 def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" #
5495 "|cmlt.16b\t$dst, $src1, $src2}",
5496 (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5497 def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" #
5498 "|cmlt.4h\t$dst, $src1, $src2}",
5499 (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5500 def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" #
5501 "|cmlt.8h\t$dst, $src1, $src2}",
5502 (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5503 def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" #
5504 "|cmlt.2s\t$dst, $src1, $src2}",
5505 (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5506 def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" #
5507 "|cmlt.4s\t$dst, $src1, $src2}",
5508 (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5509 def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
5510 "|cmlt.2d\t$dst, $src1, $src2}",
5511 (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5513 let Predicates = [HasNEON, HasFullFP16] in {
5514 def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" #
5515 "|fcmle.4h\t$dst, $src1, $src2}",
5516 (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5517 def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" #
5518 "|fcmle.8h\t$dst, $src1, $src2}",
5519 (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5521 def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
5522 "|fcmle.2s\t$dst, $src1, $src2}",
5523 (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5524 def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" #
5525 "|fcmle.4s\t$dst, $src1, $src2}",
5526 (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5527 def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
5528 "|fcmle.2d\t$dst, $src1, $src2}",
5529 (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5531 let Predicates = [HasNEON, HasFullFP16] in {
5532 def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" #
5533 "|fcmlt.4h\t$dst, $src1, $src2}",
5534 (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5535 def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" #
5536 "|fcmlt.8h\t$dst, $src1, $src2}",
5537 (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5539 def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
5540 "|fcmlt.2s\t$dst, $src1, $src2}",
5541 (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5542 def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" #
5543 "|fcmlt.4s\t$dst, $src1, $src2}",
5544 (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5545 def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
5546 "|fcmlt.2d\t$dst, $src1, $src2}",
5547 (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5549 let Predicates = [HasNEON, HasFullFP16] in {
5550 def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" #
5551 "|facle.4h\t$dst, $src1, $src2}",
5552 (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5553 def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" #
5554 "|facle.8h\t$dst, $src1, $src2}",
5555 (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5557 def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
5558 "|facle.2s\t$dst, $src1, $src2}",
5559 (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5560 def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" #
5561 "|facle.4s\t$dst, $src1, $src2}",
5562 (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5563 def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
5564 "|facle.2d\t$dst, $src1, $src2}",
5565 (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5567 let Predicates = [HasNEON, HasFullFP16] in {
5568 def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" #
5569 "|faclt.4h\t$dst, $src1, $src2}",
5570 (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5571 def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" #
5572 "|faclt.8h\t$dst, $src1, $src2}",
5573 (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5575 def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
5576 "|faclt.2s\t$dst, $src1, $src2}",
5577 (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5578 def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
5579 "|faclt.4s\t$dst, $src1, $src2}",
5580 (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5581 def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
5582 "|faclt.2d\t$dst, $src1, $src2}",
5583 (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5586 //===----------------------------------------------------------------------===//
5587 // Advanced SIMD three scalar instructions.
5588 //===----------------------------------------------------------------------===//
5590 defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>;
5591 defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>;
5592 defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>;
5593 defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
5594 defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
5595 defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
5596 defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
5597 defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
5598 def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5599 (FABD64 FPR64:$Rn, FPR64:$Rm)>;
5600 let Predicates = [HasNEON, HasFullFP16] in {
5601 def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>;
5603 let Predicates = [HasNEON] in {
5604 def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>;
5605 def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>;
5607 defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
5608 int_aarch64_neon_facge>;
5609 defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
5610 int_aarch64_neon_facgt>;
5611 defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
5612 defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
5613 defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
5614 defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>;
5615 defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>;
5616 defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>;
5617 defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
5618 defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
5619 defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
5620 defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
5621 defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
5622 defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
5623 defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>;
5624 defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>;
5625 defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>;
5626 defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
5627 defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
5628 defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
5629 defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
5630 defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
5631 defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
5632 let Predicates = [HasRDM] in {
5633 defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
5634 defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
5635 def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn),
5637 (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5638 def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn),
5640 (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5643 defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64",
5644 int_aarch64_neon_fmulx,
5647 let Predicates = [HasNEON] in {
5648 def : InstAlias<"cmls $dst, $src1, $src2",
5649 (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5650 def : InstAlias<"cmle $dst, $src1, $src2",
5651 (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5652 def : InstAlias<"cmlo $dst, $src1, $src2",
5653 (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5654 def : InstAlias<"cmlt $dst, $src1, $src2",
5655 (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5657 let Predicates = [HasFPARMv8] in {
5658 def : InstAlias<"fcmle $dst, $src1, $src2",
5659 (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5660 def : InstAlias<"fcmle $dst, $src1, $src2",
5661 (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5662 def : InstAlias<"fcmlt $dst, $src1, $src2",
5663 (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5664 def : InstAlias<"fcmlt $dst, $src1, $src2",
5665 (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5666 def : InstAlias<"facle $dst, $src1, $src2",
5667 (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5668 def : InstAlias<"facle $dst, $src1, $src2",
5669 (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5670 def : InstAlias<"faclt $dst, $src1, $src2",
5671 (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5672 def : InstAlias<"faclt $dst, $src1, $src2",
5673 (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5676 //===----------------------------------------------------------------------===//
5677 // Advanced SIMD three scalar instructions (mixed operands).
5678 //===----------------------------------------------------------------------===//
5679 defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
5680 int_aarch64_neon_sqdmulls_scalar>;
5681 defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
5682 defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
5684 def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
5685 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
5686 (i32 FPR32:$Rm))))),
5687 (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5688 def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
5689 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
5690 (i32 FPR32:$Rm))))),
5691 (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5693 //===----------------------------------------------------------------------===//
5694 // Advanced SIMD two scalar instructions.
5695 //===----------------------------------------------------------------------===//
5697 defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs, [HasNoCSSC]>;
5698 defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
5699 defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
5700 defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
5701 defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
5702 defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
5703 defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
5704 defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
5705 defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
5706 defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
5707 defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
5708 defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">;
5709 defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">;
5710 defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">;
5711 defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">;
5712 defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">;
5713 defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">;
5714 defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">;
5715 defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
5716 def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
5717 defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
5718 defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
5719 defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorSME>;
5720 defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorSME>;
5721 defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorSME>;
5722 defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg",
5723 UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
5724 defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>;
5725 defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
5726 defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
5727 defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
5728 defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
5729 defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
5730 int_aarch64_neon_suqadd>;
5731 defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>;
5732 defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
5733 defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
5734 int_aarch64_neon_usqadd>;
5736 def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
5737 (CMLTv1i64rz V64:$Rn)>;
5739 // Round FP64 to BF16.
5740 let Predicates = [HasNEONorSME, HasBF16] in
5741 def : Pat<(bf16 (any_fpround (f64 FPR64:$Rn))),
5742 (BFCVT (FCVTXNv1i64 $Rn))>;
5744 def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
5745 (FCVTASv1i64 FPR64:$Rn)>;
5746 def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
5747 (FCVTAUv1i64 FPR64:$Rn)>;
5748 def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))),
5749 (FCVTMSv1i64 FPR64:$Rn)>;
5750 def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))),
5751 (FCVTMUv1i64 FPR64:$Rn)>;
5752 def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))),
5753 (FCVTNSv1i64 FPR64:$Rn)>;
5754 def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))),
5755 (FCVTNUv1i64 FPR64:$Rn)>;
5756 def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
5757 (FCVTPSv1i64 FPR64:$Rn)>;
5758 def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
5759 (FCVTPUv1i64 FPR64:$Rn)>;
5760 def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))),
5761 (FCVTZSv1i64 FPR64:$Rn)>;
5762 def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))),
5763 (FCVTZUv1i64 FPR64:$Rn)>;
5765 def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))),
5766 (FRECPEv1f16 FPR16:$Rn)>;
5767 def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))),
5768 (FRECPEv1i32 FPR32:$Rn)>;
5769 def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))),
5770 (FRECPEv1i64 FPR64:$Rn)>;
5771 def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))),
5772 (FRECPEv1i64 FPR64:$Rn)>;
5774 def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))),
5775 (FRECPEv1i32 FPR32:$Rn)>;
5776 def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))),
5777 (FRECPEv2f32 V64:$Rn)>;
5778 def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))),
5779 (FRECPEv4f32 FPR128:$Rn)>;
5780 def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))),
5781 (FRECPEv1i64 FPR64:$Rn)>;
5782 def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))),
5783 (FRECPEv1i64 FPR64:$Rn)>;
5784 def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),
5785 (FRECPEv2f64 FPR128:$Rn)>;
5787 def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
5788 (FRECPS32 FPR32:$Rn, FPR32:$Rm)>;
5789 def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5790 (FRECPSv2f32 V64:$Rn, V64:$Rm)>;
5791 def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
5792 (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>;
5793 def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
5794 (FRECPS64 FPR64:$Rn, FPR64:$Rm)>;
5795 def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
5796 (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>;
5798 def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))),
5799 (FRECPXv1f16 FPR16:$Rn)>;
5800 def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
5801 (FRECPXv1i32 FPR32:$Rn)>;
5802 def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
5803 (FRECPXv1i64 FPR64:$Rn)>;
5805 def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))),
5806 (FRSQRTEv1f16 FPR16:$Rn)>;
5807 def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),
5808 (FRSQRTEv1i32 FPR32:$Rn)>;
5809 def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
5810 (FRSQRTEv1i64 FPR64:$Rn)>;
5811 def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))),
5812 (FRSQRTEv1i64 FPR64:$Rn)>;
5814 def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))),
5815 (FRSQRTEv1i32 FPR32:$Rn)>;
5816 def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))),
5817 (FRSQRTEv2f32 V64:$Rn)>;
5818 def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))),
5819 (FRSQRTEv4f32 FPR128:$Rn)>;
5820 def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))),
5821 (FRSQRTEv1i64 FPR64:$Rn)>;
5822 def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))),
5823 (FRSQRTEv1i64 FPR64:$Rn)>;
5824 def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),
5825 (FRSQRTEv2f64 FPR128:$Rn)>;
5827 def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
5828 (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>;
5829 def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5830 (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>;
5831 def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
5832 (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>;
5833 def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
5834 (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>;
5835 def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
5836 (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;
5838 // Some float -> int -> float conversion patterns for which we want to keep the
5839 // int values in FP registers using the corresponding NEON instructions to
5840 // avoid more costly int <-> fp register transfers.
5841 let Predicates = [HasNEON] in {
5842 def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
5843 (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
5844 def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
5845 (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
5846 def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
5847 (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
5848 def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
5849 (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
5851 let Predicates = [HasFullFP16] in {
5852 def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
5853 (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
5854 def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
5855 (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
5857 // If an integer is about to be converted to a floating point value,
5858 // just load it on the floating point unit.
5859 // Here are the patterns for 8 and 16-bits to float.
5861 multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
5862 SDPatternOperator loadop, Instruction UCVTF,
5863 ROAddrMode ro, Instruction LDRW, Instruction LDRX,
5865 def : Pat<(DstTy (uint_to_fp (SrcTy
5866 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm,
5867 ro.Wext:$extend))))),
5868 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
5869 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
5872 def : Pat<(DstTy (uint_to_fp (SrcTy
5873 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm,
5874 ro.Wext:$extend))))),
5875 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
5876 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
5880 defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
5881 UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
5882 def : Pat <(f32 (uint_to_fp (i32
5883 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
5884 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5885 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
5886 def : Pat <(f32 (uint_to_fp (i32
5887 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
5888 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5889 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
5890 // 16-bits -> float.
5891 defm : UIntToFPROLoadPat<f32, i32, zextloadi16,
5892 UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>;
5893 def : Pat <(f32 (uint_to_fp (i32
5894 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
5895 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5896 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
5897 def : Pat <(f32 (uint_to_fp (i32
5898 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
5899 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5900 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
5901 // 32-bits are handled in target specific dag combine:
5902 // performIntToFpCombine.
5903 // 64-bits integer to 32-bits floating point, not possible with
5904 // UCVTF on floating point registers (both source and destination
5905 // must have the same size).
5907 // Here are the patterns for 8, 16, 32, and 64-bits to double.
5908 // 8-bits -> double.
5909 defm : UIntToFPROLoadPat<f64, i32, zextloadi8,
5910 UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>;
5911 def : Pat <(f64 (uint_to_fp (i32
5912 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
5913 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5914 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
5915 def : Pat <(f64 (uint_to_fp (i32
5916 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
5917 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5918 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
5919 // 16-bits -> double.
5920 defm : UIntToFPROLoadPat<f64, i32, zextloadi16,
5921 UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>;
5922 def : Pat <(f64 (uint_to_fp (i32
5923 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
5924 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5925 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
5926 def : Pat <(f64 (uint_to_fp (i32
5927 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
5928 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5929 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
5930 // 32-bits -> double.
5931 defm : UIntToFPROLoadPat<f64, i32, load,
5932 UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>;
5933 def : Pat <(f64 (uint_to_fp (i32
5934 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
5935 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5936 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>;
5937 def : Pat <(f64 (uint_to_fp (i32
5938 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
5939 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5940 (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
5941 // 64-bits -> double are handled in target specific dag combine:
5942 // performIntToFpCombine.
5943 } // let Predicates = [HasNEON]
5945 //===----------------------------------------------------------------------===//
5946 // Advanced SIMD three different-sized vector instructions.
5947 //===----------------------------------------------------------------------===//
5949 defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>;
5950 defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
5951 defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
5952 defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
5953 defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>;
5954 defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
5956 defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
5958 defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
5959 BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
5960 defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
5961 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
5962 defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
5963 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
5964 defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
5965 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
5966 defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>;
5967 defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
5968 int_aarch64_neon_sqadd>;
5969 defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
5970 int_aarch64_neon_sqsub>;
5971 defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
5972 int_aarch64_neon_sqdmull>;
5973 defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
5974 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
5975 defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
5976 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
5977 defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
5979 defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
5980 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>;
5981 defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
5982 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>;
5983 defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
5984 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
5985 defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
5986 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
5987 defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>;
5988 defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
5989 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
5990 defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
5991 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>;
5993 // Additional patterns for [SU]ML[AS]L
5994 multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode,
5995 Instruction INST8B, Instruction INST4H, Instruction INST2S> {
5996 def : Pat<(v4i16 (opnode
5998 (v4i16 (extract_subvector
5999 (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)),
6001 (EXTRACT_SUBREG (v8i16 (INST8B
6002 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub),
6003 V64:$Rn, V64:$Rm)), dsub)>;
6004 def : Pat<(v2i32 (opnode
6006 (v2i32 (extract_subvector
6007 (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)),
6009 (EXTRACT_SUBREG (v4i32 (INST4H
6010 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub),
6011 V64:$Rn, V64:$Rm)), dsub)>;
6012 def : Pat<(v1i64 (opnode
6014 (v1i64 (extract_subvector
6015 (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)),
6017 (EXTRACT_SUBREG (v2i64 (INST2S
6018 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub),
6019 V64:$Rn, V64:$Rm)), dsub)>;
6022 defm : Neon_mul_acc_widen_patterns<add, AArch64umull,
6023 UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
6024 defm : Neon_mul_acc_widen_patterns<add, AArch64smull,
6025 SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
6026 defm : Neon_mul_acc_widen_patterns<sub, AArch64umull,
6027 UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
6028 defm : Neon_mul_acc_widen_patterns<sub, AArch64smull,
6029 SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
6032 multiclass Neon_addl_extract_patterns<SDPatternOperator opnode, SDPatternOperator ext, string Inst> {
6033 def : Pat<(v4i16 (opnode (extract_subvector (ext (v8i8 V64:$Rn)), (i64 0)),
6034 (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
6035 (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Lv8i8_v8i16") V64:$Rn, V64:$Rm)), dsub)>;
6036 def : Pat<(v2i32 (opnode (extract_subvector (ext (v4i16 V64:$Rn)), (i64 0)),
6037 (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
6038 (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Lv4i16_v4i32") V64:$Rn, V64:$Rm)), dsub)>;
6039 def : Pat<(v1i64 (opnode (extract_subvector (ext (v2i32 V64:$Rn)), (i64 0)),
6040 (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
6041 (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Lv2i32_v2i64") V64:$Rn, V64:$Rm)), dsub)>;
6043 def : Pat<(v4i16 (opnode (v4i16 V64:$Rn),
6044 (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
6045 (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Wv8i8_v8i16") (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
6046 def : Pat<(v2i32 (opnode (v2i32 V64:$Rn),
6047 (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
6048 (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Wv4i16_v4i32") (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
6049 def : Pat<(v1i64 (opnode (v1i64 V64:$Rn),
6050 (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
6051 (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Wv2i32_v2i64") (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
6054 defm : Neon_addl_extract_patterns<add, zanyext, "UADD">;
6055 defm : Neon_addl_extract_patterns<add, sext, "SADD">;
6056 defm : Neon_addl_extract_patterns<sub, zanyext, "USUB">;
6057 defm : Neon_addl_extract_patterns<sub, sext, "SSUB">;
6059 // CodeGen patterns for addhn and subhn instructions, which can actually be
6060 // written in LLVM IR without too much difficulty.
6062 // Prioritize ADDHN and SUBHN over UZP2.
6063 let AddedComplexity = 10 in {
6066 def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
6067 (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
6068 def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6070 (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
6071 def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6073 (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
6074 def : Pat<(concat_vectors (v8i8 V64:$Rd),
6075 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6077 (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6078 V128:$Rn, V128:$Rm)>;
6079 def : Pat<(concat_vectors (v4i16 V64:$Rd),
6080 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6082 (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6083 V128:$Rn, V128:$Rm)>;
6084 def : Pat<(concat_vectors (v2i32 V64:$Rd),
6085 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6087 (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6088 V128:$Rn, V128:$Rm)>;
6091 def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
6092 (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
6093 def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6095 (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
6096 def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6098 (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
6099 def : Pat<(concat_vectors (v8i8 V64:$Rd),
6100 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6102 (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6103 V128:$Rn, V128:$Rm)>;
6104 def : Pat<(concat_vectors (v4i16 V64:$Rd),
6105 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6107 (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6108 V128:$Rn, V128:$Rm)>;
6109 def : Pat<(concat_vectors (v2i32 V64:$Rd),
6110 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6112 (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6113 V128:$Rn, V128:$Rm)>;
6115 } // AddedComplexity = 10
6117 //----------------------------------------------------------------------------
6118 // AdvSIMD bitwise extract from vector instruction.
6119 //----------------------------------------------------------------------------
6121 defm EXT : SIMDBitwiseExtract<"ext">;
6123 def AdjustExtImm : SDNodeXForm<imm, [{
6124 return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
6126 multiclass ExtPat<ValueType VT64, ValueType VT128, int N> {
6127 def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
6128 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
6129 def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
6130 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
6131 // We use EXT to handle extract_subvector to copy the upper 64-bits of a
6133 def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))),
6134 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
6135 // A 64-bit EXT of two halves of the same 128-bit register can be done as a
6136 // single 128-bit EXT.
6137 def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)),
6138 (extract_subvector V128:$Rn, (i64 N)),
6140 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>;
6141 // A 64-bit EXT of the high half of a 128-bit register can be done using a
6142 // 128-bit EXT of the whole register with an adjustment to the immediate. The
6143 // top half of the other operand will be unset, but that doesn't matter as it
6144 // will not be used.
6145 def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)),
6148 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn,
6149 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
6150 (AdjustExtImm imm:$imm)), dsub)>;
6153 defm : ExtPat<v8i8, v16i8, 8>;
6154 defm : ExtPat<v4i16, v8i16, 4>;
6155 defm : ExtPat<v4f16, v8f16, 4>;
6156 defm : ExtPat<v4bf16, v8bf16, 4>;
6157 defm : ExtPat<v2i32, v4i32, 2>;
6158 defm : ExtPat<v2f32, v4f32, 2>;
6159 defm : ExtPat<v1i64, v2i64, 1>;
6160 defm : ExtPat<v1f64, v2f64, 1>;
6162 //----------------------------------------------------------------------------
6163 // AdvSIMD zip vector
6164 //----------------------------------------------------------------------------
6166 defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>;
6167 defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>;
6168 defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>;
6169 defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
6170 defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
6171 defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
6173 def trunc_optional_assert_ext : PatFrags<(ops node:$op0),
6175 (assertzext (trunc node:$op0)),
6176 (assertsext (trunc node:$op0))]>;
6178 // concat_vectors(trunc(x), trunc(y)) -> uzp1(x, y)
6179 // concat_vectors(assertzext(trunc(x)), assertzext(trunc(y))) -> uzp1(x, y)
6180 // concat_vectors(assertsext(trunc(x)), assertsext(trunc(y))) -> uzp1(x, y)
6181 class concat_trunc_to_uzp1_pat<ValueType SrcTy, ValueType TruncTy, ValueType ConcatTy>
6182 : Pat<(ConcatTy (concat_vectors (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))),
6183 (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))),
6184 (!cast<Instruction>("UZP1"#ConcatTy) V128:$Vn, V128:$Vm)>;
6185 def : concat_trunc_to_uzp1_pat<v8i16, v8i8, v16i8>;
6186 def : concat_trunc_to_uzp1_pat<v4i32, v4i16, v8i16>;
6187 def : concat_trunc_to_uzp1_pat<v2i64, v2i32, v4i32>;
6189 // trunc(concat_vectors(trunc(x), trunc(y))) -> xtn(uzp1(x, y))
6190 // trunc(concat_vectors(assertzext(trunc(x)), assertzext(trunc(y)))) -> xtn(uzp1(x, y))
6191 // trunc(concat_vectors(assertsext(trunc(x)), assertsext(trunc(y)))) -> xtn(uzp1(x, y))
6192 class trunc_concat_trunc_to_xtn_uzp1_pat<ValueType SrcTy, ValueType TruncTy, ValueType ConcatTy,
6194 : Pat<(Ty (trunc_optional_assert_ext
6195 (ConcatTy (concat_vectors
6196 (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))),
6197 (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))))),
6198 (!cast<Instruction>("XTN"#Ty) (!cast<Instruction>("UZP1"#ConcatTy) V128:$Vn, V128:$Vm))>;
6199 def : trunc_concat_trunc_to_xtn_uzp1_pat<v4i32, v4i16, v8i16, v8i8>;
6200 def : trunc_concat_trunc_to_xtn_uzp1_pat<v2i64, v2i32, v4i32, v4i16>;
6202 def : Pat<(v8i8 (trunc (concat_vectors (v4i16 V64:$Vn), (v4i16 V64:$Vm)))),
6203 (UZP1v8i8 V64:$Vn, V64:$Vm)>;
6204 def : Pat<(v4i16 (trunc (concat_vectors (v2i32 V64:$Vn), (v2i32 V64:$Vm)))),
6205 (UZP1v4i16 V64:$Vn, V64:$Vm)>;
6207 def : Pat<(v16i8 (concat_vectors
6208 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))),
6209 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))),
6210 (UZP2v16i8 V128:$Vn, V128:$Vm)>;
6211 def : Pat<(v8i16 (concat_vectors
6212 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))),
6213 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))),
6214 (UZP2v8i16 V128:$Vn, V128:$Vm)>;
6215 def : Pat<(v4i32 (concat_vectors
6216 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))),
6217 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))),
6218 (UZP2v4i32 V128:$Vn, V128:$Vm)>;
6220 //----------------------------------------------------------------------------
6221 // AdvSIMD TBL/TBX instructions
6222 //----------------------------------------------------------------------------
6224 defm TBL : SIMDTableLookup< 0, "tbl">;
6225 defm TBX : SIMDTableLookupTied<1, "tbx">;
6227 def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
6228 (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
6229 def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
6230 (TBLv16i8One V128:$Ri, V128:$Rn)>;
6232 def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd),
6233 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
6234 (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
6235 def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
6236 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
6237 (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
6239 //----------------------------------------------------------------------------
6240 // AdvSIMD LUT instructions
6241 //----------------------------------------------------------------------------
6242 let Predicates = [HasLUT] in {
6243 defm LUT2 : BaseSIMDTableLookupIndexed2<"luti2">;
6244 defm LUT4 : BaseSIMDTableLookupIndexed4<"luti4">;
6247 //----------------------------------------------------------------------------
6248 // AdvSIMD scalar DUP instruction
6249 //----------------------------------------------------------------------------
6251 defm DUP : SIMDScalarDUP<"mov">;
6253 //----------------------------------------------------------------------------
6254 // AdvSIMD scalar pairwise instructions
6255 //----------------------------------------------------------------------------
6257 defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">;
6258 defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">;
6259 defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
6260 defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
6261 defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
6262 defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
6264 // Only the lower half of the result of the inner FADDP is used in the patterns
6265 // below, so the second operand does not matter. Re-use the first input
6266 // operand, so no additional dependencies need to be introduced.
6267 let Predicates = [HasFullFP16] in {
6268 def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
6271 (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn),
6273 def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
6274 (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>;
6276 def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
6279 (FADDPv4f32 V128:$Rn, V128:$Rn),
6281 def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
6282 (FADDPv2i32p V64:$Rn)>;
6283 def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))),
6284 (FADDPv2i64p V128:$Rn)>;
6286 def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
6287 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
6288 def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
6289 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
6290 def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
6291 (FADDPv2i32p V64:$Rn)>;
6292 def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
6293 (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
6294 def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))),
6295 (FADDPv2i64p V128:$Rn)>;
6296 def : Pat<(f32 (AArch64fmaxnmv (v2f32 V64:$Rn))),
6297 (FMAXNMPv2i32p V64:$Rn)>;
6298 def : Pat<(f64 (AArch64fmaxnmv (v2f64 V128:$Rn))),
6299 (FMAXNMPv2i64p V128:$Rn)>;
6300 def : Pat<(f32 (AArch64fmaxv (v2f32 V64:$Rn))),
6301 (FMAXPv2i32p V64:$Rn)>;
6302 def : Pat<(f64 (AArch64fmaxv (v2f64 V128:$Rn))),
6303 (FMAXPv2i64p V128:$Rn)>;
6304 def : Pat<(f32 (AArch64fminnmv (v2f32 V64:$Rn))),
6305 (FMINNMPv2i32p V64:$Rn)>;
6306 def : Pat<(f64 (AArch64fminnmv (v2f64 V128:$Rn))),
6307 (FMINNMPv2i64p V128:$Rn)>;
6308 def : Pat<(f32 (AArch64fminv (v2f32 V64:$Rn))),
6309 (FMINPv2i32p V64:$Rn)>;
6310 def : Pat<(f64 (AArch64fminv (v2f64 V128:$Rn))),
6311 (FMINPv2i64p V128:$Rn)>;
6313 //----------------------------------------------------------------------------
6314 // AdvSIMD INS/DUP instructions
6315 //----------------------------------------------------------------------------
6317 def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
6318 def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
6319 def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
6320 def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
6321 def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
6322 def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
6323 def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
6325 def DUPv2i64lane : SIMDDup64FromElement;
6326 def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
6327 def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
6328 def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>;
6329 def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
6330 def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
6331 def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
6333 // DUP from a 64-bit register to a 64-bit register is just a copy
6334 def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))),
6335 (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>;
6336 def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))),
6337 (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>;
6339 def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
6340 (v2f32 (DUPv2i32lane
6341 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
6343 def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))),
6344 (v4f32 (DUPv4i32lane
6345 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
6347 def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
6348 (v2f64 (DUPv2i64lane
6349 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
6351 def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))),
6352 (v4f16 (DUPv4i16lane
6353 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6355 def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))),
6356 (v4bf16 (DUPv4i16lane
6357 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6359 def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))),
6360 (v8f16 (DUPv8i16lane
6361 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6363 def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))),
6364 (v8bf16 (DUPv8i16lane
6365 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6368 def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
6369 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
6370 def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
6371 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
6373 def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
6374 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
6375 def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
6376 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
6378 def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
6379 (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
6380 def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
6381 (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
6382 def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
6383 (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
6385 // If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane
6386 // instruction even if the types don't match: we just have to remap the lane
6387 // carefully. N.b. this trick only applies to truncations.
6388 def VecIndex_x2 : SDNodeXForm<imm, [{
6389 return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
6391 def VecIndex_x4 : SDNodeXForm<imm, [{
6392 return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
6394 def VecIndex_x8 : SDNodeXForm<imm, [{
6395 return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
6398 multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
6399 ValueType Src128VT, ValueType ScalVT,
6400 Instruction DUP, SDNodeXForm IdxXFORM> {
6401 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn),
6403 (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
6405 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn),
6407 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
6410 defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>;
6411 defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>;
6412 defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>;
6414 defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>;
6415 defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>;
6416 defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
6418 multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
6419 SDNodeXForm IdxXFORM> {
6420 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn),
6422 (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
6424 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn),
6426 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
6429 defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>;
6430 defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>;
6431 defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>;
6433 defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>;
6434 defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>;
6435 defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>;
6437 // SMOV and UMOV definitions, with some extra patterns for convenience
6441 def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
6442 (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>;
6443 def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
6444 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
6445 def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6446 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
6447 def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6448 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
6449 def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6450 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
6451 def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
6452 (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
6454 def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
6455 VectorIndexB:$idx)))), i8),
6456 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
6457 def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
6458 VectorIndexH:$idx)))), i16),
6459 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
6461 // Extracting i8 or i16 elements will have the zero-extend transformed to
6462 // an 'and' mask by type legalization since neither i8 nor i16 are legal types
6463 // for AArch64. Match these patterns here since UMOV already zeroes out the high
6464 // bits of the destination register.
6465 def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
6467 (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>;
6468 def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
6470 (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
6472 def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
6473 VectorIndexB:$idx)))), (i64 0xff))),
6474 (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>;
6475 def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
6476 VectorIndexH:$idx)))), (i64 0xffff))),
6477 (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>;
6481 def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
6482 (SUBREG_TO_REG (i32 0),
6483 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6484 def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
6485 (SUBREG_TO_REG (i32 0),
6486 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6488 // The top bits will be zero from the FMOVWSr
6489 def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))),
6490 (SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>;
6492 def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
6493 (SUBREG_TO_REG (i32 0),
6494 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6495 def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
6496 (SUBREG_TO_REG (i32 0),
6497 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6499 def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
6500 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6501 def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
6502 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6504 def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6505 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6506 def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6507 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6509 def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
6510 (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
6511 (i32 FPR32:$Rn), ssub))>;
6512 def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
6513 (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6514 (i32 FPR32:$Rn), ssub))>;
6516 def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
6517 (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
6518 (i64 FPR64:$Rn), dsub))>;
6520 def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
6521 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6522 def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
6523 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6525 def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6526 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6527 def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6528 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6530 def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
6531 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
6532 def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
6533 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
6535 def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
6536 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
6538 def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
6539 (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
6542 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6544 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6548 def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
6549 (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
6550 def : Pat<(vector_insert (v4f16 V64:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
6551 (EXTRACT_SUBREG (INSvi16gpr (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexH:$imm, WZR), dsub)>;
6552 def : Pat<(vector_insert (v4f32 V128:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
6553 (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
6554 def : Pat<(vector_insert (v2f32 V64:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
6555 (EXTRACT_SUBREG (INSvi32gpr (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, WZR), dsub)>;
6556 def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), (i64 VectorIndexD:$imm)),
6557 (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
6559 def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
6560 (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
6562 V128:$Rn, VectorIndexH:$imm,
6563 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6566 def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn),
6567 (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
6570 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6572 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6576 def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn),
6577 (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
6579 V128:$Rn, VectorIndexH:$imm,
6580 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6583 def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
6584 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
6587 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6589 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
6592 def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn),
6593 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
6595 V128:$Rn, VectorIndexS:$imm,
6596 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
6598 def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
6599 (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))),
6601 V128:$Rn, VectorIndexD:$imm,
6602 (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
6605 def : Pat<(v2i32 (vector_insert (v2i32 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexS:$imm))),
6607 (INSvi32gpr (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6608 VectorIndexS:$imm, GPR32:$Rm),
6610 def : Pat<(v4i16 (vector_insert (v4i16 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexH:$imm))),
6612 (INSvi16gpr (v8i16 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6613 VectorIndexH:$imm, GPR32:$Rm),
6615 def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexB:$imm))),
6617 (INSvi8gpr (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6618 VectorIndexB:$imm, GPR32:$Rm),
6621 def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i8 FPR8:$Rm), (i64 VectorIndexB:$imm))),
6623 (INSvi8lane (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6624 VectorIndexB:$imm, (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR8:$Rm, bsub)), (i64 0)),
6626 def : Pat<(v16i8 (vector_insert (v16i8 V128:$Rn), (i8 FPR8:$Rm), (i64 VectorIndexB:$imm))),
6627 (INSvi8lane V128:$Rn, VectorIndexB:$imm,
6628 (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR8:$Rm, bsub)), (i64 0))>;
6630 // Copy an element at a constant index in one vector into a constant indexed
6631 // element of another.
6632 // FIXME refactor to a shared class/dev parameterized on vector type, vector
6633 // index type and INS extension
6634 def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane
6635 (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
6636 VectorIndexB:$idx2)),
6638 V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
6640 def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane
6641 (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
6642 VectorIndexH:$idx2)),
6644 V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
6646 def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane
6647 (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
6648 VectorIndexS:$idx2)),
6650 V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
6652 def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
6653 (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
6654 VectorIndexD:$idx2)),
6656 V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
6659 multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
6660 ValueType VTScal, Instruction INS> {
6661 def : Pat<(VT128 (vector_insert V128:$src,
6662 (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
6664 (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
6666 def : Pat<(VT128 (vector_insert V128:$src,
6667 (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
6669 (INS V128:$src, imm:$Immd,
6670 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
6672 def : Pat<(VT64 (vector_insert V64:$src,
6673 (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
6675 (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
6676 imm:$Immd, V128:$Rn, imm:$Immn),
6679 def : Pat<(VT64 (vector_insert V64:$src,
6680 (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
6683 (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
6684 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
6688 defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
6689 defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>;
6690 defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
6691 defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
6693 defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
6694 defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
6695 defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
6696 defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi64lane>;
6698 // Insert from bitcast
6699 // vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
6700 def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), (i64 imm:$Immd))),
6701 (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>;
6702 def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), (i64 imm:$Immd))),
6704 (INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)),
6705 imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0),
6707 def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), (i64 imm:$Immd))),
6708 (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>;
6710 // bitcast of an extract
6711 // f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
6712 def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
6713 (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>;
6714 def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))),
6715 (EXTRACT_SUBREG V128:$src, ssub)>;
6716 def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))),
6717 (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>;
6718 def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))),
6719 (EXTRACT_SUBREG V128:$src, dsub)>;
6721 // Floating point vector extractions are codegen'd as either a sequence of
6722 // subregister extractions, or a MOV (aka DUP here) if
6723 // the lane number is anything other than zero.
6724 def : Pat<(f64 (vector_extract (v2f64 V128:$Rn), (i64 0))),
6725 (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
6726 def : Pat<(f32 (vector_extract (v4f32 V128:$Rn), (i64 0))),
6727 (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
6728 def : Pat<(f16 (vector_extract (v8f16 V128:$Rn), (i64 0))),
6729 (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
6730 def : Pat<(bf16 (vector_extract (v8bf16 V128:$Rn), (i64 0))),
6731 (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
6734 def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
6735 (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>;
6736 def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
6737 (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>;
6738 def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
6739 (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
6740 def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
6741 (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
6743 // All concat_vectors operations are canonicalised to act on i64 vectors for
6744 // AArch64. In the general case we need an instruction, which had just as well be
6746 multiclass ConcatPat<ValueType DstTy, ValueType SrcTy> {
6747 def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
6748 (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
6749 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
6751 // If the high lanes are zero we can instead emit a d->d register mov, which
6752 // will implicitly clear the upper bits.
6753 def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), immAllZerosV)),
6754 (SUBREG_TO_REG (i64 0), (FMOVDr V64:$Rn), dsub)>;
6756 // If the high lanes are undef we can just ignore them:
6757 def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
6758 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
6761 defm : ConcatPat<v2i64, v1i64>;
6762 defm : ConcatPat<v2f64, v1f64>;
6763 defm : ConcatPat<v4i32, v2i32>;
6764 defm : ConcatPat<v4f32, v2f32>;
6765 defm : ConcatPat<v8i16, v4i16>;
6766 defm : ConcatPat<v8f16, v4f16>;
6767 defm : ConcatPat<v8bf16, v4bf16>;
6768 defm : ConcatPat<v16i8, v8i8>;
6770 //----------------------------------------------------------------------------
6771 // AdvSIMD across lanes instructions
6772 //----------------------------------------------------------------------------
6774 defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">;
6775 defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">;
6776 defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">;
6777 defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
6778 defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
6779 defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
6780 defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
6781 defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", AArch64fmaxnmv>;
6782 defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", AArch64fmaxv>;
6783 defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>;
6784 defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>;
6786 multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> {
6787 // Patterns for addv(addlp(x)) ==> addlv
6788 def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
6789 (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))),
6790 (i64 0))), (i64 0))),
6791 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
6792 (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>;
6793 def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))),
6794 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6795 (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>;
6796 def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))),
6797 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>;
6799 // Patterns for addp(addlp(x))) ==> addlv
6800 def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))),
6801 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>;
6802 def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))),
6803 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>;
6806 defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>;
6807 defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>;
6809 // Pattern is used for GlobalISel
6810 multiclass SIMDAcrossLaneLongPairIntrinsicGISel<string Opc, SDPatternOperator addlp> {
6811 // Patterns for addv(addlp(x)) ==> addlv
6812 def : Pat<(i16 (vecreduce_add (v4i16 (addlp (v8i8 V64:$Rn))))),
6813 (!cast<Instruction>(Opc#"v8i8v") V64:$Rn)>;
6814 def : Pat<(i16 (vecreduce_add (v8i16 (addlp (v16i8 V128:$Rn))))),
6815 (!cast<Instruction>(Opc#"v16i8v") V128:$Rn)>;
6816 def : Pat<(i32 (vecreduce_add (v4i32 (addlp (v8i16 V128:$Rn))))),
6817 (!cast<Instruction>(Opc#"v8i16v") V128:$Rn)>;
6819 // Patterns for addp(addlp(x))) ==> addlv
6820 def : Pat<(i32 (vecreduce_add (v2i32 (addlp (v4i16 V64:$Rn))))),
6821 (!cast<Instruction>(Opc#"v4i16v") V64:$Rn)>;
6822 def : Pat<(i64 (vecreduce_add (v2i64 (addlp (v4i32 V128:$Rn))))),
6823 (!cast<Instruction>(Opc#"v4i32v") V128:$Rn)>;
6826 defm : SIMDAcrossLaneLongPairIntrinsicGISel<"UADDLV", AArch64uaddlp>;
6827 defm : SIMDAcrossLaneLongPairIntrinsicGISel<"SADDLV", AArch64saddlp>;
6829 // Patterns for uaddlv(uaddlp(x)) ==> uaddlv
6830 def : Pat<(i64 (int_aarch64_neon_uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
6831 (i64 (EXTRACT_SUBREG
6832 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub)),
6835 def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))),
6836 (i32 (EXTRACT_SUBREG
6837 (v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)),
6840 def : Pat<(v2i64 (AArch64uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
6841 (v2i64 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub))>;
6843 def : Pat<(v4i32 (AArch64uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))),
6844 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub))>;
6846 def : Pat<(v4i32 (AArch64uaddlv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))),
6847 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$op), hsub))>;
6849 multiclass SIMDAcrossLaneLongReductionIntrinsic<string Opc, SDPatternOperator addlv> {
6850 def : Pat<(v4i32 (addlv (v8i8 V64:$Rn))),
6851 (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v8i8v") V64:$Rn), hsub))>;
6853 def : Pat<(v4i32 (addlv (v4i16 V64:$Rn))),
6854 (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v4i16v") V64:$Rn), ssub))>;
6856 def : Pat<(v4i32 (addlv (v16i8 V128:$Rn))),
6857 (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v16i8v") V128:$Rn), hsub))>;
6859 def : Pat<(v4i32 (addlv (v8i16 V128:$Rn))),
6860 (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v8i16v") V128:$Rn), ssub))>;
6862 def : Pat<(v2i64 (addlv (v4i32 V128:$Rn))),
6863 (v2i64 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v4i32v") V128:$Rn), dsub))>;
6866 defm : SIMDAcrossLaneLongReductionIntrinsic<"UADDLV", AArch64uaddlv>;
6867 defm : SIMDAcrossLaneLongReductionIntrinsic<"SADDLV", AArch64saddlv>;
6869 // Patterns for across-vector intrinsics, that have a node equivalent, that
6870 // returns a vector (with only the low lane defined) instead of a scalar.
6871 // In effect, opNode is the same as (scalar_to_vector (IntNode)).
6872 multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
6873 SDPatternOperator opNode> {
6874 // If a lane instruction caught the vector_extract around opNode, we can
6875 // directly match the latter to the instruction.
6876 def : Pat<(v8i8 (opNode V64:$Rn)),
6877 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
6878 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
6879 def : Pat<(v16i8 (opNode V128:$Rn)),
6880 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6881 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
6882 def : Pat<(v4i16 (opNode V64:$Rn)),
6883 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
6884 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
6885 def : Pat<(v8i16 (opNode V128:$Rn)),
6886 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6887 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
6888 def : Pat<(v4i32 (opNode V128:$Rn)),
6889 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6890 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
6893 // If none did, fallback to the explicit patterns, consuming the vector_extract.
6894 def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
6895 (i64 0)), (i64 0))),
6896 (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
6897 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
6899 def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
6900 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6901 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
6903 def : Pat<(i32 (vector_extract (insert_subvector undef,
6904 (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))),
6905 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
6906 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
6908 def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
6909 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6910 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
6912 def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
6913 (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6914 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
6919 multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
6920 SDPatternOperator opNode>
6921 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
6922 // If there is a sign extension after this intrinsic, consume it as smov already
6924 def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
6925 (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)),
6927 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6928 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
6930 def : Pat<(i32 (sext_inreg (i32 (vector_extract
6931 (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
6933 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6934 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
6936 def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
6937 (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)),
6939 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6940 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
6942 def : Pat<(i32 (sext_inreg (i32 (vector_extract
6943 (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
6945 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6946 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
6950 multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
6951 SDPatternOperator opNode>
6952 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
6953 // If there is a masking operation keeping only what has been actually
6954 // generated, consume it.
6955 def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
6956 (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)),
6957 (i32 (EXTRACT_SUBREG
6958 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6959 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
6961 def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
6963 (i32 (EXTRACT_SUBREG
6964 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6965 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
6967 def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
6968 (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)),
6969 (i32 (EXTRACT_SUBREG
6970 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6971 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
6973 def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
6975 (i32 (EXTRACT_SUBREG
6976 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6977 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
6981 // For vecreduce_add, used by GlobalISel not SDAG
6982 def : Pat<(i8 (vecreduce_add (v8i8 V64:$Rn))),
6983 (i8 (ADDVv8i8v V64:$Rn))>;
6984 def : Pat<(i8 (vecreduce_add (v16i8 V128:$Rn))),
6985 (i8 (ADDVv16i8v V128:$Rn))>;
6986 def : Pat<(i16 (vecreduce_add (v4i16 V64:$Rn))),
6987 (i16 (ADDVv4i16v V64:$Rn))>;
6988 def : Pat<(i16 (vecreduce_add (v8i16 V128:$Rn))),
6989 (i16 (ADDVv8i16v V128:$Rn))>;
6990 def : Pat<(i32 (vecreduce_add (v2i32 V64:$Rn))),
6991 (i32 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub))>;
6992 def : Pat<(i32 (vecreduce_add (v4i32 V128:$Rn))),
6993 (i32 (ADDVv4i32v V128:$Rn))>;
6994 def : Pat<(i64 (vecreduce_add (v2i64 V128:$Rn))),
6995 (i64 (ADDPv2i64p V128:$Rn))>;
6997 defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>;
6998 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
6999 def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
7000 (ADDPv2i32 V64:$Rn, V64:$Rn)>;
7002 defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
7003 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
7004 def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
7005 (ADDPv2i32 V64:$Rn, V64:$Rn)>;
7007 defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
7008 def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
7009 (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
7011 defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
7012 def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
7013 (SMINPv2i32 V64:$Rn, V64:$Rn)>;
7015 defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
7016 def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
7017 (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
7019 defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
7020 def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
7021 (UMINPv2i32 V64:$Rn, V64:$Rn)>;
7023 // For vecreduce_{opc} used by GlobalISel, not SDAG at the moment
7024 // because GlobalISel allows us to specify the return register to be a FPR
7025 multiclass SIMDAcrossLanesVecReductionIntrinsic<string baseOpc,
7026 SDPatternOperator opNode> {
7027 def : Pat<(i8 (opNode (v8i8 FPR64:$Rn))),
7028 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) FPR64:$Rn)>;
7030 def : Pat<(i8 (opNode (v16i8 FPR128:$Rn))),
7031 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) FPR128:$Rn)>;
7033 def : Pat<(i16 (opNode (v4i16 FPR64:$Rn))),
7034 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) FPR64:$Rn)>;
7036 def : Pat<(i16 (opNode (v8i16 FPR128:$Rn))),
7037 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) FPR128:$Rn)>;
7039 def : Pat<(i32 (opNode (v4i32 V128:$Rn))),
7040 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn)>;
7043 // For v2i32 source type, the pairwise instruction can be used instead
7044 defm : SIMDAcrossLanesVecReductionIntrinsic<"UMINV", vecreduce_umin>;
7045 def : Pat<(i32 (vecreduce_umin (v2i32 V64:$Rn))),
7046 (i32 (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub))>;
7048 defm : SIMDAcrossLanesVecReductionIntrinsic<"UMAXV", vecreduce_umax>;
7049 def : Pat<(i32 (vecreduce_umax (v2i32 V64:$Rn))),
7050 (i32 (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>;
7052 defm : SIMDAcrossLanesVecReductionIntrinsic<"SMINV", vecreduce_smin>;
7053 def : Pat<(i32 (vecreduce_smin (v2i32 V64:$Rn))),
7054 (i32 (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub))>;
7056 defm : SIMDAcrossLanesVecReductionIntrinsic<"SMAXV", vecreduce_smax>;
7057 def : Pat<(i32 (vecreduce_smax (v2i32 V64:$Rn))),
7058 (i32 (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>;
7060 multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
7061 def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
7063 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7064 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
7066 def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
7068 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7069 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
7072 def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
7073 (i32 (EXTRACT_SUBREG
7074 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7075 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
7077 def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
7078 (i32 (EXTRACT_SUBREG
7079 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7080 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
7083 def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
7084 (i64 (EXTRACT_SUBREG
7085 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7086 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
7090 multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc,
7092 def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
7093 (i32 (EXTRACT_SUBREG
7094 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7095 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
7097 def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
7098 (i32 (EXTRACT_SUBREG
7099 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7100 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
7103 def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
7104 (i32 (EXTRACT_SUBREG
7105 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7106 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
7108 def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
7109 (i32 (EXTRACT_SUBREG
7110 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7111 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
7114 def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
7115 (i64 (EXTRACT_SUBREG
7116 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7117 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
7121 defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
7122 defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
7124 // The vaddlv_s32 intrinsic gets mapped to SADDLP.
7125 def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))),
7126 (i64 (EXTRACT_SUBREG
7127 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7128 (SADDLPv2i32_v1i64 V64:$Rn), dsub),
7130 // The vaddlv_u32 intrinsic gets mapped to UADDLP.
7131 def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))),
7132 (i64 (EXTRACT_SUBREG
7133 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7134 (UADDLPv2i32_v1i64 V64:$Rn), dsub),
7137 //------------------------------------------------------------------------------
7138 // AdvSIMD modified immediate instructions
7139 //------------------------------------------------------------------------------
7142 defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
7144 defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>;
7146 let Predicates = [HasNEON] in {
7147 def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>;
7148 def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7149 def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>;
7150 def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7152 def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>;
7153 def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7154 def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>;
7155 def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7157 def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>;
7158 def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7159 def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>;
7160 def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7162 def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>;
7163 def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7164 def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>;
7165 def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7169 def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
7171 [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7172 def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8,
7174 [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7175 def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
7177 [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7178 let Predicates = [HasNEON, HasFullFP16] in {
7179 def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8,
7181 [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7182 def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
7184 [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7185 } // Predicates = [HasNEON, HasFullFP16]
7189 // EDIT byte mask: scalar
7190 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7191 def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
7192 [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
7193 // The movi_edit node has the immediate value already encoded, so we use
7194 // a plain imm0_255 here.
7195 def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
7196 (MOVID imm0_255:$shift)>;
7198 // EDIT byte mask: 2d
7200 // The movi_edit node has the immediate value already encoded, so we use
7201 // a plain imm0_255 in the pattern
7202 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7203 def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
7206 [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
7208 def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7209 def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7210 def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7211 def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7212 def : Pat<(v2f64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7213 def : Pat<(v4f32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7214 def : Pat<(v8f16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7215 def : Pat<(v8bf16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7217 def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7218 def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7219 def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7220 def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7222 // Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
7223 // extract is free and this gives better MachineCSE results.
7224 def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7225 def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7226 def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7227 def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7228 def : Pat<(v1f64 immAllZerosV), (MOVID (i32 0))>;
7229 def : Pat<(v2f32 immAllZerosV), (MOVID (i32 0))>;
7230 def : Pat<(v4f16 immAllZerosV), (MOVID (i32 0))>;
7231 def : Pat<(v4bf16 immAllZerosV), (MOVID (i32 0))>;
7233 def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7234 def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7235 def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7236 def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7238 // EDIT per word & halfword: 2s, 4h, 4s, & 8h
7239 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7240 defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
7242 let Predicates = [HasNEON] in {
7243 // Using the MOVI to materialize fp constants.
7244 def : Pat<(f32 fpimm32SIMDModImmType4:$in),
7245 (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in),
7250 let Predicates = [HasNEON] in {
7251 def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
7252 def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7253 def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
7254 def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7256 def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
7257 def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7258 def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
7259 def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7262 def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7263 (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
7264 def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7265 (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
7266 def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7267 (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
7268 def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7269 (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
7271 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
7272 // EDIT per word: 2s & 4s with MSL shifter
7273 def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
7274 [(set (v2i32 V64:$Rd),
7275 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7276 def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
7277 [(set (v4i32 V128:$Rd),
7278 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7280 // Per byte: 8b & 16b
7281 def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255,
7283 [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
7285 def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
7287 [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
7292 // EDIT per word & halfword: 2s, 4h, 4s, & 8h
7293 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7294 defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
7296 let Predicates = [HasNEON] in {
7297 def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
7298 def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7299 def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
7300 def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7302 def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
7303 def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7304 def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
7305 def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7308 def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7309 (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
7310 def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7311 (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
7312 def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7313 (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
7314 def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7315 (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
7317 // EDIT per word: 2s & 4s with MSL shifter
7318 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
7319 def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
7320 [(set (v2i32 V64:$Rd),
7321 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7322 def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
7323 [(set (v4i32 V128:$Rd),
7324 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7327 //----------------------------------------------------------------------------
7328 // AdvSIMD indexed element
7329 //----------------------------------------------------------------------------
7331 let hasSideEffects = 0 in {
7332 defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">;
7333 defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">;
7336 // NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
7337 // instruction expects the addend first, while the intrinsic expects it last.
7339 // On the other hand, there are quite a few valid combinatorial options due to
7340 // the commutativity of multiplication and the fact that (-x) * y = x * (-y).
7341 defm : SIMDFPIndexedTiedPatterns<"FMLA",
7342 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>;
7343 defm : SIMDFPIndexedTiedPatterns<"FMLA",
7344 TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>;
7346 defm : SIMDFPIndexedTiedPatterns<"FMLS",
7347 TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
7348 defm : SIMDFPIndexedTiedPatterns<"FMLS",
7349 TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
7350 defm : SIMDFPIndexedTiedPatterns<"FMLS",
7351 TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
7352 defm : SIMDFPIndexedTiedPatterns<"FMLS",
7353 TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
7355 multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
7356 // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7358 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
7359 (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
7360 VectorIndexS:$idx))),
7361 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
7362 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
7363 (v2f32 (AArch64duplane32
7364 (v4f32 (insert_subvector undef,
7365 (v2f32 (fneg V64:$Rm)),
7367 VectorIndexS:$idx)))),
7368 (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
7369 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
7370 VectorIndexS:$idx)>;
7371 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
7372 (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
7373 (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
7374 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
7376 // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7378 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
7379 (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
7380 VectorIndexS:$idx))),
7381 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
7382 VectorIndexS:$idx)>;
7383 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
7384 (v4f32 (AArch64duplane32
7385 (v4f32 (insert_subvector undef,
7386 (v2f32 (fneg V64:$Rm)),
7388 VectorIndexS:$idx)))),
7389 (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
7390 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
7391 VectorIndexS:$idx)>;
7392 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
7393 (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
7394 (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
7395 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
7397 // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
7398 // (DUPLANE from 64-bit would be trivial).
7399 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
7400 (AArch64duplane64 (v2f64 (fneg V128:$Rm)),
7401 VectorIndexD:$idx))),
7403 V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
7404 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
7405 (AArch64dup (f64 (fneg FPR64Op:$Rm))))),
7406 (FMLSv2i64_indexed V128:$Rd, V128:$Rn,
7407 (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
7409 // 2 variants for 32-bit scalar version: extract from .2s or from .4s
7410 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
7411 (vector_extract (v4f32 (fneg V128:$Rm)),
7412 VectorIndexS:$idx))),
7413 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
7414 V128:$Rm, VectorIndexS:$idx)>;
7415 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
7416 (vector_extract (v4f32 (insert_subvector undef,
7417 (v2f32 (fneg V64:$Rm)),
7419 VectorIndexS:$idx))),
7420 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
7421 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
7423 // 1 variant for 64-bit scalar version: extract from .1d or from .2d
7424 def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
7425 (vector_extract (v2f64 (fneg V128:$Rm)),
7426 VectorIndexS:$idx))),
7427 (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn,
7428 V128:$Rm, VectorIndexS:$idx)>;
7431 defm : FMLSIndexedAfterNegPatterns<
7432 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
7433 defm : FMLSIndexedAfterNegPatterns<
7434 TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >;
7436 defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
7437 defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>;
7439 def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
7440 (FMULv2i32_indexed V64:$Rn,
7441 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
7443 def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
7444 (FMULv4i32_indexed V128:$Rn,
7445 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
7447 def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
7448 (FMULv2i64_indexed V128:$Rn,
7449 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
7452 defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
7453 defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
7455 defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane,
7456 int_aarch64_neon_sqdmulh_laneq>;
7457 defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane,
7458 int_aarch64_neon_sqrdmulh_laneq>;
7460 // Generated by MachineCombine
7461 defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>;
7462 defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>;
7464 defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
7465 defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
7466 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
7467 defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
7468 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
7469 defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>;
7470 defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
7471 int_aarch64_neon_sqadd>;
7472 defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
7473 int_aarch64_neon_sqsub>;
7474 defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
7475 int_aarch64_neon_sqrdmlah>;
7476 defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
7477 int_aarch64_neon_sqrdmlsh>;
7478 defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
7479 defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
7480 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
7481 defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
7482 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
7483 defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>;
7485 // A scalar sqdmull with the second operand being a vector lane can be
7486 // handled directly with the indexed instruction encoding.
7487 def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
7488 (vector_extract (v4i32 V128:$Vm),
7489 VectorIndexS:$idx)),
7490 (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
7492 //----------------------------------------------------------------------------
7493 // AdvSIMD scalar shift instructions
7494 //----------------------------------------------------------------------------
7495 defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
7496 defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
7497 defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
7498 defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
7499 // Codegen patterns for the above. We don't put these directly on the
7500 // instructions because TableGen's type inference can't handle the truth.
7501 // Having the same base pattern for fp <--> int totally freaks it out.
7502 def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
7503 (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
7504 def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
7505 (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
7506 def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
7507 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
7508 def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
7509 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
7510 def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
7512 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
7513 def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
7515 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
7516 def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
7517 (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
7518 def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
7519 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7520 def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
7522 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7523 def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
7524 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7525 def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
7527 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7528 def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
7529 (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
7531 // Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
7533 def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
7534 (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
7535 def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
7536 (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
7537 def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
7538 (SCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>;
7539 def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
7540 (and FPR32:$Rn, (i32 65535)),
7542 (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
7543 def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
7544 (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
7545 def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
7546 (UCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>;
7547 def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
7549 (i32 (IMPLICIT_DEF)),
7550 (FCVTZSh FPR16:$Rn, vecshiftR32:$imm),
7552 def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
7554 (i64 (IMPLICIT_DEF)),
7555 (FCVTZSh FPR16:$Rn, vecshiftR64:$imm),
7557 def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)),
7559 (i32 (IMPLICIT_DEF)),
7560 (FCVTZUh FPR16:$Rn, vecshiftR32:$imm),
7562 def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
7564 (i64 (IMPLICIT_DEF)),
7565 (FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
7567 def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
7569 (i32 (IMPLICIT_DEF)),
7570 (FACGE16 FPR16:$Rn, FPR16:$Rm),
7572 def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
7574 (i32 (IMPLICIT_DEF)),
7575 (FACGT16 FPR16:$Rn, FPR16:$Rm),
7578 defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>;
7579 defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
7580 defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
7581 int_aarch64_neon_sqrshrn>;
7582 defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
7583 int_aarch64_neon_sqrshrun>;
7584 defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
7585 defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
7586 defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
7587 int_aarch64_neon_sqshrn>;
7588 defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
7589 int_aarch64_neon_sqshrun>;
7590 defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">;
7591 defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>;
7592 defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra",
7593 TriOpFrag<(add node:$LHS,
7594 (AArch64srshri node:$MHS, node:$RHS))>>;
7595 defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>;
7596 defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra",
7597 TriOpFrag<(add_and_or_is_add node:$LHS,
7598 (AArch64vashr node:$MHS, node:$RHS))>>;
7599 defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
7600 int_aarch64_neon_uqrshrn>;
7601 defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
7602 defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
7603 int_aarch64_neon_uqshrn>;
7604 defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>;
7605 defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra",
7606 TriOpFrag<(add node:$LHS,
7607 (AArch64urshri node:$MHS, node:$RHS))>>;
7608 defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>;
7609 defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra",
7610 TriOpFrag<(add_and_or_is_add node:$LHS,
7611 (AArch64vlshr node:$MHS, node:$RHS))>>;
7613 //----------------------------------------------------------------------------
7614 // AdvSIMD vector shift instructions
7615 //----------------------------------------------------------------------------
7616 defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
7617 defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
7618 defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
7619 int_aarch64_neon_vcvtfxs2fp>;
7620 defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", AArch64rshrn>;
7621 defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
7624 class SHLToADDPat<ValueType ty, RegisterClass regtype>
7625 : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))),
7626 (!cast<Instruction>("ADD"#ty) regtype:$Rn, regtype:$Rn)>;
7628 def : SHLToADDPat<v16i8, FPR128>;
7629 def : SHLToADDPat<v8i16, FPR128>;
7630 def : SHLToADDPat<v4i32, FPR128>;
7631 def : SHLToADDPat<v2i64, FPR128>;
7632 def : SHLToADDPat<v8i8, FPR64>;
7633 def : SHLToADDPat<v4i16, FPR64>;
7634 def : SHLToADDPat<v2i32, FPR64>;
7636 defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
7637 BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
7638 defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
7639 def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
7640 (i32 vecshiftL64:$imm))),
7641 (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
7642 defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
7643 int_aarch64_neon_sqrshrn>;
7644 defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
7645 int_aarch64_neon_sqrshrun>;
7646 defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
7647 defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
7648 defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
7649 int_aarch64_neon_sqshrn>;
7650 defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
7651 int_aarch64_neon_sqshrun>;
7652 defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
7653 def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
7654 (i32 vecshiftR64:$imm))),
7655 (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
7656 defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
7657 defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
7658 TriOpFrag<(add node:$LHS,
7659 (AArch64srshri node:$MHS, node:$RHS))> >;
7660 defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
7661 BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;
7663 defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
7664 defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
7665 TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
7666 defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
7667 int_aarch64_neon_vcvtfxu2fp>;
7668 defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
7669 int_aarch64_neon_uqrshrn>;
7670 defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
7671 defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
7672 int_aarch64_neon_uqshrn>;
7673 defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
7674 defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
7675 TriOpFrag<(add node:$LHS,
7676 (AArch64urshri node:$MHS, node:$RHS))> >;
7677 defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
7678 BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
7679 defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
7680 defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
7681 TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
7683 def VImm0080: PatLeaf<(AArch64movi_shift (i32 128), (i32 0))>;
7684 def VImm00008000: PatLeaf<(AArch64movi_shift (i32 128), (i32 8))>;
7685 def VImm0000000080000000: PatLeaf<(AArch64NvCast (v2f64 (fneg (AArch64NvCast (v4i32 (AArch64movi_shift (i32 128), (i32 24)))))))>;
7687 // RADDHN patterns for when RSHRN shifts by half the size of the vector element
7688 def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), VImm0080), (i32 8)))),
7689 (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
7690 def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), VImm00008000), (i32 16)))),
7691 (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
7692 let AddedComplexity = 5 in
7693 def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), VImm0000000080000000), (i32 32)))),
7694 (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
7695 def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),
7696 (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
7697 def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))),
7698 (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
7699 def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))),
7700 (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
7702 // RADDHN2 patterns for when RSHRN shifts by half the size of the vector element
7703 def : Pat<(v16i8 (concat_vectors
7705 (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), VImm0080), (i32 8)))))),
7707 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7708 (v8i16 (MOVIv2d_ns (i32 0))))>;
7709 def : Pat<(v8i16 (concat_vectors
7711 (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), VImm00008000), (i32 16)))))),
7713 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7714 (v4i32 (MOVIv2d_ns (i32 0))))>;
7715 let AddedComplexity = 5 in
7716 def : Pat<(v4i32 (concat_vectors
7718 (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), VImm0000000080000000), (i32 32)))))),
7720 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7721 (v2i64 (MOVIv2d_ns (i32 0))))>;
7722 def : Pat<(v16i8 (concat_vectors
7724 (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))),
7726 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7727 (v8i16 (MOVIv2d_ns (i32 0))))>;
7728 def : Pat<(v8i16 (concat_vectors
7730 (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))),
7732 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7733 (v4i32 (MOVIv2d_ns (i32 0))))>;
7734 def : Pat<(v4i32 (concat_vectors
7736 (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))),
7738 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7739 (v2i64 (MOVIv2d_ns (i32 0))))>;
7741 // SHRN patterns for when a logical right shift was used instead of arithmetic
7742 // (the immediate guarantees no sign bits actually end up in the result so it
7744 def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
7745 (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
7746 def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
7747 (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
7748 def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
7749 (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
7751 def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
7752 (trunc (AArch64vlshr (v8i16 V128:$Rn),
7753 vecshiftR16Narrow:$imm)))),
7754 (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
7755 V128:$Rn, vecshiftR16Narrow:$imm)>;
7756 def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
7757 (trunc (AArch64vlshr (v4i32 V128:$Rn),
7758 vecshiftR32Narrow:$imm)))),
7759 (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
7760 V128:$Rn, vecshiftR32Narrow:$imm)>;
7761 def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
7762 (trunc (AArch64vlshr (v2i64 V128:$Rn),
7763 vecshiftR64Narrow:$imm)))),
7764 (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
7765 V128:$Rn, vecshiftR32Narrow:$imm)>;
7767 // Vector sign and zero extensions are implemented with SSHLL and USSHLL.
7768 // Anyexts are implemented as zexts.
7769 def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>;
7770 def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>;
7771 def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>;
7772 def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>;
7773 def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
7774 def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
7775 def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
7776 def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
7777 def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
7778 // Vector bf16 -> fp32 is implemented morally as a zext + shift.
7779 def : Pat<(v4f32 (any_fpextend (v4bf16 V64:$Rn))), (SHLLv4i16 V64:$Rn)>;
7780 // Also match an extend from the upper half of a 128 bit source register.
7781 def : Pat<(v8i16 (anyext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
7782 (USHLLv16i8_shift V128:$Rn, (i32 0))>;
7783 def : Pat<(v8i16 (zext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
7784 (USHLLv16i8_shift V128:$Rn, (i32 0))>;
7785 def : Pat<(v8i16 (sext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
7786 (SSHLLv16i8_shift V128:$Rn, (i32 0))>;
7787 def : Pat<(v4i32 (anyext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
7788 (USHLLv8i16_shift V128:$Rn, (i32 0))>;
7789 def : Pat<(v4i32 (zext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
7790 (USHLLv8i16_shift V128:$Rn, (i32 0))>;
7791 def : Pat<(v4i32 (sext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
7792 (SSHLLv8i16_shift V128:$Rn, (i32 0))>;
7793 def : Pat<(v2i64 (anyext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
7794 (USHLLv4i32_shift V128:$Rn, (i32 0))>;
7795 def : Pat<(v2i64 (zext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
7796 (USHLLv4i32_shift V128:$Rn, (i32 0))>;
7797 def : Pat<(v2i64 (sext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
7798 (SSHLLv4i32_shift V128:$Rn, (i32 0))>;
7800 let Predicates = [HasNEON] in {
7801 // Vector shift sxtl aliases
7802 def : InstAlias<"sxtl.8h $dst, $src1",
7803 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7804 def : InstAlias<"sxtl $dst.8h, $src1.8b",
7805 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7806 def : InstAlias<"sxtl.4s $dst, $src1",
7807 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7808 def : InstAlias<"sxtl $dst.4s, $src1.4h",
7809 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7810 def : InstAlias<"sxtl.2d $dst, $src1",
7811 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7812 def : InstAlias<"sxtl $dst.2d, $src1.2s",
7813 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7815 // Vector shift sxtl2 aliases
7816 def : InstAlias<"sxtl2.8h $dst, $src1",
7817 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7818 def : InstAlias<"sxtl2 $dst.8h, $src1.16b",
7819 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7820 def : InstAlias<"sxtl2.4s $dst, $src1",
7821 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7822 def : InstAlias<"sxtl2 $dst.4s, $src1.8h",
7823 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7824 def : InstAlias<"sxtl2.2d $dst, $src1",
7825 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7826 def : InstAlias<"sxtl2 $dst.2d, $src1.4s",
7827 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7829 // Vector shift uxtl aliases
7830 def : InstAlias<"uxtl.8h $dst, $src1",
7831 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7832 def : InstAlias<"uxtl $dst.8h, $src1.8b",
7833 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7834 def : InstAlias<"uxtl.4s $dst, $src1",
7835 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7836 def : InstAlias<"uxtl $dst.4s, $src1.4h",
7837 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7838 def : InstAlias<"uxtl.2d $dst, $src1",
7839 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7840 def : InstAlias<"uxtl $dst.2d, $src1.2s",
7841 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7843 // Vector shift uxtl2 aliases
7844 def : InstAlias<"uxtl2.8h $dst, $src1",
7845 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7846 def : InstAlias<"uxtl2 $dst.8h, $src1.16b",
7847 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7848 def : InstAlias<"uxtl2.4s $dst, $src1",
7849 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7850 def : InstAlias<"uxtl2 $dst.4s, $src1.8h",
7851 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7852 def : InstAlias<"uxtl2.2d $dst, $src1",
7853 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7854 def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
7855 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7859 OutPatFrag<(ops node:$Rn),
7860 (EXTRACT_SUBREG (f32 (COPY_TO_REGCLASS
7862 (i32 (COPY_TO_REGCLASS (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
7863 node:$Rn, hsub), GPR32)),
7864 (i32 (logical_imm32_XFORM(i32 0x7fff))))),
7867 def : Pat<(f16 (fabs (f16 FPR16:$Rn))), (f16 (abs_f16 (f16 FPR16:$Rn)))>;
7868 def : Pat<(bf16 (fabs (bf16 FPR16:$Rn))), (bf16 (abs_f16 (bf16 FPR16:$Rn)))>;
7871 OutPatFrag<(ops node:$Rn),
7872 (EXTRACT_SUBREG (f32 (COPY_TO_REGCLASS
7874 (i32 (COPY_TO_REGCLASS (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
7875 node:$Rn, hsub), GPR32)),
7876 (i32 (logical_imm32_XFORM(i32 0x8000))))),
7879 def : Pat<(f16 (fneg (f16 FPR16:$Rn))), (f16 (neg_f16 (f16 FPR16:$Rn)))>;
7880 def : Pat<(bf16 (fneg (bf16 FPR16:$Rn))), (bf16 (neg_f16 (bf16 FPR16:$Rn)))>;
7882 let Predicates = [HasNEON] in {
7883 def : Pat<(v4f16 (fabs (v4f16 V64:$Rn))), (v4f16 (BICv4i16 (v4f16 V64:$Rn), (i32 128), (i32 8)))>;
7884 def : Pat<(v4bf16 (fabs (v4bf16 V64:$Rn))), (v4bf16 (BICv4i16 (v4bf16 V64:$Rn), (i32 128), (i32 8)))>;
7885 def : Pat<(v8f16 (fabs (v8f16 V128:$Rn))), (v8f16 (BICv8i16 (v8f16 V128:$Rn), (i32 128), (i32 8)))>;
7886 def : Pat<(v8bf16 (fabs (v8bf16 V128:$Rn))), (v8bf16 (BICv8i16 (v8bf16 V128:$Rn), (i32 128), (i32 8)))>;
7888 def : Pat<(v4f16 (fneg (v4f16 V64:$Rn))), (v4f16 (EORv8i8 (v4f16 V64:$Rn), (MOVIv4i16 (i32 128), (i32 8))))>;
7889 def : Pat<(v4bf16 (fneg (v4bf16 V64:$Rn))), (v4bf16 (EORv8i8 (v4bf16 V64:$Rn), (v4i16 (MOVIv4i16 (i32 0x80), (i32 8)))))>;
7890 def : Pat<(v8f16 (fneg (v8f16 V128:$Rn))), (v8f16 (EORv16i8 (v8f16 V128:$Rn), (MOVIv8i16 (i32 128), (i32 8))))>;
7891 def : Pat<(v8bf16 (fneg (v8bf16 V128:$Rn))), (v8bf16 (EORv16i8 (v8bf16 V128:$Rn), (v8i16 (MOVIv8i16 (i32 0x80), (i32 8)))))>;
7894 // If an integer is about to be converted to a floating point value,
7895 // just load it on the floating point unit.
7896 // These patterns are more complex because floating point loads do not
7897 // support sign extension.
7898 // The sign extension has to be explicitly added and is only supported for
7899 // one step: byte-to-half, half-to-word, word-to-doubleword.
7900 // SCVTF GPR -> FPR is 9 cycles.
7901 // SCVTF FPR -> FPR is 4 cyclces.
7902 // (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
7903 // Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
7904 // and still being faster.
7905 // However, this is not good for code size.
7906 // 8-bits -> float. 2 sizes step-up.
7907 class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
7908 : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))),
7909 (SCVTFv1i32 (f32 (EXTRACT_SUBREG
7914 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7921 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7923 def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
7924 (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
7925 def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext),
7926 (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>;
7927 def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset),
7928 (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>;
7929 def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset),
7930 (LDURBi GPR64sp:$Rn, simm9:$offset)>;
7932 // 16-bits -> float. 1 size step-up.
7933 class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
7934 : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))),
7935 (SCVTFv1i32 (f32 (EXTRACT_SUBREG
7937 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7942 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7944 def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
7945 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
7946 def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
7947 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
7948 def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
7949 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
7950 def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
7951 (LDURHi GPR64sp:$Rn, simm9:$offset)>;
7953 // 32-bits to 32-bits are handled in target specific dag combine:
7954 // performIntToFpCombine.
7955 // 64-bits integer to 32-bits floating point, not possible with
7956 // SCVTF on floating point registers (both source and destination
7957 // must have the same size).
7959 // Here are the patterns for 8, 16, 32, and 64-bits to double.
7960 // 8-bits -> double. 3 size step-up: give up.
7961 // 16-bits -> double. 2 size step.
7962 class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
7963 : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))),
7964 (SCVTFv1i64 (f64 (EXTRACT_SUBREG
7969 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7976 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7978 def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
7979 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
7980 def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
7981 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
7982 def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
7983 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
7984 def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
7985 (LDURHi GPR64sp:$Rn, simm9:$offset)>;
7986 // 32-bits -> double. 1 size step-up.
7987 class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
7988 : Pat <(f64 (sint_to_fp (i32 (load addrmode)))),
7989 (SCVTFv1i64 (f64 (EXTRACT_SUBREG
7991 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7996 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7998 def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
7999 (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
8000 def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext),
8001 (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>;
8002 def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset),
8003 (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
8004 def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset),
8005 (LDURSi GPR64sp:$Rn, simm9:$offset)>;
8007 // 64-bits -> double are handled in target specific dag combine:
8008 // performIntToFpCombine.
8011 //----------------------------------------------------------------------------
8012 // AdvSIMD Load-Store Structure
8013 //----------------------------------------------------------------------------
8014 defm LD1 : SIMDLd1Multiple<"ld1">;
8015 defm LD2 : SIMDLd2Multiple<"ld2">;
8016 defm LD3 : SIMDLd3Multiple<"ld3">;
8017 defm LD4 : SIMDLd4Multiple<"ld4">;
8019 defm ST1 : SIMDSt1Multiple<"st1">;
8020 defm ST2 : SIMDSt2Multiple<"st2">;
8021 defm ST3 : SIMDSt3Multiple<"st3">;
8022 defm ST4 : SIMDSt4Multiple<"st4">;
8024 class Ld1Pat<ValueType ty, Instruction INST>
8025 : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>;
8027 def : Ld1Pat<v16i8, LD1Onev16b>;
8028 def : Ld1Pat<v8i16, LD1Onev8h>;
8029 def : Ld1Pat<v4i32, LD1Onev4s>;
8030 def : Ld1Pat<v2i64, LD1Onev2d>;
8031 def : Ld1Pat<v8i8, LD1Onev8b>;
8032 def : Ld1Pat<v4i16, LD1Onev4h>;
8033 def : Ld1Pat<v2i32, LD1Onev2s>;
8034 def : Ld1Pat<v1i64, LD1Onev1d>;
8036 class St1Pat<ValueType ty, Instruction INST>
8037 : Pat<(store ty:$Vt, GPR64sp:$Rn),
8038 (INST ty:$Vt, GPR64sp:$Rn)>;
8040 def : St1Pat<v16i8, ST1Onev16b>;
8041 def : St1Pat<v8i16, ST1Onev8h>;
8042 def : St1Pat<v4i32, ST1Onev4s>;
8043 def : St1Pat<v2i64, ST1Onev2d>;
8044 def : St1Pat<v8i8, ST1Onev8b>;
8045 def : St1Pat<v4i16, ST1Onev4h>;
8046 def : St1Pat<v2i32, ST1Onev2s>;
8047 def : St1Pat<v1i64, ST1Onev1d>;
8053 defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
8054 defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
8055 defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
8056 defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
8057 let mayLoad = 1, hasSideEffects = 0 in {
8058 defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>;
8059 defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>;
8060 defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>;
8061 defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>;
8062 defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>;
8063 defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>;
8064 defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>;
8065 defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>;
8066 defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>;
8067 defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>;
8068 defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>;
8069 defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>;
8070 defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>;
8071 defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>;
8072 defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>;
8073 defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>;
8076 def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
8077 (LD1Rv8b GPR64sp:$Rn)>;
8078 def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
8079 (LD1Rv16b GPR64sp:$Rn)>;
8080 def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
8081 (LD1Rv4h GPR64sp:$Rn)>;
8082 def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
8083 (LD1Rv8h GPR64sp:$Rn)>;
8084 def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
8085 (LD1Rv2s GPR64sp:$Rn)>;
8086 def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
8087 (LD1Rv4s GPR64sp:$Rn)>;
8088 def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
8089 (LD1Rv2d GPR64sp:$Rn)>;
8090 def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
8091 (LD1Rv1d GPR64sp:$Rn)>;
8093 def : Pat<(v8i8 (AArch64duplane8 (v16i8 (insert_subvector undef, (v8i8 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
8094 (LD1Rv8b GPR64sp:$Rn)>;
8095 def : Pat<(v16i8 (AArch64duplane8 (v16i8 (load GPR64sp:$Rn)), (i64 0))),
8096 (LD1Rv16b GPR64sp:$Rn)>;
8097 def : Pat<(v4i16 (AArch64duplane16 (v8i16 (insert_subvector undef, (v4i16 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
8098 (LD1Rv4h GPR64sp:$Rn)>;
8099 def : Pat<(v8i16 (AArch64duplane16 (v8i16 (load GPR64sp:$Rn)), (i64 0))),
8100 (LD1Rv8h GPR64sp:$Rn)>;
8101 def : Pat<(v2i32 (AArch64duplane32 (v4i32 (insert_subvector undef, (v2i32 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
8102 (LD1Rv2s GPR64sp:$Rn)>;
8103 def : Pat<(v4i32 (AArch64duplane32 (v4i32 (load GPR64sp:$Rn)), (i64 0))),
8104 (LD1Rv4s GPR64sp:$Rn)>;
8105 def : Pat<(v2i64 (AArch64duplane64 (v2i64 (load GPR64sp:$Rn)), (i64 0))),
8106 (LD1Rv2d GPR64sp:$Rn)>;
8108 // Grab the floating point version too
8109 def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
8110 (LD1Rv2s GPR64sp:$Rn)>;
8111 def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
8112 (LD1Rv4s GPR64sp:$Rn)>;
8113 def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
8114 (LD1Rv2d GPR64sp:$Rn)>;
8115 def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
8116 (LD1Rv1d GPR64sp:$Rn)>;
8117 def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
8118 (LD1Rv4h GPR64sp:$Rn)>;
8119 def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
8120 (LD1Rv8h GPR64sp:$Rn)>;
8121 def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
8122 (LD1Rv4h GPR64sp:$Rn)>;
8123 def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
8124 (LD1Rv8h GPR64sp:$Rn)>;
8126 class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
8127 ValueType VTy, ValueType STy, Instruction LD1>
8128 : Pat<(vector_insert (VTy VecListOne128:$Rd),
8129 (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
8130 (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
8132 def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>;
8133 def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
8134 def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>;
8135 def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>;
8136 def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>;
8137 def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>;
8138 def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>;
8139 def : Ld1Lane128Pat<load, VectorIndexH, v8bf16, bf16, LD1i16>;
8141 // Generate LD1 for extload if memory type does not match the
8142 // destination type, for example:
8144 // (v4i32 (insert_vector_elt (load anyext from i8) idx))
8146 // In this case, the index must be adjusted to match LD1 type.
8148 class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand
8149 VecIndex, ValueType VTy, ValueType STy,
8150 Instruction LD1, SDNodeXForm IdxOp>
8151 : Pat<(vector_insert (VTy VecListOne128:$Rd),
8152 (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
8153 (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>;
8155 class Ld1Lane64IdxOpPat<SDPatternOperator scalar_load, Operand VecIndex,
8156 ValueType VTy, ValueType STy, Instruction LD1,
8158 : Pat<(vector_insert (VTy VecListOne64:$Rd),
8159 (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
8161 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
8162 (IdxOp VecIndex:$idx), GPR64sp:$Rn),
8165 def VectorIndexStoH : SDNodeXForm<imm, [{
8166 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
8168 def VectorIndexStoB : SDNodeXForm<imm, [{
8169 return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64);
8171 def VectorIndexHtoB : SDNodeXForm<imm, [{
8172 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
8175 def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>;
8176 def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>;
8177 def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>;
8179 def : Ld1Lane64IdxOpPat<extloadi16, VectorIndexS, v2i32, i32, LD1i16, VectorIndexStoH>;
8180 def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexS, v2i32, i32, LD1i8, VectorIndexStoB>;
8181 def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexHtoB>;
8183 // Same as above, but the first element is populated using
8184 // scalar_to_vector + insert_subvector instead of insert_vector_elt.
8185 let Predicates = [IsNeonAvailable] in {
8186 class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
8187 SDPatternOperator ExtLoad, Instruction LD1>
8188 : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
8189 (ResultTy (EXTRACT_SUBREG
8190 (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
8192 def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>;
8193 def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>;
8194 def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>;
8196 class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
8197 ValueType VTy, ValueType STy, Instruction LD1>
8198 : Pat<(vector_insert (VTy VecListOne64:$Rd),
8199 (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
8201 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
8202 VecIndex:$idx, GPR64sp:$Rn),
8205 def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>;
8206 def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
8207 def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>;
8208 def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>;
8209 def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>;
8210 def : Ld1Lane64Pat<load, VectorIndexH, v4bf16, bf16, LD1i16>;
8213 defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
8214 defm LD2 : SIMDLdSt2SingleAliases<"ld2">;
8215 defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
8216 defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
8219 defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>;
8220 defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>;
8221 defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
8222 defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
8224 let AddedComplexity = 19 in
8225 class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
8226 ValueType VTy, ValueType STy, Instruction ST1>
8228 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
8230 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>;
8232 def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>;
8233 def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
8234 def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>;
8235 def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>;
8236 def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>;
8237 def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>;
8238 def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>;
8239 def : St1Lane128Pat<store, VectorIndexH, v8bf16, bf16, ST1i16>;
8241 let AddedComplexity = 19 in
8242 class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
8243 ValueType VTy, ValueType STy, Instruction ST1>
8245 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
8247 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
8248 VecIndex:$idx, GPR64sp:$Rn)>;
8250 def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>;
8251 def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
8252 def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>;
8253 def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>;
8254 def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>;
8255 def : St1Lane64Pat<store, VectorIndexH, v4bf16, bf16, ST1i16>;
8257 multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
8258 ValueType VTy, ValueType STy, Instruction ST1,
8260 def : Pat<(scalar_store
8261 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
8262 GPR64sp:$Rn, offset),
8263 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
8264 VecIndex:$idx, GPR64sp:$Rn, XZR)>;
8266 def : Pat<(scalar_store
8267 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
8268 GPR64sp:$Rn, GPR64:$Rm),
8269 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
8270 VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
8273 defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>;
8274 defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST,
8276 defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
8277 defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
8278 defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
8279 defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
8280 defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>;
8281 defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>;
8283 multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
8284 ValueType VTy, ValueType STy, Instruction ST1,
8286 def : Pat<(scalar_store
8287 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
8288 GPR64sp:$Rn, offset),
8289 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>;
8291 def : Pat<(scalar_store
8292 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
8293 GPR64sp:$Rn, GPR64:$Rm),
8294 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
8297 defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
8299 defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
8301 defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
8302 defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
8303 defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
8304 defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
8305 defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
8306 defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>;
8308 let mayStore = 1, hasSideEffects = 0 in {
8309 defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>;
8310 defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>;
8311 defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>;
8312 defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>;
8313 defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>;
8314 defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>;
8315 defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
8316 defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
8317 defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>;
8318 defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>;
8319 defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>;
8320 defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>;
8323 defm ST1 : SIMDLdSt1SingleAliases<"st1">;
8324 defm ST2 : SIMDLdSt2SingleAliases<"st2">;
8325 defm ST3 : SIMDLdSt3SingleAliases<"st3">;
8326 defm ST4 : SIMDLdSt4SingleAliases<"st4">;
8328 //----------------------------------------------------------------------------
8329 // Crypto extensions
8330 //----------------------------------------------------------------------------
8332 let Predicates = [HasAES] in {
8333 let isCommutable = 1 in {
8334 def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>;
8335 def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>;
8337 def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>;
8338 def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>;
8341 // Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
8342 // for AES fusion on some CPUs.
8343 let hasSideEffects = 0, mayStore = 0, mayLoad = 0, Predicates = [HasAES] in {
8344 def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
8346 def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
8350 // Only use constrained versions of AES(I)MC instructions if they are paired with
8352 def : Pat<(v16i8 (int_aarch64_crypto_aesmc
8353 (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1),
8354 (v16i8 V128:$src2))))),
8355 (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1),
8356 (v16i8 V128:$src2)))))>,
8357 Requires<[HasFuseAES]>;
8359 def : Pat<(v16i8 (int_aarch64_crypto_aesimc
8360 (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1),
8361 (v16i8 V128:$src2))))),
8362 (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1),
8363 (v16i8 V128:$src2)))))>,
8364 Requires<[HasFuseAES]>;
8366 let Predicates = [HasSHA2] in {
8367 def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>;
8368 def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>;
8369 def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>;
8370 def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>;
8371 def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>;
8372 def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>;
8373 def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>;
8375 def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>;
8376 def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>;
8377 def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>;
8380 //----------------------------------------------------------------------------
8382 //----------------------------------------------------------------------------
8383 // FIXME: Like for X86, these should go in their own separate .td file.
8385 // For an anyext, we don't care what the high bits are, so we can perform an
8386 // INSERT_SUBREF into an IMPLICIT_DEF.
8387 def : Pat<(i64 (anyext GPR32:$src)),
8388 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
8390 // When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
8391 // then assert the extension has happened.
8392 def : Pat<(i64 (zext GPR32:$src)),
8393 (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
8395 // To sign extend, we use a signed bitfield move instruction (SBFM) on the
8396 // containing super-reg.
8397 def : Pat<(i64 (sext GPR32:$src)),
8398 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
8399 def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>;
8400 def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>;
8401 def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>;
8402 def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>;
8403 def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>;
8404 def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>;
8405 def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>;
8407 def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)),
8408 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
8409 (i64 (i32shift_sext_i8 imm0_31:$imm)))>;
8410 def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)),
8411 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
8412 (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
8414 def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)),
8415 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
8416 (i64 (i32shift_sext_i16 imm0_31:$imm)))>;
8417 def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)),
8418 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
8419 (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
8421 def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
8422 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
8423 (i64 (i64shift_a imm0_63:$imm)),
8424 (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
8426 def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)),
8427 (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
8428 (i64 (i64shift_a imm0_63:$imm)),
8429 (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
8431 // sra patterns have an AddedComplexity of 10, so make sure we have a higher
8432 // AddedComplexity for the following patterns since we want to match sext + sra
8433 // patterns before we attempt to match a single sra node.
8434 let AddedComplexity = 20 in {
8435 // We support all sext + sra combinations which preserve at least one bit of the
8436 // original value which is to be sign extended. E.g. we support shifts up to
8438 def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)),
8439 (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>;
8440 def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)),
8441 (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>;
8443 def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)),
8444 (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>;
8445 def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)),
8446 (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>;
8448 def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
8449 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
8450 (i64 imm0_31:$imm), 31)>;
8451 } // AddedComplexity = 20
8453 // To truncate, we can simply extract from a subregister.
8454 def : Pat<(i32 (trunc GPR64sp:$src)),
8455 (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
8457 // __builtin_trap() uses the BRK instruction on AArch64.
8458 def : Pat<(trap), (BRK 1)>;
8459 def : Pat<(debugtrap), (BRK 0xF000)>;
8461 def ubsan_trap_xform : SDNodeXForm<timm, [{
8462 return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
8465 def gi_ubsan_trap_xform : GICustomOperandRenderer<"renderUbsanTrap">,
8466 GISDNodeXFormEquiv<ubsan_trap_xform>;
8468 def ubsan_trap_imm : TImmLeaf<i32, [{
8469 return isUInt<8>(Imm);
8470 }], ubsan_trap_xform>;
8472 def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>;
8474 // Multiply high patterns which multiply the lower subvector using smull/umull
8475 // and the upper subvector with smull2/umull2. Then shuffle the high the high
8476 // part of both results together.
8477 def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)),
8479 (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
8480 (EXTRACT_SUBREG V128:$Rm, dsub)),
8481 (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
8482 def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)),
8484 (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
8485 (EXTRACT_SUBREG V128:$Rm, dsub)),
8486 (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
8487 def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)),
8489 (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
8490 (EXTRACT_SUBREG V128:$Rm, dsub)),
8491 (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
8493 def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)),
8495 (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
8496 (EXTRACT_SUBREG V128:$Rm, dsub)),
8497 (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
8498 def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)),
8500 (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
8501 (EXTRACT_SUBREG V128:$Rm, dsub)),
8502 (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
8503 def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
8505 (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
8506 (EXTRACT_SUBREG V128:$Rm, dsub)),
8507 (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
8509 // Conversions within AdvSIMD types in the same register size are free.
8510 // But because we need a consistent lane ordering, in big endian many
8511 // conversions require one or more REV instructions.
8513 // Consider a simple memory load followed by a bitconvert then a store.
8515 // v1 = BITCAST v2i32 v0 to v4i16
8518 // In big endian mode every memory access has an implicit byte swap. LDR and
8519 // STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
8520 // is, they treat the vector as a sequence of elements to be byte-swapped.
8521 // The two pairs of instructions are fundamentally incompatible. We've decided
8522 // to use LD1/ST1 only to simplify compiler implementation.
8524 // LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes
8525 // the original code sequence:
8527 // v1 = REV v2i32 (implicit)
8528 // v2 = BITCAST v2i32 v1 to v4i16
8529 // v3 = REV v4i16 v2 (implicit)
8532 // But this is now broken - the value stored is different to the value loaded
8533 // due to lane reordering. To fix this, on every BITCAST we must perform two
8536 // v1 = REV v2i32 (implicit)
8538 // v3 = BITCAST v2i32 v2 to v4i16
8540 // v5 = REV v4i16 v4 (implicit)
8543 // This means an extra two instructions, but actually in most cases the two REV
8544 // instructions can be combined into one. For example:
8545 // (REV64_2s (REV64_4h X)) === (REV32_4h X)
8547 // There is also no 128-bit REV instruction. This must be synthesized with an
8550 // Most bitconverts require some sort of conversion. The only exceptions are:
8551 // a) Identity conversions - vNfX <-> vNiX
8552 // b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
8555 // Natural vector casts (64 bit)
8556 foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in
8557 foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in
8558 def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))),
8561 // Natural vector casts (128 bit)
8562 foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
8563 foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
8564 def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))),
8567 let Predicates = [IsLE] in {
8568 def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8569 def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8570 def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8571 def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8572 def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8573 def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8575 def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))),
8576 (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8577 def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
8578 (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8579 def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
8580 (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8581 def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
8582 (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8583 def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
8584 (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8585 def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
8586 (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8587 def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
8588 (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8590 let Predicates = [IsBE] in {
8591 def : Pat<(v8i8 (bitconvert GPR64:$Xn)),
8592 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8593 def : Pat<(v4i16 (bitconvert GPR64:$Xn)),
8594 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8595 def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
8596 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8597 def : Pat<(v4f16 (bitconvert GPR64:$Xn)),
8598 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8599 def : Pat<(v4bf16 (bitconvert GPR64:$Xn)),
8600 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8601 def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
8602 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8604 def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))),
8605 (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8606 def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
8607 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8608 def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
8609 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8610 def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
8611 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8612 def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
8613 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8614 def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
8615 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8617 def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8618 def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8619 def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
8620 (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8621 def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
8622 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8623 def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
8624 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8625 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
8627 def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
8628 (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
8629 def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))),
8630 (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>;
8631 def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))),
8632 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8633 def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))),
8634 (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>;
8635 def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
8636 (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8638 def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>;
8639 def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>;
8641 let Predicates = [IsLE] in {
8642 def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
8643 def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
8644 def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
8645 def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>;
8646 def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>;
8647 def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
8649 let Predicates = [IsBE] in {
8650 def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))),
8651 (v1i64 (REV64v2i32 FPR64:$src))>;
8652 def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))),
8653 (v1i64 (REV64v4i16 FPR64:$src))>;
8654 def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))),
8655 (v1i64 (REV64v8i8 FPR64:$src))>;
8656 def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))),
8657 (v1i64 (REV64v4i16 FPR64:$src))>;
8658 def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))),
8659 (v1i64 (REV64v4i16 FPR64:$src))>;
8660 def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
8661 (v1i64 (REV64v2i32 FPR64:$src))>;
8663 def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>;
8664 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
8666 let Predicates = [IsLE] in {
8667 def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>;
8668 def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
8669 def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
8670 def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
8671 def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
8672 def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>;
8673 def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>;
8675 let Predicates = [IsBE] in {
8676 def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
8677 (v2i32 (REV64v2i32 FPR64:$src))>;
8678 def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))),
8679 (v2i32 (REV32v4i16 FPR64:$src))>;
8680 def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))),
8681 (v2i32 (REV32v8i8 FPR64:$src))>;
8682 def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))),
8683 (v2i32 (REV64v2i32 FPR64:$src))>;
8684 def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
8685 (v2i32 (REV64v2i32 FPR64:$src))>;
8686 def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))),
8687 (v2i32 (REV32v4i16 FPR64:$src))>;
8688 def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))),
8689 (v2i32 (REV32v4i16 FPR64:$src))>;
8691 def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
8693 let Predicates = [IsLE] in {
8694 def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
8695 def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
8696 def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
8697 def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
8698 def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
8699 def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
8701 let Predicates = [IsBE] in {
8702 def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))),
8703 (v4i16 (REV64v4i16 FPR64:$src))>;
8704 def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))),
8705 (v4i16 (REV32v4i16 FPR64:$src))>;
8706 def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))),
8707 (v4i16 (REV16v8i8 FPR64:$src))>;
8708 def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))),
8709 (v4i16 (REV64v4i16 FPR64:$src))>;
8710 def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
8711 (v4i16 (REV32v4i16 FPR64:$src))>;
8712 def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
8713 (v4i16 (REV64v4i16 FPR64:$src))>;
8715 def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>;
8716 def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>;
8718 let Predicates = [IsLE] in {
8719 def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>;
8720 def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
8721 def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
8722 def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
8723 def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>;
8724 def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>;
8726 def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>;
8727 def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>;
8728 def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>;
8729 def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
8730 def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>;
8731 def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
8733 let Predicates = [IsBE] in {
8734 def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))),
8735 (v4f16 (REV64v4i16 FPR64:$src))>;
8736 def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))),
8737 (v4f16 (REV32v4i16 FPR64:$src))>;
8738 def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))),
8739 (v4f16 (REV16v8i8 FPR64:$src))>;
8740 def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))),
8741 (v4f16 (REV64v4i16 FPR64:$src))>;
8742 def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))),
8743 (v4f16 (REV32v4i16 FPR64:$src))>;
8744 def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))),
8745 (v4f16 (REV64v4i16 FPR64:$src))>;
8747 def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))),
8748 (v4bf16 (REV64v4i16 FPR64:$src))>;
8749 def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))),
8750 (v4bf16 (REV32v4i16 FPR64:$src))>;
8751 def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))),
8752 (v4bf16 (REV16v8i8 FPR64:$src))>;
8753 def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))),
8754 (v4bf16 (REV64v4i16 FPR64:$src))>;
8755 def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))),
8756 (v4bf16 (REV32v4i16 FPR64:$src))>;
8757 def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))),
8758 (v4bf16 (REV64v4i16 FPR64:$src))>;
8760 def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
8761 def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>;
8763 let Predicates = [IsLE] in {
8764 def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>;
8765 def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
8766 def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
8767 def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
8768 def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
8769 def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>;
8770 def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>;
8771 def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), (v8i8 FPR64:$src)>;
8773 let Predicates = [IsBE] in {
8774 def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))),
8775 (v8i8 (REV64v8i8 FPR64:$src))>;
8776 def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))),
8777 (v8i8 (REV32v8i8 FPR64:$src))>;
8778 def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))),
8779 (v8i8 (REV16v8i8 FPR64:$src))>;
8780 def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))),
8781 (v8i8 (REV64v8i8 FPR64:$src))>;
8782 def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))),
8783 (v8i8 (REV32v8i8 FPR64:$src))>;
8784 def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))),
8785 (v8i8 (REV64v8i8 FPR64:$src))>;
8786 def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))),
8787 (v8i8 (REV16v8i8 FPR64:$src))>;
8788 def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))),
8789 (v8i8 (REV16v8i8 FPR64:$src))>;
8792 let Predicates = [IsLE] in {
8793 def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>;
8794 def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>;
8795 def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>;
8796 def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>;
8797 def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>;
8798 def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), (f64 FPR64:$src)>;
8800 let Predicates = [IsBE] in {
8801 def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))),
8802 (f64 (REV64v2i32 FPR64:$src))>;
8803 def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))),
8804 (f64 (REV64v4i16 FPR64:$src))>;
8805 def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))),
8806 (f64 (REV64v2i32 FPR64:$src))>;
8807 def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))),
8808 (f64 (REV64v8i8 FPR64:$src))>;
8809 def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))),
8810 (f64 (REV64v4i16 FPR64:$src))>;
8811 def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))),
8812 (f64 (REV64v4i16 FPR64:$src))>;
8814 def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
8815 def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
8817 let Predicates = [IsLE] in {
8818 def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
8819 def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
8820 def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>;
8821 def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
8822 def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>;
8823 def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>;
8825 let Predicates = [IsBE] in {
8826 def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
8827 (v1f64 (REV64v2i32 FPR64:$src))>;
8828 def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))),
8829 (v1f64 (REV64v4i16 FPR64:$src))>;
8830 def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))),
8831 (v1f64 (REV64v8i8 FPR64:$src))>;
8832 def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
8833 (v1f64 (REV64v2i32 FPR64:$src))>;
8834 def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))),
8835 (v1f64 (REV64v4i16 FPR64:$src))>;
8836 def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))),
8837 (v1f64 (REV64v4i16 FPR64:$src))>;
8839 def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
8840 def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
8842 let Predicates = [IsLE] in {
8843 def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>;
8844 def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
8845 def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
8846 def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
8847 def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
8848 def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>;
8849 def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>;
8851 let Predicates = [IsBE] in {
8852 def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
8853 (v2f32 (REV64v2i32 FPR64:$src))>;
8854 def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))),
8855 (v2f32 (REV32v4i16 FPR64:$src))>;
8856 def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))),
8857 (v2f32 (REV32v8i8 FPR64:$src))>;
8858 def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))),
8859 (v2f32 (REV64v2i32 FPR64:$src))>;
8860 def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))),
8861 (v2f32 (REV64v2i32 FPR64:$src))>;
8862 def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))),
8863 (v2f32 (REV32v4i16 FPR64:$src))>;
8864 def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))),
8865 (v2f32 (REV32v4i16 FPR64:$src))>;
8867 def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
8869 let Predicates = [IsLE] in {
8870 def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>;
8871 def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
8872 def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
8873 def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
8874 def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
8875 def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>;
8876 def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>;
8877 def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
8879 let Predicates = [IsBE] in {
8880 def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))),
8881 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
8882 def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
8883 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
8884 (REV64v4i32 FPR128:$src), (i32 8)))>;
8885 def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
8886 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
8887 (REV64v8i16 FPR128:$src), (i32 8)))>;
8888 def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
8889 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
8890 (REV64v8i16 FPR128:$src), (i32 8)))>;
8891 def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))),
8892 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
8893 (REV64v8i16 FPR128:$src), (i32 8)))>;
8894 def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
8895 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
8896 def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
8897 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
8898 (REV64v4i32 FPR128:$src), (i32 8)))>;
8899 def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))),
8900 (f128 (EXTv16i8 (REV64v16i8 FPR128:$src),
8901 (REV64v16i8 FPR128:$src), (i32 8)))>;
8904 let Predicates = [IsLE] in {
8905 def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
8906 def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
8907 def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
8908 def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
8909 def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>;
8910 def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
8911 def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
8913 let Predicates = [IsBE] in {
8914 def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))),
8915 (v2f64 (EXTv16i8 FPR128:$src,
8916 FPR128:$src, (i32 8)))>;
8917 def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
8918 (v2f64 (REV64v4i32 FPR128:$src))>;
8919 def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
8920 (v2f64 (REV64v8i16 FPR128:$src))>;
8921 def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
8922 (v2f64 (REV64v8i16 FPR128:$src))>;
8923 def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))),
8924 (v2f64 (REV64v8i16 FPR128:$src))>;
8925 def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
8926 (v2f64 (REV64v16i8 FPR128:$src))>;
8927 def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
8928 (v2f64 (REV64v4i32 FPR128:$src))>;
8930 def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
8932 let Predicates = [IsLE] in {
8933 def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
8934 def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
8935 def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
8936 def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>;
8937 def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
8938 def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
8939 def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
8941 let Predicates = [IsBE] in {
8942 def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))),
8943 (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src),
8944 (REV64v4i32 FPR128:$src), (i32 8)))>;
8945 def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
8946 (v4f32 (REV32v8i16 FPR128:$src))>;
8947 def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
8948 (v4f32 (REV32v8i16 FPR128:$src))>;
8949 def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))),
8950 (v4f32 (REV32v8i16 FPR128:$src))>;
8951 def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
8952 (v4f32 (REV32v16i8 FPR128:$src))>;
8953 def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
8954 (v4f32 (REV64v4i32 FPR128:$src))>;
8955 def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))),
8956 (v4f32 (REV64v4i32 FPR128:$src))>;
8958 def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
8960 let Predicates = [IsLE] in {
8961 def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
8962 def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
8963 def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
8964 def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
8965 def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
8966 def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>;
8967 def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>;
8969 let Predicates = [IsBE] in {
8970 def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))),
8971 (v2i64 (EXTv16i8 FPR128:$src,
8972 FPR128:$src, (i32 8)))>;
8973 def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))),
8974 (v2i64 (REV64v4i32 FPR128:$src))>;
8975 def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))),
8976 (v2i64 (REV64v8i16 FPR128:$src))>;
8977 def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))),
8978 (v2i64 (REV64v16i8 FPR128:$src))>;
8979 def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
8980 (v2i64 (REV64v4i32 FPR128:$src))>;
8981 def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))),
8982 (v2i64 (REV64v8i16 FPR128:$src))>;
8983 def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))),
8984 (v2i64 (REV64v8i16 FPR128:$src))>;
8986 def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
8988 let Predicates = [IsLE] in {
8989 def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
8990 def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
8991 def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
8992 def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
8993 def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
8994 def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>;
8995 def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>;
8997 let Predicates = [IsBE] in {
8998 def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))),
8999 (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src),
9000 (REV64v4i32 FPR128:$src),
9002 def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))),
9003 (v4i32 (REV64v4i32 FPR128:$src))>;
9004 def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))),
9005 (v4i32 (REV32v8i16 FPR128:$src))>;
9006 def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))),
9007 (v4i32 (REV32v16i8 FPR128:$src))>;
9008 def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
9009 (v4i32 (REV64v4i32 FPR128:$src))>;
9010 def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))),
9011 (v4i32 (REV32v8i16 FPR128:$src))>;
9012 def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))),
9013 (v4i32 (REV32v8i16 FPR128:$src))>;
9015 def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
9017 let Predicates = [IsLE] in {
9018 def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
9019 def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
9020 def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
9021 def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
9022 def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
9023 def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
9025 let Predicates = [IsBE] in {
9026 def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))),
9027 (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src),
9028 (REV64v8i16 FPR128:$src),
9030 def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))),
9031 (v8i16 (REV64v8i16 FPR128:$src))>;
9032 def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))),
9033 (v8i16 (REV32v8i16 FPR128:$src))>;
9034 def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))),
9035 (v8i16 (REV16v16i8 FPR128:$src))>;
9036 def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))),
9037 (v8i16 (REV64v8i16 FPR128:$src))>;
9038 def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
9039 (v8i16 (REV32v8i16 FPR128:$src))>;
9041 def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>;
9042 def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>;
9044 let Predicates = [IsLE] in {
9045 def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>;
9046 def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
9047 def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
9048 def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
9049 def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
9050 def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
9052 def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), (v8bf16 FPR128:$src)>;
9053 def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>;
9054 def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>;
9055 def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>;
9056 def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>;
9057 def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>;
9059 let Predicates = [IsBE] in {
9060 def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))),
9061 (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src),
9062 (REV64v8i16 FPR128:$src),
9064 def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))),
9065 (v8f16 (REV64v8i16 FPR128:$src))>;
9066 def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))),
9067 (v8f16 (REV32v8i16 FPR128:$src))>;
9068 def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))),
9069 (v8f16 (REV16v16i8 FPR128:$src))>;
9070 def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))),
9071 (v8f16 (REV64v8i16 FPR128:$src))>;
9072 def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))),
9073 (v8f16 (REV32v8i16 FPR128:$src))>;
9075 def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))),
9076 (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src),
9077 (REV64v8i16 FPR128:$src),
9079 def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))),
9080 (v8bf16 (REV64v8i16 FPR128:$src))>;
9081 def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))),
9082 (v8bf16 (REV32v8i16 FPR128:$src))>;
9083 def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))),
9084 (v8bf16 (REV16v16i8 FPR128:$src))>;
9085 def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))),
9086 (v8bf16 (REV64v8i16 FPR128:$src))>;
9087 def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))),
9088 (v8bf16 (REV32v8i16 FPR128:$src))>;
9090 def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
9091 def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>;
9093 let Predicates = [IsLE] in {
9094 def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
9095 def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
9096 def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
9097 def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
9098 def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
9099 def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
9100 def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>;
9101 def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>;
9103 let Predicates = [IsBE] in {
9104 def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))),
9105 (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src),
9106 (REV64v16i8 FPR128:$src),
9108 def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))),
9109 (v16i8 (REV64v16i8 FPR128:$src))>;
9110 def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))),
9111 (v16i8 (REV32v16i8 FPR128:$src))>;
9112 def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))),
9113 (v16i8 (REV16v16i8 FPR128:$src))>;
9114 def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))),
9115 (v16i8 (REV64v16i8 FPR128:$src))>;
9116 def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
9117 (v16i8 (REV32v16i8 FPR128:$src))>;
9118 def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
9119 (v16i8 (REV16v16i8 FPR128:$src))>;
9120 def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))),
9121 (v16i8 (REV16v16i8 FPR128:$src))>;
9124 def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))),
9125 (EXTRACT_SUBREG V128:$Rn, dsub)>;
9126 def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))),
9127 (EXTRACT_SUBREG V128:$Rn, dsub)>;
9128 def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))),
9129 (EXTRACT_SUBREG V128:$Rn, dsub)>;
9130 def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))),
9131 (EXTRACT_SUBREG V128:$Rn, dsub)>;
9132 def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))),
9133 (EXTRACT_SUBREG V128:$Rn, dsub)>;
9134 def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))),
9135 (EXTRACT_SUBREG V128:$Rn, dsub)>;
9136 def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))),
9137 (EXTRACT_SUBREG V128:$Rn, dsub)>;
9138 def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))),
9139 (EXTRACT_SUBREG V128:$Rn, dsub)>;
9141 def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
9142 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
9143 def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
9144 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
9145 def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
9146 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
9147 def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
9148 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
9150 // A 64-bit subvector insert to the first 128-bit vector position
9151 // is a subregister copy that needs no instruction.
9152 multiclass InsertSubvectorUndef<ValueType Ty> {
9153 def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)),
9154 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9155 def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)),
9156 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9157 def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)),
9158 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9159 def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)),
9160 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9161 def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)),
9162 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9163 def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)),
9164 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9165 def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)),
9166 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9167 def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)),
9168 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9171 defm : InsertSubvectorUndef<i32>;
9172 defm : InsertSubvectorUndef<i64>;
9174 // Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
9176 def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
9177 (vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
9178 (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
9179 def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
9180 (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
9181 (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
9182 // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
9183 // so we match on v4f32 here, not v2f32. This will also catch adding
9184 // the low two lanes of a true v4f32 vector.
9185 def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
9186 (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
9187 (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
9188 def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
9189 (vector_extract (v8f16 FPR128:$Rn), (i64 1))),
9190 (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
9192 // Prefer using the bottom lanes of addp Rn, Rn compared to
9193 // addp extractlow(Rn), extracthigh(Rn)
9194 def : Pat<(AArch64addp (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 0))),
9195 (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 2)))),
9196 (v2i32 (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub))>;
9197 def : Pat<(AArch64addp (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 0))),
9198 (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 4)))),
9199 (v4i16 (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub))>;
9200 def : Pat<(AArch64addp (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 0))),
9201 (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 8)))),
9202 (v8i8 (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub))>;
9204 def : Pat<(AArch64faddp (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 0))),
9205 (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 2)))),
9206 (v2f32 (EXTRACT_SUBREG (FADDPv4f32 $Rn, $Rn), dsub))>;
9207 def : Pat<(AArch64faddp (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 0))),
9208 (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 4)))),
9209 (v4f16 (EXTRACT_SUBREG (FADDPv8f16 $Rn, $Rn), dsub))>;
9211 // Scalar 64-bit shifts in FPR64 registers.
9212 def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9213 (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9214 def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9215 (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9216 def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9217 (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9218 def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9219 (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9221 // Patterns for nontemporal/no-allocate stores.
9222 // We have to resort to tricks to turn a single-input store into a store pair,
9223 // because there is no single-input nontemporal store, only STNP.
9224 let Predicates = [IsLE] in {
9225 let AddedComplexity = 15 in {
9226 class NTStore128Pat<ValueType VT> :
9227 Pat<(nontemporalstore (VT FPR128:$Rt),
9228 (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
9229 (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
9230 (DUPi64 FPR128:$Rt, (i64 1)),
9231 GPR64sp:$Rn, simm7s8:$offset)>;
9233 def : NTStore128Pat<v2i64>;
9234 def : NTStore128Pat<v4i32>;
9235 def : NTStore128Pat<v8i16>;
9236 def : NTStore128Pat<v16i8>;
9238 class NTStore64Pat<ValueType VT> :
9239 Pat<(nontemporalstore (VT FPR64:$Rt),
9240 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
9241 (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
9242 (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
9243 GPR64sp:$Rn, simm7s4:$offset)>;
9245 // FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
9246 def : NTStore64Pat<v1f64>;
9247 def : NTStore64Pat<v1i64>;
9248 def : NTStore64Pat<v2i32>;
9249 def : NTStore64Pat<v4i16>;
9250 def : NTStore64Pat<v8i8>;
9252 def : Pat<(nontemporalstore GPR64:$Rt,
9253 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
9254 (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
9255 (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32),
9256 GPR64sp:$Rn, simm7s4:$offset)>;
9257 } // AddedComplexity=10
9258 } // Predicates = [IsLE]
9260 // Tail call return handling. These are all compiler pseudo-instructions,
9261 // so no encoding information or anything like that.
9262 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
9263 def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>,
9264 Sched<[WriteBrReg]>;
9265 def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>,
9266 Sched<[WriteBrReg]>;
9267 // Indirect tail-call with any register allowed, used by MachineOutliner when
9268 // this is proven safe.
9269 // FIXME: If we have to add any more hacks like this, we should instead relax
9270 // some verifier checks for outlined functions.
9271 def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>,
9272 Sched<[WriteBrReg]>;
9274 // Indirect tail-calls with reduced register classes, needed for BTI and
9276 def TCRETURNrix16x17 : Pseudo<(outs), (ins tcGPRx16x17:$dst, i32imm:$FPDiff), []>,
9277 Sched<[WriteBrReg]>;
9278 def TCRETURNrix17 : Pseudo<(outs), (ins tcGPRx17:$dst, i32imm:$FPDiff), []>,
9279 Sched<[WriteBrReg]>;
9280 def TCRETURNrinotx16 : Pseudo<(outs), (ins tcGPRnotx16:$dst, i32imm:$FPDiff), []>,
9281 Sched<[WriteBrReg]>;
9284 def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
9285 (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>,
9286 Requires<[TailCallAny]>;
9287 def : Pat<(AArch64tcret tcGPRx16x17:$dst, (i32 timm:$FPDiff)),
9288 (TCRETURNrix16x17 tcGPRx16x17:$dst, imm:$FPDiff)>,
9289 Requires<[TailCallX16X17]>;
9290 def : Pat<(AArch64tcret tcGPRx17:$dst, (i32 timm:$FPDiff)),
9291 (TCRETURNrix17 tcGPRx17:$dst, imm:$FPDiff)>,
9292 Requires<[TailCallX17]>;
9293 def : Pat<(AArch64tcret tcGPRnotx16:$dst, (i32 timm:$FPDiff)),
9294 (TCRETURNrinotx16 tcGPRnotx16:$dst, imm:$FPDiff)>,
9295 Requires<[TailCallNotX16]>;
9297 def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
9298 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
9299 def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
9300 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
9302 def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
9303 def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
9305 // Extracting lane zero is a special case where we can just use a plain
9306 // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the
9307 // rest of the compiler, especially the register allocator and copy propagation,
9308 // to reason about, so is preferred when it's possible to use it.
9309 let AddedComplexity = 10 in {
9310 def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>;
9311 def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>;
9312 def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>;
9316 class mul_v4i8<SDPatternOperator ldop> :
9317 PatFrag<(ops node:$Rn, node:$Rm, node:$offset),
9318 (mul (ldop (add node:$Rn, node:$offset)),
9319 (ldop (add node:$Rm, node:$offset)))>;
9320 class mulz_v4i8<SDPatternOperator ldop> :
9321 PatFrag<(ops node:$Rn, node:$Rm),
9322 (mul (ldop node:$Rn), (ldop node:$Rm))>;
9325 OutPatFrag<(ops node:$R),
9327 (v2i32 (IMPLICIT_DEF)),
9328 (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)),
9331 class dot_v4i8<Instruction DOT, SDPatternOperator ldop> :
9332 Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)),
9333 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)),
9334 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)),
9335 (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))),
9336 (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR),
9337 (load_v4i8 GPR64sp:$Rn),
9338 (load_v4i8 GPR64sp:$Rm))),
9339 sub_32)>, Requires<[HasDotProd]>;
9342 class ee_v8i8<SDPatternOperator extend> :
9343 PatFrag<(ops node:$V, node:$K),
9344 (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>;
9346 class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
9347 PatFrag<(ops node:$M, node:$N, node:$K),
9348 (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)),
9349 (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>;
9351 class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
9352 PatFrag<(ops node:$M, node:$N),
9354 (v4i32 (AArch64uaddv
9355 (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)),
9356 (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))),
9359 // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
9360 def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>;
9362 class odot_v8i8<Instruction DOT> :
9363 OutPatFrag<(ops node:$Vm, node:$Vn),
9366 (i64 (DOT (DUPv2i32gpr WZR),
9371 class dot_v8i8<Instruction DOT, SDPatternOperator mulop,
9372 SDPatternOperator extend> :
9373 Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn),
9374 (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>,
9375 Requires<[HasDotProd]>;
9378 class ee_v16i8<SDPatternOperator extend> :
9379 PatFrag<(ops node:$V, node:$K1, node:$K2),
9380 (v4i16 (extract_subvector
9382 (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>;
9384 class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> :
9385 PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2),
9387 (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)),
9388 (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>;
9390 class idot_v16i8<SDPatternOperator m, SDPatternOperator x> :
9391 PatFrag<(ops node:$M, node:$N),
9393 (v4i32 (AArch64uaddv
9395 (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)),
9396 (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))),
9397 (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)),
9398 (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))),
9401 class odot_v16i8<Instruction DOT> :
9402 OutPatFrag<(ops node:$Vm, node:$Vn),
9404 (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>;
9406 class dot_v16i8<Instruction DOT, SDPatternOperator mulop,
9407 SDPatternOperator extend> :
9408 Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn),
9409 (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>,
9410 Requires<[HasDotProd]>;
9412 let AddedComplexity = 10 in {
9413 def : dot_v4i8<SDOTv8i8, sextloadi8>;
9414 def : dot_v4i8<UDOTv8i8, zextloadi8>;
9415 def : dot_v8i8<SDOTv8i8, AArch64smull, sext>;
9416 def : dot_v8i8<UDOTv8i8, AArch64umull, zext>;
9417 def : dot_v16i8<SDOTv16i8, AArch64smull, sext>;
9418 def : dot_v16i8<UDOTv16i8, AArch64umull, zext>;
9420 // FIXME: add patterns to generate vector by element dot product.
9421 // FIXME: add SVE dot-product patterns.
9424 // Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs,
9425 // so that it can be used as input to inline asm, and vice versa.
9426 def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>;
9427 def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>;
9428 def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3,
9429 GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)),
9430 (REG_SEQUENCE GPR64x8Class,
9431 $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3,
9432 $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>;
9433 foreach i = 0-7 in {
9434 def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))),
9435 (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>;
9438 let Predicates = [HasLS64] in {
9439 def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn),
9440 (outs GPR64x8:$Rt)>;
9441 def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn),
9443 def ST64BV: Store64BV<0b011, "st64bv">;
9444 def ST64BV0: Store64BV<0b010, "st64bv0">;
9446 class ST64BPattern<Intrinsic intrinsic, Instruction instruction>
9447 : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7),
9448 (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>;
9450 def : ST64BPattern<int_aarch64_st64b, ST64B>;
9451 def : ST64BPattern<int_aarch64_st64bv, ST64BV>;
9452 def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>;
9455 let Predicates = [HasMOPS] in {
9456 let Defs = [NZCV] in {
9457 defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">;
9459 defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">;
9461 defm SETP : MOPSMemorySetInsns<0b00, "setp">;
9463 let Uses = [NZCV] in {
9464 defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">;
9465 defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">;
9467 defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">;
9468 defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">;
9470 defm SETM : MOPSMemorySetInsns<0b01, "setm">;
9471 defm SETE : MOPSMemorySetInsns<0b10, "sete">;
9474 let Predicates = [HasMOPS, HasMTE] in {
9475 let Defs = [NZCV] in {
9476 defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">;
9478 let Uses = [NZCV] in {
9479 defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">;
9480 // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td
9481 defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">;
9485 // MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain
9486 // MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain
9487 def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>;
9488 def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>;
9489 def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>;
9490 def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>;
9491 def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>;
9493 // MOPS operations always contain three 4-byte instructions
9494 let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in {
9495 let mayLoad = 1 in {
9496 def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
9497 (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
9498 [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
9499 def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
9500 (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
9501 [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
9503 let mayLoad = 0 in {
9504 def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
9505 (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
9506 [], "$Rd = $Rd_wb,$Rn = $Rn_wb,@earlyclobber $Rn_wb">, Sched<[]>;
9509 let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in {
9510 def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
9511 (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
9512 [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
9515 //-----------------------------------------------------------------------------
9516 // v8.3 Pointer Authentication late patterns
9518 def : Pat<(int_ptrauth_blend GPR64:$Rd, imm64_0_65535:$imm),
9519 (PAUTH_BLEND GPR64:$Rd, (trunc_imm imm64_0_65535:$imm))>;
9520 def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn),
9521 (BFMXri GPR64:$Rd, GPR64:$Rn, 16, 15)>;
9523 //-----------------------------------------------------------------------------
9525 // This gets lowered into an instruction sequence of 20 bytes
9526 let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
9527 def StoreSwiftAsyncContext
9528 : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
9531 def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
9532 def : Pat<(AArch64AssertZExtBool GPR32:$op),
9535 //===----------------------------===//
9536 // 2022 Architecture Extensions:
9537 //===----------------------------===//
9539 def : InstAlias<"clrbhb", (HINT 22), 0>;
9540 let Predicates = [HasCLRBHB] in {
9541 def : InstAlias<"clrbhb", (HINT 22), 1>;
9544 //===----------------------------------------------------------------------===//
9545 // Translation Hardening Extension (FEAT_THE)
9546 //===----------------------------------------------------------------------===//
9547 defm RCW : ReadCheckWriteCompareAndSwap;
9549 defm RCWCLR : ReadCheckWriteOperation<0b001, "clr">;
9550 defm RCWSET : ReadCheckWriteOperation<0b011, "set">;
9551 defm RCWSWP : ReadCheckWriteOperation<0b010, "swp">;
9553 //===----------------------------------------------------------------------===//
9554 // General Data-Processing Instructions (FEAT_V94_DP)
9555 //===----------------------------------------------------------------------===//
9556 defm ABS : OneOperandData<0b001000, "abs", abs>, Requires<[HasCSSC]>;
9557 defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>;
9558 defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>;
9560 defm SMAX : ComparisonOp<0, 0, "smax", smax>, Requires<[HasCSSC]>;
9561 defm SMIN : ComparisonOp<0, 1, "smin", smin>, Requires<[HasCSSC]>;
9562 defm UMAX : ComparisonOp<1, 0, "umax", umax>, Requires<[HasCSSC]>;
9563 defm UMIN : ComparisonOp<1, 1, "umin", umin>, Requires<[HasCSSC]>;
9566 I<(outs), (ins rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn),
9567 "rprfm", "\t$Rt, $Rm, [$Rn]", "", []>,
9572 let Inst{2-0} = Rt{2-0};
9573 let Inst{4-3} = 0b11;
9575 let Inst{11-10} = 0b10;
9576 let Inst{13-12} = Rt{4-3};
9578 let Inst{15} = Rt{5};
9579 let Inst{20-16} = Rm;
9580 let Inst{31-21} = 0b11111000101;
9583 let hasSideEffects = 1;
9584 // RPRFM overlaps with PRFM (reg), when the decoder method of PRFM returns
9585 // Fail, the decoder should attempt to decode RPRFM. This requires setting
9586 // the decoder namespace to "Fallback".
9587 let DecoderNamespace = "Fallback";
9590 //===----------------------------------------------------------------------===//
9591 // 128-bit Atomics (FEAT_LSE128)
9592 //===----------------------------------------------------------------------===//
9593 let Predicates = [HasLSE128] in {
9594 def SWPP : LSE128Base<0b000, 0b00, 0b1, "swpp">;
9595 def SWPPA : LSE128Base<0b000, 0b10, 0b1, "swppa">;
9596 def SWPPAL : LSE128Base<0b000, 0b11, 0b1, "swppal">;
9597 def SWPPL : LSE128Base<0b000, 0b01, 0b1, "swppl">;
9598 def LDCLRP : LSE128Base<0b001, 0b00, 0b0, "ldclrp">;
9599 def LDCLRPA : LSE128Base<0b001, 0b10, 0b0, "ldclrpa">;
9600 def LDCLRPAL : LSE128Base<0b001, 0b11, 0b0, "ldclrpal">;
9601 def LDCLRPL : LSE128Base<0b001, 0b01, 0b0, "ldclrpl">;
9602 def LDSETP : LSE128Base<0b011, 0b00, 0b0, "ldsetp">;
9603 def LDSETPA : LSE128Base<0b011, 0b10, 0b0, "ldsetpa">;
9604 def LDSETPAL : LSE128Base<0b011, 0b11, 0b0, "ldsetpal">;
9605 def LDSETPL : LSE128Base<0b011, 0b01, 0b0, "ldsetpl">;
9608 //===----------------------------------------------------------------------===//
9609 // RCPC Instructions (FEAT_LRCPC3)
9610 //===----------------------------------------------------------------------===//
9612 let Predicates = [HasRCPC3] in {
9614 def STILPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">;
9615 def STILPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">;
9616 def STILPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0001, (outs), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">;
9617 def STILPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0001, (outs), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">;
9618 def LDIAPPWpost: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0000, (outs GPR64sp:$wback, GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #8", "$Rn = $wback">;
9619 def LDIAPPXpost: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0000, (outs GPR64sp:$wback, GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #16", "$Rn = $wback">;
9620 def LDIAPPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
9621 def LDIAPPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
9623 def : Pat<(AArch64ldiapp GPR64sp:$Rn), (LDIAPPX GPR64sp:$Rn)>;
9624 def : Pat<(AArch64stilp GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), (STILPX GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn)>;
9626 // Aliases for when offset=0
9627 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>;
9628 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>;
9631 def STLRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">;
9632 def STLRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">;
9633 def LDAPRWpost: BaseLRCPC3IntegerLoadStore<0b10, 0b11, (outs GPR64sp:$wback, GPR32:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #4", "$Rn = $wback">;
9634 def LDAPRXpost: BaseLRCPC3IntegerLoadStore<0b11, 0b11, (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #8", "$Rn = $wback">;
9637 let Predicates = [HasRCPC3, HasNEON] in {
9639 defm STLURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b00, FPR8 , (outs), (ins FPR8 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9640 defm STLURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b00, FPR16 , (outs), (ins FPR16 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9641 defm STLURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b00, FPR32 , (outs), (ins FPR32 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9642 defm STLURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b00, FPR64 , (outs), (ins FPR64 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9643 defm STLURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b10, FPR128, (outs), (ins FPR128:$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9644 defm LDAPURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b01, FPR8 , (outs FPR8 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9645 defm LDAPURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b01, FPR16 , (outs FPR16 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9646 defm LDAPURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b01, FPR32 , (outs FPR32 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9647 defm LDAPURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b01, FPR64 , (outs FPR64 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9648 defm LDAPURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b11, FPR128, (outs FPR128:$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9651 def STL1: LRCPC3NEONLdStSingle<0b0, (outs), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn) , "stl1", "">;
9652 def LDAP1: LRCPC3NEONLdStSingle<0b1, (outs VecListOned:$dst), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp0:$Rn), "ldap1", "$Vt = $dst">;
9654 // Aliases for when offset=0
9655 def : InstAlias<"stl1\t$Vt$Q, [$Rn, #0]", (STL1 VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn)>;
9658 //===----------------------------------------------------------------------===//
9659 // 128-bit System Instructions (FEAT_SYSINSTR128)
9660 //===----------------------------------------------------------------------===//
9661 let Predicates = [HasD128] in {
9662 def SYSPxt : SystemPXtI<0, "sysp">;
9665 : BaseSystemI<0, (outs),
9666 (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, SyspXzrPairOperand:$xzr_pair),
9667 "sysp", "\t$op1, $Cn, $Cm, $op2, $xzr_pair">,
9670 // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?)
9671 // and therefore autogenerates a decoder that builds an MC representation that has 4 fields
9672 // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one
9673 // extra for the XZR) because AArch64InstPrinter::printInstruction in AArch64GenAsmWriter.inc
9674 // is based off of the asm template (maybe) and therefore wants to print 5 operands.
9675 // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would
9676 // overlap with the main SYSP instruction.
9677 let DecoderMethod = "DecodeSyspXzrInstruction";
9682 let Inst{22} = 0b1; // override BaseSystemI
9683 let Inst{20-19} = 0b01;
9684 let Inst{18-16} = op1;
9685 let Inst{15-12} = Cn;
9686 let Inst{11-8} = Cm;
9687 let Inst{7-5} = op2;
9688 let Inst{4-0} = 0b11111;
9691 def : InstAlias<"sysp $op1, $Cn, $Cm, $op2",
9692 (SYSPxt_XZR imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
9696 // 128-bit System Registers (FEAT_SYSREG128)
9699 // Instruction encoding:
9701 // 31 22|21|20|19|18 16|15 12|11 8|7 5|4 0
9702 // MRRS 1101010101| 1| 1|o0| op1| Cn| Cm|op2| Rt
9703 // MSRR 1101010101| 0| 1|o0| op1| Cn| Cm|op2| Rt
9705 // Instruction syntax:
9707 // MRRS <Xt>, <Xt+1>, <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>
9708 // MSRR <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>, <Xt>, <Xt+1>
9710 // ...where t is even (X0, X2, etc).
9712 let Predicates = [HasD128] in {
9713 def MRRS : RtSystemI128<1,
9714 (outs MrrsMssrPairClassOperand:$Rt), (ins mrs_sysreg_op:$systemreg),
9715 "mrrs", "\t$Rt, $systemreg">
9718 let Inst{20-5} = systemreg;
9721 def MSRR : RtSystemI128<0,
9722 (outs), (ins msr_sysreg_op:$systemreg, MrrsMssrPairClassOperand:$Rt),
9723 "msrr", "\t$systemreg, $Rt">
9726 let Inst{20-5} = systemreg;
9730 //===----------------------------===//
9731 // 2023 Architecture Extensions:
9732 //===----------------------------===//
9734 let Predicates = [HasFP8] in {
9735 defm F1CVTL : SIMDMixedTwoVectorFP8<0b00, "f1cvtl">;
9736 defm F2CVTL : SIMDMixedTwoVectorFP8<0b01, "f2cvtl">;
9737 defm BF1CVTL : SIMDMixedTwoVectorFP8<0b10, "bf1cvtl">;
9738 defm BF2CVTL : SIMDMixedTwoVectorFP8<0b11, "bf2cvtl">;
9739 defm FCVTN_F16_F8 : SIMDThreeSameSizeVectorCvt<"fcvtn">;
9740 defm FCVTN_F32_F8 : SIMDThreeVectorCvt<"fcvtn">;
9741 defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>;
9742 } // End let Predicates = [HasFP8]
9744 let Predicates = [HasFAMINMAX] in {
9745 defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>;
9746 defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
9747 } // End let Predicates = [HasFAMAXMIN]
9749 let Predicates = [HasFP8FMA] in {
9750 defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb">;
9751 defm FMLALTlane : SIMDThreeSameVectorMLAIndex<0b1, "fmlalt">;
9752 defm FMLALLBBlane : SIMDThreeSameVectorMLALIndex<0b0, 0b00, "fmlallbb">;
9753 defm FMLALLBTlane : SIMDThreeSameVectorMLALIndex<0b0, 0b01, "fmlallbt">;
9754 defm FMLALLTBlane : SIMDThreeSameVectorMLALIndex<0b1, 0b00, "fmlalltb">;
9755 defm FMLALLTTlane : SIMDThreeSameVectorMLALIndex<0b1, 0b01, "fmlalltt">;
9757 defm FMLALB : SIMDThreeSameVectorMLA<0b0, "fmlalb">;
9758 defm FMLALT : SIMDThreeSameVectorMLA<0b1, "fmlalt">;
9759 defm FMLALLBB : SIMDThreeSameVectorMLAL<0b0, 0b00, "fmlallbb">;
9760 defm FMLALLBT : SIMDThreeSameVectorMLAL<0b0, 0b01, "fmlallbt">;
9761 defm FMLALLTB : SIMDThreeSameVectorMLAL<0b1, 0b00, "fmlalltb">;
9762 defm FMLALLTT : SIMDThreeSameVectorMLAL<0b1, 0b01, "fmlalltt">;
9763 } // End let Predicates = [HasFP8FMA]
9765 let Predicates = [HasFP8DOT2] in {
9766 defm FDOTlane : SIMDThreeSameVectorFP8DOT2Index<"fdot">;
9767 defm FDOT : SIMDThreeSameVectorDOT2<"fdot">;
9768 } // End let Predicates = [HasFP8DOT2]
9770 let Predicates = [HasFP8DOT4] in {
9771 defm FDOTlane : SIMDThreeSameVectorFP8DOT4Index<"fdot">;
9772 defm FDOT : SIMDThreeSameVectorDOT4<"fdot">;
9773 } // End let Predicates = [HasFP8DOT4]
9775 //===----------------------------------------------------------------------===//
9776 // Checked Pointer Arithmetic (FEAT_CPA)
9777 //===----------------------------------------------------------------------===//
9778 let Predicates = [HasCPA] in {
9779 // Scalar add/subtract
9780 defm ADDPT : AddSubCPA<0, "addpt">;
9781 defm SUBPT : AddSubCPA<1, "subpt">;
9783 // Scalar multiply-add/subtract
9784 def MADDPT : MulAccumCPA<0, "maddpt">;
9785 def MSUBPT : MulAccumCPA<1, "msubpt">;
9788 def round_v4fp32_to_v4bf16 :
9789 OutPatFrag<(ops node:$Rn),
9790 // NaN? Round : Quiet(NaN)
9791 (BSPv16i8 (FCMEQv4f32 $Rn, $Rn),
9794 // Extract the LSB of the fp32 *truncated* to bf16.
9795 (ANDv16i8 (USHRv4i32_shift V128:$Rn, (i32 16)),
9796 (MOVIv4i32 (i32 1), (i32 0)))),
9797 // Bias which will help us break ties correctly.
9798 (MOVIv4s_msl (i32 127), (i32 264))),
9799 // Set the quiet bit in the NaN.
9800 (ORRv4i32 $Rn, (i32 64), (i32 16)))>;
9802 multiclass PromoteUnaryv8f16Tov4f32<SDPatternOperator InOp, Instruction OutInst> {
9803 let Predicates = [HasNoFullFP16] in
9804 def : Pat<(InOp (v8f16 V128:$Rn)),
9806 (INSERT_SUBREG (IMPLICIT_DEF),
9809 (v4f32 (FCVTLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))))))),
9811 (v4f32 (OutInst (v4f32 (FCVTLv8i16 V128:$Rn))))))>;
9813 let Predicates = [HasBF16] in
9814 def : Pat<(InOp (v8bf16 V128:$Rn)),
9818 (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))))))),
9819 (v4f32 (OutInst (v4f32 (SHLLv8i16 V128:$Rn))))))>;
9821 let Predicates = [HasNoBF16] in
9822 def : Pat<(InOp (v8bf16 V128:$Rn)),
9824 (round_v4fp32_to_v4bf16 (v4f32 (OutInst
9825 (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub))))))),
9826 (round_v4fp32_to_v4bf16 (v4f32 (OutInst
9827 (v4f32 (SHLLv8i16 V128:$Rn))))))>;
9829 defm : PromoteUnaryv8f16Tov4f32<any_fceil, FRINTPv4f32>;
9830 defm : PromoteUnaryv8f16Tov4f32<any_ffloor, FRINTMv4f32>;
9831 defm : PromoteUnaryv8f16Tov4f32<any_fnearbyint, FRINTIv4f32>;
9832 defm : PromoteUnaryv8f16Tov4f32<any_fround, FRINTAv4f32>;
9833 defm : PromoteUnaryv8f16Tov4f32<any_froundeven, FRINTNv4f32>;
9834 defm : PromoteUnaryv8f16Tov4f32<any_frint, FRINTXv4f32>;
9835 defm : PromoteUnaryv8f16Tov4f32<any_ftrunc, FRINTZv4f32>;
9837 multiclass PromoteBinaryv8f16Tov4f32<SDPatternOperator InOp, Instruction OutInst> {
9838 let Predicates = [HasNoFullFP16] in
9839 def : Pat<(InOp (v8f16 V128:$Rn), (v8f16 V128:$Rm)),
9841 (INSERT_SUBREG (IMPLICIT_DEF),
9844 (v4f32 (FCVTLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))),
9845 (v4f32 (FCVTLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rm, dsub)))))))),
9847 (v4f32 (OutInst (v4f32 (FCVTLv8i16 V128:$Rn)),
9848 (v4f32 (FCVTLv8i16 V128:$Rm))))))>;
9850 let Predicates = [HasBF16] in
9851 def : Pat<(InOp (v8bf16 V128:$Rn), (v8bf16 V128:$Rm)),
9855 (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))),
9856 (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rm, dsub)))))))),
9857 (v4f32 (OutInst (v4f32 (SHLLv8i16 V128:$Rn)),
9858 (v4f32 (SHLLv8i16 V128:$Rm))))))>;
9860 let Predicates = [HasNoBF16] in
9861 def : Pat<(InOp (v8bf16 V128:$Rn), (v8bf16 V128:$Rm)),
9863 (round_v4fp32_to_v4bf16 (v4f32 (OutInst
9864 (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))),
9865 (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rm, dsub))))))),
9866 (round_v4fp32_to_v4bf16 (v4f32 (OutInst
9867 (v4f32 (SHLLv8i16 V128:$Rn)),
9868 (v4f32 (SHLLv8i16 V128:$Rm))))))>;
9870 defm : PromoteBinaryv8f16Tov4f32<any_fadd, FADDv4f32>;
9871 defm : PromoteBinaryv8f16Tov4f32<any_fdiv, FDIVv4f32>;
9872 defm : PromoteBinaryv8f16Tov4f32<any_fmul, FMULv4f32>;
9873 defm : PromoteBinaryv8f16Tov4f32<any_fsub, FSUBv4f32>;
9875 include "AArch64InstrAtomics.td"
9876 include "AArch64SVEInstrInfo.td"
9877 include "AArch64SMEInstrInfo.td"
9878 include "AArch64InstrGISel.td"