From 5c86e58fb2ba5bc54b8aa56441f46cf3c29d8ece Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 13 Sep 2019 11:20:17 +0000 Subject: [PATCH] [ARM] Add earlyclobber for cross beat MVE instructions rL367544 added @earlyclobbers for the MVE VREV64 instruction. This adds the same for a number of other 32bit instructions that are similarly unpredictable if the destination equals the source (due to the cross beat nature of the instructions). This includes: VCADD.f32 VCADD.i32 VCMUL.f32 VHCADD.s32 VMULLT/B.s/u32 VQDMLADH{X}.s32 VQRDMLADH{X}.s32 VQDMLSDH{X}.s32 VQRDMLSDH{X}.s32 VQDMULLT/B.s32 with Qm and Rm No tests here as this would require intrinsics (or very interesting codegen) to manifest. The tests will follow naturally as the intrinsics are added. Differential Revision: https://reviews.llvm.org/D67462 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@371838 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrMVE.td | 79 +++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/lib/Target/ARM/ARMInstrMVE.td b/lib/Target/ARM/ARMInstrMVE.td index c83ca203b54..93c976a85e1 100644 --- a/lib/Target/ARM/ARMInstrMVE.td +++ b/lib/Target/ARM/ARMInstrMVE.td @@ -2725,10 +2725,10 @@ let Predicates = [HasMVEFloat] in { (v8f16 (MVE_VSUBf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; } -class MVE_VCADD pattern=[]> +class MVE_VCADD pattern=[]> : MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot), - "$Qd, $Qn, $Qm, $rot", vpred_r, "", pattern> { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> { bits<4> Qd; bits<4> Qn; bit rot; @@ -2747,7 +2747,7 @@ class MVE_VCADD pattern=[]> } def MVE_VCADDf16 : MVE_VCADD<"f16", 0b0>; -def MVE_VCADDf32 : MVE_VCADD<"f32", 0b1>; +def MVE_VCADDf32 : MVE_VCADD<"f32", 0b1, "@earlyclobber $Qd">; class MVE_VABD_fp : MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), @@ -3300,10 +3300,10 @@ class MVE_qDest_qSrc size, list pattern=[]> + string suffix, bits<2> size, string cstr="", list pattern=[]> : MVE_qDest_qSrc { + vpred_n, "$Qd = $Qd_src"#cstr, pattern> { bits<4> Qn; let Inst{28} = subtract; @@ -3320,7 +3320,7 @@ multiclass MVE_VQxDMLxDH_multi { def s8 : MVE_VQxDMLxDH; def s16 : MVE_VQxDMLxDH; - def s32 : MVE_VQxDMLxDH; + def s32 : MVE_VQxDMLxDH; } defm MVE_VQDMLADH : MVE_VQxDMLxDH_multi<"vqdmladh", 0b0, 0b0, 0b0>; @@ -3332,10 +3332,10 @@ defm MVE_VQDMLSDHX : MVE_VQxDMLxDH_multi<"vqdmlsdhx", 0b1, 0b0, 0b1>; defm MVE_VQRDMLSDH : MVE_VQxDMLxDH_multi<"vqrdmlsdh", 0b0, 0b1, 0b1>; defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>; -class MVE_VCMUL pattern=[]> +class MVE_VCMUL pattern=[]> : MVE_qDest_qSrc { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> { bits<4> Qn; bits<2> rot; @@ -3352,13 +3352,13 @@ class MVE_VCMUL pattern=[]> } def MVE_VCMULf16 : MVE_VCMUL<"vcmul", "f16", 0b0>; -def MVE_VCMULf32 : MVE_VCMUL<"vcmul", "f32", 0b1>; +def MVE_VCMULf32 : MVE_VCMUL<"vcmul", "f32", 0b1, "@earlyclobber $Qd">; class MVE_VMULL bits_21_20, - bit T, list pattern=[]> + bit T, string cstr, list pattern=[]> : MVE_qDest_qSrc { + vpred_r, cstr, pattern> { bits<4> Qd; bits<4> Qn; bits<4> Qm; @@ -3374,9 +3374,9 @@ class MVE_VMULL bits_21_20, } multiclass MVE_VMULL_multi bits_21_20> { - def bh : MVE_VMULL; - def th : MVE_VMULL; + bit bit_28, bits<2> bits_21_20, string cstr=""> { + def bh : MVE_VMULL; + def th : MVE_VMULL; } // For integer multiplies, bits 21:20 encode size, and bit 28 signedness. @@ -3385,10 +3385,10 @@ multiclass MVE_VMULL_multi; defm MVE_VMULLs16 : MVE_VMULL_multi<"vmull", "s16", 0b0, 0b01>; -defm MVE_VMULLs32 : MVE_VMULL_multi<"vmull", "s32", 0b0, 0b10>; +defm MVE_VMULLs32 : MVE_VMULL_multi<"vmull", "s32", 0b0, 0b10, "@earlyclobber $Qd">; defm MVE_VMULLu8 : MVE_VMULL_multi<"vmull", "u8", 0b1, 0b00>; defm MVE_VMULLu16 : MVE_VMULL_multi<"vmull", "u16", 0b1, 0b01>; -defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10>; +defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10, "@earlyclobber $Qd">; defm MVE_VMULLp8 : MVE_VMULL_multi<"vmull", "p8", 0b0, 0b11>; defm MVE_VMULLp16 : MVE_VMULL_multi<"vmull", "p16", 0b1, 0b11>; @@ -3477,11 +3477,10 @@ defm MVE_VCVTf16f32 : MVE_VCVT_ff_halves<"f16.f32", 0b0>; defm MVE_VCVTf32f16 : MVE_VCVT_ff_halves<"f32.f16", 0b1>; class MVE_VxCADD size, bit halve, - list pattern=[]> + string cstr="", list pattern=[]> : MVE_qDest_qSrc { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> { bits<4> Qn; bit rot; @@ -3497,11 +3496,11 @@ class MVE_VxCADD size, bit halve, def MVE_VCADDi8 : MVE_VxCADD<"vcadd", "i8", 0b00, 0b1>; def MVE_VCADDi16 : MVE_VxCADD<"vcadd", "i16", 0b01, 0b1>; -def MVE_VCADDi32 : MVE_VxCADD<"vcadd", "i32", 0b10, 0b1>; +def MVE_VCADDi32 : MVE_VxCADD<"vcadd", "i32", 0b10, 0b1, "@earlyclobber $Qd">; def MVE_VHCADDs8 : MVE_VxCADD<"vhcadd", "s8", 0b00, 0b0>; def MVE_VHCADDs16 : MVE_VxCADD<"vhcadd", "s16", 0b01, 0b0>; -def MVE_VHCADDs32 : MVE_VxCADD<"vhcadd", "s32", 0b10, 0b0>; +def MVE_VHCADDs32 : MVE_VxCADD<"vhcadd", "s32", 0b10, 0b0, "@earlyclobber $Qd">; class MVE_VADCSBC pattern=[]> @@ -3531,10 +3530,10 @@ def MVE_VSBC : MVE_VADCSBC<"vsbc", 0b0, 0b1, (ins cl_FPSCR_NZCV:$carryin)>; def MVE_VSBCI : MVE_VADCSBC<"vsbci", 0b1, 0b1, (ins)>; class MVE_VQDMULL pattern=[]> + string cstr="", list pattern=[]> : MVE_qDest_qSrc { + vpred_r, cstr, pattern> { bits<4> Qn; let Inst{28} = size; @@ -3547,13 +3546,13 @@ class MVE_VQDMULL { - def bh : MVE_VQDMULL<"vqdmullb", suffix, size, 0b0>; - def th : MVE_VQDMULL<"vqdmullt", suffix, size, 0b1>; +multiclass MVE_VQDMULL_halves { + def bh : MVE_VQDMULL<"vqdmullb", suffix, size, 0b0, cstr>; + def th : MVE_VQDMULL<"vqdmullt", suffix, size, 0b1, cstr>; } defm MVE_VQDMULLs16 : MVE_VQDMULL_halves<"s16", 0b0>; -defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<"s32", 0b1>; +defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<"s32", 0b1, "@earlyclobber $Qd">; // end of mve_qDest_qSrc @@ -3578,9 +3577,9 @@ class MVE_qr_base pattern=[]> +class MVE_qDest_rSrc pattern=[]> : MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qn, rGPR:$Rm), - NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, "", + NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, cstr, pattern>; class MVE_qDestSrc_rSrc pattern=[]> @@ -3602,7 +3601,7 @@ class MVE_qDest_single_rSrc pattern=[]> class MVE_VADDSUB_qr size, bit bit_5, bit bit_12, bit bit_16, bit bit_28, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = bit_28; let Inst{21-20} = size; @@ -3650,8 +3649,8 @@ let Predicates = [HasMVEInt] in { } class MVE_VQDMULL_qr pattern=[]> - : MVE_qDest_rSrc { + bit T, string cstr="", list pattern=[]> + : MVE_qDest_rSrc { let Inst{28} = size; let Inst{21-20} = 0b11; @@ -3661,18 +3660,18 @@ class MVE_VQDMULL_qr { - def bh : MVE_VQDMULL_qr<"vqdmullb", suffix, size, 0b0>; - def th : MVE_VQDMULL_qr<"vqdmullt", suffix, size, 0b1>; +multiclass MVE_VQDMULL_qr_halves { + def bh : MVE_VQDMULL_qr<"vqdmullb", suffix, size, 0b0, cstr>; + def th : MVE_VQDMULL_qr<"vqdmullt", suffix, size, 0b1, cstr>; } defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves<"s16", 0b0>; -defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<"s32", 0b1>; +defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<"s32", 0b1, "@earlyclobber $Qd">; class MVE_VxADDSUB_qr bits_21_20, bit subtract, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = bit_28; let Inst{21-20} = bits_21_20; @@ -3750,7 +3749,7 @@ let Predicates = [HasMVEInt] in { } class MVE_VBRSR size, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = 0b1; let Inst{21-20} = size; @@ -3766,7 +3765,7 @@ def MVE_VBRSR32 : MVE_VBRSR<"vbrsr", "32", 0b10>; class MVE_VMUL_qr_int size, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = 0b0; let Inst{21-20} = size; @@ -3791,7 +3790,7 @@ let Predicates = [HasMVEInt] in { class MVE_VxxMUL_qr bits_21_20, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = bit_28; let Inst{21-20} = bits_21_20; -- 2.11.4.GIT