From 9c4cd7971382ecbaf8e1530e381aa54ed23a2b0e Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Mon, 15 Jan 2024 08:55:30 +0000 Subject: [PATCH] [Clang][SME2] Fix PSEL builtin predicates (#77097) PSEL intrinsics which return a predicate-as-counter are available in SVE2p1 & SME2. --- clang/include/clang/Basic/arm_sve.td | 17 ++-- .../aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c | 94 +--------------------- ...le_sve2p1_psel.c => acle_sve2p1_psel_svcount.c} | 72 +---------------- 3 files changed, 14 insertions(+), 169 deletions(-) copy clang/test/CodeGen/aarch64-sve2p1-intrinsics/{acle_sve2p1_psel.c => acle_sve2p1_psel_svcount.c} (66%) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 6de940b4da60..628e325c8581 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1948,14 +1948,10 @@ def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sv } let TargetGuard = "sve2p1|sme" in { -def SVPSEL_B : SInst<"svpsel_lane_b8", "PPPm", "Pc", MergeNone, "", [IsStreamingCompatible], []>; -def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [IsStreamingCompatible], []>; -def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [IsStreamingCompatible], []>; -def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [IsStreamingCompatible], []>; -def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8", "}}Pm", "Pc", MergeNone, "", [IsStreamingCompatible], []>; -def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, "", [IsStreamingCompatible], []>; -def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, "", [IsStreamingCompatible], []>; -def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, "", [IsStreamingCompatible], []>; +def SVPSEL_B : SInst<"svpsel_lane_b8", "PPPm", "Pc", MergeNone, "", [IsStreamingOrSVE2p1], []>; +def SVPSEL_H : SInst<"svpsel_lane_b16", "PPPm", "Ps", MergeNone, "", [IsStreamingOrSVE2p1], []>; +def SVPSEL_S : SInst<"svpsel_lane_b32", "PPPm", "Pi", MergeNone, "", [IsStreamingOrSVE2p1], []>; +def SVPSEL_D : SInst<"svpsel_lane_b64", "PPPm", "Pl", MergeNone, "", [IsStreamingOrSVE2p1], []>; } // Standalone sve2.1 builtins @@ -1979,6 +1975,11 @@ let TargetGuard = "sve2p1|sme2" in { def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [IsStreamingOrSVE2p1], [ImmCheck<1, ImmCheck0_3>]>; def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [IsStreamingOrSVE2p1], [ImmCheck<1, ImmCheck0_1>]>; +def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8", "}}Pm", "Pc", MergeNone, "", [IsStreamingOrSVE2p1], []>; +def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, "", [IsStreamingOrSVE2p1], []>; +def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, "", [IsStreamingOrSVE2p1], []>; +def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, "", [IsStreamingOrSVE2p1], []>; + def SVWHILEGE_COUNT : SInst<"svwhilege_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; def SVWHILELE_COUNT : SInst<"svwhilele_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>; diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c index de3f6a9a57bf..a61d874a1d9e 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c @@ -11,16 +11,16 @@ // RUN: -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu \ -// RUN: -target-feature +sme2 -S -DTEST_SME2 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: -target-feature +sme -S -DTEST_SME -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -S -DTEST_SME -disable-O0-optnone -Werror -Wall -o /dev/null %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include -#ifndef TEST_SME2 +#ifndef TEST_SME #define ATTR #else -#define ATTR __arm_streaming_compatible +#define ATTR __arm_streaming #endif // CHECK-LABEL: @test_svpsel_lane_b8( @@ -92,89 +92,3 @@ svbool_t test_svpsel_lane_b32(svbool_t p1, svbool_t p2, uint32_t idx) ATTR { svbool_t test_svpsel_lane_b64(svbool_t p1, svbool_t p2, uint32_t idx) ATTR { return svpsel_lane_b64(p1, p2, idx + 1); } - -// CHECK-LABEL: @test_svpsel_lane_c8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 15 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.psel.nxv16i1( [[TMP0]], [[P2:%.*]], i32 [[ADD]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( [[TMP1]]) -// CHECK-NEXT: ret target("aarch64.svcount") [[TMP2]] -// -// CPP-CHECK-LABEL: @_Z19test_svpsel_lane_c8u11__SVCount_tu10__SVBool_tj( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 15 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.psel.nxv16i1( [[TMP0]], [[P2:%.*]], i32 [[ADD]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( [[TMP1]]) -// CPP-CHECK-NEXT: ret target("aarch64.svcount") [[TMP2]] -// -svcount_t test_svpsel_lane_c8(svcount_t p1, svbool_t p2, uint32_t idx) ATTR { - return svpsel_lane_c8(p1, p2, idx + 15); -} - -// CHECK-LABEL: @test_svpsel_lane_c16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 7 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P2:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv8i1( [[TMP0]], [[TMP1]], i32 [[ADD]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( [[TMP2]]) -// CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]] -// -// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_c16u11__SVCount_tu10__SVBool_tj( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 7 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P2:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv8i1( [[TMP0]], [[TMP1]], i32 [[ADD]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( [[TMP2]]) -// CPP-CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]] -// -svcount_t test_svpsel_lane_c16(svcount_t p1, svbool_t p2, uint32_t idx) ATTR { - return svpsel_lane_c16(p1, p2, idx + 7); -} - -// CHECK-LABEL: @test_svpsel_lane_c32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 3 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[P2:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv4i1( [[TMP0]], [[TMP1]], i32 [[ADD]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( [[TMP2]]) -// CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]] -// -// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_c32u11__SVCount_tu10__SVBool_tj( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 3 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[P2:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv4i1( [[TMP0]], [[TMP1]], i32 [[ADD]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( [[TMP2]]) -// CPP-CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]] -// -svcount_t test_svpsel_lane_c32(svcount_t p1, svbool_t p2, uint32_t idx) ATTR { - return svpsel_lane_c32(p1, p2, idx + 3); -} - -// CHECK-LABEL: @test_svpsel_lane_c64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 1 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[P2:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv2i1( [[TMP0]], [[TMP1]], i32 [[ADD]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( [[TMP2]]) -// CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]] -// -// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_c64u11__SVCount_tu10__SVBool_tj( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[P1:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[P2:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv2i1( [[TMP0]], [[TMP1]], i32 [[ADD]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt( [[TMP2]]) -// CPP-CHECK-NEXT: ret target("aarch64.svcount") [[TMP3]] -// -svcount_t test_svpsel_lane_c64(svcount_t p1, svbool_t p2, uint32_t idx) ATTR { - return svpsel_lane_c64(p1, p2, idx + 1); -} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel_svcount.c similarity index 66% copy from clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c copy to clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel_svcount.c index de3f6a9a57bf..4b8a582be301 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel_svcount.c @@ -20,79 +20,9 @@ #ifndef TEST_SME2 #define ATTR #else -#define ATTR __arm_streaming_compatible +#define ATTR __arm_streaming #endif -// CHECK-LABEL: @test_svpsel_lane_b8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 15 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.psel.nxv16i1( [[P1:%.*]], [[P2:%.*]], i32 [[ADD]]) -// CHECK-NEXT: ret [[TMP0]] -// -// CPP-CHECK-LABEL: @_Z19test_svpsel_lane_b8u10__SVBool_tS_j( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 15 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.psel.nxv16i1( [[P1:%.*]], [[P2:%.*]], i32 [[ADD]]) -// CPP-CHECK-NEXT: ret [[TMP0]] -// -svbool_t test_svpsel_lane_b8(svbool_t p1, svbool_t p2, uint32_t idx) ATTR { - return svpsel_lane_b8(p1, p2, idx + 15); -} - -// CHECK-LABEL: @test_svpsel_lane_b16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 7 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P2:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.psel.nxv8i1( [[P1:%.*]], [[TMP0]], i32 [[ADD]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_b16u10__SVBool_tS_j( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 7 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P2:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.psel.nxv8i1( [[P1:%.*]], [[TMP0]], i32 [[ADD]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbool_t test_svpsel_lane_b16(svbool_t p1, svbool_t p2, uint32_t idx) ATTR { - return svpsel_lane_b16(p1, p2, idx + 7); -} - -// CHECK-LABEL: @test_svpsel_lane_b32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 3 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[P2:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.psel.nxv4i1( [[P1:%.*]], [[TMP0]], i32 [[ADD]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_b32u10__SVBool_tS_j( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 3 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[P2:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.psel.nxv4i1( [[P1:%.*]], [[TMP0]], i32 [[ADD]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbool_t test_svpsel_lane_b32(svbool_t p1, svbool_t p2, uint32_t idx) ATTR { - return svpsel_lane_b32(p1, p2, idx + 3); -} - -// CHECK-LABEL: @test_svpsel_lane_b64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 1 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[P2:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.psel.nxv2i1( [[P1:%.*]], [[TMP0]], i32 [[ADD]]) -// CHECK-NEXT: ret [[TMP1]] -// -// CPP-CHECK-LABEL: @_Z20test_svpsel_lane_b64u10__SVBool_tS_j( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 1 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[P2:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.psel.nxv2i1( [[P1:%.*]], [[TMP0]], i32 [[ADD]]) -// CPP-CHECK-NEXT: ret [[TMP1]] -// -svbool_t test_svpsel_lane_b64(svbool_t p1, svbool_t p2, uint32_t idx) ATTR { - return svpsel_lane_b64(p1, p2, idx + 1); -} - // CHECK-LABEL: @test_svpsel_lane_c8( // CHECK-NEXT: entry: // CHECK-NEXT: [[ADD:%.*]] = add i32 [[IDX:%.*]], 15 -- 2.11.4.GIT