clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mla.c

   1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
   2 // REQUIRES: aarch64-registered-target
   3
   4 // RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
   5 // RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
   6 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
   7 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
   8 // RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
   9 #include <arm_sme.h>
  10
  11 #ifdef SVE_OVERLOADED_FORMS
  12 // A simple used,unused... macro, long enough to represent any SVE builtin.
  13 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5
  14 #else
  15 #define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5
  16 #endif
  17
  18 //
  19 // Multi, multi
  20 // CHECK-LABEL: @test_svmla2_f32(
  21 // CHECK-NEXT:  entry:
  22 // CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZM_COERCE0:%.*]], <vscale x 4 x float> [[ZM_COERCE1:%.*]])
  23 // CHECK-NEXT:    ret void
  24 //
  25 // CPP-CHECK-LABEL: @_Z15test_svmla2_f32j13svfloat32x2_tS_(
  26 // CPP-CHECK-NEXT:  entry:
  27 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZM_COERCE0:%.*]], <vscale x 4 x float> [[ZM_COERCE1:%.*]])
  28 // CPP-CHECK-NEXT:    ret void
  29 //
  30 void test_svmla2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32x2_t zm) __arm_streaming __arm_inout("za") {
  31   SVE_ACLE_FUNC(svmla_za32,_f32,_vg1x2,,)(slice_base, zn, zm);
  32 }
  33
  34 // CHECK-LABEL: @test_svmla4_f32(
  35 // CHECK-NEXT:  entry:
  36 // CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZN_COERCE2:%.*]], <vscale x 4 x float> [[ZN_COERCE3:%.*]], <vscale x 4 x float> [[ZM_COERCE0:%.*]], <vscale x 4 x float> [[ZM_COERCE1:%.*]], <vscale x 4 x float> [[ZM_COERCE2:%.*]], <vscale x 4 x float> [[ZM_COERCE3:%.*]])
  37 // CHECK-NEXT:    ret void
  38 //
  39 // CPP-CHECK-LABEL: @_Z15test_svmla4_f32j13svfloat32x4_tS_(
  40 // CPP-CHECK-NEXT:  entry:
  41 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZN_COERCE2:%.*]], <vscale x 4 x float> [[ZN_COERCE3:%.*]], <vscale x 4 x float> [[ZM_COERCE0:%.*]], <vscale x 4 x float> [[ZM_COERCE1:%.*]], <vscale x 4 x float> [[ZM_COERCE2:%.*]], <vscale x 4 x float> [[ZM_COERCE3:%.*]])
  42 // CPP-CHECK-NEXT:    ret void
  43 //
  44 void test_svmla4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) __arm_streaming __arm_inout("za") {
  45   SVE_ACLE_FUNC(svmla_za32,_f32,_vg1x4,,)(slice_base, zn, zm);
  46 }
  47
  48 //
  49 // Multi, single
  50 // CHECK-LABEL: @test_svmla_single2_f32(
  51 // CHECK-NEXT:  entry:
  52 // CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZM:%.*]])
  53 // CHECK-NEXT:    ret void
  54 //
  55 // CPP-CHECK-LABEL: @_Z22test_svmla_single2_f32j13svfloat32x2_tu13__SVFloat32_t(
  56 // CPP-CHECK-NEXT:  entry:
  57 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZM:%.*]])
  58 // CPP-CHECK-NEXT:    ret void
  59 //
  60 void test_svmla_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") {
  61   SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x2)(slice_base, zn, zm);
  62 }
  63
  64 // CHECK-LABEL: @test_svmla_single4_f32(
  65 // CHECK-NEXT:  entry:
  66 // CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZN_COERCE2:%.*]], <vscale x 4 x float> [[ZN_COERCE3:%.*]], <vscale x 4 x float> [[ZM:%.*]])
  67 // CHECK-NEXT:    ret void
  68 //
  69 // CPP-CHECK-LABEL: @_Z22test_svmla_single4_f32j13svfloat32x4_tu13__SVFloat32_t(
  70 // CPP-CHECK-NEXT:  entry:
  71 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZN_COERCE2:%.*]], <vscale x 4 x float> [[ZN_COERCE3:%.*]], <vscale x 4 x float> [[ZM:%.*]])
  72 // CPP-CHECK-NEXT:    ret void
  73 //
  74 void test_svmla_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") {
  75   SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x4)(slice_base, zn, zm);
  76 }
  77
  78 //
  79 // Multi, indexed
  80 // CHECK-LABEL: @test_svmla_lane2_f32(
  81 // CHECK-NEXT:  entry:
  82 // CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZM:%.*]], i32 3)
  83 // CHECK-NEXT:    ret void
  84 //
  85 // CPP-CHECK-LABEL: @_Z20test_svmla_lane2_f32j13svfloat32x2_tu13__SVFloat32_t(
  86 // CPP-CHECK-NEXT:  entry:
  87 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZM:%.*]], i32 3)
  88 // CPP-CHECK-NEXT:    ret void
  89 //
  90 void test_svmla_lane2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") {
  91   SVE_ACLE_FUNC(svmla_lane_za32,_f32,_vg1x2,,)(slice_base, zn, zm, 3);
  92 }
  93
  94 // CHECK-LABEL: @test_svmla_lane4_f32(
  95 // CHECK-NEXT:  entry:
  96 // CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZN_COERCE2:%.*]], <vscale x 4 x float> [[ZN_COERCE3:%.*]], <vscale x 4 x float> [[ZM:%.*]], i32 3)
  97 // CHECK-NEXT:    ret void
  98 //
  99 // CPP-CHECK-LABEL: @_Z20test_svmla_lane4_f32j13svfloat32x4_tu13__SVFloat32_t(
 100 // CPP-CHECK-NEXT:  entry:
 101 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], <vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZN_COERCE2:%.*]], <vscale x 4 x float> [[ZN_COERCE3:%.*]], <vscale x 4 x float> [[ZM:%.*]], i32 3)
 102 // CPP-CHECK-NEXT:    ret void
 103 //
 104 void test_svmla_lane4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") {
 105   SVE_ACLE_FUNC(svmla_lane_za32,_f32,_vg1x4,,)(slice_base, zn, zm, 3);
 106 }
 107
 108 //
 109 // Multi, multi
 110 // CHECK-LABEL: @test_svmla2_f64(
 111 // CHECK-NEXT:  entry:
 112 // CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], <vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]], <vscale x 2 x double> [[ZM_COERCE0:%.*]], <vscale x 2 x double> [[ZM_COERCE1:%.*]])
 113 // CHECK-NEXT:    ret void
 114 //
 115 // CPP-CHECK-LABEL: @_Z15test_svmla2_f64j13svfloat64x2_tS_(
 116 // CPP-CHECK-NEXT:  entry:
 117 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], <vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]], <vscale x 2 x double> [[ZM_COERCE0:%.*]], <vscale x 2 x double> [[ZM_COERCE1:%.*]])
 118 // CPP-CHECK-NEXT:    ret void
 119 //
 120 void test_svmla2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64x2_t zm) __arm_streaming __arm_inout("za") {
 121   SVE_ACLE_FUNC(svmla_za64,_f64,_vg1x2,,)(slice_base, zn, zm);
 122 }
 123
 124 // CHECK-LABEL: @test_svmla4_f64(
 125 // CHECK-NEXT:  entry:
 126 // CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], <vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]], <vscale x 2 x double> [[ZN_COERCE2:%.*]], <vscale x 2 x double> [[ZN_COERCE3:%.*]], <vscale x 2 x double> [[ZM_COERCE0:%.*]], <vscale x 2 x double> [[ZM_COERCE1:%.*]], <vscale x 2 x double> [[ZM_COERCE2:%.*]], <vscale x 2 x double> [[ZM_COERCE3:%.*]])
 127 // CHECK-NEXT:    ret void
 128 //
 129 // CPP-CHECK-LABEL: @_Z15test_svmla4_f64j13svfloat64x4_tS_(
 130 // CPP-CHECK-NEXT:  entry:
 131 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], <vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]], <vscale x 2 x double> [[ZN_COERCE2:%.*]], <vscale x 2 x double> [[ZN_COERCE3:%.*]], <vscale x 2 x double> [[ZM_COERCE0:%.*]], <vscale x 2 x double> [[ZM_COERCE1:%.*]], <vscale x 2 x double> [[ZM_COERCE2:%.*]], <vscale x 2 x double> [[ZM_COERCE3:%.*]])
 132 // CPP-CHECK-NEXT:    ret void
 133 //
 134 void test_svmla4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) __arm_streaming __arm_inout("za") {
 135   SVE_ACLE_FUNC(svmla_za64,_f64,_vg1x4,,)(slice_base, zn, zm);
 136 }
 137
 138 //
 139 // Multi, single
 140 // CHECK-LABEL: @test_svmla_single2_f64(
 141 // CHECK-NEXT:  entry:
 142 // CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], <vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]], <vscale x 2 x double> [[ZM:%.*]])
 143 // CHECK-NEXT:    ret void
 144 //
 145 // CPP-CHECK-LABEL: @_Z22test_svmla_single2_f64j13svfloat64x2_tu13__SVFloat64_t(
 146 // CPP-CHECK-NEXT:  entry:
 147 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], <vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]], <vscale x 2 x double> [[ZM:%.*]])
 148 // CPP-CHECK-NEXT:    ret void
 149 //
 150 void test_svmla_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") {
 151   SVE_ACLE_FUNC(svmla,_single,_za64,_f64,_vg1x2)(slice_base, zn, zm);
 152 }
 153
 154 // CHECK-LABEL: @test_svmla_single4_f64(
 155 // CHECK-NEXT:  entry:
 156 // CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], <vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]], <vscale x 2 x double> [[ZN_COERCE2:%.*]], <vscale x 2 x double> [[ZN_COERCE3:%.*]], <vscale x 2 x double> [[ZM:%.*]])
 157 // CHECK-NEXT:    ret void
 158 //
 159 // CPP-CHECK-LABEL: @_Z22test_svmla_single4_f64j13svfloat64x4_tu13__SVFloat64_t(
 160 // CPP-CHECK-NEXT:  entry:
 161 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], <vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]], <vscale x 2 x double> [[ZN_COERCE2:%.*]], <vscale x 2 x double> [[ZN_COERCE3:%.*]], <vscale x 2 x double> [[ZM:%.*]])
 162 // CPP-CHECK-NEXT:    ret void
 163 //
 164 void test_svmla_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") {
 165   SVE_ACLE_FUNC(svmla,_single,_za64,_f64,_vg1x4)(slice_base, zn, zm);
 166 }
 167
 168 //
 169 // Multi, indexed
 170 // CHECK-LABEL: @test_svmla_lane2_f64(
 171 // CHECK-NEXT:  entry:
 172 // CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], <vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]], <vscale x 2 x double> [[ZM:%.*]], i32 1)
 173 // CHECK-NEXT:    ret void
 174 //
 175 // CPP-CHECK-LABEL: @_Z20test_svmla_lane2_f64j13svfloat64x2_tu13__SVFloat64_t(
 176 // CPP-CHECK-NEXT:  entry:
 177 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], <vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]], <vscale x 2 x double> [[ZM:%.*]], i32 1)
 178 // CPP-CHECK-NEXT:    ret void
 179 //
 180 void test_svmla_lane2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") {
 181   SVE_ACLE_FUNC(svmla_lane_za64,_f64,_vg1x2,,)(slice_base, zn, zm, 1);
 182 }
 183
 184 // CHECK-LABEL: @test_svmla_lane4_f64(
 185 // CHECK-NEXT:  entry:
 186 // CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], <vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]], <vscale x 2 x double> [[ZN_COERCE2:%.*]], <vscale x 2 x double> [[ZN_COERCE3:%.*]], <vscale x 2 x double> [[ZM:%.*]], i32 1)
 187 // CHECK-NEXT:    ret void
 188 //
 189 // CPP-CHECK-LABEL: @_Z20test_svmla_lane4_f64j13svfloat64x4_tu13__SVFloat64_t(
 190 // CPP-CHECK-NEXT:  entry:
 191 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], <vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]], <vscale x 2 x double> [[ZN_COERCE2:%.*]], <vscale x 2 x double> [[ZN_COERCE3:%.*]], <vscale x 2 x double> [[ZM:%.*]], i32 1)
 192 // CPP-CHECK-NEXT:    ret void
 193 //
 194 void test_svmla_lane4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") {
 195   SVE_ACLE_FUNC(svmla_lane_za64,_f64,_vg1x4,,)(slice_base, zn, zm, 1);
 196 }