1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
3 // REQUIRES: aarch64-registered-target
5 // RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
6 // RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
7 // RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
12 // CHECK-LABEL: @test_svluti4_lane_zt_u8(
14 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
15 // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
17 // CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_u8u11__SVUint8_t(
18 // CPP-CHECK-NEXT: entry:
19 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
20 // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
22 svuint8_t
test_svluti4_lane_zt_u8(svuint8_t zn
) __arm_streaming
__arm_in("zt0") {
23 return svluti4_lane_zt_u8(0, zn
, 7);
27 // CHECK-LABEL: @test_svluti4_lane_zt_s8(
29 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
30 // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
32 // CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_s8u11__SVUint8_t(
33 // CPP-CHECK-NEXT: entry:
34 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
35 // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
37 svint8_t
test_svluti4_lane_zt_s8(svuint8_t zn
) __arm_streaming
__arm_in("zt0") {
38 return svluti4_lane_zt_s8(0, zn
, 7);
41 // CHECK-LABEL: @test_svluti4_lane_zt_u16(
43 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
44 // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
46 // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u16u11__SVUint8_t(
47 // CPP-CHECK-NEXT: entry:
48 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
49 // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
51 svuint16_t
test_svluti4_lane_zt_u16(svuint8_t zn
) __arm_streaming
__arm_in("zt0") {
52 return svluti4_lane_zt_u16(0, zn
, 7);
55 // CHECK-LABEL: @test_svluti4_lane_zt_s16(
57 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
58 // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
60 // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s16u11__SVUint8_t(
61 // CPP-CHECK-NEXT: entry:
62 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
63 // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
65 svint16_t
test_svluti4_lane_zt_s16(svuint8_t zn
) __arm_streaming
__arm_in("zt0") {
66 return svluti4_lane_zt_s16(0, zn
, 7);
69 // CHECK-LABEL: @test_svluti4_lane_zt_f16(
71 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.luti4.lane.zt.nxv8f16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
72 // CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
74 // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_f16u11__SVUint8_t(
75 // CPP-CHECK-NEXT: entry:
76 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.luti4.lane.zt.nxv8f16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
77 // CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
79 svfloat16_t
test_svluti4_lane_zt_f16(svuint8_t zn
) __arm_streaming
__arm_in("zt0") {
80 return svluti4_lane_zt_f16(0, zn
, 7);
83 // CHECK-LABEL: @test_svluti4_lane_zt_bf16(
85 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.luti4.lane.zt.nxv8bf16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
86 // CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
88 // CPP-CHECK-LABEL: @_Z25test_svluti4_lane_zt_bf16u11__SVUint8_t(
89 // CPP-CHECK-NEXT: entry:
90 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.luti4.lane.zt.nxv8bf16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
91 // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
93 svbfloat16_t
test_svluti4_lane_zt_bf16(svuint8_t zn
) __arm_streaming
__arm_in("zt0") {
94 return svluti4_lane_zt_bf16(0, zn
, 7);
97 // CHECK-LABEL: @test_svluti4_lane_zt_u32(
99 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
100 // CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
102 // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u32u11__SVUint8_t(
103 // CPP-CHECK-NEXT: entry:
104 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
105 // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
107 svuint32_t
test_svluti4_lane_zt_u32(svuint8_t zn
) __arm_streaming
__arm_in("zt0") {
108 return svluti4_lane_zt_u32(0, zn
, 7);
111 // CHECK-LABEL: @test_svluti4_lane_zt_s32(
112 // CHECK-NEXT: entry:
113 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
114 // CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
116 // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s32u11__SVUint8_t(
117 // CPP-CHECK-NEXT: entry:
118 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
119 // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
121 svint32_t
test_svluti4_lane_zt_s32(svuint8_t zn
) __arm_streaming
__arm_in("zt0") {
122 return svluti4_lane_zt_s32(0, zn
, 7);
125 // CHECK-LABEL: @test_svluti4_lane_zt_f32(
126 // CHECK-NEXT: entry:
127 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.luti4.lane.zt.nxv4f32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
128 // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
130 // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_f32u11__SVUint8_t(
131 // CPP-CHECK-NEXT: entry:
132 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.luti4.lane.zt.nxv4f32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
133 // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
135 svfloat32_t
test_svluti4_lane_zt_f32(svuint8_t zn
) __arm_streaming
__arm_in("zt0") {
136 return svluti4_lane_zt_f32(0, zn
, 7);