1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
4 define void @bfmopa(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
7 ; CHECK-NEXT: bfmopa za3.s, p0/m, p1/m, z0.h, z1.h
9 call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32 3, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
13 define void @fmopa(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
16 ; CHECK-NEXT: fmopa za3.s, p0/m, p1/m, z0.h, z1.h
18 call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32 3, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm)
22 define void @smopa_s(<vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
23 ; CHECK-LABEL: smopa_s:
25 ; CHECK-NEXT: smopa za3.s, p0/m, p1/m, z0.b, z1.b
27 call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32 3, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
31 define void @smopa_d(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) #0 {
32 ; CHECK-LABEL: smopa_d:
34 ; CHECK-NEXT: smopa za7.d, p0/m, p1/m, z0.h, z1.h
36 call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32 7, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
40 define void @umopa_s(<vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
41 ; CHECK-LABEL: umopa_s:
43 ; CHECK-NEXT: umopa za3.s, p0/m, p1/m, z0.b, z1.b
45 call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32 3, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
49 define void @umopa_d(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) #0 {
50 ; CHECK-LABEL: umopa_d:
52 ; CHECK-NEXT: umopa za7.d, p0/m, p1/m, z0.h, z1.h
54 call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32 7, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
58 define void @fmopa_s(<vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
59 ; CHECK-LABEL: fmopa_s:
61 ; CHECK-NEXT: fmopa za3.s, p0/m, p1/m, z0.s, z1.s
63 call void @llvm.aarch64.sme.mopa.nxv4f32(i32 3, <vscale x 4 x i1> %pn, <vscale x 4 x i1> %pm, <vscale x 4 x float> %zn, <vscale x 4 x float> %zm)
67 define void @fmopa_d(<vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x double> %zn, <vscale x 2 x double> %zm) #1 {
68 ; CHECK-LABEL: fmopa_d:
70 ; CHECK-NEXT: fmopa za7.d, p0/m, p1/m, z0.d, z1.d
72 call void @llvm.aarch64.sme.mopa.nxv2f64(i32 7, <vscale x 2 x i1> %pn, <vscale x 2 x i1> %pm, <vscale x 2 x double> %zn, <vscale x 2 x double> %zm)
76 define void @sumopa_s(<vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
77 ; CHECK-LABEL: sumopa_s:
79 ; CHECK-NEXT: sumopa za3.s, p0/m, p1/m, z0.b, z1.b
81 call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i32 3, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
85 define void @sumopa_d(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) #0 {
86 ; CHECK-LABEL: sumopa_d:
88 ; CHECK-NEXT: sumopa za7.d, p0/m, p1/m, z0.h, z1.h
90 call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i32 7, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
94 define void @usmopa_s(<vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
95 ; CHECK-LABEL: usmopa_s:
97 ; CHECK-NEXT: usmopa za3.s, p0/m, p1/m, z0.b, z1.b
99 call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i32 3, <vscale x 16 x i1> %pn, <vscale x 16 x i1> %pm, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
103 define void @usmopa_d(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) #0 {
104 ; CHECK-LABEL: usmopa_d:
106 ; CHECK-NEXT: usmopa za7.d, p0/m, p1/m, z0.h, z1.h
108 call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32 7, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
112 attributes #0 = { "target-features"="+sme-i16i64" }
113 attributes #1 = { "target-features"="+sme-f64f64" }
115 declare void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
116 declare void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
117 declare void @llvm.aarch64.sme.mopa.nxv4f32(i32, <vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
118 declare void @llvm.aarch64.sme.mopa.nxv2f64(i32, <vscale x 2 x i1>, <vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
119 declare void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
120 declare void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
121 declare void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
122 declare void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
123 declare void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i32, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
124 declare void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
125 declare void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i32, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
126 declare void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32, <vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)