1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
7 define <vscale x 16 x i8> @abs_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
9 ; CHECK: abs z0.b, p0/m, z1.b
11 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> %a,
12 <vscale x 16 x i1> %pg,
13 <vscale x 16 x i8> %b)
14 ret <vscale x 16 x i8> %out
17 define <vscale x 8 x i16> @abs_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
18 ; CHECK-LABEL: abs_i16:
19 ; CHECK: abs z0.h, p0/m, z1.h
21 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %a,
22 <vscale x 8 x i1> %pg,
23 <vscale x 8 x i16> %b)
24 ret <vscale x 8 x i16> %out
27 define <vscale x 4 x i32> @abs_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
28 ; CHECK-LABEL: abs_i32:
29 ; CHECK: abs z0.s, p0/m, z1.s
31 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> %a,
32 <vscale x 4 x i1> %pg,
33 <vscale x 4 x i32> %b)
34 ret <vscale x 4 x i32> %out
37 define <vscale x 2 x i64> @abs_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
38 ; CHECK-LABEL: abs_i64:
39 ; CHECK: abs z0.d, p0/m, z1.d
41 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> %a,
42 <vscale x 2 x i1> %pg,
43 <vscale x 2 x i64> %b)
44 ret <vscale x 2 x i64> %out
51 define <vscale x 16 x i8> @neg_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
52 ; CHECK-LABEL: neg_i8:
53 ; CHECK: neg z0.b, p0/m, z1.b
55 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> %a,
56 <vscale x 16 x i1> %pg,
57 <vscale x 16 x i8> %b)
58 ret <vscale x 16 x i8> %out
61 define <vscale x 8 x i16> @neg_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
62 ; CHECK-LABEL: neg_i16:
63 ; CHECK: neg z0.h, p0/m, z1.h
65 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16> %a,
66 <vscale x 8 x i1> %pg,
67 <vscale x 8 x i16> %b)
68 ret <vscale x 8 x i16> %out
71 define <vscale x 4 x i32> @neg_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
72 ; CHECK-LABEL: neg_i32:
73 ; CHECK: neg z0.s, p0/m, z1.s
75 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> %a,
76 <vscale x 4 x i1> %pg,
77 <vscale x 4 x i32> %b)
78 ret <vscale x 4 x i32> %out
81 define <vscale x 2 x i64> @neg_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
82 ; CHECK-LABEL: neg_i64:
83 ; CHECK: neg z0.d, p0/m, z1.d
85 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> %a,
86 <vscale x 2 x i1> %pg,
87 <vscale x 2 x i64> %b)
88 ret <vscale x 2 x i64> %out
93 define <vscale x 4 x i32> @sdot_i32(<vscale x 4 x i32> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
94 ; CHECK-LABEL: sdot_i32:
95 ; CHECK: sdot z0.s, z1.b, z2.b
97 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.nxv4i32(<vscale x 4 x i32> %a,
98 <vscale x 16 x i8> %b,
99 <vscale x 16 x i8> %c)
100 ret <vscale x 4 x i32> %out
103 define <vscale x 2 x i64> @sdot_i64(<vscale x 2 x i64> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
104 ; CHECK-LABEL: sdot_i64:
105 ; CHECK: sdot z0.d, z1.h, z2.h
107 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sdot.nxv2i64(<vscale x 2 x i64> %a,
108 <vscale x 8 x i16> %b,
109 <vscale x 8 x i16> %c)
110 ret <vscale x 2 x i64> %out
115 define <vscale x 4 x i32> @sdot_lane_i32(<vscale x 4 x i32> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
116 ; CHECK-LABEL: sdot_lane_i32:
117 ; CHECK: sdot z0.s, z1.b, z2.b[2]
119 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.nxv4i32(<vscale x 4 x i32> %a,
120 <vscale x 16 x i8> %b,
121 <vscale x 16 x i8> %c,
123 ret <vscale x 4 x i32> %out
126 define <vscale x 2 x i64> @sdot_lane_i64(<vscale x 2 x i64> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
127 ; CHECK-LABEL: sdot_lane_i64:
128 ; CHECK: sdot z0.d, z1.h, z2.h[1]
130 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sdot.lane.nxv2i64(<vscale x 2 x i64> %a,
131 <vscale x 8 x i16> %b,
132 <vscale x 8 x i16> %c,
134 ret <vscale x 2 x i64> %out
139 define <vscale x 4 x i32> @udot_i32(<vscale x 4 x i32> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
140 ; CHECK-LABEL: udot_i32:
141 ; CHECK: udot z0.s, z1.b, z2.b
143 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.udot.nxv4i32(<vscale x 4 x i32> %a,
144 <vscale x 16 x i8> %b,
145 <vscale x 16 x i8> %c)
146 ret <vscale x 4 x i32> %out
149 define <vscale x 2 x i64> @udot_i64(<vscale x 2 x i64> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
150 ; CHECK-LABEL: udot_i64:
151 ; CHECK: udot z0.d, z1.h, z2.h
153 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.udot.nxv2i64(<vscale x 2 x i64> %a,
154 <vscale x 8 x i16> %b,
155 <vscale x 8 x i16> %c)
156 ret <vscale x 2 x i64> %out
161 define <vscale x 4 x i32> @udot_lane_i32(<vscale x 4 x i32> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
162 ; CHECK-LABEL: udot_lane_i32:
163 ; CHECK: udot z0.s, z1.b, z2.b[2]
165 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.udot.lane.nxv4i32(<vscale x 4 x i32> %a,
166 <vscale x 16 x i8> %b,
167 <vscale x 16 x i8> %c,
169 ret <vscale x 4 x i32> %out
172 declare <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
173 declare <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
174 declare <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
175 declare <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
177 declare <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
178 declare <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
179 declare <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
180 declare <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
182 declare <vscale x 4 x i32> @llvm.aarch64.sve.sdot.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>)
183 declare <vscale x 2 x i64> @llvm.aarch64.sve.sdot.nxv2i64(<vscale x 2 x i64>, <vscale x 8 x i16>, <vscale x 8 x i16>)
185 declare <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
186 declare <vscale x 2 x i64> @llvm.aarch64.sve.sdot.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
188 declare <vscale x 4 x i32> @llvm.aarch64.sve.udot.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>)
189 declare <vscale x 2 x i64> @llvm.aarch64.sve.udot.nxv2i64(<vscale x 2 x i64>, <vscale x 8 x i16>, <vscale x 8 x i16>)
191 declare <vscale x 4 x i32> @llvm.aarch64.sve.udot.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
192 declare <vscale x 2 x i64> @llvm.aarch64.sve.udot.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)