1 // RUN: mlir-opt %s -convert-vector-to-arm-sme -split-input-file -allow-unregistered-dialect | FileCheck %s
3 //===----------------------------------------------------------------------===//
4 // vector.transfer_read
5 //===----------------------------------------------------------------------===//
7 // CHECK-LABEL: @transfer_read_2d_i8
8 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi8>, vector<[16]x[16]xi8>
9 func.func @transfer_read_2d_i8(%src : memref<?x?xi8>) {
10 %c0 = arith.constant 0 : index
11 %pad = arith.constant 0 : i8
12 %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?xi8>, vector<[16]x[16]xi8>
13 "prevent.dce"(%0) : (vector<[16]x[16]xi8>) -> ()
19 // CHECK-LABEL: @transfer_read_2d_i16
20 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi16>, vector<[8]x[8]xi16>
21 func.func @transfer_read_2d_i16(%src : memref<?x?xi16>) {
22 %c0 = arith.constant 0 : index
23 %pad = arith.constant 0 : i16
24 %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?xi16>, vector<[8]x[8]xi16>
25 "prevent.dce"(%0) : (vector<[8]x[8]xi16>) -> ()
31 // CHECK-LABEL: @transfer_read_2d_i32
32 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi32>, vector<[4]x[4]xi32>
33 func.func @transfer_read_2d_i32(%src : memref<?x?xi32>) {
34 %c0 = arith.constant 0 : index
35 %pad = arith.constant 0 : i32
36 %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?xi32>, vector<[4]x[4]xi32>
37 "prevent.dce"(%0) : (vector<[4]x[4]xi32>) -> ()
43 // CHECK-LABEL: @transfer_read_2d_i64
44 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi64>, vector<[2]x[2]xi64>
45 func.func @transfer_read_2d_i64(%src : memref<?x?xi64>) {
46 %c0 = arith.constant 0 : index
47 %pad = arith.constant 0 : i64
48 %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?xi64>, vector<[2]x[2]xi64>
49 "prevent.dce"(%0) : (vector<[2]x[2]xi64>) -> ()
55 // CHECK-LABEL: @transfer_read_2d_i128
56 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xi128>, vector<[1]x[1]xi128>
57 func.func @transfer_read_2d_i128(%src : memref<?x?xi128>) {
58 %c0 = arith.constant 0 : index
59 %pad = arith.constant 0 : i128
60 %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?xi128>, vector<[1]x[1]xi128>
61 "prevent.dce"(%0) : (vector<[1]x[1]xi128>) -> ()
67 // CHECK-LABEL: @transfer_read_2d_f16
68 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xf16>, vector<[8]x[8]xf16>
69 func.func @transfer_read_2d_f16(%src : memref<?x?xf16>) {
70 %c0 = arith.constant 0 : index
71 %pad = arith.constant 0.0 : f16
72 %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?xf16>, vector<[8]x[8]xf16>
73 "prevent.dce"(%0) : (vector<[8]x[8]xf16>) -> ()
79 // CHECK-LABEL: @transfer_read_2d_bf16
80 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xbf16>, vector<[8]x[8]xbf16>
81 func.func @transfer_read_2d_bf16(%src : memref<?x?xbf16>) {
82 %c0 = arith.constant 0 : index
83 %pad = arith.constant 0.0 : bf16
84 %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?xbf16>, vector<[8]x[8]xbf16>
85 "prevent.dce"(%0) : (vector<[8]x[8]xbf16>) -> ()
91 // CHECK-LABEL: @transfer_read_2d_f32
92 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xf32>, vector<[4]x[4]xf32>
93 func.func @transfer_read_2d_f32(%src : memref<?x?xf32>) {
94 %c0 = arith.constant 0 : index
95 %pad = arith.constant 0.0 : f32
96 %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?xf32>, vector<[4]x[4]xf32>
97 "prevent.dce"(%0) : (vector<[4]x[4]xf32>) -> ()
103 // CHECK-LABEL: @transfer_read_2d_f64
104 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}] : memref<?x?xf64>, vector<[2]x[2]xf64>
105 func.func @transfer_read_2d_f64(%src : memref<?x?xf64>) {
106 %c0 = arith.constant 0 : index
107 %pad = arith.constant 0.0 : f64
108 %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?xf64>, vector<[2]x[2]xf64>
109 "prevent.dce"(%0) : (vector<[2]x[2]xf64>) -> ()
115 // CHECK-LABEL: @transfer_read_2d_with_mask_i16
116 // CHECK: arm_sme.tile_load %{{.*}}[{{.*}}], {{.*}}, {{.*}} : memref<?x?xi16>, vector<[8]x[8]xi16>
117 func.func @transfer_read_2d_with_mask_i16(%src : memref<?x?xi16>, %mask : vector<[8]x[8]xi1>) {
118 %c0 = arith.constant 0 : index
119 %pad = arith.constant 0 : i16
120 %0 = vector.transfer_read %src[%c0, %c0], %pad, %mask {in_bounds = [true, true]} : memref<?x?xi16>, vector<[8]x[8]xi16>
121 "prevent.dce"(%0) : (vector<[8]x[8]xi16>) -> ()
127 /// in-flight transpose
129 // CHECK-LABEL: @transfer_read_2d_transpose_i8
130 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xi8>, vector<[16]x[16]xi8>
131 func.func @transfer_read_2d_transpose_i8(%src : memref<?x?xi8>) {
132 %c0 = arith.constant 0 : index
133 %pad = arith.constant 0 : i8
134 %0 = vector.transfer_read %src[%c0, %c0], %pad {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref<?x?xi8>, vector<[16]x[16]xi8>
135 "prevent.dce"(%0) : (vector<[16]x[16]xi8>) -> ()
141 // CHECK-LABEL: @transfer_read_2d_transpose_with_mask_f32
142 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32>
143 func.func @transfer_read_2d_transpose_with_mask_f32(%src : memref<?x?xf32>, %mask : vector<[4]x[4]xi1>) {
144 %c0 = arith.constant 0 : index
145 %pad = arith.constant 0.0 : f32
146 %0 = vector.transfer_read %src[%c0, %c0], %pad, %mask {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : memref<?x?xf32>, vector<[4]x[4]xf32>
147 "prevent.dce"(%0) : (vector<[4]x[4]xf32>) -> ()
153 // CHECK-LABEL: @fold_transpose_into_load
154 // CHECK-NOT: arm_sme.tile_store
155 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32>
156 // CHECK-NOT: arm_sme.tile_store
157 func.func @fold_transpose_into_load(%src : memref<?x?xf32>) {
158 %c0 = arith.constant 0 : index
159 %pad = arith.constant 0.0 : f32
160 %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?xf32>, vector<[4]x[4]xf32>
161 %1 = vector.transpose %0, [1, 0] : vector<[4]x[4]xf32> to vector<[4]x[4]xf32>
162 "prevent.dce"(%1) : (vector<[4]x[4]xf32>) -> ()
167 /// Transposes with more than a single use cannot be folded into load and will
168 /// instead be transposed via memory.
170 // CHECK-LABEL: @fold_transpose_into_load_multi_use
171 // CHECK: arm_sme.tile_load {{.*}} : memref<?x?xf32>, vector<[4]x[4]xf32>
172 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xf32>, vector<[4]x[4]xf32>
173 // CHECK: %[[TILE_TRANSPOSED_VIA_MEM:.*]] = arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32>
174 // CHECK: "prevent.dce"(%[[TILE_TRANSPOSED_VIA_MEM]]) : (vector<[4]x[4]xf32>) -> ()
175 func.func @fold_transpose_into_load_multi_use(%src : memref<?x?xf32>) {
176 %c0 = arith.constant 0 : index
177 %pad = arith.constant 0.0 : f32
178 %0 = vector.transfer_read %src[%c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?xf32>, vector<[4]x[4]xf32>
179 "test.some_use"(%0) : (vector<[4]x[4]xf32>) -> ()
180 %1 = vector.transpose %0, [1, 0] : vector<[4]x[4]xf32> to vector<[4]x[4]xf32>
181 "prevent.dce"(%1) : (vector<[4]x[4]xf32>) -> ()
186 //===----------------------------------------------------------------------===//
187 // vector.transfer_write
188 //===----------------------------------------------------------------------===//
190 // CHECK-LABEL: func.func @transfer_write_2d_i8(
191 // CHECK-SAME: %[[VECTOR:.*]]: vector<[16]x[16]xi8>,
192 // CHECK-SAME: %[[DEST:.*]]: memref<?x?xi8>) {
193 // CHECK: %[[C0:.*]] = arith.constant 0 : index
194 // CHECK: arm_sme.tile_store %[[VECTOR]], %[[DEST]]{{\[}}%[[C0]], %[[C0]]] : memref<?x?xi8>, vector<[16]x[16]xi8>
195 func.func @transfer_write_2d_i8(%vector : vector<[16]x[16]xi8>, %dest : memref<?x?xi8>) {
196 %c0 = arith.constant 0 : index
197 vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[16]x[16]xi8>, memref<?x?xi8>
203 // CHECK-LABEL: func.func @transfer_write_2d_i16(
204 // CHECK-SAME: %[[VECTOR:.*]]: vector<[8]x[8]xi16>,
205 // CHECK-SAME: %[[DEST:.*]]: memref<?x?xi16>) {
206 // CHECK: %[[C0:.*]] = arith.constant 0 : index
207 // CHECK: arm_sme.tile_store %[[VECTOR]], %[[DEST]]{{\[}}%[[C0]], %[[C0]]] : memref<?x?xi16>, vector<[8]x[8]xi16>
208 func.func @transfer_write_2d_i16(%vector : vector<[8]x[8]xi16>, %dest : memref<?x?xi16>) {
209 %c0 = arith.constant 0 : index
210 vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xi16>, memref<?x?xi16>
216 // CHECK-LABEL: func.func @transfer_write_2d_i32(
217 // CHECK-SAME: %[[VECTOR:.*]]: vector<[4]x[4]xi32>,
218 // CHECK-SAME: %[[DEST:.*]]: memref<?x?xi32>) {
219 // CHECK: %[[C0:.*]] = arith.constant 0 : index
220 // CHECK: arm_sme.tile_store %[[VECTOR]], %[[DEST]]{{\[}}%[[C0]], %[[C0]]] : memref<?x?xi32>, vector<[4]x[4]xi32>
221 func.func @transfer_write_2d_i32(%vector : vector<[4]x[4]xi32>, %dest : memref<?x?xi32>) {
222 %c0 = arith.constant 0 : index
223 vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[4]x[4]xi32>, memref<?x?xi32>
229 // CHECK-LABEL: func.func @transfer_write_2d_i64(
230 // CHECK-SAME: %[[VECTOR:.*]]: vector<[2]x[2]xi64>,
231 // CHECK-SAME: %[[DEST:.*]]: memref<?x?xi64>) {
232 // CHECK: %[[C0:.*]] = arith.constant 0 : index
233 // CHECK: arm_sme.tile_store %[[VECTOR]], %[[DEST]]{{\[}}%[[C0]], %[[C0]]] : memref<?x?xi64>, vector<[2]x[2]xi64>
234 func.func @transfer_write_2d_i64(%vector : vector<[2]x[2]xi64>, %dest : memref<?x?xi64>) {
235 %c0 = arith.constant 0 : index
236 vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[2]x[2]xi64>, memref<?x?xi64>
242 // CHECK-LABEL: func.func @transfer_write_2d_f16(
243 // CHECK-SAME: %[[VECTOR:.*]]: vector<[8]x[8]xf16>,
244 // CHECK-SAME: %[[DEST:.*]]: memref<?x?xf16>) {
245 // CHECK: %[[C0:.*]] = arith.constant 0 : index
246 // CHECK: arm_sme.tile_store %[[VECTOR]], %[[DEST]]{{\[}}%[[C0]], %[[C0]]] : memref<?x?xf16>, vector<[8]x[8]xf16>
247 func.func @transfer_write_2d_f16(%vector : vector<[8]x[8]xf16>, %dest : memref<?x?xf16>) {
248 %c0 = arith.constant 0 : index
249 vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xf16>, memref<?x?xf16>
255 // CHECK-LABEL: func.func @transfer_write_2d_bf16(
256 // CHECK-SAME: %[[VECTOR:.*]]: vector<[8]x[8]xbf16>,
257 // CHECK-SAME: %[[DEST:.*]]: memref<?x?xbf16>) {
258 // CHECK: %[[C0:.*]] = arith.constant 0 : index
259 // CHECK: arm_sme.tile_store %[[VECTOR]], %[[DEST]]{{\[}}%[[C0]], %[[C0]]] : memref<?x?xbf16>, vector<[8]x[8]xbf16>
260 func.func @transfer_write_2d_bf16(%vector : vector<[8]x[8]xbf16>, %dest : memref<?x?xbf16>) {
261 %c0 = arith.constant 0 : index
262 vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xbf16>, memref<?x?xbf16>
268 // CHECK-LABEL: func.func @transfer_write_2d_f32(
269 // CHECK-SAME: %[[VECTOR:.*]]: vector<[4]x[4]xf32>,
270 // CHECK-SAME: %[[DEST:.*]]: memref<?x?xf32>) {
271 // CHECK: %[[C0:.*]] = arith.constant 0 : index
272 // CHECK: arm_sme.tile_store %[[VECTOR]], %[[DEST]]{{\[}}%[[C0]], %[[C0]]] : memref<?x?xf32>, vector<[4]x[4]xf32>
273 func.func @transfer_write_2d_f32(%vector : vector<[4]x[4]xf32>, %dest : memref<?x?xf32>) {
274 %c0 = arith.constant 0 : index
275 vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[4]x[4]xf32>, memref<?x?xf32>
281 // CHECK-LABEL: func.func @transfer_write_2d_f64(
282 // CHECK-SAME: %[[VECTOR:.*]]: vector<[2]x[2]xf64>,
283 // CHECK-SAME: %[[DEST:.*]]: memref<?x?xf64>) {
284 // CHECK: %[[C0:.*]] = arith.constant 0 : index
285 // CHECK: arm_sme.tile_store %[[VECTOR]], %[[DEST]]{{\[}}%[[C0]], %[[C0]]] : memref<?x?xf64>, vector<[2]x[2]xf64>
286 func.func @transfer_write_2d_f64(%vector : vector<[2]x[2]xf64>, %dest : memref<?x?xf64>) {
287 %c0 = arith.constant 0 : index
288 vector.transfer_write %vector, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[2]x[2]xf64>, memref<?x?xf64>
294 // CHECK-LABEL: func.func @transfer_write_2d_with_mask_f64(
295 // CHECK-SAME: %[[VECTOR:.*]]: vector<[2]x[2]xf64>,
296 // CHECK-SAME: %[[DEST:.*]]: memref<?x?xf64>,
297 // CHECK-SAME: %[[MASK:.*]]: vector<[2]x[2]xi1>) {
298 // CHECK: %[[C0:.*]] = arith.constant 0 : index
299 // CHECK: arm_sme.tile_store %[[VECTOR]], %[[DEST]]{{\[}}%[[C0]], %[[C0]]], %[[MASK]] : memref<?x?xf64>, vector<[2]x[2]xf64>
300 func.func @transfer_write_2d_with_mask_f64(%vector : vector<[2]x[2]xf64>, %dest : memref<?x?xf64>, %mask : vector<[2]x[2]xi1>) {
301 %c0 = arith.constant 0 : index
302 vector.transfer_write %vector, %dest[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[2]x[2]xf64>, memref<?x?xf64>
308 /// in-flight transpose via vertical store.
310 // CHECK-LABEL: func.func @transfer_write_2d_transpose_i64(
311 // CHECK-SAME: %[[VECTOR:.*]]: vector<[2]x[2]xi64>,
312 // CHECK-SAME: %[[DEST:.*]]: memref<?x?xi64>) {
313 // CHECK: %[[C0:.*]] = arith.constant 0 : index
314 // CHECK: arm_sme.tile_store %[[VECTOR]], %[[DEST]]{{\[}}%[[C0]], %[[C0]]] layout<vertical> : memref<?x?xi64>, vector<[2]x[2]xi64>
315 func.func @transfer_write_2d_transpose_i64(%vector : vector<[2]x[2]xi64>, %dest : memref<?x?xi64>) {
316 %c0 = arith.constant 0 : index
317 vector.transfer_write %vector, %dest[%c0, %c0] {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : vector<[2]x[2]xi64>, memref<?x?xi64>
323 /// in-flight transpose via vertical store with mask.
325 // CHECK-LABEL: func.func @transfer_write_2d_transpose_with_mask_bf16(
326 // CHECK-SAME: %[[VECTOR:.*]]: vector<[8]x[8]xbf16>,
327 // CHECK-SAME: %[[DEST:.*]]: memref<?x?xbf16>,
328 // CHECK-SAME: %[[MASK:.*]]: vector<[8]x[8]xi1>) {
329 // CHECK: %[[C0:.*]] = arith.constant 0 : index
330 // CHECK: arm_sme.tile_store %[[VECTOR]], %[[DEST]]{{\[}}%[[C0]], %[[C0]]], %[[MASK]] layout<vertical> : memref<?x?xbf16>, vector<[8]x[8]xbf16>
331 func.func @transfer_write_2d_transpose_with_mask_bf16(%vector : vector<[8]x[8]xbf16>, %dest : memref<?x?xbf16>, %mask : vector<[8]x[8]xi1>) {
332 %c0 = arith.constant 0 : index
333 vector.transfer_write %vector, %dest[%c0, %c0], %mask {permutation_map = affine_map<(d0, d1) -> (d1, d0)>, in_bounds = [true, true]} : vector<[8]x[8]xbf16>, memref<?x?xbf16>
339 // CHECK-LABEL: func.func @transfer_write_slice(
340 // CHECK-SAME: %[[VECTOR:.*]]: vector<[4]x[4]xf32>,
341 // CHECK-SAME: %[[DEST:.*]]: memref<?x?xf32>,
342 // CHECK-SAME: %[[INDEX:.*]]: index) {
343 // CHECK: %[[C0:.*]] = arith.constant 0 : index
344 // CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<[4]xi1>
345 // CHECK: arm_sme.store_tile_slice %[[VECTOR]], %[[INDEX]], %[[MASK]], %[[DEST]][%[[INDEX]], %[[C0]]] : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32>
346 func.func @transfer_write_slice(%vector: vector<[4]x[4]xf32>, %dest : memref<?x?xf32>, %slice_index: index) {
347 %c0 = arith.constant 0 : index
348 %slice = vector.extract %vector[%slice_index] : vector<[4]xf32> from vector<[4]x[4]xf32>
349 vector.transfer_write %slice, %dest[%slice_index, %c0] { in_bounds = [true] }: vector<[4]xf32>, memref<?x?xf32>
355 // CHECK-LABEL: func.func @transfer_write_slice_with_mask(
356 // CHECK-SAME: %[[VECTOR:.*]]: vector<[4]x[4]xf32>,
357 // CHECK-SAME: %[[DEST:.*]]: memref<?x?xf32>,
358 // CHECK-SAME: %[[MASK:.*]]: vector<[4]xi1>,
359 // CHECK-SAME: %[[INDEX:.*]]: index) {
360 // CHECK: %[[C0:.*]] = arith.constant 0 : index
361 // CHECK: arm_sme.store_tile_slice %[[VECTOR]], %[[INDEX]], %[[MASK]], %[[DEST]][%[[INDEX]], %[[C0]]] : memref<?x?xf32>, vector<[4]xi1>, vector<[4]x[4]xf32>
362 func.func @transfer_write_slice_with_mask(%vector: vector<[4]x[4]xf32>, %dest : memref<?x?xf32>, %mask: vector<[4]xi1>, %slice_index: index) {
363 %c0 = arith.constant 0 : index
364 %slice = vector.extract %vector[%slice_index] : vector<[4]xf32> from vector<[4]x[4]xf32>
365 vector.transfer_write %slice, %dest[%slice_index, %c0], %mask { in_bounds = [true] }: vector<[4]xf32>, memref<?x?xf32>
371 // CHECK-LABEL: func.func @transfer_write_vertical_slice
372 // CHECK: arm_sme.store_tile_slice {{.*}} layout<vertical>
373 func.func @transfer_write_vertical_slice(%vector: vector<[4]x[4]xf32>, %dest : memref<?x?xf32>, %slice_index: index) {
374 %c0 = arith.constant 0 : index
375 %slice = arm_sme.extract_tile_slice %vector[%slice_index] layout<vertical>
376 : vector<[4]xf32> from vector<[4]x[4]xf32>
377 vector.transfer_write %slice, %dest[%slice_index, %c0] { in_bounds = [true] }: vector<[4]xf32>, memref<?x?xf32>
381 //===----------------------------------------------------------------------===//
383 //===----------------------------------------------------------------------===//
387 // CHECK-LABEL: func.func @broadcast_vec2d_from_i32(
388 // CHECK-SAME: %[[SRC:.*]]: i32) {
389 // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
390 // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
391 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
392 // CHECK: %[[SRC_1D:.*]] = vector.broadcast %[[SRC]] : i32 to vector<[4]xi32>
393 // CHECK: %[[INIT_TILE:.*]] = arm_sme.get_tile : vector<[4]x[4]xi32>
394 // CHECK: %[[VSCALE:.*]] = vector.vscale
395 // CHECK: %[[NUM_TILE_SLICES:.*]] = arith.muli %[[VSCALE]], %[[C4]] : index
396 // CHECK: %[[TILE:.*]] = scf.for %[[TILE_SLICE_INDEX:.*]] = %[[C0]] to %[[NUM_TILE_SLICES]] step %[[C1]] iter_args(%[[CURRENT_TILE:.*]] = %[[INIT_TILE]]) -> (vector<[4]x[4]xi32>) {
397 // CHECK: %[[NEW_TILE:.*]] = arm_sme.insert_tile_slice %[[SRC_1D]], %[[CURRENT_TILE]][%[[TILE_SLICE_INDEX]]] : vector<[4]xi32> into vector<[4]x[4]xi32>
398 // CHECK: scf.yield %[[NEW_TILE]] : vector<[4]x[4]xi32>
399 // CHECK: "prevent.dce"(%[[TILE]]) : (vector<[4]x[4]xi32>) -> ()
400 func.func @broadcast_vec2d_from_i32(%arg0: i32) {
401 %0 = vector.broadcast %arg0 : i32 to vector<[4]x[4]xi32>
402 "prevent.dce"(%0) : (vector<[4]x[4]xi32>) -> ()
408 // CHECK-LABEL: func.func @broadcast_vec2d_from_vec0d(
409 // CHECK-SAME: %[[SRC:.*]]: vector<f32>) {
410 // CHECK: %[[SRC_1D:.*]] = vector.broadcast %[[SRC]] : vector<f32> to vector<[4]xf32>
412 // CHECK: arm_sme.insert_tile_slice %[[SRC_1D]], {{.*}}
413 func.func @broadcast_vec2d_from_vec0d(%arg0: vector<f32>) {
414 %0 = vector.broadcast %arg0 : vector<f32> to vector<[4]x[4]xf32>
415 "prevent.dce"(%0) : (vector<[4]x[4]xf32>) -> ()
421 // CHECK-LABEL: func.func @broadcast_vec2d_from_vec1d(
422 // CHECK-SAME: %[[SRC:.*]]: vector<[8]xi16>) {
423 // CHECK-NOT: vector.broadcast
425 // CHECK: arm_sme.insert_tile_slice %[[SRC]], {{.*}}
426 func.func @broadcast_vec2d_from_vec1d(%arg0: vector<[8]xi16>) {
427 %0 = vector.broadcast %arg0 : vector<[8]xi16> to vector<[8]x[8]xi16>
428 "prevent.dce"(%0) : (vector<[8]x[8]xi16>) -> ()
432 //===----------------------------------------------------------------------===//
434 //===----------------------------------------------------------------------===//
438 // CHECK-LABEL: func.func @splat_vec2d_from_i32(
439 // CHECK-SAME: %[[SRC:.*]]: i32) {
440 // CHECK: %[[BCST:.*]] = vector.broadcast %[[SRC]] : i32 to vector<[4]xi32>
441 // CHECK: arm_sme.get_tile : vector<[4]x[4]xi32>
442 // CHECK: %[[VSCALE:.*]] = vector.vscale
443 // CHECK: %[[NUM_TILE_SLICES:.*]] = arith.muli %[[VSCALE]], %{{.*}} : index
444 // CHECK: scf.for {{.*}} to %[[NUM_TILE_SLICES]] {{.*}} {
445 // CHECK: arm_sme.insert_tile_slice %[[BCST]], {{.*}} : vector<[4]xi32> into vector<[4]x[4]xi32>
446 func.func @splat_vec2d_from_i32(%arg0: i32) {
447 %0 = vector.splat %arg0 : vector<[4]x[4]xi32>
448 "prevent.dce"(%0) : (vector<[4]x[4]xi32>) -> ()
454 // CHECK-LABEL: func.func @splat_vec2d_from_f16(
455 // CHECK-SAME: %[[SRC:.*]]: f16) {
456 // CHECK: %[[BCST:.*]] = vector.broadcast %[[SRC]] : f16 to vector<[8]xf16>
458 // CHECK: arm_sme.insert_tile_slice %[[BCST]], {{.*}} : vector<[8]xf16> into vector<[8]x[8]xf16>
459 func.func @splat_vec2d_from_f16(%arg0: f16) {
460 %0 = vector.splat %arg0 : vector<[8]x[8]xf16>
461 "prevent.dce"(%0) : (vector<[8]x[8]xf16>) -> ()
465 //===----------------------------------------------------------------------===//
467 //===----------------------------------------------------------------------===//
471 // CHECK-LABEL: func.func @transpose_i8(
472 // CHECK-SAME: %[[TILE:.*]]: vector<[16]x[16]xi8>)
473 // CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index
474 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
475 // CHECK: %[[VSCALE:.*]] = vector.vscale
476 // CHECK: %[[MIN_TILE_SLICES:.*]] = arith.muli %[[VSCALE]], %[[C16]] : index
477 // CHECK: %[[NUM_TILE_SLICES:.*]] = memref.alloca(%[[MIN_TILE_SLICES]], %[[MIN_TILE_SLICES]]) : memref<?x?xi8>
478 // CHECK: arm_sme.tile_store %[[TILE]], %[[NUM_TILE_SLICES]]{{\[}}%[[C0]], %[[C0]]] : memref<?x?xi8>, vector<[16]x[16]xi8>
479 // CHECK: arm_sme.tile_load %[[NUM_TILE_SLICES]]{{\[}}%[[C0]], %[[C0]]] layout<vertical> : memref<?x?xi8>, vector<[16]x[16]xi8>
480 func.func @transpose_i8(%arg0: vector<[16]x[16]xi8>) {
481 %0 = vector.transpose %arg0, [1, 0] : vector<[16]x[16]xi8> to vector<[16]x[16]xi8>
482 "prevent.dce"(%0) : (vector<[16]x[16]xi8>) -> ()
488 // CHECK-LABEL: @transpose_i16
489 // CHECK: arith.constant 8
490 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xi16>, vector<[8]x[8]xi16>
491 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xi16>, vector<[8]x[8]xi16>
492 func.func @transpose_i16(%arg0: vector<[8]x[8]xi16>) {
493 %0 = vector.transpose %arg0, [1, 0] : vector<[8]x[8]xi16> to vector<[8]x[8]xi16>
494 "prevent.dce"(%0) : (vector<[8]x[8]xi16>) -> ()
500 // CHECK-LABEL: @transpose_i32
501 // CHECK: arith.constant 4
502 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xi32>, vector<[4]x[4]xi32>
503 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xi32>, vector<[4]x[4]xi32>
504 func.func @transpose_i32(%arg0: vector<[4]x[4]xi32>) {
505 %0 = vector.transpose %arg0, [1, 0] : vector<[4]x[4]xi32> to vector<[4]x[4]xi32>
506 "prevent.dce"(%0) : (vector<[4]x[4]xi32>) -> ()
512 // CHECK-LABEL: @transpose_i64
513 // CHECK: arith.constant 2
514 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xi64>, vector<[2]x[2]xi64>
515 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xi64>, vector<[2]x[2]xi64>
516 func.func @transpose_i64(%arg0: vector<[2]x[2]xi64>) {
517 %0 = vector.transpose %arg0, [1, 0] : vector<[2]x[2]xi64> to vector<[2]x[2]xi64>
518 "prevent.dce"(%0) : (vector<[2]x[2]xi64>) -> ()
524 // CHECK-LABEL: @transpose_i128
525 // CHECK: %[[VSCALE:.*]] = vector.vscale
526 // CHECK: %[[NUM_TILE_SLICES:.*]] = memref.alloca(%[[VSCALE]], %[[VSCALE]]) : memref<?x?xi128>
527 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xi128>, vector<[1]x[1]xi128>
528 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xi128>, vector<[1]x[1]xi128>
529 func.func @transpose_i128(%arg0: vector<[1]x[1]xi128>) {
530 %0 = vector.transpose %arg0, [1, 0] : vector<[1]x[1]xi128> to vector<[1]x[1]xi128>
531 "prevent.dce"(%0) : (vector<[1]x[1]xi128>) -> ()
537 // CHECK-LABEL: @transpose_f16
538 // CHECK: arith.constant 8
539 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xf16>, vector<[8]x[8]xf16>
540 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xf16>, vector<[8]x[8]xf16>
541 func.func @transpose_f16(%arg0: vector<[8]x[8]xf16>) {
542 %0 = vector.transpose %arg0, [1, 0] : vector<[8]x[8]xf16> to vector<[8]x[8]xf16>
543 "prevent.dce"(%0) : (vector<[8]x[8]xf16>) -> ()
549 // CHECK-LABEL: @transpose_bf16
550 // CHECK: arith.constant 8
551 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xbf16>, vector<[8]x[8]xbf16>
552 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xbf16>, vector<[8]x[8]xbf16>
553 func.func @transpose_bf16(%arg0: vector<[8]x[8]xbf16>) {
554 %0 = vector.transpose %arg0, [1, 0] : vector<[8]x[8]xbf16> to vector<[8]x[8]xbf16>
555 "prevent.dce"(%0) : (vector<[8]x[8]xbf16>) -> ()
561 // CHECK-LABEL: @transpose_f32
562 // CHECK: arith.constant 4
563 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xf32>, vector<[4]x[4]xf32>
564 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xf32>, vector<[4]x[4]xf32>
565 func.func @transpose_f32(%arg0: vector<[4]x[4]xf32>) {
566 %0 = vector.transpose %arg0, [1, 0] : vector<[4]x[4]xf32> to vector<[4]x[4]xf32>
567 "prevent.dce"(%0) : (vector<[4]x[4]xf32>) -> ()
573 // CHECK-LABEL: @transpose_f64
574 // CHECK: arith.constant 2
575 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xf64>, vector<[2]x[2]xf64>
576 // CHECK: arm_sme.tile_load {{.*}} layout<vertical> : memref<?x?xf64>, vector<[2]x[2]xf64>
577 func.func @transpose_f64(%arg0: vector<[2]x[2]xf64>) {
578 %0 = vector.transpose %arg0, [1, 0] : vector<[2]x[2]xf64> to vector<[2]x[2]xf64>
579 "prevent.dce"(%0) : (vector<[2]x[2]xf64>) -> ()
583 //===----------------------------------------------------------------------===//
584 // vector.outerproduct
585 //===----------------------------------------------------------------------===//
589 // CHECK-LABEL: @vector_outerproduct_masked_f16
590 // CHECK-SAME: (%[[LHS:.*]]: vector<[8]xf16>, %[[RHS:.*]]: vector<[8]xf16>, %[[ACC:.*]]: vector<[8]x[8]xf16>, %[[DIM0:.*]]: index, %[[DIM1:.*]]: index
591 func.func @vector_outerproduct_masked_f16(%lhs : vector<[8]xf16>, %rhs : vector<[8]xf16>, %acc : vector<[8]x[8]xf16>, %dim0 : index, %dim1 : index) {
592 %mask = vector.create_mask %dim0, %dim1 : vector<[8]x[8]xi1>
593 // CHECK: %[[LHS_MASK:.*]] = vector.create_mask %[[DIM0]] : vector<[8]xi1>
594 // CHECK: %[[RHS_MASK:.*]] = vector.create_mask %[[DIM1]] : vector<[8]xi1>
595 // CHECK: arm_sme.outerproduct %[[LHS]], %[[RHS]] acc(%[[ACC]]) masks(%[[LHS_MASK]], %[[RHS_MASK]]) : vector<[8]xf16>, vector<[8]xf16>
596 %result = vector.mask %mask { vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind<add>} : vector<[8]xf16>, vector<[8]xf16> } : vector<[8]x[8]xi1> -> vector<[8]x[8]xf16>
597 "prevent.dce"(%result) : (vector<[8]x[8]xf16>) -> ()
602 // CHECK-LABEL: @vector_outerproduct_masked_bf16
603 // CHECK-SAME: (%[[LHS:.*]]: vector<[8]xbf16>, %[[RHS:.*]]: vector<[8]xbf16>, %[[ACC:.*]]: vector<[8]x[8]xbf16>, %[[DIM0:.*]]: index, %[[DIM1:.*]]: index
604 func.func @vector_outerproduct_masked_bf16(%lhs : vector<[8]xbf16>, %rhs : vector<[8]xbf16>, %acc : vector<[8]x[8]xbf16>, %dim0 : index, %dim1 : index) {
605 %mask = vector.create_mask %dim0, %dim1 : vector<[8]x[8]xi1>
606 // CHECK: %[[LHS_MASK:.*]] = vector.create_mask %[[DIM0]] : vector<[8]xi1>
607 // CHECK: %[[RHS_MASK:.*]] = vector.create_mask %[[DIM1]] : vector<[8]xi1>
608 // CHECK: arm_sme.outerproduct %[[LHS]], %[[RHS]] acc(%[[ACC]]) masks(%[[LHS_MASK]], %[[RHS_MASK]]) : vector<[8]xbf16>, vector<[8]xbf16>
609 %result = vector.mask %mask { vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind<add>} : vector<[8]xbf16>, vector<[8]xbf16> } : vector<[8]x[8]xi1> -> vector<[8]x[8]xbf16>
610 "prevent.dce"(%result) : (vector<[8]x[8]xbf16>) -> ()
615 // CHECK-LABEL: @vector_outerproduct_masked_f32
616 // CHECK-SAME: (%[[LHS:.*]]: vector<[4]xf32>, %[[RHS:.*]]: vector<[4]xf32>, %[[ACC:.*]]: vector<[4]x[4]xf32>, %[[DIM0:.*]]: index, %[[DIM1:.*]]: index
617 func.func @vector_outerproduct_masked_f32(%lhs : vector<[4]xf32>, %rhs : vector<[4]xf32>, %acc : vector<[4]x[4]xf32>, %dim0 : index, %dim1 : index) {
618 %mask = vector.create_mask %dim0, %dim1 : vector<[4]x[4]xi1>
619 // CHECK: %[[LHS_MASK:.*]] = vector.create_mask %[[DIM0]] : vector<[4]xi1>
620 // CHECK: %[[RHS_MASK:.*]] = vector.create_mask %[[DIM1]] : vector<[4]xi1>
621 // CHECK: arm_sme.outerproduct %[[LHS]], %[[RHS]] acc(%[[ACC]]) masks(%[[LHS_MASK]], %[[RHS_MASK]]) : vector<[4]xf32>, vector<[4]xf32>
622 %result = vector.mask %mask { vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind<add>} : vector<[4]xf32>, vector<[4]xf32> } : vector<[4]x[4]xi1> -> vector<[4]x[4]xf32>
623 "prevent.dce"(%result) : (vector<[4]x[4]xf32>) -> ()
628 // CHECK-LABEL: @vector_outerproduct_masked_f64
629 // CHECK-SAME: (%[[LHS:.*]]: vector<[2]xf64>, %[[RHS:.*]]: vector<[2]xf64>, %[[ACC:.*]]: vector<[2]x[2]xf64>, %[[DIM0:.*]]: index, %[[DIM1:.*]]: index
630 func.func @vector_outerproduct_masked_f64(%lhs : vector<[2]xf64>, %rhs : vector<[2]xf64>, %acc : vector<[2]x[2]xf64>, %dim0 : index, %dim1 : index) {
631 %mask = vector.create_mask %dim0, %dim1 : vector<[2]x[2]xi1>
632 // CHECK: %[[LHS_MASK:.*]] = vector.create_mask %[[DIM0]] : vector<[2]xi1>
633 // CHECK: %[[RHS_MASK:.*]] = vector.create_mask %[[DIM1]] : vector<[2]xi1>
634 // CHECK: arm_sme.outerproduct %[[LHS]], %[[RHS]] acc(%[[ACC]]) masks(%[[LHS_MASK]], %[[RHS_MASK]]) : vector<[2]xf64>, vector<[2]xf64>
635 %result = vector.mask %mask { vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind<add>} : vector<[2]xf64>, vector<[2]xf64> } : vector<[2]x[2]xi1> -> vector<[2]x[2]xf64>
636 "prevent.dce"(%result) : (vector<[2]x[2]xf64>) -> ()
641 // CHECK-LABEL: @vector_outerproduct_f16
642 // CHECK-SAME: (%[[LHS:.*]]: vector<[8]xf16>, %[[RHS:.*]]: vector<[8]xf16>, %[[ACC:.*]]: vector<[8]x[8]xf16>
643 func.func @vector_outerproduct_f16(%lhs : vector<[8]xf16>, %rhs : vector<[8]xf16>, %acc : vector<[8]x[8]xf16>) {
644 // CHECK: arm_sme.outerproduct %[[LHS]], %[[RHS]] acc(%[[ACC]]) : vector<[8]xf16>, vector<[8]xf16>
645 %result = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind<add>} : vector<[8]xf16>, vector<[8]xf16>
646 "prevent.dce"(%result) : (vector<[8]x[8]xf16>) -> ()
651 // CHECK-LABEL: @vector_outerproduct_bf16
652 // CHECK-SAME: (%[[LHS:.*]]: vector<[8]xbf16>, %[[RHS:.*]]: vector<[8]xbf16>, %[[ACC:.*]]: vector<[8]x[8]xbf16>
653 func.func @vector_outerproduct_bf16(%lhs : vector<[8]xbf16>, %rhs : vector<[8]xbf16>, %acc : vector<[8]x[8]xbf16>) {
654 // CHECK: arm_sme.outerproduct %[[LHS]], %[[RHS]] acc(%[[ACC]]) : vector<[8]xbf16>, vector<[8]xbf16>
655 %result = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind<add>} : vector<[8]xbf16>, vector<[8]xbf16>
656 "prevent.dce"(%result) : (vector<[8]x[8]xbf16>) -> ()
661 // CHECK-LABEL: @vector_outerproduct_f32
662 // CHECK-SAME: (%[[LHS:.*]]: vector<[4]xf32>, %[[RHS:.*]]: vector<[4]xf32>, %[[ACC:.*]]: vector<[4]x[4]xf32>
663 func.func @vector_outerproduct_f32(%lhs : vector<[4]xf32>, %rhs : vector<[4]xf32>, %acc : vector<[4]x[4]xf32>) {
664 // CHECK: arm_sme.outerproduct %[[LHS]], %[[RHS]] acc(%[[ACC]]) : vector<[4]xf32>, vector<[4]xf32>
665 %result = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind<add>} : vector<[4]xf32>, vector<[4]xf32>
666 "prevent.dce"(%result) : (vector<[4]x[4]xf32>) -> ()
671 // CHECK-LABEL: @vector_outerproduct_f64
672 // CHECK-SAME: (%[[LHS:.*]]: vector<[2]xf64>, %[[RHS:.*]]: vector<[2]xf64>, %[[ACC:.*]]: vector<[2]x[2]xf64>
673 func.func @vector_outerproduct_f64(%lhs : vector<[2]xf64>, %rhs : vector<[2]xf64>, %acc : vector<[2]x[2]xf64>) {
674 // CHECK: arm_sme.outerproduct %[[LHS]], %[[RHS]] acc(%[[ACC]]) : vector<[2]xf64>, vector<[2]xf64>
675 %result = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind<add>} : vector<[2]xf64>, vector<[2]xf64>
676 "prevent.dce"(%result) : (vector<[2]x[2]xf64>) -> ()
679 //===----------------------------------------------------------------------===//
681 //===----------------------------------------------------------------------===//
685 func.func @vector_print_tile(%tile: vector<[4]x[4]xf32>)
687 vector.print %tile : vector<[4]x[4]xf32>
690 // CHECK-LABEL: func.func @vector_print_tile(
691 // CHECK-SAME: %[[TILE:.*]]: vector<[4]x[4]xf32>) {
692 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
693 // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
694 // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
695 // CHECK-DAG: %[[VSCALE:.*]] = vector.vscale
696 // CHECK-DAG: %[[NUM_TILE_SLICES:.*]] = arith.muli %[[VSCALE]], %[[C4]] : index
697 // CHECK-NEXT: scf.for %[[TILE_SLICE_INDEX:.*]] = %[[C0]] to %[[NUM_TILE_SLICES]] step %[[C1]] {
698 // CHECK-NEXT: %[[TILE_SLICE:.*]] = arm_sme.extract_tile_slice %[[TILE]][%[[TILE_SLICE_INDEX]]] : vector<[4]xf32> from vector<[4]x[4]xf32>
699 // CHECK-NEXT: vector.print %[[TILE_SLICE]] : vector<[4]xf32>
701 //===----------------------------------------------------------------------===//
703 //===----------------------------------------------------------------------===//
707 // CHECK-LABEL: @vector_load_i8_with_offset(
708 // CHECK-SAME: %[[MEMREF:.*]]: memref<?x?xi8>)
709 // CHECK: %[[C0:.*]] = arith.constant 0 : index
710 // CHECK: %[[C123:.*]] = arith.constant 123 : index
711 // CHECK: arm_sme.tile_load %[[MEMREF]][%[[C123]], %[[C0]]] : memref<?x?xi8>, vector<[16]x[16]xi8>
712 func.func @vector_load_i8_with_offset(%arg0 : memref<?x?xi8>) -> vector<[16]x[16]xi8> {
713 %c0 = arith.constant 0 : index
714 %c123 = arith.constant 123 : index
715 %tile = vector.load %arg0[%c123, %c0] : memref<?x?xi8>, vector<[16]x[16]xi8>
716 return %tile : vector<[16]x[16]xi8>
721 // CHECK-LABEL: @vector_load_i8_from_rank_1_memref(
722 // CHECK-SAME: %[[MEMREF:.*]]: memref<?xi8>)
723 // CHECK: %[[C0:.*]] = arith.constant 0 : index
724 // CHECK: arm_sme.tile_load %[[MEMREF]][%[[C0]]] : memref<?xi8>, vector<[16]x[16]xi8>
725 func.func @vector_load_i8_from_rank_1_memref(%arg0 : memref<?xi8>) -> vector<[16]x[16]xi8> {
726 %c0 = arith.constant 0 : index
727 %tile = vector.load %arg0[%c0] : memref<?xi8>, vector<[16]x[16]xi8>
728 return %tile : vector<[16]x[16]xi8>
733 // CHECK-LABEL: @vector_load_i16(
734 // CHECK: arm_sme.tile_load {{.*}} : memref<?x?xi16>, vector<[8]x[8]xi16>
735 func.func @vector_load_i16(%arg0 : memref<?x?xi16>) -> vector<[8]x[8]xi16> {
736 %c0 = arith.constant 0 : index
737 %tile = vector.load %arg0[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16>
738 return %tile : vector<[8]x[8]xi16>
743 // CHECK-LABEL: @vector_load_i32(
744 // CHECK: arm_sme.tile_load {{.*}} : memref<?x?xi32>, vector<[4]x[4]xi32>
745 func.func @vector_load_i32(%arg0 : memref<?x?xi32>) -> vector<[4]x[4]xi32> {
746 %c0 = arith.constant 0 : index
747 %tile = vector.load %arg0[%c0, %c0] : memref<?x?xi32>, vector<[4]x[4]xi32>
748 return %tile : vector<[4]x[4]xi32>
753 // CHECK-LABEL: @vector_load_i64(
754 // CHECK: arm_sme.tile_load {{.*}} : memref<?x?xi64>, vector<[2]x[2]xi64>
755 func.func @vector_load_i64(%arg0 : memref<?x?xi64>) -> vector<[2]x[2]xi64> {
756 %c0 = arith.constant 0 : index
757 %tile = vector.load %arg0[%c0, %c0] : memref<?x?xi64>, vector<[2]x[2]xi64>
758 return %tile : vector<[2]x[2]xi64>
763 // CHECK-LABEL: @vector_load_f16(
764 // CHECK: arm_sme.tile_load {{.*}} : memref<?x?xf16>, vector<[8]x[8]xf16>
765 func.func @vector_load_f16(%arg0 : memref<?x?xf16>) -> vector<[8]x[8]xf16> {
766 %c0 = arith.constant 0 : index
767 %tile = vector.load %arg0[%c0, %c0] : memref<?x?xf16>, vector<[8]x[8]xf16>
768 return %tile : vector<[8]x[8]xf16>
773 // CHECK-LABEL: @vector_load_bf16(
774 // CHECK: arm_sme.tile_load {{.*}} : memref<?x?xbf16>, vector<[8]x[8]xbf16>
775 func.func @vector_load_bf16(%arg0 : memref<?x?xbf16>) -> vector<[8]x[8]xbf16> {
776 %c0 = arith.constant 0 : index
777 %tile = vector.load %arg0[%c0, %c0] : memref<?x?xbf16>, vector<[8]x[8]xbf16>
778 return %tile : vector<[8]x[8]xbf16>
783 // CHECK-LABEL: @vector_load_f32(
784 // CHECK: arm_sme.tile_load {{.*}} : memref<?x?xf32>, vector<[4]x[4]xf32>
785 func.func @vector_load_f32(%arg0 : memref<?x?xf32>) -> vector<[4]x[4]xf32> {
786 %c0 = arith.constant 0 : index
787 %tile = vector.load %arg0[%c0, %c0] : memref<?x?xf32>, vector<[4]x[4]xf32>
788 return %tile : vector<[4]x[4]xf32>
793 // CHECK-LABEL: @vector_load_f64(
794 // CHECK: arm_sme.tile_load {{.*}} : memref<?x?xf64>, vector<[2]x[2]xf64>
795 func.func @vector_load_f64(%arg0 : memref<?x?xf64>) -> vector<[2]x[2]xf64> {
796 %c0 = arith.constant 0 : index
797 %tile = vector.load %arg0[%c0, %c0] : memref<?x?xf64>, vector<[2]x[2]xf64>
798 return %tile : vector<[2]x[2]xf64>
803 // CHECK-LABEL: @vector_load_i128(
804 // CHECK: arm_sme.tile_load {{.*}} : memref<?x?xi128>, vector<[1]x[1]xi128>
805 func.func @vector_load_i128(%arg0 : memref<?x?xi128>) -> vector<[1]x[1]xi128> {
806 %c0 = arith.constant 0 : index
807 %tile = vector.load %arg0[%c0, %c0] : memref<?x?xi128>, vector<[1]x[1]xi128>
808 return %tile : vector<[1]x[1]xi128>
812 //===----------------------------------------------------------------------===//
814 //===----------------------------------------------------------------------===//
818 // CHECK-LABEL: @vector_store_i8(
819 // CHECK-SAME: %[[MEMREF:.*]]: memref<?x?xi8>) {
820 // CHECK: %[[C0:.*]] = arith.constant 0 : index
821 // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[16]x[16]xi8>
822 // CHECK: arm_sme.tile_store %[[TILE]], %[[MEMREF]][%[[C0]], %[[C0]]] : memref<?x?xi8>, vector<[16]x[16]xi8>
823 func.func @vector_store_i8(%arg0 : memref<?x?xi8>) {
824 %c0 = arith.constant 0 : index
825 %tile = arm_sme.get_tile : vector<[16]x[16]xi8>
826 vector.store %tile, %arg0[%c0, %c0] : memref<?x?xi8>, vector<[16]x[16]xi8>
832 // CHECK-LABEL: @vector_store_i16
833 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xi16>, vector<[8]x[8]xi16>
834 func.func @vector_store_i16(%arg0 : memref<?x?xi16>) {
835 %c0 = arith.constant 0 : index
836 %tile = arm_sme.get_tile : vector<[8]x[8]xi16>
837 vector.store %tile, %arg0[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16>
843 // CHECK-LABEL: @vector_store_i32
844 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xi32>, vector<[4]x[4]xi32>
845 func.func @vector_store_i32(%arg0 : memref<?x?xi32>) {
846 %c0 = arith.constant 0 : index
847 %tile = arm_sme.get_tile : vector<[4]x[4]xi32>
848 vector.store %tile, %arg0[%c0, %c0] : memref<?x?xi32>, vector<[4]x[4]xi32>
854 // CHECK-LABEL: @vector_store_i64
855 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xi64>, vector<[2]x[2]xi64>
856 func.func @vector_store_i64(%arg0 : memref<?x?xi64>) {
857 %c0 = arith.constant 0 : index
858 %tile = arm_sme.get_tile : vector<[2]x[2]xi64>
859 vector.store %tile, %arg0[%c0, %c0] : memref<?x?xi64>, vector<[2]x[2]xi64>
865 // CHECK-LABEL: @vector_store_f16
866 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xf16>, vector<[8]x[8]xf16>
867 func.func @vector_store_f16(%arg0 : memref<?x?xf16>) {
868 %c0 = arith.constant 0 : index
869 %tile = arm_sme.get_tile : vector<[8]x[8]xf16>
870 vector.store %tile, %arg0[%c0, %c0] : memref<?x?xf16>, vector<[8]x[8]xf16>
876 // CHECK-LABEL: @vector_store_bf16
877 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xbf16>, vector<[8]x[8]xbf16>
878 func.func @vector_store_bf16(%arg0 : memref<?x?xbf16>) {
879 %c0 = arith.constant 0 : index
880 %tile = arm_sme.get_tile : vector<[8]x[8]xbf16>
881 vector.store %tile, %arg0[%c0, %c0] : memref<?x?xbf16>, vector<[8]x[8]xbf16>
886 // CHECK-LABEL: @vector_store_f32
887 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xf32>, vector<[4]x[4]xf32>
888 func.func @vector_store_f32(%arg0 : memref<?x?xf32>) {
889 %c0 = arith.constant 0 : index
890 %tile = arm_sme.get_tile : vector<[4]x[4]xf32>
891 vector.store %tile, %arg0[%c0, %c0] : memref<?x?xf32>, vector<[4]x[4]xf32>
897 // CHECK-LABEL: @vector_store_f64
898 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xf64>, vector<[2]x[2]xf64>
899 func.func @vector_store_f64(%arg0 : memref<?x?xf64>) {
900 %c0 = arith.constant 0 : index
901 %tile = arm_sme.get_tile : vector<[2]x[2]xf64>
902 vector.store %tile, %arg0[%c0, %c0] : memref<?x?xf64>, vector<[2]x[2]xf64>
908 // CHECK-LABEL: @vector_store_i128
909 // CHECK: arm_sme.tile_store {{.*}} : memref<?x?xi128>, vector<[1]x[1]xi128>
910 func.func @vector_store_i128(%arg0 : memref<?x?xi128>) {
911 %c0 = arith.constant 0 : index
912 %tile = arm_sme.get_tile : vector<[1]x[1]xi128>
913 vector.store %tile, %arg0[%c0, %c0] : memref<?x?xi128>, vector<[1]x[1]xi128>
917 //===----------------------------------------------------------------------===//
919 //===----------------------------------------------------------------------===//
923 // CHECK-LABEL: @vector_insert_slice_i32(
924 // CHECK-SAME: %[[SLICE:.*]]: vector<[4]xi32>,
925 // CHECK-SAME: %[[INDEX:.*]]: index)
926 func.func @vector_insert_slice_i32(%slice: vector<[4]xi32>, %row: index) -> vector<[4]x[4]xi32>{
927 // CHECK-NEXT: %[[TILE:.*]] = arm_sme.get_tile : vector<[4]x[4]xi32>
928 // CHECK-NEXT: arm_sme.insert_tile_slice %[[SLICE]], %[[TILE]][%[[INDEX]]] : vector<[4]xi32> into vector<[4]x[4]xi32>
929 %tile = arm_sme.get_tile : vector<[4]x[4]xi32>
930 %new_tile = vector.insert %slice, %tile[%row] : vector<[4]xi32> into vector<[4]x[4]xi32>
931 return %new_tile : vector<[4]x[4]xi32>
936 // CHECK-LABEL: @vector_insert_slice_i8
937 func.func @vector_insert_slice_i8(%slice: vector<[16]xi8>, %row: index) -> vector<[16]x[16]xi8> {
938 // CHECK: arm_sme.insert_tile_slice %{{.*}} : vector<[16]xi8> into vector<[16]x[16]xi8>
939 %tile = arm_sme.get_tile : vector<[16]x[16]xi8>
940 %new_tile = vector.insert %slice, %tile[%row] : vector<[16]xi8> into vector<[16]x[16]xi8>
941 return %new_tile : vector<[16]x[16]xi8>
946 // CHECK-LABEL: @vector_insert_slice_i16
947 func.func @vector_insert_slice_i16(%slice: vector<[8]xi16>, %row: index) -> vector<[8]x[8]xi16> {
948 // CHECK: arm_sme.insert_tile_slice %{{.*}} : vector<[8]xi16> into vector<[8]x[8]xi16>
949 %tile = arm_sme.get_tile : vector<[8]x[8]xi16>
950 %new_tile = vector.insert %slice, %tile[%row] : vector<[8]xi16> into vector<[8]x[8]xi16>
951 return %new_tile : vector<[8]x[8]xi16>
956 // CHECK-LABEL: @vector_insert_slice_i64
957 func.func @vector_insert_slice_i64(%slice: vector<[2]xi64>, %row: index) -> vector<[2]x[2]xi64> {
958 // CHECK: arm_sme.insert_tile_slice %{{.*}} : vector<[2]xi64> into vector<[2]x[2]xi64>
959 %tile = arm_sme.get_tile : vector<[2]x[2]xi64>
960 %new_tile = vector.insert %slice, %tile[%row] : vector<[2]xi64> into vector<[2]x[2]xi64>
961 return %new_tile : vector<[2]x[2]xi64>
966 // CHECK-LABEL: @vector_insert_slice_i128
967 func.func @vector_insert_slice_i128(%slice: vector<[1]xi128>, %row: index) -> vector<[1]x[1]xi128> {
968 // CHECK: arm_sme.insert_tile_slice %{{.*}} : vector<[1]xi128> into vector<[1]x[1]xi128>
969 %tile = arm_sme.get_tile : vector<[1]x[1]xi128>
970 %new_tile = vector.insert %slice, %tile[%row] : vector<[1]xi128> into vector<[1]x[1]xi128>
971 return %new_tile : vector<[1]x[1]xi128>
976 // CHECK-LABEL: @vector_insert_slice_f16
977 func.func @vector_insert_slice_f16(%slice: vector<[8]xf16>, %row: index) -> vector<[8]x[8]xf16> {
978 // CHECK: arm_sme.insert_tile_slice %{{.*}} : vector<[8]xf16> into vector<[8]x[8]xf16>
979 %tile = arm_sme.get_tile : vector<[8]x[8]xf16>
980 %new_tile = vector.insert %slice, %tile[%row] : vector<[8]xf16> into vector<[8]x[8]xf16>
981 return %new_tile : vector<[8]x[8]xf16>
986 // CHECK-LABEL: @vector_insert_slice_bf16
987 func.func @vector_insert_slice_bf16(%slice: vector<[8]xbf16>, %row: index) -> vector<[8]x[8]xbf16> {
988 // CHECK: arm_sme.insert_tile_slice %{{.*}} : vector<[8]xbf16> into vector<[8]x[8]xbf16>
989 %tile = arm_sme.get_tile : vector<[8]x[8]xbf16>
990 %new_tile = vector.insert %slice, %tile[%row] : vector<[8]xbf16> into vector<[8]x[8]xbf16>
991 return %new_tile : vector<[8]x[8]xbf16>
996 // CHECK-LABEL: @vector_insert_slice_f32
997 func.func @vector_insert_slice_f32(%slice: vector<[4]xf32>, %row: index) -> vector<[4]x[4]xf32> {
998 // CHECK: arm_sme.insert_tile_slice %{{.*}} : vector<[4]xf32> into vector<[4]x[4]xf32>
999 %tile = arm_sme.get_tile : vector<[4]x[4]xf32>
1000 %new_tile = vector.insert %slice, %tile[%row] : vector<[4]xf32> into vector<[4]x[4]xf32>
1001 return %new_tile : vector<[4]x[4]xf32>
1006 // CHECK-LABEL: @vector_insert_slice_f64
1007 func.func @vector_insert_slice_f64(%slice: vector<[2]xf64>, %row: index) -> vector<[2]x[2]xf64> {
1008 // CHECK: arm_sme.insert_tile_slice %{{.*}} : vector<[2]xf64> into vector<[2]x[2]xf64>
1009 %tile = arm_sme.get_tile : vector<[2]x[2]xf64>
1010 %new_tile = vector.insert %slice, %tile[%row] : vector<[2]xf64> into vector<[2]x[2]xf64>
1011 return %new_tile : vector<[2]x[2]xf64>
1016 // CHECK-LABEL: @vector_insert_element_i32(
1017 // CHECK-SAME: %[[EL:.*]]: i32,
1018 // CHECK-SAME: %[[ROW:.*]]: index,
1019 // CHECK-SAME: %[[COL:.*]]: index)
1020 func.func @vector_insert_element_i32(%el: i32, %row: index, %col: index) -> vector<[4]x[4]xi32> {
1021 // CHECK-NEXT: %[[TILE:.*]] = arm_sme.get_tile : vector<[4]x[4]xi32>
1022 // CHECK-NEXT: %[[SLICE:.*]] = arm_sme.extract_tile_slice %[[TILE]][%[[ROW]]] : vector<[4]xi32> from vector<[4]x[4]xi32>
1023 // CHECK-NEXT: %[[NEW_SLICE:.*]] = vector.insert %[[EL]], %[[SLICE]] [%[[COL]]] : i32 into vector<[4]xi32>
1024 // CHECK-NEXT: arm_sme.insert_tile_slice %[[NEW_SLICE]], %[[TILE]][%[[ROW]]] : vector<[4]xi32> into vector<[4]x[4]xi32>
1025 %tile = arm_sme.get_tile : vector<[4]x[4]xi32>
1026 %new_tile = vector.insert %el, %tile[%row, %col] : i32 into vector<[4]x[4]xi32>
1027 return %new_tile : vector<[4]x[4]xi32>
1032 // CHECK-LABEL: @vector_insert_element_i8
1033 func.func @vector_insert_element_i8(%el: i8, %row: index, %col: index) -> vector<[16]x[16]xi8> {
1034 // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[16]x[16]xi8>
1035 // CHECK: arm_sme.extract_tile_slice %[[TILE]]{{.*}} : vector<[16]xi8> from vector<[16]x[16]xi8>
1036 // CHECK: arm_sme.insert_tile_slice %{{.*}}, %[[TILE]][%{{.*}}] : vector<[16]xi8> into vector<[16]x[16]xi8>
1037 %tile = arm_sme.get_tile : vector<[16]x[16]xi8>
1038 %new_tile = vector.insert %el, %tile[%row, %col] : i8 into vector<[16]x[16]xi8>
1039 return %new_tile : vector<[16]x[16]xi8>
1044 // CHECK-LABEL: @vector_insert_element_i16
1045 func.func @vector_insert_element_i16(%el: i16, %row: index, %col: index) -> vector<[8]x[8]xi16> {
1046 // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[8]x[8]xi16>
1047 // CHECK: arm_sme.extract_tile_slice %[[TILE]]{{.*}} : vector<[8]xi16> from vector<[8]x[8]xi16>
1048 // CHECK: arm_sme.insert_tile_slice %{{.*}}, %[[TILE]][%{{.*}}] : vector<[8]xi16> into vector<[8]x[8]xi16>
1049 %tile = arm_sme.get_tile : vector<[8]x[8]xi16>
1050 %new_tile = vector.insert %el, %tile[%row, %col] : i16 into vector<[8]x[8]xi16>
1051 return %new_tile : vector<[8]x[8]xi16>
1056 // CHECK-LABEL: @vector_insert_element_i64
1057 func.func @vector_insert_element_i64(%el: i64, %row: index, %col: index) -> vector<[2]x[2]xi64> {
1058 // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[2]x[2]xi64>
1059 // CHECK: arm_sme.extract_tile_slice %[[TILE]]{{.*}} : vector<[2]xi64> from vector<[2]x[2]xi64>
1060 // CHECK: arm_sme.insert_tile_slice %{{.*}}, %[[TILE]][%{{.*}}] : vector<[2]xi64> into vector<[2]x[2]xi64>
1061 %tile = arm_sme.get_tile : vector<[2]x[2]xi64>
1062 %new_tile = vector.insert %el, %tile[%row, %col] : i64 into vector<[2]x[2]xi64>
1063 return %new_tile : vector<[2]x[2]xi64>
1068 // CHECK-LABEL: @vector_insert_element_i128
1069 func.func @vector_insert_element_i128(%el: i128, %row: index, %col: index) -> vector<[1]x[1]xi128> {
1070 // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[1]x[1]xi128>
1071 // CHECK: arm_sme.extract_tile_slice %[[TILE]]{{.*}} : vector<[1]xi128> from vector<[1]x[1]xi128>
1072 // CHECK: arm_sme.insert_tile_slice %{{.*}}, %[[TILE]][%{{.*}}] : vector<[1]xi128> into vector<[1]x[1]xi128>
1073 %tile = arm_sme.get_tile : vector<[1]x[1]xi128>
1074 %new_tile = vector.insert %el, %tile[%row, %col] : i128 into vector<[1]x[1]xi128>
1075 return %new_tile : vector<[1]x[1]xi128>
1080 // CHECK-LABEL: @vector_insert_element_f16
1081 func.func @vector_insert_element_f16(%el: f16, %row: index, %col: index) -> vector<[8]x[8]xf16> {
1082 // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[8]x[8]xf16>
1083 // CHECK: arm_sme.extract_tile_slice %[[TILE]]{{.*}} : vector<[8]xf16> from vector<[8]x[8]xf16>
1084 // CHECK: arm_sme.insert_tile_slice %{{.*}}, %[[TILE]][%{{.*}}] : vector<[8]xf16> into vector<[8]x[8]xf16>
1085 %tile = arm_sme.get_tile : vector<[8]x[8]xf16>
1086 %new_tile = vector.insert %el, %tile[%row, %col] : f16 into vector<[8]x[8]xf16>
1087 return %new_tile : vector<[8]x[8]xf16>
1092 // CHECK-LABEL: @vector_insert_element_bf16
1093 func.func @vector_insert_element_bf16(%el: bf16, %row: index, %col: index) -> vector<[8]x[8]xbf16> {
1094 // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[8]x[8]xbf16>
1095 // CHECK: arm_sme.extract_tile_slice %[[TILE]]{{.*}} : vector<[8]xbf16> from vector<[8]x[8]xbf16>
1096 // CHECK: arm_sme.insert_tile_slice %{{.*}}, %[[TILE]][%{{.*}}] : vector<[8]xbf16> into vector<[8]x[8]xbf16>
1097 %tile = arm_sme.get_tile : vector<[8]x[8]xbf16>
1098 %new_tile = vector.insert %el, %tile[%row, %col] : bf16 into vector<[8]x[8]xbf16>
1099 return %new_tile : vector<[8]x[8]xbf16>
1104 // CHECK-LABEL: @vector_insert_element_f32
1105 func.func @vector_insert_element_f32(%el: f32, %row: index, %col: index) -> vector<[4]x[4]xf32> {
1106 // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[4]x[4]xf32>
1107 // CHECK: arm_sme.extract_tile_slice %[[TILE]]{{.*}} : vector<[4]xf32> from vector<[4]x[4]xf32>
1108 // CHECK: arm_sme.insert_tile_slice %{{.*}}, %[[TILE]][%{{.*}}] : vector<[4]xf32> into vector<[4]x[4]xf32>
1109 %tile = arm_sme.get_tile : vector<[4]x[4]xf32>
1110 %new_tile = vector.insert %el, %tile[%row, %col] : f32 into vector<[4]x[4]xf32>
1111 return %new_tile : vector<[4]x[4]xf32>
1116 // CHECK-LABEL: @vector_insert_element_f64
1117 func.func @vector_insert_element_f64(%el: f64, %row: index, %col: index) -> vector<[2]x[2]xf64> {
1118 // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[2]x[2]xf64>
1119 // CHECK: arm_sme.extract_tile_slice %[[TILE]]{{.*}} : vector<[2]xf64> from vector<[2]x[2]xf64>
1120 // CHECK: arm_sme.insert_tile_slice %{{.*}}, %[[TILE]][%{{.*}}] : vector<[2]xf64> into vector<[2]x[2]xf64>
1121 %tile = arm_sme.get_tile : vector<[2]x[2]xf64>
1122 %new_tile = vector.insert %el, %tile[%row, %col] : f64 into vector<[2]x[2]xf64>
1123 return %new_tile : vector<[2]x[2]xf64>
1126 //===----------------------------------------------------------------------===//
1127 // vector.extract --> arm_sme.extract_tile_slice
1128 //===----------------------------------------------------------------------===//
1132 // CHECK-LABEL: @vector_extract_slice_i32(
1133 // CHECK-SAME: %[[INDEX:.*]]: index)
1134 func.func @vector_extract_slice_i32(%row: index) -> vector<[4]xi32> {
1135 // CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[4]x[4]xi32>
1136 // CHECK: arm_sme.extract_tile_slice %[[TILE]][%[[INDEX]]] : vector<[4]xi32> from vector<[4]x[4]xi32>
1137 %tile = arm_sme.get_tile : vector<[4]x[4]xi32>
1138 %slice = vector.extract %tile[%row] : vector<[4]xi32> from vector<[4]x[4]xi32>
1139 return %slice : vector<[4]xi32>
1144 // CHECK-LABEL: @vector_extract_slice_i8
1145 func.func @vector_extract_slice_i8(%row: index) -> vector<[16]xi8> {
1146 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[16]xi8> from vector<[16]x[16]xi8>
1147 %tile = arm_sme.get_tile : vector<[16]x[16]xi8>
1148 %slice = vector.extract %tile[%row] : vector<[16]xi8> from vector<[16]x[16]xi8>
1149 return %slice : vector<[16]xi8>
1154 // CHECK-LABEL: @vector_extract_slice_i16
1155 func.func @vector_extract_slice_i16(%row: index) -> vector<[8]xi16> {
1156 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[8]xi16> from vector<[8]x[8]xi16>
1157 %tile = arm_sme.get_tile : vector<[8]x[8]xi16>
1158 %slice = vector.extract %tile[%row] : vector<[8]xi16> from vector<[8]x[8]xi16>
1159 return %slice : vector<[8]xi16>
1164 // CHECK-LABEL: @vector_extract_slice_i64
1165 func.func @vector_extract_slice_i64(%row: index) -> vector<[2]xi64> {
1166 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[2]xi64> from vector<[2]x[2]xi64>
1167 %tile = arm_sme.get_tile : vector<[2]x[2]xi64>
1168 %slice = vector.extract %tile[%row] : vector<[2]xi64> from vector<[2]x[2]xi64>
1169 return %slice : vector<[2]xi64>
1174 // CHECK-LABEL: @vector_extract_slice_i128
1175 func.func @vector_extract_slice_i128(%row: index) -> vector<[1]xi128> {
1176 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[1]xi128> from vector<[1]x[1]xi128>
1177 %tile = arm_sme.get_tile : vector<[1]x[1]xi128>
1178 %slice = vector.extract %tile[%row] : vector<[1]xi128> from vector<[1]x[1]xi128>
1179 return %slice : vector<[1]xi128>
1184 // CHECK-LABEL: @vector_extract_slice_f16
1185 func.func @vector_extract_slice_f16(%row: index) -> vector<[8]xf16> {
1186 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[8]xf16> from vector<[8]x[8]xf16>
1187 %tile = arm_sme.get_tile : vector<[8]x[8]xf16>
1188 %slice = vector.extract %tile[%row] : vector<[8]xf16> from vector<[8]x[8]xf16>
1189 return %slice : vector<[8]xf16>
1194 // CHECK-LABEL: @vector_extract_slice_bf16
1195 func.func @vector_extract_slice_bf16(%row: index) -> vector<[8]xbf16> {
1196 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[8]xbf16> from vector<[8]x[8]xbf16>
1197 %tile = arm_sme.get_tile : vector<[8]x[8]xbf16>
1198 %slice = vector.extract %tile[%row] : vector<[8]xbf16> from vector<[8]x[8]xbf16>
1199 return %slice : vector<[8]xbf16>
1204 // CHECK-LABEL: @vector_extract_slice_f32
1205 func.func @vector_extract_slice_f32(%row: index) -> vector<[4]xf32> {
1206 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[4]xf32> from vector<[4]x[4]xf32>
1207 %tile = arm_sme.get_tile : vector<[4]x[4]xf32>
1208 %slice = vector.extract %tile[%row] : vector<[4]xf32> from vector<[4]x[4]xf32>
1209 return %slice : vector<[4]xf32>
1214 // CHECK-LABEL: @vector_extract_slice_f64
1215 func.func @vector_extract_slice_f64(%row: index) -> vector<[2]xf64> {
1216 // CHECK: arm_sme.extract_tile_slice {{.*}} : vector<[2]xf64> from vector<[2]x[2]xf64>
1217 %tile = arm_sme.get_tile : vector<[2]x[2]xf64>
1218 %slice = vector.extract %tile[%row] : vector<[2]xf64> from vector<[2]x[2]xf64>
1219 return %slice : vector<[2]xf64>
1224 // CHECK-LABEL: @vector_extract_element(
1225 // CHECK-SAME: %[[ROW:.*]]: index,
1226 // CHECK-SAME: %[[COL:.*]]: index)
1227 func.func @vector_extract_element(%row: index, %col: index) -> i32 {
1228 // CHECK-NEXT: %[[TILE:.*]] = arm_sme.get_tile : vector<[4]x[4]xi32>
1229 // CHECK-NEXT: %[[SLICE:.*]] = arm_sme.extract_tile_slice %[[TILE]][%[[ROW]]] : vector<[4]xi32> from vector<[4]x[4]xi32>
1230 // CHECK-NEXT: %[[EL:.*]] = vector.extract %[[SLICE]]{{\[}}%[[COL]]] : i32 from vector<[4]xi32>
1231 %tile = arm_sme.get_tile : vector<[4]x[4]xi32>
1232 %el = vector.extract %tile[%row, %col] : i32 from vector<[4]x[4]xi32>
1238 // CHECK-LABEL: @vector_extract_element_i8
1239 func.func @vector_extract_element_i8(%row: index, %col: index) -> i8 {
1240 // CHECK: %[[SLICE:.*]] = arm_sme.extract_tile_slice %{{.*}} : vector<[16]xi8> from vector<[16]x[16]xi8>
1241 // CHECK-NEXT: %{{.*}} = vector.extract %[[SLICE]]{{\[}}%{{.*}}] : i8 from vector<[16]xi8>
1242 %tile = arm_sme.get_tile : vector<[16]x[16]xi8>
1243 %el = vector.extract %tile[%row, %col] : i8 from vector<[16]x[16]xi8>
1249 // CHECK-LABEL: @vector_extract_element_i16
1250 func.func @vector_extract_element_i16(%row: index, %col: index) -> i16 {
1251 // CHECK: %[[SLICE:.*]] = arm_sme.extract_tile_slice %{{.*}} : vector<[8]xi16> from vector<[8]x[8]xi16>
1252 // CHECK-NEXT: %{{.*}} = vector.extract %[[SLICE]]{{\[}}%{{.*}}] : i16 from vector<[8]xi16>
1253 %tile = arm_sme.get_tile : vector<[8]x[8]xi16>
1254 %el = vector.extract %tile[%row, %col] : i16 from vector<[8]x[8]xi16>
1260 // CHECK-LABEL: @vector_extract_element_i64
1261 func.func @vector_extract_element_i64(%row: index, %col: index) -> i64 {
1262 // CHECK: %[[SLICE:.*]] = arm_sme.extract_tile_slice %{{.*}} : vector<[2]xi64> from vector<[2]x[2]xi64>
1263 // CHECK-NEXT: %{{.*}} = vector.extract %[[SLICE]]{{\[}}%{{.*}}] : i64 from vector<[2]xi64>
1264 %tile = arm_sme.get_tile : vector<[2]x[2]xi64>
1265 %el = vector.extract %tile[%row, %col] : i64 from vector<[2]x[2]xi64>
1271 // CHECK-LABEL: @vector_extract_element_i128
1272 func.func @vector_extract_element_i128(%row: index, %col: index) -> i128 {
1273 // CHECK: %[[SLICE:.*]] = arm_sme.extract_tile_slice %{{.*}} : vector<[1]xi128> from vector<[1]x[1]xi128>
1274 // CHECK-NEXT: %{{.*}} = vector.extract %[[SLICE]]{{\[}}%{{.*}}] : i128 from vector<[1]xi128>
1275 %tile = arm_sme.get_tile : vector<[1]x[1]xi128>
1276 %el = vector.extract %tile[%row, %col] : i128 from vector<[1]x[1]xi128>
1282 // CHECK-LABEL: @vector_extract_element_f16
1283 func.func @vector_extract_element_f16(%row: index, %col: index) -> f16 {
1284 // CHECK: %[[SLICE:.*]] = arm_sme.extract_tile_slice %{{.*}} : vector<[8]xf16> from vector<[8]x[8]xf16>
1285 // CHECK-NEXT: %{{.*}} = vector.extract %[[SLICE]]{{\[}}%{{.*}}] : f16 from vector<[8]xf16>
1286 %tile = arm_sme.get_tile : vector<[8]x[8]xf16>
1287 %el = vector.extract %tile[%row, %col] : f16 from vector<[8]x[8]xf16>
1293 // CHECK-LABEL: @vector_extract_element_bf16
1294 func.func @vector_extract_element_bf16(%row: index, %col: index) -> bf16 {
1295 // CHECK: %[[SLICE:.*]] = arm_sme.extract_tile_slice %{{.*}} : vector<[8]xbf16> from vector<[8]x[8]xbf16>
1296 // CHECK-NEXT: %{{.*}} = vector.extract %[[SLICE]]{{\[}}%{{.*}}] : bf16 from vector<[8]xbf16>
1297 %tile = arm_sme.get_tile : vector<[8]x[8]xbf16>
1298 %el = vector.extract %tile[%row, %col] : bf16 from vector<[8]x[8]xbf16>
1304 // CHECK-LABEL: @vector_extract_element_f32
1305 func.func @vector_extract_element_f32(%row: index, %col: index) -> f32 {
1306 // CHECK: %[[SLICE:.*]] = arm_sme.extract_tile_slice %{{.*}} : vector<[4]xf32> from vector<[4]x[4]xf32>
1307 // CHECK-NEXT: %{{.*}} = vector.extract %[[SLICE]]{{\[}}%{{.*}}] : f32 from vector<[4]xf32>
1308 %tile = arm_sme.get_tile : vector<[4]x[4]xf32>
1309 %el = vector.extract %tile[%row, %col] : f32 from vector<[4]x[4]xf32>
1315 // CHECK-LABEL: @vector_extract_element_f64
1316 func.func @vector_extract_element_f64(%row: index, %col: index) -> f64 {
1317 // CHECK: %[[SLICE:.*]] = arm_sme.extract_tile_slice %{{.*}} : vector<[2]xf64> from vector<[2]x[2]xf64>
1318 // CHECK-NEXT: %{{.*}} = vector.extract %[[SLICE]]{{\[}}%{{.*}}] : f64 from vector<[2]xf64>
1319 %tile = arm_sme.get_tile : vector<[2]x[2]xf64>
1320 %el = vector.extract %tile[%row, %col] : f64 from vector<[2]x[2]xf64>
1324 //===----------------------------------------------------------------------===//
1325 // vector.extract --> arm_sve.psel
1326 //===----------------------------------------------------------------------===//
1330 // CHECK-LABEL: @dynamic_vector_extract_mask_to_psel(
1331 // CHECK-SAME: %[[A:.*]]: index, %[[B:.*]]: index, %[[INDEX:.*]]: index)
1332 func.func @dynamic_vector_extract_mask_to_psel(%a: index, %b: index, %index: index) -> vector<[8]xi1>
1334 // CHECK: %[[MASK_ROWS:.*]] = vector.create_mask %[[A]] : vector<[4]xi1>
1335 // CHECK: %[[MASK_COLS:.*]] = vector.create_mask %[[B]] : vector<[8]xi1>
1336 // CHECK: arm_sve.psel %[[MASK_COLS]], %[[MASK_ROWS]][%[[INDEX]]] : vector<[8]xi1>, vector<[4]xi1>
1337 %mask = vector.create_mask %a, %b : vector<[4]x[8]xi1>
1338 %slice = vector.extract %mask[%index] : vector<[8]xi1> from vector<[4]x[8]xi1>
1339 return %slice : vector<[8]xi1>
1344 // CHECK-LABEL: @vector_extract_mask_to_psel(
1345 // CHECK-SAME: %[[A:.*]]: index,
1346 // CHECK-SAME: %[[B:.*]]: index)
1347 func.func @vector_extract_mask_to_psel(%a: index, %b: index) -> vector<[2]xi1>
1349 // CHECK: %[[C1:.*]] = arith.constant 1 : index
1350 // CHECK: %[[MASK_ROWS:.*]] = vector.create_mask %[[A]] : vector<[16]xi1>
1351 // CHECK: %[[MASK_COLS:.*]] = vector.create_mask %[[B]] : vector<[2]xi1>
1352 // CHECK: arm_sve.psel %[[MASK_COLS]], %[[MASK_ROWS]][%[[C1]]] : vector<[2]xi1>, vector<[16]xi1>
1353 %mask = vector.create_mask %a, %b : vector<[16]x[2]xi1>
1354 %slice = vector.extract %mask[1] : vector<[2]xi1> from vector<[16]x[2]xi1>
1355 return %slice : vector<[2]xi1>