1 // RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file -allow-unregistered-dialect | FileCheck %s
3 // CHECK-LABEL: create_vector_mask_to_constant_mask
4 func.func @create_vector_mask_to_constant_mask() -> (vector<4x3xi1>) {
5 %c2 = arith.constant 2 : index
6 %c3 = arith.constant 3 : index
7 // CHECK: vector.constant_mask [3, 2] : vector<4x3xi1>
8 %0 = vector.create_mask %c3, %c2 : vector<4x3xi1>
9 return %0 : vector<4x3xi1>
13 // CHECK-LABEL: create_scalable_vector_mask_to_constant_mask
14 func.func @create_scalable_vector_mask_to_constant_mask() -> (vector<[8]xi1>) {
15 %c-1 = arith.constant -1 : index
16 // CHECK: vector.constant_mask [0] : vector<[8]xi1>
17 %0 = vector.create_mask %c-1 : vector<[8]xi1>
18 return %0 : vector<[8]xi1>
23 // CHECK-LABEL: create_vector_mask_to_constant_mask_truncation
24 func.func @create_vector_mask_to_constant_mask_truncation() -> (vector<4x3xi1>) {
25 %c2 = arith.constant 2 : index
26 %c5 = arith.constant 5 : index
27 // CHECK: vector.constant_mask [4, 2] : vector<4x3xi1>
28 %0 = vector.create_mask %c5, %c2 : vector<4x3xi1>
29 return %0 : vector<4x3xi1>
34 // CHECK-LABEL: create_vector_mask_to_constant_mask_truncation_neg
35 func.func @create_vector_mask_to_constant_mask_truncation_neg() -> (vector<4x3xi1>) {
36 %cneg2 = arith.constant -2 : index
37 %c5 = arith.constant 5 : index
38 // CHECK: vector.constant_mask [0, 0] : vector<4x3xi1>
39 %0 = vector.create_mask %c5, %cneg2 : vector<4x3xi1>
40 return %0 : vector<4x3xi1>
45 // CHECK-LABEL: create_vector_mask_to_constant_mask_truncation_zero
46 func.func @create_vector_mask_to_constant_mask_truncation_zero() -> (vector<4x3xi1>) {
47 %c2 = arith.constant 2 : index
48 %c0 = arith.constant 0 : index
49 // CHECK: vector.constant_mask [0, 0] : vector<4x3xi1>
50 %0 = vector.create_mask %c0, %c2 : vector<4x3xi1>
51 return %0 : vector<4x3xi1>
56 // CHECK-LABEL: create_vector_mask_to_constant_mask_scalable_all_true
57 func.func @create_vector_mask_to_constant_mask_scalable_all_true() -> (vector<8x[16]xi1>) {
58 %c8 = arith.constant 8 : index
59 %c16 = arith.constant 16 : index
61 %1 = arith.muli %0, %c16 : index
62 // CHECK: vector.constant_mask [8, 16] : vector<8x[16]xi1>
63 %10 = vector.create_mask %c8, %1 : vector<8x[16]xi1>
64 return %10 : vector<8x[16]xi1>
69 // CHECK-LABEL: create_mask_transpose_to_transposed_create_mask
70 // CHECK-SAME: %[[DIM0:.*]]: index, %[[DIM1:.*]]: index, %[[DIM2:.*]]: index
71 func.func @create_mask_transpose_to_transposed_create_mask(
72 %dim0: index, %dim1: index, %dim2: index) -> (vector<2x3x4xi1>, vector<4x2x3xi1>) {
73 // CHECK: vector.create_mask %[[DIM0]], %[[DIM1]], %[[DIM2]] : vector<2x3x4xi1>
74 // CHECK: vector.create_mask %[[DIM2]], %[[DIM0]], %[[DIM1]] : vector<4x2x3xi1>
75 // CHECK-NOT: vector.transpose
76 %0 = vector.create_mask %dim0, %dim1, %dim2 : vector<2x3x4xi1>
77 %1 = vector.transpose %0, [2, 0, 1] : vector<2x3x4xi1> to vector<4x2x3xi1>
78 return %0, %1 : vector<2x3x4xi1>, vector<4x2x3xi1>
83 // CHECK-LABEL: extract_from_create_mask
84 // CHECK-SAME: %[[DIM0:.*]]: index, %[[DIM1:.*]]: index
85 func.func @extract_from_create_mask(%dim0: index, %dim1: index) -> vector<[4]x[4]xi1> {
86 %c2 = arith.constant 2 : index
87 %mask = vector.create_mask %c2, %dim0, %dim1 : vector<4x[4]x[4]xi1>
88 // CHECK: vector.create_mask %[[DIM0]], %[[DIM1]] : vector<[4]x[4]xi1>
89 // CHECK-NOT: vector.extract
90 %extract = vector.extract %mask[1] : vector<[4]x[4]xi1> from vector<4x[4]x[4]xi1>
91 return %extract : vector<[4]x[4]xi1>
96 // CHECK-LABEL: extract_from_create_mask_all_false
97 func.func @extract_from_create_mask_all_false(%dim0: index, %dim1: index) -> vector<[4]x[4]xi1> {
98 %c2 = arith.constant 2 : index
99 %mask = vector.create_mask %c2, %dim0, %dim1 : vector<4x[4]x[4]xi1>
100 // CHECK: arith.constant dense<false> : vector<[4]x[4]xi1>
101 // CHECK-NOT: vector.extract
102 %extract = vector.extract %mask[2] : vector<[4]x[4]xi1> from vector<4x[4]x[4]xi1>
103 return %extract : vector<[4]x[4]xi1>
108 // CHECK-LABEL: extract_from_create_mask_leading_scalable
109 // CHECK-SAME: %[[DIM0:.*]]: index
110 func.func @extract_from_create_mask_leading_scalable(%dim0: index) -> vector<8xi1> {
111 %c3 = arith.constant 3 : index
112 %mask = vector.create_mask %c3, %dim0 : vector<[4]x8xi1>
113 // CHECK: vector.create_mask %[[DIM0]] : vector<8xi1>
114 // CHECK-NOT: vector.extract
115 %extract = vector.extract %mask[1] : vector<8xi1> from vector<[4]x8xi1>
116 return %extract : vector<8xi1>
121 // CHECK-LABEL: extract_from_create_mask_dynamic_position
122 // CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index
123 func.func @extract_from_create_mask_dynamic_position(%dim0: index, %index: index) -> vector<6xi1> {
124 %c4 = arith.constant 4 : index
125 %c3 = arith.constant 3 : index
126 %mask = vector.create_mask %c3, %c4, %dim0 : vector<4x4x6xi1>
127 // CHECK: vector.create_mask %[[DIM0]] : vector<6xi1>
128 // CHECK-NOT: vector.extract
129 %extract = vector.extract %mask[2, %index] : vector<6xi1> from vector<4x4x6xi1>
130 return %extract : vector<6xi1>
135 // CHECK-LABEL: extract_from_create_mask_dynamic_position_all_false
136 // CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index
137 func.func @extract_from_create_mask_dynamic_position_all_false(%dim0: index, %index: index) -> vector<6xi1> {
138 %c0 = arith.constant 0 : index
139 %c1 = arith.constant 1 : index
140 %mask = vector.create_mask %c1, %c0, %dim0 : vector<1x4x6xi1>
141 // CHECK: arith.constant dense<false> : vector<6xi1>
142 // CHECK-NOT: vector.extract
143 %extract = vector.extract %mask[0, %index] : vector<6xi1> from vector<1x4x6xi1>
144 return %extract : vector<6xi1>
149 // CHECK-LABEL: extract_from_create_mask_dynamic_position_unknown
150 // CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index
151 func.func @extract_from_create_mask_dynamic_position_unknown(%dim0: index, %index: index) -> vector<6xi1> {
152 %c2 = arith.constant 2 : index
153 %mask = vector.create_mask %c2, %dim0 : vector<4x6xi1>
154 // CHECK: %[[C2:.*]] = arith.constant 2 : index
155 // CHECK-NEXT: %[[MASK:.*]] = vector.create_mask %[[C2]], %[[DIM0]] : vector<4x6xi1>
156 // CHECK-NEXT: vector.extract %[[MASK]][%[[INDEX]]] : vector<6xi1> from vector<4x6xi1>
157 %extract = vector.extract %mask[%index] : vector<6xi1> from vector<4x6xi1>
158 return %extract : vector<6xi1>
163 // CHECK-LABEL: extract_from_create_mask_mixed_position_unknown
164 // CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index
165 func.func @extract_from_create_mask_mixed_position_unknown(%dim0: index, %index0: index) -> vector<4xi1> {
166 %c2 = arith.constant 2 : index
167 %mask = vector.create_mask %c2, %c2, %dim0 : vector<2x4x4xi1>
168 // CHECK: %[[C2:.*]] = arith.constant 2 : index
169 // CHECK-NEXT: %[[MASK:.*]] = vector.create_mask %[[C2]], %[[C2]], %[[DIM0]] : vector<2x4x4xi1>
170 // CHECK-NEXT: vector.extract %[[MASK]][1, %[[INDEX]]] : vector<4xi1> from vector<2x4x4xi1>
171 %extract = vector.extract %mask[1, %index0] : vector<4xi1> from vector<2x4x4xi1>
172 return %extract : vector<4xi1>
177 // CHECK-LABEL: extract_from_non_constant_create_mask
178 // CHECK-SAME: %[[DIM0:.*]]: index
179 func.func @extract_from_non_constant_create_mask(%dim0: index) -> vector<[2]xi1> {
180 %mask = vector.create_mask %dim0, %dim0 : vector<[2]x[2]xi1>
181 // CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM0]], %[[DIM0]] : vector<[2]x[2]xi1>
182 // CHECK-NEXT: vector.extract %[[MASK]][0] : vector<[2]xi1> from vector<[2]x[2]xi1>
183 %extract = vector.extract %mask[0] : vector<[2]xi1> from vector<[2]x[2]xi1>
184 return %extract : vector<[2]xi1>
189 // CHECK-LABEL: constant_mask_transpose_to_transposed_constant_mask
190 func.func @constant_mask_transpose_to_transposed_constant_mask() -> (vector<2x3x4xi1>, vector<4x2x3xi1>) {
191 // CHECK: vector.constant_mask [1, 2, 3] : vector<2x3x4xi1>
192 // CHECK: vector.constant_mask [3, 1, 2] : vector<4x2x3xi1>
193 // CHECK-NOT: vector.transpose
194 %0 = vector.constant_mask [1, 2, 3] : vector<2x3x4xi1>
195 %1 = vector.transpose %0, [2, 0, 1] : vector<2x3x4xi1> to vector<4x2x3xi1>
196 return %0, %1 : vector<2x3x4xi1>, vector<4x2x3xi1>
201 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
202 %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
203 %1 = vector.extract_strided_slice %0
204 {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]}
205 : vector<4x3xi1> to vector<2x2xi1>
206 // CHECK: vector.constant_mask [2, 2] : vector<2x2xi1>
207 return %1 : vector<2x2xi1>
212 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
213 %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
214 %1 = vector.extract_strided_slice %0
215 {offsets = [1, 0], sizes = [2, 2], strides = [1, 1]}
216 : vector<4x3xi1> to vector<2x2xi1>
217 // CHECK: vector.constant_mask [1, 2] : vector<2x2xi1>
218 return %1 : vector<2x2xi1>
223 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
224 %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
225 %1 = vector.extract_strided_slice %0
226 {offsets = [0, 1], sizes = [2, 2], strides = [1, 1]}
227 : vector<4x3xi1> to vector<2x2xi1>
228 // CHECK: vector.constant_mask [2, 1] : vector<2x2xi1>
229 return %1 : vector<2x2xi1>
234 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
235 %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
236 %1 = vector.extract_strided_slice %0
237 {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]}
238 : vector<4x3xi1> to vector<2x2xi1>
239 // CHECK: vector.constant_mask [0, 0] : vector<2x2xi1>
240 return %1 : vector<2x2xi1>
245 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x1xi1>) {
246 %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
247 %1 = vector.extract_strided_slice %0
248 {offsets = [0, 2], sizes = [2, 1], strides = [1, 1]}
249 : vector<4x3xi1> to vector<2x1xi1>
250 // CHECK: vector.constant_mask [0, 0] : vector<2x1xi1>
251 return %1 : vector<2x1xi1>
256 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x1xi1>) {
257 %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
258 %1 = vector.extract_strided_slice %0
259 {offsets = [0, 1], sizes = [2, 1], strides = [1, 1]}
260 : vector<4x3xi1> to vector<2x1xi1>
261 // CHECK: vector.constant_mask [2, 1] : vector<2x1xi1>
262 return %1 : vector<2x1xi1>
267 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x1xi1>) {
268 %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
269 %1 = vector.extract_strided_slice %0
270 {offsets = [1, 1], sizes = [2, 1], strides = [1, 1]}
271 : vector<4x3xi1> to vector<2x1xi1>
272 // CHECK: vector.constant_mask [1, 1] : vector<2x1xi1>
273 return %1 : vector<2x1xi1>
278 // CHECK-LABEL: extract_strided_fold
279 // CHECK-SAME: (%[[ARG:.*]]: vector<4x3xi1>)
280 // CHECK-NEXT: return %[[ARG]] : vector<4x3xi1>
281 func.func @extract_strided_fold(%arg : vector<4x3xi1>) -> (vector<4x3xi1>) {
282 %0 = vector.extract_strided_slice %arg
283 {offsets = [0, 0], sizes = [4, 3], strides = [1, 1]}
284 : vector<4x3xi1> to vector<4x3xi1>
285 return %0 : vector<4x3xi1>
290 // CHECK-LABEL: extract_strided_fold_insert
291 // CHECK-SAME: (%[[ARG:.*]]: vector<4x4xf32>
292 // CHECK-NEXT: return %[[ARG]] : vector<4x4xf32>
293 func.func @extract_strided_fold_insert(%a: vector<4x4xf32>, %b: vector<8x16xf32>)
294 -> (vector<4x4xf32>) {
295 %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]}
296 : vector<4x4xf32> into vector<8x16xf32>
297 %1 = vector.extract_strided_slice %0
298 {offsets = [2, 2], sizes = [4, 4], strides = [1, 1]}
299 : vector<8x16xf32> to vector<4x4xf32>
300 return %1 : vector<4x4xf32>
305 // Case where the vector inserted is a subset of the vector extracted.
306 // CHECK-LABEL: extract_strided_fold_insert
307 // CHECK-SAME: (%[[ARG0:.*]]: vector<6x4xf32>
308 // CHECK-NEXT: %[[EXT:.*]] = vector.extract_strided_slice %[[ARG0]]
309 // CHECK-SAME: {offsets = [0, 0], sizes = [4, 4], strides = [1, 1]}
310 // CHECK-SAME: : vector<6x4xf32> to vector<4x4xf32>
311 // CHECK-NEXT: return %[[EXT]] : vector<4x4xf32>
312 func.func @extract_strided_fold_insert(%a: vector<6x4xf32>, %b: vector<8x16xf32>)
313 -> (vector<4x4xf32>) {
314 %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]}
315 : vector<6x4xf32> into vector<8x16xf32>
316 %1 = vector.extract_strided_slice %0
317 {offsets = [2, 2], sizes = [4, 4], strides = [1, 1]}
318 : vector<8x16xf32> to vector<4x4xf32>
319 return %1 : vector<4x4xf32>
324 // Negative test where the extract is not a subset of the element inserted.
325 // CHECK-LABEL: extract_strided_fold_negative
326 // CHECK-SAME: (%[[ARG0:.*]]: vector<4x4xf32>, %[[ARG1:.*]]: vector<8x16xf32>
327 // CHECK: %[[INS:.*]] = vector.insert_strided_slice %[[ARG0]], %[[ARG1]]
328 // CHECK-SAME: {offsets = [2, 2], strides = [1, 1]}
329 // CHECK-SAME: : vector<4x4xf32> into vector<8x16xf32>
330 // CHECK: %[[EXT:.*]] = vector.extract_strided_slice %[[INS]]
331 // CHECK-SAME: {offsets = [2, 2], sizes = [6, 4], strides = [1, 1]}
332 // CHECK-SAME: : vector<8x16xf32> to vector<6x4xf32>
333 // CHECK-NEXT: return %[[EXT]] : vector<6x4xf32>
334 func.func @extract_strided_fold_negative(%a: vector<4x4xf32>, %b: vector<8x16xf32>)
335 -> (vector<6x4xf32>) {
336 %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]}
337 : vector<4x4xf32> into vector<8x16xf32>
338 %1 = vector.extract_strided_slice %0
339 {offsets = [2, 2], sizes = [6, 4], strides = [1, 1]}
340 : vector<8x16xf32> to vector<6x4xf32>
341 return %1 : vector<6x4xf32>
346 // Case where we need to go through 2 level of insert element.
347 // CHECK-LABEL: extract_strided_fold_insert
348 // CHECK-SAME: (%[[ARG0:.*]]: vector<2x8xf32>, %[[ARG1:.*]]: vector<1x4xf32>,
349 // CHECK-NEXT: %[[EXT:.*]] = vector.extract_strided_slice %[[ARG1]]
350 // CHECK-SAME: {offsets = [0, 0], sizes = [1, 1], strides = [1, 1]}
351 // CHECK-SAME: : vector<1x4xf32> to vector<1x1xf32>
352 // CHECK-NEXT: return %[[EXT]] : vector<1x1xf32>
353 func.func @extract_strided_fold_insert(%a: vector<2x8xf32>, %b: vector<1x4xf32>,
354 %c : vector<1x4xf32>) -> (vector<1x1xf32>) {
355 %0 = vector.insert_strided_slice %b, %a {offsets = [0, 1], strides = [1, 1]}
356 : vector<1x4xf32> into vector<2x8xf32>
357 %1 = vector.insert_strided_slice %c, %0 {offsets = [1, 0], strides = [1, 1]}
358 : vector<1x4xf32> into vector<2x8xf32>
359 %2 = vector.extract_strided_slice %1
360 {offsets = [0, 1], sizes = [1, 1], strides = [1, 1]}
361 : vector<2x8xf32> to vector<1x1xf32>
362 return %2 : vector<1x1xf32>
367 // CHECK-LABEL: transpose_1D_identity
368 // CHECK-SAME: ([[ARG:%.*]]: vector<4xf32>)
369 func.func @transpose_1D_identity(%arg : vector<4xf32>) -> vector<4xf32> {
370 // CHECK-NOT: transpose
371 %0 = vector.transpose %arg, [0] : vector<4xf32> to vector<4xf32>
372 // CHECK-NEXT: return [[ARG]]
373 return %0 : vector<4xf32>
378 // CHECK-LABEL: transpose_2D_identity
379 // CHECK-SAME: ([[ARG:%.*]]: vector<4x3xf32>)
380 func.func @transpose_2D_identity(%arg : vector<4x3xf32>) -> vector<4x3xf32> {
381 // CHECK-NOT: transpose
382 %0 = vector.transpose %arg, [0, 1] : vector<4x3xf32> to vector<4x3xf32>
383 // CHECK-NEXT: return [[ARG]]
384 return %0 : vector<4x3xf32>
389 // CHECK-LABEL: transpose_3D_identity
390 // CHECK-SAME: ([[ARG:%.*]]: vector<4x3x2xf32>)
391 func.func @transpose_3D_identity(%arg : vector<4x3x2xf32>) -> vector<4x3x2xf32> {
392 // CHECK-NOT: transpose
393 %0 = vector.transpose %arg, [0, 1, 2] : vector<4x3x2xf32> to vector<4x3x2xf32>
394 // CHECK-NEXT: return [[ARG]]
395 return %0 : vector<4x3x2xf32>
400 // CHECK-LABEL: transpose_2D_sequence
401 // CHECK-SAME: ([[ARG:%.*]]: vector<4x3xf32>)
402 func.func @transpose_2D_sequence(%arg : vector<4x3xf32>) -> vector<4x3xf32> {
403 // CHECK-NOT: transpose
404 %0 = vector.transpose %arg, [1, 0] : vector<4x3xf32> to vector<3x4xf32>
405 %1 = vector.transpose %0, [0, 1] : vector<3x4xf32> to vector<3x4xf32>
406 %2 = vector.transpose %1, [1, 0] : vector<3x4xf32> to vector<4x3xf32>
407 %3 = vector.transpose %2, [0, 1] : vector<4x3xf32> to vector<4x3xf32>
408 // CHECK: [[ADD:%.*]] = arith.addf [[ARG]], [[ARG]]
409 %4 = arith.addf %2, %3 : vector<4x3xf32>
410 // CHECK-NEXT: return [[ADD]]
411 return %4 : vector<4x3xf32>
416 // CHECK-LABEL: transpose_3D_sequence
417 // CHECK-SAME: ([[ARG:%.*]]: vector<4x3x2xf32>)
418 func.func @transpose_3D_sequence(%arg : vector<4x3x2xf32>) -> vector<4x3x2xf32> {
419 // CHECK: [[T0:%.*]] = vector.transpose [[ARG]], [2, 1, 0]
420 %0 = vector.transpose %arg, [1, 2, 0] : vector<4x3x2xf32> to vector<3x2x4xf32>
421 %1 = vector.transpose %0, [1, 0, 2] : vector<3x2x4xf32> to vector<2x3x4xf32>
422 // CHECK: [[T1:%.*]] = vector.transpose %arg0, [2, 1, 0]
423 %2 = vector.transpose %1, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
424 %3 = vector.transpose %2, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32>
425 // CHECK: [[MUL:%.*]] = arith.mulf [[T0]], [[T1]]
426 %4 = arith.mulf %1, %3 : vector<2x3x4xf32>
427 // CHECK: [[T5:%.*]] = vector.transpose [[MUL]], [2, 1, 0]
428 %5 = vector.transpose %4, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
429 // CHECK-NOT: transpose
430 %6 = vector.transpose %3, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
431 // CHECK: [[ADD:%.*]] = arith.addf [[T5]], [[ARG]]
432 %7 = arith.addf %5, %6 : vector<4x3x2xf32>
433 // CHECK-NEXT: return [[ADD]]
434 return %7 : vector<4x3x2xf32>
439 // CHECK-LABEL: cast_transfers
440 func.func @cast_transfers(%A: memref<4x8xf32>) -> (vector<4x8xf32>) {
441 %c0 = arith.constant 0 : index
442 %f0 = arith.constant 0.0 : f32
443 %0 = memref.cast %A : memref<4x8xf32> to memref<?x?xf32>
445 // CHECK: vector.transfer_read %{{.*}} {in_bounds = [true, true]} : memref<4x8xf32>, vector<4x8xf32>
446 %1 = vector.transfer_read %0[%c0, %c0], %f0 : memref<?x?xf32>, vector<4x8xf32>
448 // CHECK: vector.transfer_write %{{.*}} {in_bounds = [true, true]} : vector<4x8xf32>, memref<4x8xf32>
449 vector.transfer_write %1, %0[%c0, %c0] : vector<4x8xf32>, memref<?x?xf32>
450 return %1 : vector<4x8xf32>
455 // CHECK-LABEL: cast_transfers
456 func.func @cast_transfers(%A: tensor<4x8xf32>) -> (vector<4x8xf32>) {
457 %c0 = arith.constant 0 : index
458 %f0 = arith.constant 0.0 : f32
459 %0 = tensor.cast %A : tensor<4x8xf32> to tensor<?x?xf32>
461 // CHECK: vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor<4x8xf32>, vector<4x8xf32>
462 %1 = vector.transfer_read %0[%c0, %c0], %f0 : tensor<?x?xf32>, vector<4x8xf32>
464 return %1 : vector<4x8xf32>
469 // CHECK-LABEL: func @insert_extract_transpose_2d(
470 // CHECK-SAME: %[[V:[a-zA-Z0-9]*]]: vector<2x3xf32>,
471 // CHECK-SAME: %[[F0:[a-zA-Z0-9]*]]: f32,
472 // CHECK-SAME: %[[F1:[a-zA-Z0-9]*]]: f32,
473 // CHECK-SAME: %[[F2:[a-zA-Z0-9]*]]: f32,
474 // CHECK-SAME: %[[F3:[a-zA-Z0-9]*]]: f32
475 func.func @insert_extract_transpose_2d(
476 %v: vector<2x3xf32>, %f0: f32, %f1: f32, %f2: f32, %f3: f32)
479 %0 = vector.insert %f0, %v[0, 0] : f32 into vector<2x3xf32>
480 %1 = vector.insert %f1, %0[0, 1] : f32 into vector<2x3xf32>
481 %2 = vector.insert %f2, %1[1, 0] : f32 into vector<2x3xf32>
482 %3 = vector.insert %f3, %2[1, 1] : f32 into vector<2x3xf32>
483 %4 = vector.transpose %3, [1, 0] : vector<2x3xf32> to vector<3x2xf32>
484 %5 = vector.insert %f3, %4[1, 0] : f32 into vector<3x2xf32>
485 %6 = vector.transpose %5, [1, 0] : vector<3x2xf32> to vector<2x3xf32>
487 // Expected %f2 from %2 = vector.insert %f2, %1[1, 0].
488 %r1 = vector.extract %3[1, 0] : f32 from vector<2x3xf32>
490 // Expected %f1 from %1 = vector.insert %f1, %0[0, 1] followed by
492 %r2 = vector.extract %4[1, 0] : f32 from vector<3x2xf32>
494 // Expected %f2 from %2 = vector.insert %f2, %1[1, 0] followed by double
496 %r3 = vector.extract %6[1, 0] : f32 from vector<2x3xf32>
498 // CHECK-NEXT: return %[[F2]], %[[F1]], %[[F2]] : f32, f32, f32
499 return %r1, %r2, %r3 : f32, f32, f32
504 // CHECK-LABEL: insert_extract_chain
505 // CHECK-SAME: %[[V234:[a-zA-Z0-9]*]]: vector<2x3x4xf32>
506 // CHECK-SAME: %[[V34:[a-zA-Z0-9]*]]: vector<3x4xf32>
507 // CHECK-SAME: %[[V4:[a-zA-Z0-9]*]]: vector<4xf32>
508 func.func @insert_extract_chain(%v234: vector<2x3x4xf32>, %v34: vector<3x4xf32>, %v4: vector<4xf32>)
509 -> (vector<4xf32>, vector<4xf32>, vector<3x4xf32>, vector<3x4xf32>) {
510 // CHECK-NEXT: %[[A34:.*]] = vector.insert
511 %A34 = vector.insert %v34, %v234[0]: vector<3x4xf32> into vector<2x3x4xf32>
512 // CHECK-NEXT: %[[B34:.*]] = vector.insert
513 %B34 = vector.insert %v34, %A34[1]: vector<3x4xf32> into vector<2x3x4xf32>
514 // CHECK-NEXT: %[[A4:.*]] = vector.insert
515 %A4 = vector.insert %v4, %B34[1, 0]: vector<4xf32> into vector<2x3x4xf32>
516 // CHECK-NEXT: %[[B4:.*]] = vector.insert
517 %B4 = vector.insert %v4, %A4[1, 1]: vector<4xf32> into vector<2x3x4xf32>
519 // Case 2.a. [1, 1] == insertpos ([1, 1])
520 // Match %A4 insertionpos and fold to its source(i.e. %V4).
521 %r0 = vector.extract %B4[1, 1]: vector<4xf32> from vector<2x3x4xf32>
523 // Case 3.a. insertpos ([1]) is a prefix of [1, 0].
524 // Traverse %B34 to its source(i.e. %V34@[*0*]).
525 // CHECK-NEXT: %[[R1:.*]] = vector.extract %[[V34]][0]
526 %r1 = vector.extract %B34[1, 0]: vector<4xf32> from vector<2x3x4xf32>
528 // Case 4. [1] is a prefix of insertpos ([1, 1]).
529 // Cannot traverse %B4.
530 // CHECK-NEXT: %[[R2:.*]] = vector.extract %[[B4]][1]
531 %r2 = vector.extract %B4[1]: vector<3x4xf32> from vector<2x3x4xf32>
533 // Case 5. [0] is disjoint from insertpos ([1, 1]).
534 // Traverse %B4 to its dest(i.e. %A4@[0]).
535 // Traverse %A4 to its dest(i.e. %B34@[0]).
536 // Traverse %B34 to its dest(i.e. %A34@[0]).
537 // Match %A34 insertionpos and fold to its source(i.e. %V34).
538 %r3 = vector.extract %B4[0]: vector<3x4xf32> from vector<2x3x4xf32>
540 // CHECK: return %[[V4]], %[[R1]], %[[R2]], %[[V34]]
541 return %r0, %r1, %r2, %r3:
542 vector<4xf32>, vector<4xf32>, vector<3x4xf32>, vector<3x4xf32>
547 // CHECK-LABEL: func @insert_extract_transpose_3d(
548 // CHECK-SAME: %[[V234:[a-zA-Z0-9]*]]: vector<2x3x4xf32>
549 func.func @insert_extract_transpose_3d(
550 %v234: vector<2x3x4xf32>, %v43: vector<4x3xf32>, %f0: f32)
551 -> (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<3x4xf32>) {
553 %a432 = vector.transpose %v234, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
554 %b432 = vector.insert %f0, %a432[0, 0, 1] : f32 into vector<4x3x2xf32>
555 %c234 = vector.transpose %b432, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32>
556 // Case 1. %c234 = transpose [2,1,0] posWithSentinels [1,2,-1] -> [-1,2,1]
557 // Case 5. %b432 = insert [0,0,1] (inter([.,2,1], [.,0,1]) == 0) prop to %v432
558 // Case 1. %a432 = transpose [2,1,0] posWithSentinels [-1,2,1] -> [1,2,-1]
559 // can extract directly from %v234, the rest folds.
560 // CHECK: %[[R0:.*]] = vector.extract %[[V234]][1, 2]
561 %r0 = vector.extract %c234[1, 2] : vector<4xf32> from vector<2x3x4xf32>
563 // CHECK-NEXT: vector.transpose
564 // CHECK-NEXT: vector.insert
565 // CHECK-NEXT: %[[F234:.*]] = vector.transpose
566 %d432 = vector.transpose %v234, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
567 %e432 = vector.insert %f0, %d432[0, 2, 1] : f32 into vector<4x3x2xf32>
568 %f234 = vector.transpose %e432, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32>
569 // Case 1. %c234 = transpose [2,1,0] posWithSentinels [1,2,-1] -> [-1,2,1]
570 // Case 4. %b432 = insert [0,0,1] (inter([.,2,1], [.,2,1]) != 0)
571 // Bail, cannot do better than the current.
572 // CHECK: %[[R1:.*]] = vector.extract %[[F234]]
573 %r1 = vector.extract %f234[1, 2] : vector<4xf32> from vector<2x3x4xf32>
575 // CHECK-NEXT: vector.transpose
576 // CHECK-NEXT: vector.insert
577 // CHECK-NEXT: %[[H234:.*]] = vector.transpose
578 %g243 = vector.transpose %v234, [0, 2, 1] : vector<2x3x4xf32> to vector<2x4x3xf32>
579 %h243 = vector.insert %v43, %g243[0] : vector<4x3xf32> into vector<2x4x3xf32>
580 %i234 = vector.transpose %h243, [0, 2, 1] : vector<2x4x3xf32> to vector<2x3x4xf32>
581 // Case 1. %i234 = transpose [0,2,1] posWithSentinels [0,-1,-2] -> [0,-2,-1]
582 // Case 3.b. %b432 = insert [0] is prefix of [0,.,.] but internal transpose.
583 // Bail, cannot do better than the current.
584 // CHECK: %[[R2:.*]] = vector.extract %[[H234]][0, 1]
585 %r2 = vector.extract %i234[0, 1] : vector<4xf32> from vector<2x3x4xf32>
587 // CHECK-NEXT: vector.transpose
588 // CHECK-NEXT: vector.insert
589 // CHECK-NEXT: %[[K234:.*]] = vector.transpose
590 %j243 = vector.transpose %v234, [0, 2, 1] : vector<2x3x4xf32> to vector<2x4x3xf32>
591 %k243 = vector.insert %v43, %j243[0] : vector<4x3xf32> into vector<2x4x3xf32>
592 %l234 = vector.transpose %k243, [0, 2, 1] : vector<2x4x3xf32> to vector<2x3x4xf32>
593 // Case 1. %i234 = transpose [0,2,1] posWithSentinels [0,-1,-2] -> [0,-2,-1]
594 // Case 2.b. %b432 = insert [0] == [0,.,.] but internal transpose.
595 // Bail, cannot do better than the current.
596 // CHECK: %[[R3:.*]] = vector.extract %[[K234]][0]
597 %r3 = vector.extract %l234[0] : vector<3x4xf32> from vector<2x3x4xf32>
599 // CHECK-NEXT: return %[[R0]], %[[R1]], %[[R2]], %[[R3]]
600 return %r0, %r1, %r2, %r3: vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<3x4xf32>
605 // CHECK-LABEL: fold_extracts
606 // CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: vector<3x4x5x6xf32>
607 func.func @fold_extracts(%a : vector<3x4x5x6xf32>) -> (f32, vector<4x5x6xf32>) {
608 %b = vector.extract %a[0] : vector<4x5x6xf32> from vector<3x4x5x6xf32>
609 %c = vector.extract %b[1, 2] : vector<6xf32> from vector<4x5x6xf32>
610 // CHECK-NEXT: vector.extract %[[A]][0, 1, 2, 3] : f32 from vector<3x4x5x6xf32>
611 %d = vector.extract %c[3] : f32 from vector<6xf32>
613 // CHECK-NEXT: vector.extract %[[A]][0] : vector<4x5x6xf32> from vector<3x4x5x6xf32>
614 %e = vector.extract %a[0] : vector<4x5x6xf32> from vector<3x4x5x6xf32>
616 // CHECK-NEXT: return
617 return %d, %e : f32, vector<4x5x6xf32>
622 // CHECK-LABEL: fold_extract_transpose
623 // CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: vector<3x4x5x6xf32>
624 // CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: vector<3x6x5x6xf32>
625 func.func @fold_extract_transpose(
626 %a : vector<3x4x5x6xf32>, %b : vector<3x6x5x6xf32>) -> (
627 vector<6xf32>, vector<6xf32>, vector<6xf32>) {
628 // [3] is a proper most minor identity map in transpose.
629 // Permutation is a self inverse and we have.
630 // [0, 2, 1] ^ -1 o [0, 1, 2] = [0, 2, 1] o [0, 1, 2]
632 // CHECK-NEXT: vector.extract %[[A]][0, 2, 1] : vector<6xf32> from vector<3x4x5x6xf32>
633 %0 = vector.transpose %a, [0, 2, 1, 3] : vector<3x4x5x6xf32> to vector<3x5x4x6xf32>
634 %1 = vector.extract %0[0, 1, 2] : vector<6xf32> from vector<3x5x4x6xf32>
636 // [3] is a proper most minor identity map in transpose.
637 // Permutation is a not self inverse and we have.
638 // [1, 2, 0] ^ -1 o [0, 1, 2] = [2, 0, 1] o [0, 1, 2]
640 // CHECK-NEXT: vector.extract %[[A]][2, 0, 1] : vector<6xf32> from vector<3x4x5x6xf32>
641 %2 = vector.transpose %a, [1, 2, 0, 3] : vector<3x4x5x6xf32> to vector<4x5x3x6xf32>
642 %3 = vector.extract %2[0, 1, 2] : vector<6xf32> from vector<4x5x3x6xf32>
644 // Not a minor identity map so intra-vector level has been permuted
645 // CHECK-NEXT: vector.transpose %[[B]], [0, 2, 3, 1]
646 // CHECK-NEXT: vector.extract %{{.*}}[0, 1, 2]
647 %4 = vector.transpose %b, [0, 2, 3, 1] : vector<3x6x5x6xf32> to vector<3x5x6x6xf32>
648 %5 = vector.extract %4[0, 1, 2] : vector<6xf32> from vector<3x5x6x6xf32>
650 return %1, %3, %5 : vector<6xf32>, vector<6xf32>, vector<6xf32>
655 // CHECK-LABEL: fold_extract_broadcast
656 // CHECK-SAME: %[[A:.*]]: f32
657 // CHECK: return %[[A]] : f32
658 func.func @fold_extract_broadcast(%a : f32) -> f32 {
659 %b = vector.broadcast %a : f32 to vector<1x2x4xf32>
660 %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32>
666 // CHECK-LABEL: fold_extract_broadcast_0dvec
667 // CHECK-SAME: %[[A:.*]]: vector<f32>
668 // CHECK: %[[B:.+]] = vector.extractelement %[[A]][] : vector<f32>
669 // CHECK: return %[[B]] : f32
670 func.func @fold_extract_broadcast_0dvec(%a : vector<f32>) -> f32 {
671 %b = vector.broadcast %a : vector<f32> to vector<1x2x4xf32>
672 %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32>
678 // CHECK-LABEL: fold_extract_broadcast_negative
679 // CHECK: vector.broadcast %{{.*}} : vector<1x1xf32> to vector<1x1x4xf32>
680 // CHECK: vector.extract %{{.*}}[0, 0] : vector<4xf32> from vector<1x1x4xf32>
681 func.func @fold_extract_broadcast_negative(%a : vector<1x1xf32>) -> vector<4xf32> {
682 %b = vector.broadcast %a : vector<1x1xf32> to vector<1x1x4xf32>
683 %r = vector.extract %b[0, 0] : vector<4xf32> from vector<1x1x4xf32>
684 return %r : vector<4xf32>
689 // CHECK-LABEL: fold_extract_splat
690 // CHECK-SAME: %[[A:.*]]: f32
691 // CHECK: return %[[A]] : f32
692 func.func @fold_extract_splat(%a : f32) -> f32 {
693 %b = vector.splat %a : vector<1x2x4xf32>
694 %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32>
700 // CHECK-LABEL: fold_extract_broadcast_vector
701 // CHECK-SAME: %[[A:.*]]: vector<4xf32>
702 // CHECK: return %[[A]] : vector<4xf32>
703 func.func @fold_extract_broadcast_vector(%a : vector<4xf32>) -> vector<4xf32> {
704 %b = vector.broadcast %a : vector<4xf32> to vector<1x2x4xf32>
705 %r = vector.extract %b[0, 1] : vector<4xf32> from vector<1x2x4xf32>
706 return %r : vector<4xf32>
711 // CHECK-LABEL: fold_extract_broadcast
712 // CHECK-SAME: %[[A:.*]]: vector<4xf32>
713 // CHECK: %[[R:.*]] = vector.extract %[[A]][2] : f32 from vector<4xf32>
714 // CHECK: return %[[R]] : f32
715 func.func @fold_extract_broadcast(%a : vector<4xf32>) -> f32 {
716 %b = vector.broadcast %a : vector<4xf32> to vector<1x2x4xf32>
717 %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32>
723 // CHECK-LABEL: fold_extract_broadcast
724 // CHECK: %[[B:.*]] = vector.broadcast %{{.*}} : f32 to vector<4xf32>
725 // CHECK: return %[[B]] : vector<4xf32>
726 func.func @fold_extract_broadcast(%a : f32) -> vector<4xf32> {
727 %b = vector.broadcast %a : f32 to vector<1x2x4xf32>
728 %r = vector.extract %b[0, 1] : vector<4xf32> from vector<1x2x4xf32>
729 return %r : vector<4xf32>
734 // CHECK-LABEL: fold_extract_broadcast
735 // CHECK-SAME: %[[A:.*]]: vector<1xf32>
736 // CHECK: %[[R:.*]] = vector.broadcast %[[A]] : vector<1xf32> to vector<8xf32>
737 // CHECK: return %[[R]] : vector<8xf32>
738 func.func @fold_extract_broadcast(%a : vector<1xf32>) -> vector<8xf32> {
739 %b = vector.broadcast %a : vector<1xf32> to vector<1x8xf32>
740 %r = vector.extract %b[0] : vector<8xf32> from vector<1x8xf32>
741 return %r : vector<8xf32>
745 // CHECK-LABEL: @fold_extract_shuffle
746 // CHECK-SAME: %[[A:.*]]: vector<8xf32>, %[[B:.*]]: vector<8xf32>
747 // CHECK-NOT: vector.shuffle
748 // CHECK: vector.extract %[[A]][0] : f32 from vector<8xf32>
749 // CHECK: vector.extract %[[B]][0] : f32 from vector<8xf32>
750 // CHECK: vector.extract %[[A]][7] : f32 from vector<8xf32>
751 // CHECK: vector.extract %[[B]][7] : f32 from vector<8xf32>
752 func.func @fold_extract_shuffle(%a : vector<8xf32>, %b : vector<8xf32>)
753 -> (f32, f32, f32, f32) {
754 %shuffle = vector.shuffle %a, %b [0, 8, 7, 15] : vector<8xf32>, vector<8xf32>
755 %e0 = vector.extract %shuffle[0] : f32 from vector<4xf32>
756 %e1 = vector.extract %shuffle[1] : f32 from vector<4xf32>
757 %e2 = vector.extract %shuffle[2] : f32 from vector<4xf32>
758 %e3 = vector.extract %shuffle[3] : f32 from vector<4xf32>
759 return %e0, %e1, %e2, %e3 : f32, f32, f32, f32
764 // CHECK-LABEL: func @fold_extract_shapecast
765 // CHECK-SAME: (%[[A0:.*]]: vector<5x1x3x2xf32>, %[[A1:.*]]: vector<8x4x2xf32>
766 // CHECK: %[[R0:.*]] = vector.extract %[[A0]][1, 0, 1, 1] : f32 from vector<5x1x3x2xf32>
767 // CHECK: %[[R1:.*]] = vector.extract %[[A0]][1, 0, 2] : vector<2xf32> from vector<5x1x3x2xf32>
768 // CHECK: %[[R2:.*]] = vector.extract %[[A1]][7] : vector<4x2xf32> from vector<8x4x2xf32>
769 // CHECK: return %[[R0]], %[[R1]], %[[R2]], %[[A1]] : f32, vector<2xf32>, vector<4x2xf32>, vector<8x4x2xf32>
770 func.func @fold_extract_shapecast(%arg0 : vector<5x1x3x2xf32>,
771 %arg1 : vector<8x4x2xf32>)
772 -> (f32, vector<2xf32>, vector<4x2xf32>, vector<8x4x2xf32>) {
773 %0 = vector.shape_cast %arg0 : vector<5x1x3x2xf32> to vector<15x2xf32>
774 %1 = vector.shape_cast %arg1 : vector<8x4x2xf32> to vector<4x2x4x2xf32>
775 %2 = vector.shape_cast %arg1 : vector<8x4x2xf32> to vector<1x8x4x2xf32>
776 %r1 = vector.extract %0[4, 1] : f32 from vector<15x2xf32>
777 %r2 = vector.extract %0[5] : vector<2xf32> from vector<15x2xf32>
778 %r3 = vector.extract %1[3, 1] : vector<4x2xf32> from vector<4x2x4x2xf32>
779 %r4 = vector.extract %2[0] : vector<8x4x2xf32> from vector<1x8x4x2xf32>
780 return %r1, %r2, %r3, %r4 : f32, vector<2xf32>, vector<4x2xf32>, vector<8x4x2xf32>
785 // CHECK-LABEL: fold_extract_shapecast_negative
786 // CHECK: %[[V:.*]] = vector.shape_cast %{{.*}} : vector<16xf32> to vector<2x4x2xf32>
787 // CHECK: %[[R:.*]] = vector.extract %[[V]][1] : vector<4x2xf32> from vector<2x4x2xf32>
788 // CHECK: return %[[R]] : vector<4x2xf32>
789 func.func @fold_extract_shapecast_negative(%arg0 : vector<16xf32>) -> vector<4x2xf32> {
790 %0 = vector.shape_cast %arg0 : vector<16xf32> to vector<2x4x2xf32>
791 %r = vector.extract %0[1] : vector<4x2xf32> from vector<2x4x2xf32>
792 return %r : vector<4x2xf32>
797 // CHECK-LABEL: dont_fold_0d_extract_shapecast
798 // CHECK: %[[V:.*]] = vector.shape_cast %{{.*}} : vector<f32> to vector<1xf32>
799 // CHECK: %[[R:.*]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
800 // CHECK: return %[[R]] : f32
801 func.func @dont_fold_0d_extract_shapecast(%arg0 : vector<f32>) -> f32 {
802 %0 = vector.shape_cast %arg0 : vector<f32> to vector<1xf32>
803 %r = vector.extract %0[0] : f32 from vector<1xf32>
809 // CHECK-LABEL: fold_extract_shapecast_to_shapecast
810 // CHECK-SAME: (%[[ARG:.+]]: vector<3x4xf32>)
811 // CHECK: %[[R:.+]] = vector.shape_cast %[[ARG]] : vector<3x4xf32> to vector<12xf32>
812 // CHECK: return %[[R]]
813 func.func @fold_extract_shapecast_to_shapecast(%arg0 : vector<3x4xf32>) -> vector<12xf32> {
814 %0 = vector.shape_cast %arg0 : vector<3x4xf32> to vector<1x12xf32>
815 %r = vector.extract %0[0] : vector<12xf32> from vector<1x12xf32>
816 return %r : vector<12xf32>
821 // CHECK-LABEL: func @extract_no_fold_scalar_to_0d(
822 // CHECK-SAME: %[[v:.*]]: vector<f32>)
823 // CHECK: %[[extract:.*]] = vector.extract %[[v]][] : f32 from vector<f32>
824 // CHECK: return %[[extract]]
825 func.func @extract_no_fold_scalar_to_0d(%v: vector<f32>) -> f32 {
826 %0 = vector.extract %v[] : f32 from vector<f32>
832 // CHECK-LABEL: func @insert_fold_same_rank(
833 // CHECK-SAME: %[[v:.*]]: vector<2x2xf32>)
834 // CHECK: %[[CST:.+]] = arith.constant
835 // CHECK-SAME: : vector<2x2xf32>
836 // CHECK-NOT: vector.insert
837 // CHECK: return %[[CST]]
838 func.func @insert_fold_same_rank(%v: vector<2x2xf32>) -> vector<2x2xf32> {
839 %cst = arith.constant dense<0.000000e+00> : vector<2x2xf32>
840 %0 = vector.insert %cst, %v [] : vector<2x2xf32> into vector<2x2xf32>
841 return %0 : vector<2x2xf32>
846 // CHECK-LABEL: func @insert_no_fold_scalar_to_0d(
847 // CHECK-SAME: %[[v:.*]]: vector<f32>)
848 // CHECK: %[[extract:.*]] = vector.insert %{{.*}}, %[[v]] [] : f32 into vector<f32>
849 // CHECK: return %[[extract]]
850 func.func @insert_no_fold_scalar_to_0d(%v: vector<f32>) -> vector<f32> {
851 %cst = arith.constant 0.000000e+00 : f32
852 %0 = vector.insert %cst, %v [] : f32 into vector<f32>
853 return %0 : vector<f32>
858 // CHECK-LABEL: dont_fold_expand_collapse
859 // CHECK: %[[A:.*]] = vector.shape_cast %{{.*}} : vector<1x1x64xf32> to vector<1x1x8x8xf32>
860 // CHECK: %[[B:.*]] = vector.shape_cast %{{.*}} : vector<1x1x8x8xf32> to vector<8x8xf32>
861 // CHECK: return %[[B]] : vector<8x8xf32>
862 func.func @dont_fold_expand_collapse(%arg0: vector<1x1x64xf32>) -> vector<8x8xf32> {
863 %0 = vector.shape_cast %arg0 : vector<1x1x64xf32> to vector<1x1x8x8xf32>
864 %1 = vector.shape_cast %0 : vector<1x1x8x8xf32> to vector<8x8xf32>
865 return %1 : vector<8x8xf32>
870 // CHECK-LABEL: func @fold_broadcast_shapecast
871 // CHECK-SAME: (%[[V:.+]]: vector<4xf32>)
872 // CHECK: return %[[V]]
873 func.func @fold_broadcast_shapecast(%arg0: vector<4xf32>) -> vector<4xf32> {
874 %0 = vector.broadcast %arg0 : vector<4xf32> to vector<1x1x4xf32>
875 %1 = vector.shape_cast %0 : vector<1x1x4xf32> to vector<4xf32>
876 return %1 : vector<4xf32>
881 // CHECK-LABEL: func @canonicalize_broadcast_shapecast_scalar
882 // CHECK: vector.broadcast
883 // CHECK-NOT: vector.shape_cast
884 func.func @canonicalize_broadcast_shapecast_scalar(%arg0: f32) -> vector<1xf32> {
885 %0 = vector.broadcast %arg0 : f32 to vector<1x1x1xf32>
886 %1 = vector.shape_cast %0 : vector<1x1x1xf32> to vector<1xf32>
887 return %1 : vector<1xf32>
892 // CHECK-LABEL: func @dont_fold_broadcast_shapecast_diff_shape
893 // CHECK: vector.broadcast
894 // CHECK: vector.shape_cast
895 func.func @dont_fold_broadcast_shapecast_diff_shape(%arg0: vector<4xf32>) -> vector<8xf32> {
896 %0 = vector.broadcast %arg0 : vector<4xf32> to vector<1x2x4xf32>
897 %1 = vector.shape_cast %0 : vector<1x2x4xf32> to vector<8xf32>
898 return %1 : vector<8xf32>
903 // CHECK-LABEL: func @canonicalize_broadcast_shapecast_to_broadcast
904 // CHECK: vector.broadcast
905 // CHECK-NOT: vector.shape_cast
906 func.func @canonicalize_broadcast_shapecast_to_broadcast(%arg0: vector<3xf32>) -> vector<8x3xf32> {
907 %0 = vector.broadcast %arg0 : vector<3xf32> to vector<2x4x3xf32>
908 %1 = vector.shape_cast %0 : vector<2x4x3xf32> to vector<8x3xf32>
909 return %1 : vector<8x3xf32>
914 // CHECK-LABEL: func @canonicalize_broadcast_shapecast_to_shapecast
915 // CHECK-NOT: vector.broadcast
916 // CHECK: vector.shape_cast {{.+}} : vector<3x4xf32> to vector<1x12xf32>
917 func.func @canonicalize_broadcast_shapecast_to_shapecast(%arg0: vector<3x4xf32>) -> vector<1x12xf32> {
918 %0 = vector.broadcast %arg0 : vector<3x4xf32> to vector<1x1x3x4xf32>
919 %1 = vector.shape_cast %0 : vector<1x1x3x4xf32> to vector<1x12xf32>
920 return %1 : vector<1x12xf32>
925 // CHECK-LABEL: fold_vector_transfer_masks
926 func.func @fold_vector_transfer_masks(%A: memref<?x?xf32>) -> (vector<4x8xf32>, vector<4x[4]xf32>) {
927 // CHECK: %[[C0:.+]] = arith.constant 0 : index
928 %c0 = arith.constant 0 : index
929 // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f32
930 %f0 = arith.constant 0.0 : f32
932 %mask = vector.constant_mask [8, 4] : vector<8x4xi1>
934 %arith_all_true_mask = arith.constant dense<true> : vector<4x[4]xi1>
936 // CHECK: vector.transfer_read %{{.*}}, %[[F0]] {permutation_map
937 %1 = vector.transfer_read %A[%c0, %c0], %f0, %mask
938 {permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : memref<?x?xf32>, vector<4x8xf32>
940 // CHECK: vector.transfer_write {{.*}}[%[[C0]], %[[C0]]] {permutation_map
941 vector.transfer_write %1, %A[%c0, %c0], %mask
942 {permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : vector<4x8xf32>, memref<?x?xf32>
944 // CHECK: vector.transfer_read %{{.*}}, %[[F0]] :
945 %2 = vector.transfer_read %A[%c0, %c0], %f0, %arith_all_true_mask : memref<?x?xf32>, vector<4x[4]xf32>
947 // CHECK: vector.transfer_write {{.*}}[%[[C0]], %[[C0]]] :
948 vector.transfer_write %2, %A[%c0, %c0], %arith_all_true_mask : vector<4x[4]xf32>, memref<?x?xf32>
951 return %1, %2 : vector<4x8xf32>, vector<4x[4]xf32>
956 // CHECK-LABEL: fold_vector_transfers
957 func.func @fold_vector_transfers(%A: memref<?x8xf32>) -> (vector<4x8xf32>, vector<4x9xf32>) {
958 %c0 = arith.constant 0 : index
959 %f0 = arith.constant 0.0 : f32
961 // CHECK: vector.transfer_read %{{.*}} {in_bounds = [false, true]}
962 %1 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x8xf32>, vector<4x8xf32>
964 // CHECK: vector.transfer_write %{{.*}} {in_bounds = [false, true]}
965 vector.transfer_write %1, %A[%c0, %c0] : vector<4x8xf32>, memref<?x8xf32>
967 // Both dims may be out-of-bounds, attribute is elided.
968 // CHECK: vector.transfer_read %{{.*}}
969 // CHECK-NOT: in_bounds
970 %2 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x8xf32>, vector<4x9xf32>
972 // Both dims may be out-of-bounds, attribute is elided.
973 // CHECK: vector.transfer_write %{{.*}}
974 // CHECK-NOT: in_bounds
975 vector.transfer_write %2, %A[%c0, %c0] : vector<4x9xf32>, memref<?x8xf32>
978 return %1, %2 : vector<4x8xf32>, vector<4x9xf32>
983 // CHECK-LABEL: bitcast_folding
984 // CHECK-SAME: %[[A:.*]]: vector<4x8xf32>
985 // CHECK-SAME: %[[B:.*]]: vector<2xi32>
986 // CHECK: return %[[A]], %[[B]] : vector<4x8xf32>, vector<2xi32>
987 func.func @bitcast_folding(%I1: vector<4x8xf32>, %I2: vector<2xi32>) -> (vector<4x8xf32>, vector<2xi32>) {
988 %0 = vector.bitcast %I1 : vector<4x8xf32> to vector<4x8xf32>
989 %1 = vector.bitcast %I2 : vector<2xi32> to vector<4xi16>
990 %2 = vector.bitcast %1 : vector<4xi16> to vector<2xi32>
991 return %0, %2 : vector<4x8xf32>, vector<2xi32>
994 // CHECK-LABEL: func @bitcast_f16_to_f32
995 // bit pattern: 0x40004000
996 // CHECK-DAG: %[[CST1:.+]] = arith.constant dense<2.00390625> : vector<4xf32>
997 // bit pattern: 0x00000000
998 // CHECK-DAG: %[[CST0:.+]] = arith.constant dense<0.000000e+00> : vector<4xf32>
999 // CHECK: return %[[CST0]], %[[CST1]]
1000 func.func @bitcast_f16_to_f32() -> (vector<4xf32>, vector<4xf32>) {
1001 %cst0 = arith.constant dense<0.0> : vector<8xf16> // bit pattern: 0x0000
1002 %cst1 = arith.constant dense<2.0> : vector<8xf16> // bit pattern: 0x4000
1003 %cast0 = vector.bitcast %cst0: vector<8xf16> to vector<4xf32>
1004 %cast1 = vector.bitcast %cst1: vector<8xf16> to vector<4xf32>
1005 return %cast0, %cast1: vector<4xf32>, vector<4xf32>
1008 // CHECK-LABEL: func @bitcast_i8_to_i32
1009 // bit pattern: 0xA0A0A0A0
1010 // CHECK-DAG: %[[CST1:.+]] = arith.constant dense<-1600085856> : vector<4xi32>
1011 // bit pattern: 0x00000000
1012 // CHECK-DAG: %[[CST0:.+]] = arith.constant dense<0> : vector<4xi32>
1013 // CHECK: return %[[CST0]], %[[CST1]]
1014 func.func @bitcast_i8_to_i32() -> (vector<4xi32>, vector<4xi32>) {
1015 %cst0 = arith.constant dense<0> : vector<16xi8> // bit pattern: 0x00
1016 %cst1 = arith.constant dense<160> : vector<16xi8> // bit pattern: 0xA0
1017 %cast0 = vector.bitcast %cst0: vector<16xi8> to vector<4xi32>
1018 %cast1 = vector.bitcast %cst1: vector<16xi8> to vector<4xi32>
1019 return %cast0, %cast1: vector<4xi32>, vector<4xi32>
1024 // CHECK-LABEL: broadcast_folding1
1025 // CHECK: %[[CST:.*]] = arith.constant dense<42> : vector<4xi32>
1026 // CHECK-NOT: vector.broadcast
1027 // CHECK: return %[[CST]]
1028 func.func @broadcast_folding1() -> vector<4xi32> {
1029 %0 = arith.constant 42 : i32
1030 %1 = vector.broadcast %0 : i32 to vector<4xi32>
1031 return %1 : vector<4xi32>
1036 // CHECK-LABEL: @broadcast_folding2
1037 // CHECK: %[[CST:.*]] = arith.constant dense<42> : vector<4x16xi32>
1038 // CHECK-NOT: vector.broadcast
1039 // CHECK: return %[[CST]]
1040 func.func @broadcast_folding2() -> vector<4x16xi32> {
1041 %0 = arith.constant 42 : i32
1042 %1 = vector.broadcast %0 : i32 to vector<16xi32>
1043 %2 = vector.broadcast %1 : vector<16xi32> to vector<4x16xi32>
1044 return %2 : vector<4x16xi32>
1049 // CHECK-LABEL: @fold_consecutive_broadcasts(
1050 // CHECK-SAME: %[[ARG0:.*]]: i32
1051 // CHECK: %[[RESULT:.*]] = vector.broadcast %[[ARG0]] : i32 to vector<4x16xi32>
1052 // CHECK: return %[[RESULT]]
1053 func.func @fold_consecutive_broadcasts(%a : i32) -> vector<4x16xi32> {
1054 %1 = vector.broadcast %a : i32 to vector<16xi32>
1055 %2 = vector.broadcast %1 : vector<16xi32> to vector<4x16xi32>
1056 return %2 : vector<4x16xi32>
1061 // CHECK-LABEL: shape_cast_constant
1062 // CHECK-DAG: %[[CST1:.*]] = arith.constant dense<1> : vector<3x4x2xi32>
1063 // CHECK-DAG: %[[CST0:.*]] = arith.constant dense<2.000000e+00> : vector<20x2xf32>
1064 // CHECK: return %[[CST0]], %[[CST1]] : vector<20x2xf32>, vector<3x4x2xi32>
1065 func.func @shape_cast_constant() -> (vector<20x2xf32>, vector<3x4x2xi32>) {
1066 %cst = arith.constant dense<2.000000e+00> : vector<5x4x2xf32>
1067 %cst_1 = arith.constant dense<1> : vector<12x2xi32>
1068 %0 = vector.shape_cast %cst : vector<5x4x2xf32> to vector<20x2xf32>
1069 %1 = vector.shape_cast %cst_1 : vector<12x2xi32> to vector<3x4x2xi32>
1070 return %0, %1 : vector<20x2xf32>, vector<3x4x2xi32>
1075 // CHECK-LABEL: extract_strided_constant
1076 // CHECK-DAG: %[[CST1:.*]] = arith.constant dense<1> : vector<2x13x3xi32>
1077 // CHECK-DAG: %[[CST0:.*]] = arith.constant dense<2.000000e+00> : vector<12x2xf32>
1078 // CHECK: return %[[CST0]], %[[CST1]] : vector<12x2xf32>, vector<2x13x3xi32>
1079 func.func @extract_strided_constant() -> (vector<12x2xf32>, vector<2x13x3xi32>) {
1080 %cst = arith.constant dense<2.000000e+00> : vector<29x7xf32>
1081 %cst_1 = arith.constant dense<1> : vector<4x37x9xi32>
1082 %0 = vector.extract_strided_slice %cst
1083 {offsets = [2, 3], sizes = [12, 2], strides = [1, 1]}
1084 : vector<29x7xf32> to vector<12x2xf32>
1085 %1 = vector.extract_strided_slice %cst_1
1086 {offsets = [1, 2, 5], sizes = [2, 13, 3], strides = [1, 1, 1]}
1087 : vector<4x37x9xi32> to vector<2x13x3xi32>
1088 return %0, %1 : vector<12x2xf32>, vector<2x13x3xi32>
1093 // CHECK-LABEL: extract_strided_broadcast
1094 // CHECK: %[[B:.*]] = vector.broadcast %{{.*}} : vector<4xf16> to vector<2x4xf16>
1095 // CHECK-NEXT: return %[[B]] : vector<2x4xf16>
1096 func.func @extract_strided_broadcast(%arg0: vector<4xf16>) -> vector<2x4xf16> {
1097 %0 = vector.broadcast %arg0 : vector<4xf16> to vector<16x4xf16>
1098 %1 = vector.extract_strided_slice %0
1099 {offsets = [0, 0], sizes = [2, 4], strides = [1, 1]} :
1100 vector<16x4xf16> to vector<2x4xf16>
1101 return %1 : vector<2x4xf16>
1106 // CHECK-LABEL: extract_strided_broadcast2
1107 // CHECK: %[[E:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0], sizes = [2], strides = [1]} : vector<4xf16> to vector<2xf16>
1108 // CHECK-NEXT: %[[B:.*]] = vector.broadcast %[[E]] : vector<2xf16> to vector<2x2xf16>
1109 // CHECK-NEXT: return %[[B]] : vector<2x2xf16>
1110 func.func @extract_strided_broadcast2(%arg0: vector<4xf16>) -> vector<2x2xf16> {
1111 %0 = vector.broadcast %arg0 : vector<4xf16> to vector<16x4xf16>
1112 %1 = vector.extract_strided_slice %0
1113 {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} :
1114 vector<16x4xf16> to vector<2x2xf16>
1115 return %1 : vector<2x2xf16>
1120 // CHECK-LABEL: func @extract_strided_broadcast3
1121 // CHECK-SAME: (%[[ARG:.+]]: vector<1xf32>)
1122 // CHECK: %[[V:.+]] = vector.broadcast %[[ARG]] : vector<1xf32> to vector<1x4xf32>
1123 // CHECK: return %[[V]]
1124 func.func @extract_strided_broadcast3(%arg0: vector<1xf32>) -> vector<1x4xf32> {
1125 %0 = vector.broadcast %arg0 : vector<1xf32> to vector<1x8xf32>
1126 %1 = vector.extract_strided_slice %0
1127 {offsets = [0, 4], sizes = [1, 4], strides = [1, 1]}
1128 : vector<1x8xf32> to vector<1x4xf32>
1129 return %1 : vector<1x4xf32>
1134 // CHECK-LABEL: func @extract_strided_broadcast4
1135 // CHECK-SAME: (%[[ARG:.+]]: f32)
1136 // CHECK: %[[V:.+]] = vector.broadcast %[[ARG]] : f32 to vector<1x4xf32>
1137 // CHECK: return %[[V]]
1138 func.func @extract_strided_broadcast4(%arg0: f32) -> vector<1x4xf32> {
1139 %0 = vector.broadcast %arg0 : f32 to vector<1x8xf32>
1140 %1 = vector.extract_strided_slice %0
1141 {offsets = [0, 4], sizes = [1, 4], strides = [1, 1]}
1142 : vector<1x8xf32> to vector<1x4xf32>
1143 return %1 : vector<1x4xf32>
1148 // CHECK-LABEL: consecutive_shape_cast
1149 // CHECK: %[[C:.*]] = vector.shape_cast %{{.*}} : vector<16xf16> to vector<4x4xf16>
1150 // CHECK-NEXT: return %[[C]] : vector<4x4xf16>
1151 func.func @consecutive_shape_cast(%arg0: vector<16xf16>) -> vector<4x4xf16> {
1152 %0 = vector.shape_cast %arg0 : vector<16xf16> to vector<2x8xf16>
1153 %1 = vector.shape_cast %0 : vector<2x8xf16> to vector<4x4xf16>
1154 return %1 : vector<4x4xf16>
1159 // CHECK-LABEL: func @dead_transfer_op
1160 // CHECK-NOT: vector.transfer_read
1161 // CHECK-NOT: vector.transfer_write
1163 func.func @dead_transfer_op(%arg0 : tensor<4x4xf32>, %arg1 : memref<4x4xf32>,
1164 %v0 : vector<1x4xf32>) {
1165 %c0 = arith.constant 0 : index
1166 %cf0 = arith.constant 0.0 : f32
1167 %r = vector.transfer_read %arg1[%c0, %c0], %cf0 :
1168 memref<4x4xf32>, vector<1x4xf32>
1169 %w = vector.transfer_write %v0, %arg0[%c0, %c0] :
1170 vector<1x4xf32>, tensor<4x4xf32>
1176 // CHECK-LABEL: func @dead_load
1177 // CHECK-NOT: vector.maskedload
1178 // CHECK-NOT: vector.gather
1179 // CHECK-NOT: vector.expandload
1181 func.func @dead_load(%base: memref<?xf32>, %indices: vector<16xi32>,
1182 %mask: vector<16xi1>, %passthru: vector<16xf32>) {
1183 %c0 = arith.constant 0 : index
1184 %0 = vector.maskedload %base[%c0], %mask, %passthru :
1185 memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1186 %1 = vector.gather %base[%c0][%indices], %mask, %passthru :
1187 memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1188 %2 = vector.expandload %base[%c0], %mask, %passthru :
1189 memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1195 #contraction_accesses0 = [
1196 affine_map<(i, j, k) -> (i, k)>,
1197 affine_map<(i, j, k) -> (k, j)>,
1198 affine_map<(i, j, k) -> (i, j)>
1200 #contraction_trait0 = {
1201 indexing_maps = #contraction_accesses0,
1202 iterator_types = ["parallel", "parallel", "reduction"]
1205 // CHECK-LABEL: func @contractions
1206 // CHECK-SAME: %[[A:[0-9a-zA-Z]+]]: vector<2x3xf32>
1207 // CHECK-SAME: %[[B:[0-9a-zA-Z]+]]: vector<3x4xf32>
1208 // CHECK-SAME: %[[C:[0-9a-zA-Z]+]]: vector<2x4xf32>
1209 // CHECK-SAME: %[[A_I8:[0-9a-zA-Z]+]]: vector<2x3xi8>
1210 // CHECK-SAME: %[[B_I8:[0-9a-zA-Z]+]]: vector<3x4xi8>
1211 // CHECK-SAME: %[[C_I8:[0-9a-zA-Z]+]]: vector<2x4xi8>
1212 func.func @contractions(%a: vector<2x3xf32>, %b: vector<3x4xf32>, %c: vector<2x4xf32>,
1213 %a_i8: vector<2x3xi8>, %b_i8: vector<3x4xi8>, %c_i8: vector<2x4xi8>)
1214 -> (vector<2x4xf32>, vector<2x4xi8>)
1216 // CHECK-NOT: arith.constant
1217 %vf_0 = arith.constant dense <0.0>: vector<2x4xf32>
1218 // CHECK-NOT: arith.addf
1219 // CHECK: %[[D:.*]] = vector.contract {{.*}} %[[A]], %[[B]], %[[C]]
1220 %0 = vector.contract #contraction_trait0 %a, %b, %vf_0:
1221 vector<2x3xf32>, vector<3x4xf32> into vector<2x4xf32>
1222 // CHECK-NOT: arith.addf
1223 %1 = arith.addf %0, %c: vector<2x4xf32>
1225 // CHECK-NOT: arith.constant
1226 %vi8_0 = arith.constant dense <0>: vector<2x4xi8>
1227 // CHECK-NOT: arith.addi
1228 // CHECK: %[[D_I8:.*]] = vector.contract {{.*}} %[[A_I8]], %[[B_I8]], %[[C_I8]]
1229 %i8_0 = vector.contract #contraction_trait0 %a_i8, %b_i8, %vi8_0:
1230 vector<2x3xi8>, vector<3x4xi8> into vector<2x4xi8>
1231 // CHECK-NOT: arith.addi
1232 %i8_1 = arith.addi %i8_0, %c_i8: vector<2x4xi8>
1234 // CHECK: return %[[D]], %[[D_I8]]
1235 return %1, %i8_1: vector<2x4xf32>, vector<2x4xi8>
1240 // CHECK-LABEL: func @transfer_folding_1
1241 // CHECK-SAME: %[[T0:[0-9a-zA-Z]+]]: tensor<2x3x4xf32>
1242 // CHECK-SAME: %[[T1:[0-9a-zA-Z]+]]: tensor<2x3x4xf32>
1243 func.func @transfer_folding_1(%t0: tensor<2x3x4xf32>, %t1: tensor<2x3x4xf32>)
1244 -> (tensor<2x3x4xf32>, tensor<2x3x4xf32>, tensor<2x3x4xf32>)
1246 %c0 = arith.constant 0 : index
1247 %pad = arith.constant 0.0 : f32
1248 %v = vector.transfer_read %t0[%c0, %c0, %c0], %pad {in_bounds = [true, true, true]} :
1249 tensor<2x3x4xf32>, vector<2x3x4xf32>
1251 %r0 = vector.transfer_write %v, %t1[%c0, %c0, %c0] {in_bounds = [true, true, true]} :
1252 vector<2x3x4xf32>, tensor<2x3x4xf32>
1254 %t2 = "test.constant"() { value = dense<6.0> : tensor<2x3x4xf32>} : () -> (tensor<2x3x4xf32>)
1255 %r1 = vector.transfer_write %v, %t2[%c0, %c0, %c0] {in_bounds = [true, true, true]} :
1256 vector<2x3x4xf32>, tensor<2x3x4xf32>
1259 // CHECK-NEXT: some_op_that_may_have_side_effects
1260 %t3 = "some_op_that_may_have_side_effects"() : () -> (tensor<2x3x4xf32>)
1261 %r2 = vector.transfer_write %v, %t0[%c0, %c0, %c0] {in_bounds = [true, true, true]} :
1262 vector<2x3x4xf32>, tensor<2x3x4xf32>
1264 // CHECK-NEXT: return %[[T0]], %[[T0]], %[[T0]]
1265 return %r0, %r1, %r2: tensor<2x3x4xf32>, tensor<2x3x4xf32>, tensor<2x3x4xf32>
1270 // CHECK-LABEL: func @store_after_load_tensor
1271 // CHECK-SAME: (%[[ARG:.*]]: tensor<4x4xf32>)
1272 // CHECK-NOT: vector.transfer_read
1273 // CHECK-NOT: vector.transfer_write
1274 // CHECK: return %[[ARG]] : tensor<4x4xf32>
1275 func.func @store_after_load_tensor(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
1276 %c1 = arith.constant 1 : index
1277 %c0 = arith.constant 0 : index
1278 %cf0 = arith.constant 0.0 : f32
1279 %0 = vector.transfer_read %arg0[%c1, %c0], %cf0 :
1280 tensor<4x4xf32>, vector<1x4xf32>
1281 %w0 = vector.transfer_write %0, %arg0[%c1, %c0] :
1282 vector<1x4xf32>, tensor<4x4xf32>
1283 return %w0 : tensor<4x4xf32>
1288 // CHECK-LABEL: func @store_after_load_tensor_negative
1289 // CHECK: vector.transfer_read
1290 // CHECK: vector.transfer_write
1292 func.func @store_after_load_tensor_negative(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
1293 %c1 = arith.constant 1 : index
1294 %c0 = arith.constant 0 : index
1295 %cf0 = arith.constant 0.0 : f32
1296 %0 = vector.transfer_read %arg0[%c1, %c0], %cf0 :
1297 tensor<4x4xf32>, vector<1x4xf32>
1298 %w0 = vector.transfer_write %0, %arg0[%c0, %c0] :
1299 vector<1x4xf32>, tensor<4x4xf32>
1300 return %w0 : tensor<4x4xf32>
1305 // CHECK-LABEL: func @store_to_load_tensor
1306 // CHECK-SAME: (%[[ARG:.*]]: tensor<4x4xf32>, %[[V0:.*]]: vector<1x4xf32>, %[[V1:.*]]: vector<1x4xf32>)
1307 // CHECK-NOT: vector.transfer_write
1308 // CHECK-NOT: vector.transfer_read
1309 // CHECK: return %[[V0]] : vector<1x4xf32>
1310 func.func @store_to_load_tensor(%arg0 : tensor<4x4xf32>,
1311 %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>) -> vector<1x4xf32> {
1312 %c1 = arith.constant 1 : index
1313 %c2 = arith.constant 2 : index
1314 %c0 = arith.constant 0 : index
1315 %cf0 = arith.constant 0.0 : f32
1316 %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} :
1317 vector<1x4xf32>, tensor<4x4xf32>
1318 %w1 = vector.transfer_write %v1, %w0[%c2, %c0] {in_bounds = [true, true]} :
1319 vector<1x4xf32>, tensor<4x4xf32>
1320 %0 = vector.transfer_read %w1[%c1, %c0], %cf0 {in_bounds = [true, true]} :
1321 tensor<4x4xf32>, vector<1x4xf32>
1322 return %0 : vector<1x4xf32>
1327 // CHECK-LABEL: func @store_to_load_negative_tensor
1328 // CHECK: vector.transfer_write
1329 // CHECK: vector.transfer_write
1330 // CHECK: %[[V:.*]] = vector.transfer_read
1331 // CHECK: return %[[V]] : vector<1x4xf32>
1332 func.func @store_to_load_negative_tensor(%arg0 : tensor<4x4xf32>,
1333 %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) -> vector<1x4xf32> {
1334 %c1 = arith.constant 1 : index
1335 %c2 = arith.constant 2 : index
1336 %c0 = arith.constant 0 : index
1337 %cf0 = arith.constant 0.0 : f32
1338 %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} :
1339 vector<1x4xf32>, tensor<4x4xf32>
1340 %w1 = vector.transfer_write %v0, %w0[%i, %i] {in_bounds = [true, true]} :
1341 vector<1x4xf32>, tensor<4x4xf32>
1342 %0 = vector.transfer_read %w1[%c1, %c0], %cf0 {in_bounds = [true, true]} :
1343 tensor<4x4xf32>, vector<1x4xf32>
1344 return %0 : vector<1x4xf32>
1349 // CHECK-LABEL: func @store_to_load_tensor_broadcast
1350 // CHECK-SAME: (%[[ARG:.*]]: tensor<4x4xf32>, %[[V0:.*]]: vector<4x2xf32>)
1351 // CHECK: %[[B:.*]] = vector.broadcast %[[V0]] : vector<4x2xf32> to vector<6x4x2xf32>
1352 // CHECK: %[[T:.*]] = vector.transpose %[[B]], [1, 2, 0] : vector<6x4x2xf32> to vector<4x2x6xf32>
1353 // CHECK: return %[[T]] : vector<4x2x6xf32>
1354 func.func @store_to_load_tensor_broadcast(%arg0 : tensor<4x4xf32>,
1355 %v0 : vector<4x2xf32>) -> vector<4x2x6xf32> {
1356 %c0 = arith.constant 0 : index
1357 %cf0 = arith.constant 0.0 : f32
1358 %w0 = vector.transfer_write %v0, %arg0[%c0, %c0] {in_bounds = [true, true]} :
1359 vector<4x2xf32>, tensor<4x4xf32>
1360 %0 = vector.transfer_read %w0[%c0, %c0], %cf0 {in_bounds = [true, true, true],
1361 permutation_map = affine_map<(d0, d1) -> (d0, d1, 0)>} :
1362 tensor<4x4xf32>, vector<4x2x6xf32>
1363 return %0 : vector<4x2x6xf32>
1368 // CHECK-LABEL: func @store_to_load_tensor_broadcast_scalable
1369 // CHECK-SAME: (%[[ARG:.*]]: tensor<?xf32>, %[[V0:.*]]: vector<[4]xf32>)
1370 // CHECK: %[[B:.*]] = vector.broadcast %[[V0]] : vector<[4]xf32> to vector<6x[4]xf32>
1371 // CHECK: return %[[B]] : vector<6x[4]xf32>
1372 func.func @store_to_load_tensor_broadcast_scalable(%arg0 : tensor<?xf32>,
1373 %v0 : vector<[4]xf32>) -> vector<6x[4]xf32> {
1374 %c0 = arith.constant 0 : index
1375 %cf0 = arith.constant 0.0 : f32
1376 %w0 = vector.transfer_write %v0, %arg0[%c0] {in_bounds = [true]} :
1377 vector<[4]xf32>, tensor<?xf32>
1378 %0 = vector.transfer_read %w0[%c0], %cf0 {in_bounds = [true, true],
1379 permutation_map = affine_map<(d0) -> (0, d0)>} :
1380 tensor<?xf32>, vector<6x[4]xf32>
1381 return %0 : vector<6x[4]xf32>
1386 // CHECK-LABEL: func @store_to_load_tensor_perm_broadcast
1387 // CHECK-SAME: (%[[ARG:.*]]: tensor<4x4x4xf32>, %[[V0:.*]]: vector<4x1xf32>)
1388 // CHECK: %[[B:.*]] = vector.broadcast %[[V0]] : vector<4x1xf32> to vector<100x5x4x1xf32>
1389 // CHECK: %[[T:.*]] = vector.transpose %[[B]], [3, 0, 2, 1] : vector<100x5x4x1xf32> to vector<1x100x4x5xf32>
1390 // CHECK: return %[[T]] : vector<1x100x4x5xf32>
1391 func.func @store_to_load_tensor_perm_broadcast(%arg0 : tensor<4x4x4xf32>,
1392 %v0 : vector<4x1xf32>) -> vector<1x100x4x5xf32> {
1393 %c0 = arith.constant 0 : index
1394 %cf0 = arith.constant 0.0 : f32
1395 %w0 = vector.transfer_write %v0, %arg0[%c0, %c0, %c0] {in_bounds = [true, true],
1396 permutation_map = affine_map<(d0, d1, d2) -> (d2, d1)>} :
1397 vector<4x1xf32>, tensor<4x4x4xf32>
1398 %0 = vector.transfer_read %w0[%c0, %c0, %c0], %cf0 {in_bounds = [true, true, true, true],
1399 permutation_map = affine_map<(d0, d1, d2) -> (d1, 0, d2, 0)>} :
1400 tensor<4x4x4xf32>, vector<1x100x4x5xf32>
1401 return %0 : vector<1x100x4x5xf32>
1407 // CHECK-LABEL: func @dead_store_tensor
1408 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
1409 // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
1410 // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
1411 // CHECK-NOT: vector.transfer_write {{.*}}, {{.*}}[%[[C1]], %[[C0]]
1412 // CHECK: vector.transfer_write {{.*}}, {{.*}}[%[[C2]], %[[C0]]
1413 // CHECK: %[[VTW:.*]] = vector.transfer_write {{.*}}, {{.*}}[%[[C1]], %[[C0]]
1414 // CHECK: return %[[VTW]] : tensor<4x4xf32>
1415 func.func @dead_store_tensor(%arg0 : tensor<4x4xf32>,
1416 %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) -> tensor<4x4xf32> {
1417 %c1 = arith.constant 1 : index
1418 %c2 = arith.constant 2 : index
1419 %c0 = arith.constant 0 : index
1420 %cf0 = arith.constant 0.0 : f32
1421 %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} :
1422 vector<1x4xf32>, tensor<4x4xf32>
1423 %w1 = vector.transfer_write %v0, %w0[%c2, %c0] {in_bounds = [true, true]} :
1424 vector<1x4xf32>, tensor<4x4xf32>
1425 %w2 = vector.transfer_write %v1, %w1[%c1, %c0] {in_bounds = [true, true]} :
1426 vector<1x4xf32>, tensor<4x4xf32>
1427 return %w2 : tensor<4x4xf32>
1432 // CHECK-LABEL: func @dead_store_tensor_negative
1433 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
1434 // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
1435 // CHECK: vector.transfer_write
1436 // CHECK: vector.transfer_write
1437 // CHECK: vector.transfer_read
1438 // CHECK: %[[VTW:.*]] = vector.transfer_write {{.*}}, {{.*}}[%[[C1]], %[[C0]]]
1439 // CHECK: return %[[VTW]] : tensor<4x4xf32>
1440 func.func @dead_store_tensor_negative(%arg0 : tensor<4x4xf32>,
1441 %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) -> tensor<4x4xf32> {
1442 %c1 = arith.constant 1 : index
1443 %c2 = arith.constant 2 : index
1444 %c0 = arith.constant 0 : index
1445 %cf0 = arith.constant 0.0 : f32
1446 %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} :
1447 vector<1x4xf32>, tensor<4x4xf32>
1448 %w1 = vector.transfer_write %v0, %w0[%c2, %c0] {in_bounds = [true, true]} :
1449 vector<1x4xf32>, tensor<4x4xf32>
1450 %0 = vector.transfer_read %w1[%i, %i], %cf0 {in_bounds = [true, true]} :
1451 tensor<4x4xf32>, vector<1x4xf32>
1452 %x = arith.addf %0, %0 : vector<1x4xf32>
1453 %w2 = vector.transfer_write %x, %w0[%c1, %c0] {in_bounds = [true, true]} :
1454 vector<1x4xf32>, tensor<4x4xf32>
1455 return %w2 : tensor<4x4xf32>
1460 // CHECK: #[[$MAP:[0-9a-z]+]] = affine_map<(d0, d1) -> (d1, d0)>
1462 // CHECK-LABEL: func @swap_extract_slice_transfer_write
1463 // CHECK-SAME: %[[VEC:.*]]: vector<8x4xf32>
1464 // CHECK-SAME: %[[INIT_TENSOR:.*]]: tensor<4x8xf32>,
1465 // CHECK-SAME: %[[ITER_ARG:.*]]: tensor<64x64xf32>,
1466 // CHECK-SAME: %[[IV:.*]]: index, %[[SZ:.*]]: index)
1467 func.func @swap_extract_slice_transfer_write(%arg0 : vector<8x4xf32>,
1468 %arg1 : tensor<4x8xf32>,
1469 %arg2 : tensor<64x64xf32>,
1470 %iv : index, %sz : index) -> tensor<64x64xf32> {
1471 // CHECK: %[[C0:.*]] = arith.constant 0 : index
1472 %c0 = arith.constant 0 : index
1474 // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ITER_ARG]]
1475 // CHECK-SAME: [%[[IV]], 16] [%[[SZ]], 8]
1476 // CHECK: %[[T1:.*]] = vector.transfer_write %[[VEC]]
1477 // CHECK-SAME: %[[T0]][%[[C0]], %[[C0]]]
1478 // CHECK-SAME: in_bounds = [true, false]
1479 // CHECK-SAME: permutation_map = #[[$MAP]]
1480 // CHECK: %[[T2:.*]] = tensor.insert_slice %[[T1]] into %[[ITER_ARG]]
1481 // CHECK-SAME: [%[[IV]], 16] [%[[SZ]], 8]
1482 %0 = vector.transfer_write %arg0, %arg1[%c0, %c0] {in_bounds = [true, true], permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : vector<8x4xf32>, tensor<4x8xf32>
1483 %1 = tensor.extract_slice %0[0, 0] [%sz, 8] [1, 1] : tensor<4x8xf32> to tensor<?x8xf32>
1484 %2 = tensor.insert_slice %1 into %arg2[%iv, 16] [%sz, 8] [1, 1] : tensor<?x8xf32> into tensor<64x64xf32>
1486 // CHECK: return %[[T2]]
1487 func.return %2 : tensor<64x64xf32>
1492 // CHECK-LABEL: func @do_not_swap_extract_slice_transfer_write
1493 // CHECK-SAME: %[[VEC:.*]]: vector<8xf32>,
1494 // CHECK-SAME: %[[VEC_SMALL:.*]]: vector<4xf32>,
1495 // CHECK-SAME: %[[INIT_TENSOR:.*]]: tensor<8xf32>,
1496 // CHECK-SAME: %[[ITER_ARG:.*]]: tensor<64xf32>,
1497 // CHECK-SAME: %[[IV:.*]]: index, %[[SZ:.*]]: index)
1498 func.func @do_not_swap_extract_slice_transfer_write(%arg0 : vector<8xf32>,
1499 %arg1 : vector<4xf32>,
1500 %arg2 : tensor<8xf32>,
1501 %arg3 : tensor<64xf32>,
1502 %iv : index, %sz : index) -> (tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) {
1503 // CHECK: %[[C0:.*]] = arith.constant 0 : index
1504 %c0 = arith.constant 0 : index
1506 // Don't swap if the extracted and inserted slices do not match.
1507 // CHECK: %[[T0:.*]] = vector.transfer_write %[[VEC]]
1508 // CHECK: %[[T1:.*]] = tensor.extract_slice %[[T0]]
1509 // CHECK: %[[T2:.*]] = tensor.insert_slice %[[T1]]
1510 %0 = vector.transfer_write %arg0, %arg2[%c0] {in_bounds = [true]} : vector<8xf32>, tensor<8xf32>
1511 %1 = tensor.extract_slice %0[0] [%iv] [1] : tensor<8xf32> to tensor<?xf32>
1512 %2 = tensor.insert_slice %1 into %arg3[%iv] [%sz] [1] : tensor<?xf32> into tensor<64xf32>
1514 // Don't swap if the TransferWriteOp takes a small vector.
1515 // CHECK: %[[T3:.*]] = vector.transfer_write %[[VEC_SMALL]]
1516 // CHECK: %[[T4:.*]] = tensor.extract_slice %[[T3]]
1517 // CHECK: %[[T5:.*]] = tensor.insert_slice %[[T4]]
1518 %3 = vector.transfer_write %arg1, %arg2[%c0] {in_bounds = [true]} : vector<4xf32>, tensor<8xf32>
1519 %4 = tensor.extract_slice %3[0] [%sz] [1] : tensor<8xf32> to tensor<?xf32>
1520 %5 = tensor.insert_slice %4 into %arg3[%iv] [%sz] [1] : tensor<?xf32> into tensor<64xf32>
1522 // Don't swap if the one of the operations is rank-reducing.
1523 // CHECK: %[[T6:.*]] = vector.transfer_write %[[VEC]]
1524 // CHECK: %[[T7:.*]] = tensor.extract_slice %[[T6]]
1525 // CHECK: %[[T8:.*]] = tensor.insert_slice %[[T7]]
1526 %6 = vector.transfer_write %arg0, %arg2[%c0] {in_bounds = [true]} : vector<8xf32>, tensor<8xf32>
1527 %7 = tensor.extract_slice %6[0] [1] [1] : tensor<8xf32> to tensor<f32>
1528 %8 = tensor.insert_slice %7 into %arg3[%iv] [1] [1] : tensor<f32> into tensor<64xf32>
1530 // CHECK: return %[[T2]], %[[T5]], %[[T8]]
1531 func.return %2, %5, %8 : tensor<64xf32>, tensor<64xf32>, tensor<64xf32>
1536 // CHECK-LABEL: func @vector_multi_reduction_single_parallel(
1537 // CHECK-SAME: %[[v:.*]]: vector<2xf32>,
1538 func.func @vector_multi_reduction_single_parallel(%arg0: vector<2xf32>, %acc: vector<2xf32>) -> vector<2xf32> {
1539 %0 = vector.multi_reduction <mul>, %arg0, %acc [] : vector<2xf32> to vector<2xf32>
1541 // CHECK: return %[[v]] : vector<2xf32>
1542 return %0 : vector<2xf32>
1547 // CHECK-LABEL: func @masked_vector_multi_reduction_single_parallel(
1548 // CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, %{{.*}}: vector<2xf32>,
1549 func.func @masked_vector_multi_reduction_single_parallel(%arg0: vector<2xf32>, %acc: vector<2xf32>, %mask: vector<2xi1>) -> vector<2xf32> {
1550 %0 = vector.mask %mask { vector.multi_reduction <mul>, %arg0, %acc [] : vector<2xf32> to vector<2xf32> } : vector<2xi1> -> vector<2xf32>
1551 // CHECK: return %[[VAL_0]] : vector<2xf32>
1552 return %0 : vector<2xf32>
1557 // CHECK-LABEL: func @vector_multi_reduction_unit_dimensions(
1558 // CHECK-SAME: %[[SOURCE:.+]]: vector<5x1x4x1x20xf32>, %[[ACC:.+]]: vector<5x4x20xf32>
1559 func.func @vector_multi_reduction_unit_dimensions(%source: vector<5x1x4x1x20xf32>, %acc: vector<5x4x20xf32>) -> vector<5x4x20xf32> {
1560 // CHECK: %[[CAST:.+]] = vector.shape_cast %[[SOURCE]] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32>
1561 // CHECK: %[[RESULT:.+]] = arith.mulf %[[ACC]], %[[CAST]] : vector<5x4x20xf32>
1562 %0 = vector.multi_reduction <mul>, %source, %acc [1, 3] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32>
1564 // CHECK: return %[[RESULT]] : vector<5x4x20xf32>
1565 return %0 : vector<5x4x20xf32>
1569 // CHECK-LABEL: func.func @vector_multi_reduction_scalable(
1570 // CHECK-SAME: %[[VAL_0:.*]]: vector<1x[4]x1xf32>,
1571 // CHECK-SAME: %[[VAL_1:.*]]: vector<1x[4]xf32>,
1572 // CHECK-SAME: %[[VAL_2:.*]]: vector<1x[4]x1xi1>)
1573 func.func @vector_multi_reduction_scalable(%source: vector<1x[4]x1xf32>,
1574 %acc: vector<1x[4]xf32>,
1575 %mask: vector<1x[4]x1xi1>) -> vector<1x[4]xf32> {
1576 // CHECK: %[[VAL_3:.*]] = vector.shape_cast %[[VAL_2]] : vector<1x[4]x1xi1> to vector<1x[4]xi1>
1577 // CHECK: %[[VAL_4:.*]] = vector.shape_cast %[[VAL_0]] : vector<1x[4]x1xf32> to vector<1x[4]xf32>
1578 // CHECK: %[[VAL_5:.*]] = arith.addf %[[VAL_1]], %[[VAL_4]] : vector<1x[4]xf32>
1579 // CHECK: %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_4]] : vector<1x[4]xi1>, vector<1x[4]xf32>
1580 %0 = vector.mask %mask { vector.multi_reduction <add>, %source, %acc [2] : vector<1x[4]x1xf32> to vector<1x[4]xf32> } :
1581 vector<1x[4]x1xi1> -> vector<1x[4]xf32>
1583 return %0 : vector<1x[4]xf32>
1588 // CHECK-LABEL: func @masked_vector_multi_reduction_unit_dimensions
1589 // CHECK-SAME: %[[VAL_0:.*]]: vector<5x1x4x1x20xf32>, %[[VAL_1:.*]]: vector<5x4x20xf32>,
1590 // CHECK-SAME: %[[VAL_2:.*]]: vector<5x1x4x1x20xi1>)
1591 func.func @masked_vector_multi_reduction_unit_dimensions(%source: vector<5x1x4x1x20xf32>,
1592 %acc: vector<5x4x20xf32>,
1593 %mask: vector<5x1x4x1x20xi1>) -> vector<5x4x20xf32> {
1594 // CHECK: %[[VAL_3:.*]] = vector.shape_cast %[[VAL_2]] : vector<5x1x4x1x20xi1> to vector<5x4x20xi1>
1595 // CHECK: %[[VAL_4:.*]] = vector.shape_cast %[[VAL_0]] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32>
1596 // CHECK: %[[VAL_5:.*]] = arith.mulf %[[VAL_1]], %[[VAL_4]] : vector<5x4x20xf32>
1597 // CHECK: %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_4]] : vector<5x4x20xi1>, vector<5x4x20xf32>
1598 %0 = vector.mask %mask { vector.multi_reduction <mul>, %source, %acc [1, 3] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32> } :
1599 vector<5x1x4x1x20xi1> -> vector<5x4x20xf32>
1600 return %0 : vector<5x4x20xf32>
1605 // CHECK-LABEL: func @vector_multi_reduction_unit_dimensions_fail(
1606 // CHECK-SAME: %[[SRC:.+]]: vector<5x1x4x1x20xf32>, %[[ACCUM:.+]]: vector<5x1x20xf32>
1607 func.func @vector_multi_reduction_unit_dimensions_fail(%source: vector<5x1x4x1x20xf32>, %acc: vector<5x1x20xf32>) -> vector<5x1x20xf32> {
1608 // CHECK: %[[RES:.+]] = vector.multi_reduction <mul>, %[[SRC]], %[[ACCUM]] [1, 2] : vector<5x1x4x1x20xf32> to vector<5x1x20xf32>
1609 %0 = vector.multi_reduction <mul>, %source, %acc [1, 2] : vector<5x1x4x1x20xf32> to vector<5x1x20xf32>
1611 // CHECK: return %[[RES]] : vector<5x1x20xf32>
1612 return %0 : vector<5x1x20xf32>
1617 // CHECK-LABEL: func @vector_multi_reduction_unit_dimensions_single_elem(
1618 // CHECK-SAME: %[[SOURCE:.+]]: vector<1x1x1xf32>, %[[ACC:.+]]: f32
1619 func.func @vector_multi_reduction_unit_dimensions_single_elem(%source: vector<1x1x1xf32>, %acc: f32) -> f32 {
1620 // CHECK: %[[CAST:.+]] = vector.extract %[[SOURCE]][0, 0, 0] : f32 from vector<1x1x1xf32>
1621 // CHECK: %[[RESULT:.+]] = arith.mulf %[[ACC]], %[[CAST]] : f32
1622 %0 = vector.multi_reduction <mul>, %source, %acc [0,1,2] : vector<1x1x1xf32> to f32
1624 // CHECK: return %[[RESULT]] : f32
1630 // CHECK-LABEL: func @masked_vector_multi_reduction_unit_dimensions_single_elem(
1631 // CHECK-SAME: %[[VAL_0:.*]]: vector<1x1x1xf32>, %[[VAL_1:.*]]: f32,
1632 // CHECK-SAME: %[[VAL_2:.*]]: vector<1x1x1xi1>)
1633 func.func @masked_vector_multi_reduction_unit_dimensions_single_elem(%source: vector<1x1x1xf32>, %acc: f32, %mask: vector<1x1x1xi1>) -> f32 {
1634 // CHECK: %[[VAL_3:.*]] = vector.extract %[[VAL_2]][0, 0, 0] : i1 from vector<1x1x1xi1>
1635 // CHECK: %[[VAL_4:.*]] = vector.extract %[[VAL_0]][0, 0, 0] : f32 from vector<1x1x1xf32>
1636 // CHECK: %[[VAL_5:.*]] = arith.mulf %[[VAL_1]], %[[VAL_4]] : f32
1637 // CHECK: %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_4]] : f32
1638 %0 = vector.mask %mask { vector.multi_reduction <mul>, %source, %acc [0,1,2] : vector<1x1x1xf32> to f32 } : vector<1x1x1xi1> -> f32
1644 // CHECK-LABEL: func @insert_strided_slice_full_range
1645 // CHECK-SAME: %[[SOURCE:.+]]: vector<16x16xf16>, %{{.+}}: vector<16x16xf16>
1646 func.func @insert_strided_slice_full_range(%source: vector<16x16xf16>, %dest: vector<16x16xf16>) -> vector<16x16xf16> {
1647 %0 = vector.insert_strided_slice %source, %dest {offsets = [0, 0], strides = [1, 1]} : vector<16x16xf16> into vector<16x16xf16>
1648 // CHECK: return %[[SOURCE]]
1649 return %0: vector<16x16xf16>
1654 // CHECK-LABEL: extract_strided_splat
1655 // CHECK: %[[B:.*]] = vector.splat %{{.*}} : vector<2x4xf16>
1656 // CHECK-NEXT: return %[[B]] : vector<2x4xf16>
1657 func.func @extract_strided_splat(%arg0: f16) -> vector<2x4xf16> {
1658 %0 = vector.splat %arg0 : vector<16x4xf16>
1659 %1 = vector.extract_strided_slice %0
1660 {offsets = [1, 0], sizes = [2, 4], strides = [1, 1]} :
1661 vector<16x4xf16> to vector<2x4xf16>
1662 return %1 : vector<2x4xf16>
1667 // CHECK-LABEL: func @insert_extract_to_broadcast
1668 // CHECK-SAME: (%[[ARG0:.*]]: vector<1x1x4xf32>, %[[ARG1:.*]]: vector<4xf32>)
1669 // CHECK: %[[V0:.*]] = vector.extract %[[ARG0]][0, 0] : vector<4xf32> from vector<1x1x4xf32>
1670 // CHECK: %[[V1:.*]] = vector.broadcast %[[ARG1]] : vector<4xf32> to vector<1x1x4xf32>
1671 // CHECK: return %[[V0]], %[[V1]] : vector<4xf32>, vector<1x1x4xf32>
1672 func.func @insert_extract_to_broadcast(%arg0 : vector<1x1x4xf32>,
1673 %arg1 : vector<4xf32>) -> (vector<4xf32>, vector<1x1x4xf32>) {
1674 %0 = vector.extract %arg0[0, 0] : vector<4xf32> from vector<1x1x4xf32>
1675 %1 = vector.insert %arg1, %arg0 [0, 0] : vector<4xf32> into vector<1x1x4xf32>
1676 return %0, %1 : vector<4xf32>, vector<1x1x4xf32>
1681 // CHECK-LABEL: func.func @extract_splat_constant
1682 // CHECK-DAG: %[[CST1:.*]] = arith.constant 1 : i32
1683 // CHECK-DAG: %[[CST0:.*]] = arith.constant dense<2.000000e+00> : vector<7xf32>
1684 // CHECK-NEXT: return %[[CST0]], %[[CST1]] : vector<7xf32>, i32
1685 func.func @extract_splat_constant() -> (vector<7xf32>, i32) {
1686 %cst = arith.constant dense<2.000000e+00> : vector<29x7xf32>
1687 %cst_1 = arith.constant dense<1> : vector<4x37x9xi32>
1688 %0 = vector.extract %cst[2] : vector<7xf32> from vector<29x7xf32>
1689 %1 = vector.extract %cst_1[1, 4, 5] : i32 from vector<4x37x9xi32>
1690 return %0, %1 : vector<7xf32>, i32
1695 // CHECK-LABEL: func.func @extract_1d_constant
1696 // CHECK-DAG: %[[I32CST:.*]] = arith.constant 3 : i32
1697 // CHECK-DAG: %[[IDXCST:.*]] = arith.constant 1 : index
1698 // CHECK-DAG: %[[F32CST:.*]] = arith.constant 2.000000e+00 : f32
1699 // CHECK-NEXT: return %[[I32CST]], %[[IDXCST]], %[[F32CST]] : i32, index, f32
1700 func.func @extract_1d_constant() -> (i32, index, f32) {
1701 %icst = arith.constant dense<[1, 2, 3, 4]> : vector<4xi32>
1702 %e = vector.extract %icst[2] : i32 from vector<4xi32>
1703 %idx_cst = arith.constant dense<[0, 1, 2]> : vector<3xindex>
1704 %f = vector.extract %idx_cst[1] : index from vector<3xindex>
1705 %fcst = arith.constant dense<[2.000000e+00, 3.000000e+00, 4.000000e+00]> : vector<3xf32>
1706 %g = vector.extract %fcst[0] : f32 from vector<3xf32>
1707 return %e, %f, %g : i32, index, f32
1712 // CHECK-LABEL: func.func @extract_2d_constant
1713 // CHECK-DAG: %[[ACST:.*]] = arith.constant 0 : i32
1714 // CHECK-DAG: %[[BCST:.*]] = arith.constant 2 : i32
1715 // CHECK-DAG: %[[CCST:.*]] = arith.constant 3 : i32
1716 // CHECK-DAG: %[[DCST:.*]] = arith.constant 5 : i32
1717 // CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] : i32, i32, i32, i32
1718 func.func @extract_2d_constant() -> (i32, i32, i32, i32) {
1719 %cst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32>
1720 %a = vector.extract %cst[0, 0] : i32 from vector<2x3xi32>
1721 %b = vector.extract %cst[0, 2] : i32 from vector<2x3xi32>
1722 %c = vector.extract %cst[1, 0] : i32 from vector<2x3xi32>
1723 %d = vector.extract %cst[1, 2] : i32 from vector<2x3xi32>
1724 return %a, %b, %c, %d : i32, i32, i32, i32
1729 // CHECK-LABEL: func.func @extract_vector_2d_constant
1730 // CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[0, 1, 2]> : vector<3xi32>
1731 // CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[3, 4, 5]> : vector<3xi32>
1732 // CHECK-NEXT: return %[[ACST]], %[[BCST]] : vector<3xi32>, vector<3xi32>
1733 func.func @extract_vector_2d_constant() -> (vector<3xi32>, vector<3xi32>) {
1734 %cst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32>
1735 %a = vector.extract %cst[0] : vector<3xi32> from vector<2x3xi32>
1736 %b = vector.extract %cst[1] : vector<3xi32> from vector<2x3xi32>
1737 return %a, %b : vector<3xi32>, vector<3xi32>
1742 // CHECK-LABEL: func.func @extract_3d_constant
1743 // CHECK-DAG: %[[ACST:.*]] = arith.constant 0 : i32
1744 // CHECK-DAG: %[[BCST:.*]] = arith.constant 1 : i32
1745 // CHECK-DAG: %[[CCST:.*]] = arith.constant 9 : i32
1746 // CHECK-DAG: %[[DCST:.*]] = arith.constant 10 : i32
1747 // CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] : i32, i32, i32, i32
1748 func.func @extract_3d_constant() -> (i32, i32, i32, i32) {
1749 %cst = arith.constant dense<[[[0, 1], [2, 3], [4, 5]], [[6, 7], [8, 9], [10, 11]]]> : vector<2x3x2xi32>
1750 %a = vector.extract %cst[0, 0, 0] : i32 from vector<2x3x2xi32>
1751 %b = vector.extract %cst[0, 0, 1] : i32 from vector<2x3x2xi32>
1752 %c = vector.extract %cst[1, 1, 1] : i32 from vector<2x3x2xi32>
1753 %d = vector.extract %cst[1, 2, 0] : i32 from vector<2x3x2xi32>
1754 return %a, %b, %c, %d : i32, i32, i32, i32
1759 // CHECK-LABEL: func.func @extract_vector_3d_constant
1760 // CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[0, 1\], \[2, 3\], \[4, 5\]\]}}> : vector<3x2xi32>
1761 // CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[6, 7\], \[8, 9\], \[10, 11\]\]}}> : vector<3x2xi32>
1762 // CHECK-DAG: %[[CCST:.*]] = arith.constant dense<[8, 9]> : vector<2xi32>
1763 // CHECK-DAG: %[[DCST:.*]] = arith.constant dense<[10, 11]> : vector<2xi32>
1764 // CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] : vector<3x2xi32>, vector<3x2xi32>, vector<2xi32>, vector<2xi32>
1765 func.func @extract_vector_3d_constant() -> (vector<3x2xi32>, vector<3x2xi32>, vector<2xi32>, vector<2xi32>) {
1766 %cst = arith.constant dense<[[[0, 1], [2, 3], [4, 5]], [[6, 7], [8, 9], [10, 11]]]> : vector<2x3x2xi32>
1767 %a = vector.extract %cst[0] : vector<3x2xi32> from vector<2x3x2xi32>
1768 %b = vector.extract %cst[1] : vector<3x2xi32> from vector<2x3x2xi32>
1769 %c = vector.extract %cst[1, 1] : vector<2xi32> from vector<2x3x2xi32>
1770 %d = vector.extract %cst[1, 2] : vector<2xi32> from vector<2x3x2xi32>
1771 return %a, %b, %c, %d : vector<3x2xi32>, vector<3x2xi32>, vector<2xi32>, vector<2xi32>
1776 // CHECK-LABEL: func.func @extract_splat_vector_3d_constant
1777 // CHECK-DAG: %[[ACST:.*]] = arith.constant dense<0> : vector<2xi32>
1778 // CHECK-DAG: %[[BCST:.*]] = arith.constant dense<4> : vector<2xi32>
1779 // CHECK-DAG: %[[CCST:.*]] = arith.constant dense<5> : vector<2xi32>
1780 // CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<2xi32>, vector<2xi32>, vector<2xi32>
1781 func.func @extract_splat_vector_3d_constant() -> (vector<2xi32>, vector<2xi32>, vector<2xi32>) {
1782 %cst = arith.constant dense<[[[0, 0], [1, 1], [2, 2]], [[3, 3], [4, 4], [5, 5]]]> : vector<2x3x2xi32>
1783 %a = vector.extract %cst[0, 0] : vector<2xi32> from vector<2x3x2xi32>
1784 %b = vector.extract %cst[1, 1] : vector<2xi32> from vector<2x3x2xi32>
1785 %c = vector.extract %cst[1, 2] : vector<2xi32> from vector<2x3x2xi32>
1786 return %a, %b, %c : vector<2xi32>, vector<2xi32>, vector<2xi32>
1791 // CHECK-LABEL: func.func @extract_strided_slice_1d_constant
1792 // CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[0, 1, 2]> : vector<3xi32>
1793 // CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[1, 2]> : vector<2xi32>
1794 // CHECK-DAG: %[[CCST:.*]] = arith.constant dense<2> : vector<1xi32>
1795 // CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<3xi32>, vector<2xi32>, vector<1xi32>
1796 func.func @extract_strided_slice_1d_constant() -> (vector<3xi32>, vector<2xi32>, vector<1xi32>) {
1797 %cst = arith.constant dense<[0, 1, 2]> : vector<3xi32>
1798 %a = vector.extract_strided_slice %cst
1799 {offsets = [0], sizes = [3], strides = [1]} : vector<3xi32> to vector<3xi32>
1800 %b = vector.extract_strided_slice %cst
1801 {offsets = [1], sizes = [2], strides = [1]} : vector<3xi32> to vector<2xi32>
1802 %c = vector.extract_strided_slice %cst
1803 {offsets = [2], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32>
1804 return %a, %b, %c : vector<3xi32>, vector<2xi32>, vector<1xi32>
1809 // CHECK-LABEL: func.func @extract_strided_slice_2d_constant
1810 // CHECK-DAG: %[[ACST:.*]] = arith.constant dense<0> : vector<1x1xi32>
1811 // CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[4, 5\]\]}}> : vector<1x2xi32>
1812 // CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[1, 2\], \[4, 5\]\]}}> : vector<2x2xi32>
1813 // CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<1x1xi32>, vector<1x2xi32>, vector<2x2xi32>
1814 func.func @extract_strided_slice_2d_constant() -> (vector<1x1xi32>, vector<1x2xi32>, vector<2x2xi32>) {
1815 %cst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32>
1816 %a = vector.extract_strided_slice %cst
1817 {offsets = [0, 0], sizes = [1, 1], strides = [1, 1]} : vector<2x3xi32> to vector<1x1xi32>
1818 %b = vector.extract_strided_slice %cst
1819 {offsets = [1, 1], sizes = [1, 2], strides = [1, 1]} : vector<2x3xi32> to vector<1x2xi32>
1820 %c = vector.extract_strided_slice %cst
1821 {offsets = [0, 1], sizes = [2, 2], strides = [1, 1]} : vector<2x3xi32> to vector<2x2xi32>
1822 return %a, %b, %c : vector<1x1xi32>, vector<1x2xi32>, vector<2x2xi32>
1827 // CHECK-LABEL: func.func @extract_strided_slice_3d_constant
1828 // CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[\[8, 9\], \[10, 11\]\]\]}}> : vector<1x2x2xi32>
1829 // CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[\[2, 3\]\]\]}}> : vector<1x1x2xi32>
1830 // CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[\[6, 7\]\], \[\[10, 11\]\]\]}}> : vector<2x1x2xi32>
1831 // CHECK-DAG: %[[DCST:.*]] = arith.constant dense<11> : vector<1x1x1xi32>
1832 // CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]]
1833 func.func @extract_strided_slice_3d_constant() -> (vector<1x2x2xi32>, vector<1x1x2xi32>, vector<2x1x2xi32>, vector<1x1x1xi32>) {
1834 %cst = arith.constant dense<[[[0, 1], [2, 3]], [[4, 5], [6, 7]], [[8, 9], [10, 11]]]> : vector<3x2x2xi32>
1835 %a = vector.extract_strided_slice %cst
1836 {offsets = [2], sizes = [1], strides = [1]} : vector<3x2x2xi32> to vector<1x2x2xi32>
1837 %b = vector.extract_strided_slice %cst
1838 {offsets = [0, 1], sizes = [1, 1], strides = [1, 1]} : vector<3x2x2xi32> to vector<1x1x2xi32>
1839 %c = vector.extract_strided_slice %cst
1840 {offsets = [1, 1, 0], sizes = [2, 1, 2], strides = [1, 1, 1]} : vector<3x2x2xi32> to vector<2x1x2xi32>
1841 %d = vector.extract_strided_slice %cst
1842 {offsets = [2, 1, 1], sizes = [1, 1, 1], strides = [1, 1, 1]} : vector<3x2x2xi32> to vector<1x1x1xi32>
1843 return %a, %b, %c, %d : vector<1x2x2xi32>, vector<1x1x2xi32>, vector<2x1x2xi32>, vector<1x1x1xi32>
1848 // CHECK-LABEL: extract_extract_strided
1849 // CHECK-SAME: %[[A:.*]]: vector<32x16x4xf16>
1850 // CHECK: %[[V:.*]] = vector.extract %[[A]][9, 7] : vector<4xf16> from vector<32x16x4xf16>
1851 // CHECK: return %[[V]] : vector<4xf16>
1852 func.func @extract_extract_strided(%arg0: vector<32x16x4xf16>) -> vector<4xf16> {
1853 %1 = vector.extract_strided_slice %arg0
1854 {offsets = [7, 3], sizes = [10, 8], strides = [1, 1]} :
1855 vector<32x16x4xf16> to vector<10x8x4xf16>
1856 %2 = vector.extract %1[2, 4] : vector<4xf16> from vector<10x8x4xf16>
1857 return %2 : vector<4xf16>
1862 // CHECK-LABEL: extract_insert_strided
1863 // CHECK-SAME: %[[A:.*]]: vector<6x4xf32>
1864 // CHECK: %[[V:.*]] = vector.extract %[[A]][0, 2] : f32 from vector<6x4xf32>
1865 // CHECK: return %[[V]] : f32
1866 func.func @extract_insert_strided(%a: vector<6x4xf32>, %b: vector<8x16xf32>)
1868 %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]}
1869 : vector<6x4xf32> into vector<8x16xf32>
1870 %2 = vector.extract %0[2, 4] : f32 from vector<8x16xf32>
1876 // CHECK-LABEL: extract_insert_rank_reduce
1877 // CHECK-SAME: %[[A:.*]]: vector<4xf32>
1878 // CHECK: %[[V:.*]] = vector.extract %[[A]][2] : f32 from vector<4xf32>
1879 // CHECK: return %[[V]] : f32
1880 func.func @extract_insert_rank_reduce(%a: vector<4xf32>, %b: vector<8x16xf32>)
1882 %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1]}
1883 : vector<4xf32> into vector<8x16xf32>
1884 %2 = vector.extract %0[2, 4] : f32 from vector<8x16xf32>
1890 // CHECK-LABEL: extract_insert_negative
1891 // CHECK: vector.insert_strided_slice
1892 // CHECK: vector.extract
1893 func.func @extract_insert_negative(%a: vector<2x15xf32>, %b: vector<12x8x16xf32>)
1895 %0 = vector.insert_strided_slice %a, %b {offsets = [4, 2, 0], strides = [1, 1]}
1896 : vector<2x15xf32> into vector<12x8x16xf32>
1897 %2 = vector.extract %0[4, 2] : vector<16xf32> from vector<12x8x16xf32>
1898 return %2 : vector<16xf32>
1903 // CHECK-LABEL: extract_insert_chain
1904 // CHECK-SAME: (%[[A:.*]]: vector<2x16xf32>, %[[B:.*]]: vector<12x8x16xf32>, %[[C:.*]]: vector<2x16xf32>)
1905 // CHECK: %[[V:.*]] = vector.extract %[[C]][0] : vector<16xf32> from vector<2x16xf32>
1906 // CHECK: return %[[V]] : vector<16xf32>
1907 func.func @extract_insert_chain(%a: vector<2x16xf32>, %b: vector<12x8x16xf32>, %c: vector<2x16xf32>)
1909 %0 = vector.insert_strided_slice %c, %b {offsets = [4, 2, 0], strides = [1, 1]}
1910 : vector<2x16xf32> into vector<12x8x16xf32>
1911 %1 = vector.insert_strided_slice %a, %0 {offsets = [0, 2, 0], strides = [1, 1]}
1912 : vector<2x16xf32> into vector<12x8x16xf32>
1913 %2 = vector.extract %1[4, 2] : vector<16xf32> from vector<12x8x16xf32>
1914 return %2 : vector<16xf32>
1919 // CHECK-LABEL: extract_from_extract_chain_should_not_fold_dynamic_extracts
1920 // CHECK-SAME: (%[[VEC:.*]]: vector<2x4xf32>, %[[IDX:.*]]: index)
1921 // CHECK: %[[A:.*]] = vector.extract %[[VEC]][%[[IDX]]] : vector<4xf32> from vector<2x4xf32>
1922 // CHECK: %[[B:.*]] = vector.extract %[[A]][1] : f32 from vector<4xf32>
1923 func.func @extract_from_extract_chain_should_not_fold_dynamic_extracts(%v: vector<2x4xf32>, %index: index) -> f32 {
1924 %0 = vector.extract %v[%index] : vector<4xf32> from vector<2x4xf32>
1925 %1 = vector.extract %0[1] : f32 from vector<4xf32>
1931 // CHECK-LABEL: extract_extract_strided2
1932 // CHECK-SAME: %[[A:.*]]: vector<2x4xf32>
1933 // CHECK: %[[V:.*]] = vector.extract %[[A]][1] : vector<4xf32> from vector<2x4xf32>
1934 // CHECK: return %[[V]] : vector<4xf32>
1935 func.func @extract_extract_strided2(%A: vector<2x4xf32>)
1936 -> (vector<4xf32>) {
1937 %0 = vector.extract_strided_slice %A {offsets = [1, 0], sizes = [1, 4], strides = [1, 1]} : vector<2x4xf32> to vector<1x4xf32>
1938 %1 = vector.extract %0[0] : vector<4xf32> from vector<1x4xf32>
1939 return %1 : vector<4xf32>
1944 // CHECK-LABEL: func @splat_fold
1945 func.func @splat_fold() -> vector<4xf32> {
1946 %c = arith.constant 1.0 : f32
1947 %v = vector.splat %c : vector<4xf32>
1948 return %v : vector<4xf32>
1950 // CHECK-NEXT: [[V:%.*]] = arith.constant dense<1.000000e+00> : vector<4xf32>
1951 // CHECK-NEXT: return [[V]] : vector<4xf32>
1956 // CHECK-LABEL: func @shuffle_1d
1957 // CHECK: %[[V:.+]] = arith.constant dense<[3, 2, 5, 1]> : vector<4xi32>
1958 // CHECK: return %[[V]]
1959 func.func @shuffle_1d() -> vector<4xi32> {
1960 %v0 = arith.constant dense<[0, 1, 2]> : vector<3xi32>
1961 %v1 = arith.constant dense<[3, 4, 5]> : vector<3xi32>
1962 %shuffle = vector.shuffle %v0, %v1 [3, 2, 5, 1] : vector<3xi32>, vector<3xi32>
1963 return %shuffle : vector<4xi32>
1966 // CHECK-LABEL: func @shuffle_canonicalize_0d
1967 func.func @shuffle_canonicalize_0d(%v0 : vector<i32>, %v1 : vector<i32>) -> vector<1xi32> {
1968 // CHECK: vector.broadcast %{{.*}} : vector<i32> to vector<1xi32>
1969 %shuffle = vector.shuffle %v0, %v1 [0] : vector<i32>, vector<i32>
1970 return %shuffle : vector<1xi32>
1973 // CHECK-LABEL: func @shuffle_fold1
1974 // CHECK: %arg0 : vector<4xi32>
1975 func.func @shuffle_fold1(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<4xi32> {
1976 %shuffle = vector.shuffle %v0, %v1 [0, 1, 2, 3] : vector<4xi32>, vector<2xi32>
1977 return %shuffle : vector<4xi32>
1980 // CHECK-LABEL: func @shuffle_fold2
1981 // CHECK: %arg1 : vector<2xi32>
1982 func.func @shuffle_fold2(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<2xi32> {
1983 %shuffle = vector.shuffle %v0, %v1 [4, 5] : vector<4xi32>, vector<2xi32>
1984 return %shuffle : vector<2xi32>
1987 // CHECK-LABEL: func @shuffle_fold3
1988 // CHECK: return %arg0 : vector<4x5x6xi32>
1989 func.func @shuffle_fold3(%v0 : vector<4x5x6xi32>, %v1 : vector<2x5x6xi32>) -> vector<4x5x6xi32> {
1990 %shuffle = vector.shuffle %v0, %v1 [0, 1, 2, 3] : vector<4x5x6xi32>, vector<2x5x6xi32>
1991 return %shuffle : vector<4x5x6xi32>
1994 // CHECK-LABEL: func @shuffle_fold4
1995 // CHECK: return %arg1 : vector<2x5x6xi32>
1996 func.func @shuffle_fold4(%v0 : vector<4x5x6xi32>, %v1 : vector<2x5x6xi32>) -> vector<2x5x6xi32> {
1997 %shuffle = vector.shuffle %v0, %v1 [4, 5] : vector<4x5x6xi32>, vector<2x5x6xi32>
1998 return %shuffle : vector<2x5x6xi32>
2001 // CHECK-LABEL: func @shuffle_nofold1
2002 // CHECK: %[[V:.+]] = vector.shuffle %arg0, %arg1 [0, 1, 2, 3, 4] : vector<4xi32>, vector<2xi32>
2003 // CHECK: return %[[V]]
2004 func.func @shuffle_nofold1(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<5xi32> {
2005 %shuffle = vector.shuffle %v0, %v1 [0, 1, 2, 3, 4] : vector<4xi32>, vector<2xi32>
2006 return %shuffle : vector<5xi32>
2011 // CHECK-LABEL: func @transpose_scalar_broadcast1
2012 // CHECK-SAME: (%[[ARG:.+]]: vector<1xf32>)
2013 // CHECK: %[[V:.+]] = vector.broadcast %[[ARG]] : vector<1xf32> to vector<1x8xf32>
2014 // CHECK: return %[[V]] : vector<1x8xf32>
2015 func.func @transpose_scalar_broadcast1(%value: vector<1xf32>) -> vector<1x8xf32> {
2016 %bcast = vector.broadcast %value : vector<1xf32> to vector<8x1xf32>
2017 %t = vector.transpose %bcast, [1, 0] : vector<8x1xf32> to vector<1x8xf32>
2018 return %t : vector<1x8xf32>
2023 // CHECK-LABEL: func @transpose_scalar_broadcast2
2024 // CHECK-SAME: (%[[ARG:.+]]: f32)
2025 // CHECK: %[[V:.+]] = vector.broadcast %[[ARG]] : f32 to vector<1x8xf32>
2026 // CHECK: return %[[V]] : vector<1x8xf32>
2027 func.func @transpose_scalar_broadcast2(%value: f32) -> vector<1x8xf32> {
2028 %bcast = vector.broadcast %value : f32 to vector<8x1xf32>
2029 %t = vector.transpose %bcast, [1, 0] : vector<8x1xf32> to vector<1x8xf32>
2030 return %t : vector<1x8xf32>
2035 // CHECK-LABEL: func @transpose_splat_constant
2036 // CHECK: %[[CST:.+]] = arith.constant dense<5.000000e+00> : vector<8x4xf32>
2037 // CHECK: return %[[CST]]
2038 func.func @transpose_splat_constant() -> vector<8x4xf32> {
2039 %cst = arith.constant dense<5.0> : vector<4x8xf32>
2040 %0 = vector.transpose %cst, [1, 0] : vector<4x8xf32> to vector<8x4xf32>
2041 return %0 : vector<8x4xf32>
2044 // CHECK-LABEL: func @transpose_splat2(
2045 // CHECK-SAME: %[[VAL_0:.*]]: f32) -> vector<3x4xf32> {
2046 // CHECK: %[[VAL_1:.*]] = vector.splat %[[VAL_0]] : vector<3x4xf32>
2047 // CHECK: return %[[VAL_1]] : vector<3x4xf32>
2049 func.func @transpose_splat2(%arg : f32) -> vector<3x4xf32> {
2050 %splat = vector.splat %arg : vector<4x3xf32>
2051 %0 = vector.transpose %splat, [1, 0] : vector<4x3xf32> to vector<3x4xf32>
2052 return %0 : vector<3x4xf32>
2057 // CHECK-LABEL: func.func @insert_1d_constant
2058 // CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[9, 1, 2]> : vector<3xi32>
2059 // CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[0, 9, 2]> : vector<3xi32>
2060 // CHECK-DAG: %[[CCST:.*]] = arith.constant dense<[0, 1, 9]> : vector<3xi32>
2061 // CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<3xi32>, vector<3xi32>, vector<3xi32>
2062 func.func @insert_1d_constant() -> (vector<3xi32>, vector<3xi32>, vector<3xi32>) {
2063 %vcst = arith.constant dense<[0, 1, 2]> : vector<3xi32>
2064 %icst = arith.constant 9 : i32
2065 %a = vector.insert %icst, %vcst[0] : i32 into vector<3xi32>
2066 %b = vector.insert %icst, %vcst[1] : i32 into vector<3xi32>
2067 %c = vector.insert %icst, %vcst[2] : i32 into vector<3xi32>
2068 return %a, %b, %c : vector<3xi32>, vector<3xi32>, vector<3xi32>
2073 // CHECK-LABEL: func.func @insert_2d_constant
2074 // CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[99, 1, 2\], \[3, 4, 5\]\]}}> : vector<2x3xi32>
2075 // CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[0, 1, 2\], \[3, 4, 99\]\]}}> : vector<2x3xi32>
2076 // CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[90, 91, 92\], \[3, 4, 5\]\]}}> : vector<2x3xi32>
2077 // CHECK-DAG: %[[DCST:.*]] = arith.constant dense<{{\[\[0, 1, 2\], \[90, 91, 92\]\]}}> : vector<2x3xi32>
2078 // CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]]
2079 func.func @insert_2d_constant() -> (vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>) {
2080 %vcst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32>
2081 %cst_scalar = arith.constant 99 : i32
2082 %cst_1d = arith.constant dense<[90, 91, 92]> : vector<3xi32>
2083 %a = vector.insert %cst_scalar, %vcst[0, 0] : i32 into vector<2x3xi32>
2084 %b = vector.insert %cst_scalar, %vcst[1, 2] : i32 into vector<2x3xi32>
2085 %c = vector.insert %cst_1d, %vcst[0] : vector<3xi32> into vector<2x3xi32>
2086 %d = vector.insert %cst_1d, %vcst[1] : vector<3xi32> into vector<2x3xi32>
2087 return %a, %b, %c, %d : vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>
2092 // CHECK-LABEL: func.func @insert_2d_splat_constant
2093 // CHECK-DAG: %[[ACST:.*]] = arith.constant dense<0> : vector<2x3xi32>
2094 // CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[99, 0, 0\], \[0, 0, 0\]\]}}> : vector<2x3xi32>
2095 // CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[0, 0, 0\], \[0, 99, 0\]\]}}> : vector<2x3xi32>
2096 // CHECK-DAG: %[[DCST:.*]] = arith.constant dense<{{\[\[33, 33, 33\], \[0, 0, 0\]\]}}> : vector<2x3xi32>
2097 // CHECK-DAG: %[[ECST:.*]] = arith.constant dense<{{\[\[0, 0, 0\], \[33, 33, 33\]\]}}> : vector<2x3xi32>
2098 // CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]], %[[ECST]]
2099 func.func @insert_2d_splat_constant()
2100 -> (vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>) {
2101 %vcst = arith.constant dense<0> : vector<2x3xi32>
2102 %cst_zero = arith.constant 0 : i32
2103 %cst_scalar = arith.constant 99 : i32
2104 %cst_1d = arith.constant dense<33> : vector<3xi32>
2105 %a = vector.insert %cst_zero, %vcst[0, 0] : i32 into vector<2x3xi32>
2106 %b = vector.insert %cst_scalar, %vcst[0, 0] : i32 into vector<2x3xi32>
2107 %c = vector.insert %cst_scalar, %vcst[1, 1] : i32 into vector<2x3xi32>
2108 %d = vector.insert %cst_1d, %vcst[0] : vector<3xi32> into vector<2x3xi32>
2109 %e = vector.insert %cst_1d, %vcst[1] : vector<3xi32> into vector<2x3xi32>
2110 return %a, %b, %c, %d, %e : vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>
2115 // CHECK-LABEL: func @insert_element_fold
2116 // CHECK: %[[V:.+]] = arith.constant dense<[0, 1, 7, 3]> : vector<4xi32>
2117 // CHECK: return %[[V]]
2118 func.func @insert_element_fold() -> vector<4xi32> {
2119 %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32>
2120 %s = arith.constant 7 : i32
2121 %i = arith.constant 2 : i32
2122 %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32>
2123 return %1 : vector<4xi32>
2128 // CHECK-LABEL: func @insert_element_invalid_fold
2129 func.func @insert_element_invalid_fold() -> vector<1xf32> {
2130 // Out-of-bound index here.
2131 %c26 = arith.constant 26 : index
2132 %cst_2 = arith.constant 1.60215309E+9 : f32
2133 %cst_20 = arith.constant dense<1.60215309E+9> : vector<1xf32>
2134 // CHECK: vector.insertelement
2135 %46 = vector.insertelement %cst_2, %cst_20[%c26 : index] : vector<1xf32>
2136 return %46 : vector<1xf32>
2142 // Do not crash on poison
2143 // CHECK-LABEL: func @insert_poison_fold1
2144 // CHECK: vector.insertelement
2145 func.func @insert_poison_fold1() -> vector<4xi32> {
2146 %v = ub.poison : vector<4xi32>
2147 %s = arith.constant 7 : i32
2148 %i = arith.constant 2 : i32
2149 %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32>
2150 return %1 : vector<4xi32>
2155 // Do not crash on poison
2156 // CHECK-LABEL: func @insert_poison_fold2
2157 // CHECK: vector.insertelement
2158 func.func @insert_poison_fold2() -> vector<4xi32> {
2159 %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32>
2160 %s = ub.poison : i32
2161 %i = arith.constant 2 : i32
2162 %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32>
2163 return %1 : vector<4xi32>
2168 // Do not crash on poison
2169 // CHECK-LABEL: func @insert_poison_fold3
2170 // CHECK: vector.insertelement
2171 func.func @insert_poison_fold3() -> vector<4xi32> {
2172 %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32>
2173 %s = arith.constant 7 : i32
2174 %i = ub.poison : i32
2175 %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32>
2176 return %1 : vector<4xi32>
2181 // CHECK-LABEL: func @extract_element_fold
2182 // CHECK: %[[C:.+]] = arith.constant 5 : i32
2183 // CHECK: return %[[C]]
2184 func.func @extract_element_fold() -> i32 {
2185 %v = arith.constant dense<[1, 3, 5, 7]> : vector<4xi32>
2186 %i = arith.constant 2 : i32
2187 %1 = vector.extractelement %v[%i : i32] : vector<4xi32>
2191 // CHECK-LABEL: func @extract_element_splat_fold
2192 // CHECK-SAME: (%[[ARG:.+]]: i32)
2193 // CHECK: return %[[ARG]]
2194 func.func @extract_element_splat_fold(%a : i32) -> i32 {
2195 %v = vector.splat %a : vector<4xi32>
2196 %i = arith.constant 2 : i32
2197 %1 = vector.extractelement %v[%i : i32] : vector<4xi32>
2203 // Do not crash on poison
2204 // CHECK-LABEL: func @extract_element_poison_fold1
2205 // CHECK: vector.extractelement
2206 func.func @extract_element_poison_fold1() -> i32 {
2207 %v = ub.poison : vector<4xi32>
2208 %i = arith.constant 2 : i32
2209 %1 = vector.extractelement %v[%i : i32] : vector<4xi32>
2215 // Do not crash on poison
2216 // CHECK-LABEL: func @extract_element_poison_fold2
2217 // CHECK: vector.extractelement
2218 func.func @extract_element_poison_fold2() -> i32 {
2219 %v = arith.constant dense<[1, 3, 5, 7]> : vector<4xi32>
2220 %i = ub.poison : i32
2221 %1 = vector.extractelement %v[%i : i32] : vector<4xi32>
2227 // CHECK-LABEL: func @reduce_one_element_vector_extract
2228 // CHECK-SAME: (%[[V:.+]]: vector<1xf32>)
2229 // CHECK: %[[S:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2230 // CHECK: return %[[S]] : f32
2231 func.func @reduce_one_element_vector_extract(%a : vector<1xf32>) -> f32 {
2232 %s = vector.reduction <add>, %a : vector<1xf32> into f32
2238 // CHECK-LABEL: func @masked_reduce_one_element_vector_extract
2239 // CHECK-SAME: %[[VAL_0:.*]]: vector<1xf32>, %[[VAL_1:.*]]: vector<1xi1>)
2240 func.func @masked_reduce_one_element_vector_extract(%a : vector<1xf32>, %mask : vector<1xi1>) -> f32 {
2241 // CHECK: %[[VAL_2:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<1xf32>
2242 %s = vector.mask %mask { vector.reduction <add>, %a : vector<1xf32> into f32 }
2243 : vector<1xi1> -> f32
2249 // CHECK-LABEL: func @reduce_one_element_vector_addf
2250 // CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
2251 // CHECK: %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2252 // CHECK: %[[S:.+]] = arith.addf %[[A]], %arg1 : f32
2253 // CHECK: return %[[S]]
2254 func.func @reduce_one_element_vector_addf(%a : vector<1xf32>, %b: f32) -> f32 {
2255 %s = vector.reduction <add>, %a, %b : vector<1xf32> into f32
2261 // CHECK-LABEL: func @reduce_one_element_vector_addf_fastmath
2262 // CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
2263 // CHECK: %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2264 // CHECK: %[[S:.+]] = arith.addf %[[A]], %arg1 fastmath<nnan,ninf> : f32
2265 // CHECK: return %[[S]]
2266 func.func @reduce_one_element_vector_addf_fastmath(%a : vector<1xf32>, %b: f32) -> f32 {
2267 %s = vector.reduction <add>, %a, %b fastmath<nnan,ninf> : vector<1xf32> into f32
2273 // CHECK-LABEL: func @masked_reduce_one_element_vector_addf
2274 // CHECK-SAME: %[[VAL_0:.*]]: vector<1xf32>, %[[VAL_1:.*]]: f32,
2275 // CHECK-SAME: %[[VAL_2:.*]]: vector<1xi1>)
2276 func.func @masked_reduce_one_element_vector_addf(%a: vector<1xf32>,
2278 %mask: vector<1xi1>) -> f32 {
2279 // CHECK: %[[VAL_3:.*]] = vector.extract %[[VAL_2]][0] : i1 from vector<1xi1>
2280 // CHECK: %[[VAL_4:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<1xf32>
2281 // CHECK: %[[VAL_5:.*]] = arith.addf %[[VAL_4]], %[[VAL_1]] : f32
2282 // CHECK: %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_1]] : f32
2283 %s = vector.mask %mask { vector.reduction <add>, %a, %b : vector<1xf32> into f32 }
2284 : vector<1xi1> -> f32
2290 // CHECK-LABEL: func @reduce_one_element_vector_mulf
2291 // CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
2292 // CHECK: %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2293 // CHECK: %[[S:.+]] = arith.mulf %[[A]], %arg1 : f32
2294 // CHECK: return %[[S]]
2295 func.func @reduce_one_element_vector_mulf(%a : vector<1xf32>, %b: f32) -> f32 {
2296 %s = vector.reduction <mul>, %a, %b : vector<1xf32> into f32
2302 // CHECK-LABEL: func @dont_reduce_one_element_vector
2303 // CHECK: vector.reduction
2304 func.func @dont_reduce_one_element_vector(%a : vector<4xf32>) -> f32 {
2305 %s = vector.reduction <add>, %a : vector<4xf32> into f32
2311 // CHECK-LABEL: func @reduce_one_element_vector_maximumf
2312 // CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
2313 // CHECK: %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2314 // CHECK: %[[S:.+]] = arith.maximumf %[[A]], %[[B]] : f32
2315 // CHECK: return %[[S]]
2316 func.func @reduce_one_element_vector_maximumf(%a : vector<1xf32>, %b: f32) -> f32 {
2317 %s = vector.reduction <maximumf>, %a, %b : vector<1xf32> into f32
2323 // CHECK-LABEL: func @bitcast(
2324 // CHECK-SAME: %[[ARG:.*]]: vector<4x8xf32>) -> vector<4x16xi16> {
2325 // CHECK: vector.bitcast %[[ARG:.*]] : vector<4x8xf32> to vector<4x16xi16>
2326 func.func @bitcast(%a: vector<4x8xf32>) -> vector<4x16xi16> {
2327 %0 = vector.bitcast %a : vector<4x8xf32> to vector<4x8xi32>
2328 %1 = vector.bitcast %0 : vector<4x8xi32> to vector<4x16xi16>
2329 return %1 : vector<4x16xi16>
2334 // CHECK-LABEL: @insert_strided_slice_splat
2335 // CHECK-SAME: (%[[ARG:.*]]: f32)
2336 // CHECK-NEXT: %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<8x16xf32>
2337 // CHECK-NEXT: return %[[SPLAT]] : vector<8x16xf32>
2338 func.func @insert_strided_slice_splat(%x: f32) -> (vector<8x16xf32>) {
2339 %splat0 = vector.splat %x : vector<4x4xf32>
2340 %splat1 = vector.splat %x : vector<8x16xf32>
2341 %0 = vector.insert_strided_slice %splat0, %splat1 {offsets = [2, 2], strides = [1, 1]}
2342 : vector<4x4xf32> into vector<8x16xf32>
2343 return %0 : vector<8x16xf32>
2349 // CHECK-LABEL: @insert_extract_strided_slice
2350 // CHECK-SAME: (%[[ARG:.*]]: vector<8x16xf32>)
2351 // CHECK-NEXT: return %[[ARG]] : vector<8x16xf32>
2352 func.func @insert_extract_strided_slice(%x: vector<8x16xf32>) -> (vector<8x16xf32>) {
2353 %0 = vector.extract_strided_slice %x {offsets = [0, 8], sizes = [2, 4], strides = [1, 1]}
2354 : vector<8x16xf32> to vector<2x4xf32>
2355 %1 = vector.insert_strided_slice %0, %x {offsets = [0, 8], strides = [1, 1]}
2356 : vector<2x4xf32> into vector<8x16xf32>
2357 return %1 : vector<8x16xf32>
2362 // CHECK-LABEL: func.func @insert_strided_1d_constant
2363 // CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[4, 1, 2]> : vector<3xi32>
2364 // CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[0, 1, 4]> : vector<3xi32>
2365 // CHECK-DAG: %[[CCST:.*]] = arith.constant dense<[5, 6, 2]> : vector<3xi32>
2366 // CHECK-DAG: %[[DCST:.*]] = arith.constant dense<[0, 5, 6]> : vector<3xi32>
2367 // CHECK-DAG: %[[ECST:.*]] = arith.constant dense<[7, 8, 9]> : vector<3xi32>
2368 // CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]], %[[ECST]]
2369 func.func @insert_strided_1d_constant() ->
2370 (vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>) {
2371 %vcst = arith.constant dense<[0, 1, 2]> : vector<3xi32>
2372 %cst_1 = arith.constant dense<4> : vector<1xi32>
2373 %cst_2 = arith.constant dense<[5, 6]> : vector<2xi32>
2374 %cst_3 = arith.constant dense<[7, 8, 9]> : vector<3xi32>
2375 %a = vector.insert_strided_slice %cst_1, %vcst {offsets = [0], strides = [1]} : vector<1xi32> into vector<3xi32>
2376 %b = vector.insert_strided_slice %cst_1, %vcst {offsets = [2], strides = [1]} : vector<1xi32> into vector<3xi32>
2377 %c = vector.insert_strided_slice %cst_2, %vcst {offsets = [0], strides = [1]} : vector<2xi32> into vector<3xi32>
2378 %d = vector.insert_strided_slice %cst_2, %vcst {offsets = [1], strides = [1]} : vector<2xi32> into vector<3xi32>
2379 %e = vector.insert_strided_slice %cst_3, %vcst {offsets = [0], strides = [1]} : vector<3xi32> into vector<3xi32>
2380 return %a, %b, %c, %d, %e : vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>
2385 // CHECK-LABEL: func.func @insert_strided_2d_constant
2386 // CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[0, 1\], \[9, 3\], \[4, 5\]\]}}> : vector<3x2xi32>
2387 // CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[0, 1\], \[2, 3\], \[4, 9\]\]}}> : vector<3x2xi32>
2388 // CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[18, 19\], \[2, 3\], \[4, 5\]\]}}> : vector<3x2xi32>
2389 // CHECK-DAG: %[[DCST:.*]] = arith.constant dense<{{\[\[0, 1\], \[18, 19\], \[4, 5\]\]}}> : vector<3x2xi32>
2390 // CHECK-DAG: %[[ECST:.*]] = arith.constant dense<{{\[\[0, 1\], \[2, 3\], \[18, 19\]\]}}> : vector<3x2xi32>
2391 // CHECK-DAG: %[[FCST:.*]] = arith.constant dense<{{\[\[28, 29\], \[38, 39\], \[4, 5\]\]}}> : vector<3x2xi32>
2392 // CHECK-DAG: %[[GCST:.*]] = arith.constant dense<{{\[\[0, 1\], \[28, 29\], \[38, 39\]\]}}> : vector<3x2xi32>
2393 // CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]], %[[ECST]], %[[FCST]], %[[GCST]]
2394 func.func @insert_strided_2d_constant() ->
2395 (vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>) {
2396 %vcst = arith.constant dense<[[0, 1], [2, 3], [4, 5]]> : vector<3x2xi32>
2397 %cst_1 = arith.constant dense<9> : vector<1xi32>
2398 %cst_2 = arith.constant dense<[18, 19]> : vector<2xi32>
2399 %cst_3 = arith.constant dense<[[28, 29], [38, 39]]> : vector<2x2xi32>
2400 %a = vector.insert_strided_slice %cst_1, %vcst {offsets = [1, 0], strides = [1]} : vector<1xi32> into vector<3x2xi32>
2401 %b = vector.insert_strided_slice %cst_1, %vcst {offsets = [2, 1], strides = [1]} : vector<1xi32> into vector<3x2xi32>
2402 %c = vector.insert_strided_slice %cst_2, %vcst {offsets = [0, 0], strides = [1]} : vector<2xi32> into vector<3x2xi32>
2403 %d = vector.insert_strided_slice %cst_2, %vcst {offsets = [1, 0], strides = [1]} : vector<2xi32> into vector<3x2xi32>
2404 %e = vector.insert_strided_slice %cst_2, %vcst {offsets = [2, 0], strides = [1]} : vector<2xi32> into vector<3x2xi32>
2405 %f = vector.insert_strided_slice %cst_3, %vcst {offsets = [0, 0], strides = [1, 1]} : vector<2x2xi32> into vector<3x2xi32>
2406 %g = vector.insert_strided_slice %cst_3, %vcst {offsets = [1, 0], strides = [1, 1]} : vector<2x2xi32> into vector<3x2xi32>
2407 return %a, %b, %c, %d, %e, %f, %g :
2408 vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>
2413 // CHECK-LABEL: func @shuffle_splat
2414 // CHECK-SAME: (%[[ARG:.*]]: i32)
2415 // CHECK-NEXT: %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<4xi32>
2416 // CHECK-NEXT: return %[[SPLAT]] : vector<4xi32>
2417 func.func @shuffle_splat(%x : i32) -> vector<4xi32> {
2418 %v0 = vector.splat %x : vector<4xi32>
2419 %v1 = vector.splat %x : vector<2xi32>
2420 %shuffle = vector.shuffle %v0, %v1 [2, 3, 4, 5] : vector<4xi32>, vector<2xi32>
2421 return %shuffle : vector<4xi32>
2427 // CHECK-LABEL: func @insert_splat
2428 // CHECK-SAME: (%[[ARG:.*]]: i32)
2429 // CHECK-NEXT: %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<2x4x3xi32>
2430 // CHECK-NEXT: return %[[SPLAT]] : vector<2x4x3xi32>
2431 func.func @insert_splat(%x : i32) -> vector<2x4x3xi32> {
2432 %v0 = vector.splat %x : vector<4x3xi32>
2433 %v1 = vector.splat %x : vector<2x4x3xi32>
2434 %insert = vector.insert %v0, %v1[0] : vector<4x3xi32> into vector<2x4x3xi32>
2435 return %insert : vector<2x4x3xi32>
2440 // CHECK-LABEL: func.func @transfer_read_from_rank_reducing_extract_slice
2441 // CHECK: tensor.extract_slice
2442 // CHECK: vector.transfer_read
2443 func.func @transfer_read_from_rank_reducing_extract_slice(%src: tensor<1x8x8x8xf32>, %i1: index, %i2: index, %i3: index, %i4: index) -> vector<4xf32> {
2444 %c0 = arith.constant 0 : index
2445 %f0 = arith.constant 0.000000e+00 : f32
2446 %0 = tensor.extract_slice %src[0, %i1, %i2, %i3] [1, 4, 1, 4] [1, 1, 1, 1] : tensor<1x8x8x8xf32> to tensor<1x4x4xf32>
2447 %1 = vector.transfer_read %0[%c0, %i4, %c0], %f0 {in_bounds = [true]} : tensor<1x4x4xf32>, vector<4xf32>
2448 return %1 : vector<4xf32>
2453 // CHECK-LABEL: func.func @extract_from_broadcast
2454 func.func @extract_from_broadcast(%src: vector<1x1x1xf32>) -> vector<1xf32> {
2455 %0 = vector.broadcast %src : vector<1x1x1xf32> to vector<1x1x32x1xf32>
2457 // CHECK-NEXT: %0 = vector.extract {{.*}}[0, 0] : vector<1xf32> from vector<1x1x1xf32>
2458 // CHECK-NEXT: return %0 : vector<1xf32>
2459 %1 = vector.extract %0[0, 0, 31] : vector<1xf32> from vector<1x1x32x1xf32>
2460 return %1: vector<1xf32>
2463 // CHECK-LABEL: func.func @extract_from_stretch_broadcast
2464 func.func @extract_from_stretch_broadcast(%src: vector<3x1x2xf32>) -> f32 {
2465 // CHECK-NEXT: %0 = vector.extract {{.*}}[0, 0, 0] : f32 from vector<3x1x2xf32>
2466 // CHECK-NEXT: return %0 : f32
2467 %0 = vector.broadcast %src : vector<3x1x2xf32> to vector<3x4x2xf32>
2468 %1 = vector.extract %0[0, 2, 0] : f32 from vector<3x4x2xf32>
2473 // CHECK-LABEL: func.func @extract_strided_slice_of_constant_mask
2474 func.func @extract_strided_slice_of_constant_mask() -> vector<5x7xi1>{
2475 // CHECK-NEXT: %[[RES:.*]] = vector.constant_mask [5, 4] : vector<5x7xi1>
2476 // CHECK-NEXT: return %[[RES]] : vector<5x7xi1>
2477 %c4 = arith.constant 4 : index
2478 %c10 = arith.constant 10 : index
2479 %mask = vector.create_mask %c10, %c4 : vector<12x7xi1>
2480 %res = vector.extract_strided_slice %mask {offsets = [3], sizes = [5], strides = [1]} : vector<12x7xi1> to vector<5x7xi1>
2481 return %res : vector<5x7xi1>
2486 // CHECK-LABEL: func.func @fold_extractelement_of_broadcast(
2487 // CHECK-SAME: %[[f:.*]]: f32
2488 // CHECK: return %[[f]]
2489 func.func @fold_extractelement_of_broadcast(%f: f32) -> f32 {
2490 %0 = vector.broadcast %f : f32 to vector<15xf32>
2491 %c5 = arith.constant 5 : index
2492 %1 = vector.extractelement %0 [%c5 : index] : vector<15xf32>
2498 // CHECK-LABEL: func.func @fold_0d_vector_reduction
2499 func.func @fold_0d_vector_reduction(%arg0: vector<f32>) -> f32 {
2500 // CHECK-NEXT: %[[RES:.*]] = vector.extractelement %arg{{.*}}[] : vector<f32>
2501 // CHECK-NEXT: return %[[RES]] : f32
2502 %0 = vector.reduction <add>, %arg0 : vector<f32> into f32
2508 // CHECK-LABEL: func @empty_vector_mask
2509 func.func @empty_vector_mask(%mask : vector<8xi1>) {
2510 // CHECK-NOT: vector.mask
2511 vector.mask %mask { } : vector<8xi1>
2517 // CHECK-LABEL: func @empty_vector_mask_with_return
2518 // CHECK-SAME: %[[IN:.*]]: vector<8xf32>
2519 func.func @empty_vector_mask_with_return(%a : vector<8xf32>, %mask : vector<8xi1>) -> vector<8xf32> {
2520 // CHECK-NOT: vector.mask
2521 // CHECK: return %[[IN]] : vector<8xf32>
2522 %0 = vector.mask %mask { vector.yield %a : vector<8xf32> } : vector<8xi1> -> vector<8xf32>
2523 return %0 : vector<8xf32>
2528 // CHECK-LABEL: func @all_true_vector_mask
2529 // CHECK-SAME: %[[IN:.*]]: tensor<3x4xf32>
2530 func.func @all_true_vector_mask(%ta : tensor<3x4xf32>) -> vector<3x4xf32> {
2531 // CHECK-NOT: vector.mask
2532 // CHECK: %[[LD:.*]] = vector.transfer_read %[[IN]]
2533 // CHECK: return %[[LD]] : vector<3x4xf32>
2534 %c0 = arith.constant 0 : index
2535 %cf0 = arith.constant 0.0 : f32
2536 %all_true = vector.constant_mask [3, 4] : vector<3x4xi1>
2537 %0 = vector.mask %all_true { vector.transfer_read %ta[%c0, %c0], %cf0 : tensor<3x4xf32>, vector<3x4xf32> } : vector<3x4xi1> -> vector<3x4xf32>
2538 return %0 : vector<3x4xf32>
2543 // CHECK-LABEL: func @all_true_vector_mask_no_result(
2544 func.func @all_true_vector_mask_no_result(%a : vector<3x4xf32>, %m : memref<3x4xf32>) {
2545 // CHECK-NOT: vector.mask
2546 // CHECK: vector.transfer_write
2547 %c0 = arith.constant 0 : index
2548 %all_true = vector.constant_mask [3, 4] : vector<3x4xi1>
2549 vector.mask %all_true { vector.transfer_write %a, %m[%c0, %c0] : vector<3x4xf32>, memref<3x4xf32> } : vector<3x4xi1>
2555 // CHECK-LABEL: func.func @fold_shape_cast_with_mask(
2556 // CHECK-SAME: %[[VAL_0:.*]]: tensor<1x?xf32>) -> vector<1x4xi1> {
2557 func.func @fold_shape_cast_with_mask(%arg0: tensor<1x?xf32>) -> vector<1x4xi1> {
2558 // CHECK-NOT: vector.shape_cast
2559 // CHECK: %[[VAL_1:.*]] = arith.constant 1 : index
2560 // CHECK: %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<1x?xf32>
2561 // CHECK: %[[VAL_3:.*]] = vector.create_mask %[[VAL_1]], %[[VAL_2]] : vector<1x4xi1>
2562 // CHECK: return %[[VAL_3]] : vector<1x4xi1>
2563 %c1 = arith.constant 1 : index
2564 %dim = tensor.dim %arg0, %c1 : tensor<1x?xf32>
2565 %1 = vector.create_mask %c1, %dim, %c1, %c1 : vector<1x4x1x1xi1>
2566 %2 = vector.shape_cast %1 : vector<1x4x1x1xi1> to vector<1x4xi1>
2567 return %2 : vector<1x4xi1>
2572 // CHECK-LABEL: func.func @fold_shape_cast_with_mask_scalable(
2573 // CHECK-SAME: %[[VAL_0:.*]]: tensor<1x?xf32>) -> vector<1x[4]xi1> {
2574 func.func @fold_shape_cast_with_mask_scalable(%arg0: tensor<1x?xf32>) -> vector<1x[4]xi1> {
2575 // CHECK-NOT: vector.shape_cast
2576 // CHECK: %[[VAL_1:.*]] = arith.constant 1 : index
2577 // CHECK: %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<1x?xf32>
2578 // CHECK: %[[VAL_3:.*]] = vector.create_mask %[[VAL_1]], %[[VAL_2]] : vector<1x[4]xi1>
2579 // CHECK: return %[[VAL_3]] : vector<1x[4]xi1>
2580 %c1 = arith.constant 1 : index
2581 %dim = tensor.dim %arg0, %c1 : tensor<1x?xf32>
2582 %1 = vector.create_mask %c1, %dim, %c1, %c1 : vector<1x[4]x1x1xi1>
2583 %2 = vector.shape_cast %1 : vector<1x[4]x1x1xi1> to vector<1x[4]xi1>
2584 return %2 : vector<1x[4]xi1>
2589 // Check that scalable "1" (i.e. [1]) is not folded
2590 // CHECK-LABEL: func.func @fold_shape_cast_with_mask_scalable_one(
2591 // CHECK-SAME: %[[VAL_0:.*]]: tensor<1x?xf32>) -> vector<1x[1]xi1> {
2592 func.func @fold_shape_cast_with_mask_scalable_one(%arg0: tensor<1x?xf32>) -> vector<1x[1]xi1>{
2593 // CHECK: %[[VAL_1:.*]] = arith.constant 1 : index
2594 // CHECK: %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<1x?xf32>
2595 // CHECK: %[[VAL_3:.*]] = vector.create_mask %[[VAL_1]], %[[VAL_2]] : vector<1x[1]xi1>
2596 // CHECK: return %[[VAL_3]] : vector<1x[1]xi1>
2597 %c1 = arith.constant 1 : index
2598 %dim = tensor.dim %arg0, %c1 : tensor<1x?xf32>
2599 %1 = vector.create_mask %c1, %dim, %c1 : vector<1x[1]x1xi1>
2600 %2 = vector.shape_cast %1 : vector<1x[1]x1xi1> to vector<1x[1]xi1>
2601 return %2 : vector<1x[1]xi1>
2606 // CHECK-LABEL: func.func @fold_shape_cast_with_constant_mask() -> vector<4xi1> {
2607 func.func @fold_shape_cast_with_constant_mask() -> vector<4xi1>{
2608 // CHECK-NOT: vector.shape_cast
2609 // CHECK: %[[VAL_0:.*]] = vector.constant_mask [1] : vector<4xi1>
2610 // CHECK: return %[[VAL_0]] : vector<4xi1>
2611 %1 = vector.constant_mask [1, 1, 1] : vector<4x1x1xi1>
2612 %2 = vector.shape_cast %1 : vector<4x1x1xi1> to vector<4xi1>
2613 return %2 : vector<4xi1>
2618 // TODO: This IR could be canonicalized but the canonicalization pattern is not
2619 // smart enough. For now, just make sure that we do not crash.
2621 // CHECK-LABEL: func.func @load_store_forwarding_rank_mismatch(
2622 // CHECK: vector.transfer_write
2623 // CHECK: vector.transfer_read
2624 func.func @load_store_forwarding_rank_mismatch(%v0: vector<4x1x1xf32>, %arg0: tensor<4x4x4xf32>) -> (vector<1x100x4x5xf32>) {
2625 %c0 = arith.constant 0 : index
2626 %cf0 = arith.constant 0.0 : f32
2627 // d0 is explicitly written.
2628 %w0 = vector.transfer_write %v0, %arg0[%c0, %c0, %c0]
2629 {in_bounds = [true, true, true],
2630 permutation_map = affine_map<(d0, d1, d2) -> (d2, d1, d0)>} :
2631 vector<4x1x1xf32>, tensor<4x4x4xf32>
2632 // d0 is implicitly read (rank-reduction of unit dim).
2633 %r = vector.transfer_read %w0[%c0, %c0, %c0], %cf0
2634 {in_bounds = [true, true, true, true],
2635 permutation_map = affine_map<(d0, d1, d2) -> (d1, 0, d2, 0)>} :
2636 tensor<4x4x4xf32>, vector<1x100x4x5xf32>
2637 return %r : vector<1x100x4x5xf32>
2642 // CHECK-LABEL: func.func @rank_0_shuffle_to_interleave(
2643 // CHECK-SAME: %[[LHS:.*]]: vector<f64>, %[[RHS:.*]]: vector<f64>)
2644 func.func @rank_0_shuffle_to_interleave(%arg0: vector<f64>, %arg1: vector<f64>) -> vector<2xf64> {
2645 // CHECK: %[[ZIP:.*]] = vector.interleave %[[LHS]], %[[RHS]] : vector<f64> -> vector<2xf64>
2646 // CHECK: return %[[ZIP]]
2647 %0 = vector.shuffle %arg0, %arg1 [0, 1] : vector<f64>, vector<f64>
2648 return %0 : vector<2xf64>
2653 // CHECK-LABEL: func.func @rank_1_shuffle_to_interleave(
2654 // CHECK-SAME: %[[LHS:.*]]: vector<6xi32>, %[[RHS:.*]]: vector<6xi32>)
2655 func.func @rank_1_shuffle_to_interleave(%arg0: vector<6xi32>, %arg1: vector<6xi32>) -> vector<12xi32> {
2656 // CHECK: %[[ZIP:.*]] = vector.interleave %[[LHS]], %[[RHS]] : vector<6xi32> -> vector<12xi32>
2657 // CHECK: return %[[ZIP]]
2658 %0 = vector.shuffle %arg0, %arg1 [0, 6, 1, 7, 2, 8, 3, 9, 4, 10, 5, 11] : vector<6xi32>, vector<6xi32>
2659 return %0 : vector<12xi32>
2664 // CHECK-LABEL: func @extract_from_0d_splat_broadcast_regression(
2665 // CHECK-SAME: %[[a:.*]]: f32, %[[b:.*]]: vector<f32>, %[[c:.*]]: vector<2xf32>)
2666 func.func @extract_from_0d_splat_broadcast_regression(%a: f32, %b: vector<f32>, %c: vector<2xf32>) -> (f32, f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32>) {
2667 // Splat scalar to 0D and extract scalar.
2668 %0 = vector.splat %a : vector<f32>
2669 %1 = vector.extract %0[] : f32 from vector<f32>
2671 // Broadcast scalar to 0D and extract scalar.
2672 %2 = vector.broadcast %a : f32 to vector<f32>
2673 %3 = vector.extract %2[] : f32 from vector<f32>
2675 // Broadcast 0D to 3D and extract scalar.
2676 // CHECK: %[[extract1:.*]] = vector.extractelement %[[b]][] : vector<f32>
2677 %4 = vector.broadcast %b : vector<f32> to vector<1x2x4xf32>
2678 %5 = vector.extract %4[0, 0, 1] : f32 from vector<1x2x4xf32>
2680 // Splat scalar to 2D and extract scalar.
2681 %6 = vector.splat %a : vector<2x3xf32>
2682 %7 = vector.extract %6[0, 1] : f32 from vector<2x3xf32>
2684 // Broadcast scalar to 3D and extract scalar.
2685 %8 = vector.broadcast %a : f32 to vector<5x6x7xf32>
2686 %9 = vector.extract %8[2, 1, 5] : f32 from vector<5x6x7xf32>
2688 // Extract 2D from 3D that was broadcasted from a scalar.
2689 // CHECK: %[[extract2:.*]] = vector.broadcast %[[a]] : f32 to vector<6x7xf32>
2690 %10 = vector.extract %8[2] : vector<6x7xf32> from vector<5x6x7xf32>
2692 // Extract 1D from 2D that was splat'ed from a scalar.
2693 // CHECK: %[[extract3:.*]] = vector.broadcast %[[a]] : f32 to vector<3xf32>
2694 %11 = vector.extract %6[1] : vector<3xf32> from vector<2x3xf32>
2696 // CHECK: return %[[a]], %[[a]], %[[extract1]], %[[a]], %[[a]], %[[extract2]], %[[extract3]]
2697 return %1, %3, %5, %7, %9, %10, %11 : f32, f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32>
2702 // CHECK-LABEL: func @extract_scalar_from_from_elements(
2703 // CHECK-SAME: %[[a:.*]]: f32, %[[b:.*]]: f32)
2704 func.func @extract_scalar_from_from_elements(%a: f32, %b: f32) -> (f32, f32, f32, f32, f32, f32, f32) {
2706 %0 = vector.from_elements %a : vector<f32>
2707 %1 = vector.extract %0[] : f32 from vector<f32>
2710 %2 = vector.from_elements %a : vector<1xf32>
2711 %3 = vector.extract %2[0] : f32 from vector<1xf32>
2712 %4 = vector.from_elements %a, %b, %a, %a, %b : vector<5xf32>
2713 %5 = vector.extract %4[4] : f32 from vector<5xf32>
2716 %6 = vector.from_elements %a, %a, %a, %b, %b, %b : vector<2x3xf32>
2717 %7 = vector.extract %6[0, 0] : f32 from vector<2x3xf32>
2718 %8 = vector.extract %6[0, 1] : f32 from vector<2x3xf32>
2719 %9 = vector.extract %6[1, 1] : f32 from vector<2x3xf32>
2720 %10 = vector.extract %6[1, 2] : f32 from vector<2x3xf32>
2722 // CHECK: return %[[a]], %[[a]], %[[b]], %[[a]], %[[a]], %[[b]], %[[b]]
2723 return %1, %3, %5, %7, %8, %9, %10 : f32, f32, f32, f32, f32, f32, f32
2728 // CHECK-LABEL: func @extract_1d_from_from_elements(
2729 // CHECK-SAME: %[[a:.*]]: f32, %[[b:.*]]: f32)
2730 func.func @extract_1d_from_from_elements(%a: f32, %b: f32) -> (vector<3xf32>, vector<3xf32>) {
2731 %0 = vector.from_elements %a, %a, %a, %b, %b, %b : vector<2x3xf32>
2732 // CHECK: %[[splat1:.*]] = vector.splat %[[a]] : vector<3xf32>
2733 %1 = vector.extract %0[0] : vector<3xf32> from vector<2x3xf32>
2734 // CHECK: %[[splat2:.*]] = vector.splat %[[b]] : vector<3xf32>
2735 %2 = vector.extract %0[1] : vector<3xf32> from vector<2x3xf32>
2736 // CHECK: return %[[splat1]], %[[splat2]]
2737 return %1, %2 : vector<3xf32>, vector<3xf32>
2742 // CHECK-LABEL: func @extract_2d_from_from_elements(
2743 // CHECK-SAME: %[[a:.*]]: f32, %[[b:.*]]: f32)
2744 func.func @extract_2d_from_from_elements(%a: f32, %b: f32) -> (vector<2x2xf32>, vector<2x2xf32>) {
2745 %0 = vector.from_elements %a, %a, %a, %b, %b, %b, %b, %a, %b, %a, %a, %b : vector<3x2x2xf32>
2746 // CHECK: %[[splat1:.*]] = vector.from_elements %[[a]], %[[a]], %[[a]], %[[b]] : vector<2x2xf32>
2747 %1 = vector.extract %0[0] : vector<2x2xf32> from vector<3x2x2xf32>
2748 // CHECK: %[[splat2:.*]] = vector.from_elements %[[b]], %[[b]], %[[b]], %[[a]] : vector<2x2xf32>
2749 %2 = vector.extract %0[1] : vector<2x2xf32> from vector<3x2x2xf32>
2750 // CHECK: return %[[splat1]], %[[splat2]]
2751 return %1, %2 : vector<2x2xf32>, vector<2x2xf32>
2756 // CHECK-LABEL: func @from_elements_to_splat(
2757 // CHECK-SAME: %[[a:.*]]: f32, %[[b:.*]]: f32)
2758 func.func @from_elements_to_splat(%a: f32, %b: f32) -> (vector<2x3xf32>, vector<2x3xf32>, vector<f32>) {
2759 // CHECK: %[[splat:.*]] = vector.splat %[[a]] : vector<2x3xf32>
2760 %0 = vector.from_elements %a, %a, %a, %a, %a, %a : vector<2x3xf32>
2761 // CHECK: %[[from_el:.*]] = vector.from_elements {{.*}} : vector<2x3xf32>
2762 %1 = vector.from_elements %a, %a, %a, %a, %b, %a : vector<2x3xf32>
2763 // CHECK: %[[splat2:.*]] = vector.splat %[[a]] : vector<f32>
2764 %2 = vector.from_elements %a : vector<f32>
2765 // CHECK: return %[[splat]], %[[from_el]], %[[splat2]]
2766 return %0, %1, %2 : vector<2x3xf32>, vector<2x3xf32>, vector<f32>
2772 // CHECK-LABEL: func @vector_insert_const_regression(
2773 // CHECK: llvm.mlir.undef
2774 // CHECK: vector.insert
2775 func.func @vector_insert_const_regression(%arg0: i8) -> vector<4xi8> {
2776 %0 = llvm.mlir.undef : vector<4xi8>
2777 %1 = vector.insert %arg0, %0 [0] : i8 into vector<4xi8>
2778 return %1 : vector<4xi8>
2783 // CHECK-LABEL: @contiguous_extract_strided_slices_to_extract
2784 // CHECK: %[[EXTRACT:.+]] = vector.extract {{.*}}[0, 0, 0, 0, 0] : vector<4xi32> from vector<8x1x2x1x1x4xi32>
2785 // CHECK-NEXT: return %[[EXTRACT]] : vector<4xi32>
2786 func.func @contiguous_extract_strided_slices_to_extract(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<4xi32> {
2787 %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [1, 1, 1, 1, 1, 4], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x1x1x1x4xi32>
2788 %2 = vector.shape_cast %1 : vector<1x1x1x1x1x4xi32> to vector<4xi32>
2789 return %2 : vector<4xi32>
2794 // CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_shorter_size_list
2795 // CHECK: %[[EXTRACT:.+]] = vector.extract {{.*}}[0, 0, 0, 0] : vector<1x4xi32> from vector<8x1x2x1x1x4xi32>
2796 // CHECK-NEXT: return %[[EXTRACT]] : vector<1x4xi32>
2797 func.func @contiguous_extract_strided_slices_to_extract_shorter_size_list(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<1x4xi32> {
2798 %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 1, 1, 1], strides = [1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x1x1x1x4xi32>
2799 %2 = vector.shape_cast %1 : vector<1x1x1x1x1x4xi32> to vector<1x4xi32>
2800 return %2 : vector<1x4xi32>
2805 // CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_failure_non_unit_outer_size
2806 // CHECK-NEXT: vector.extract_strided_slice
2807 func.func @contiguous_extract_strided_slices_to_extract_failure_non_unit_outer_size(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<8x1x1x1x1x4xi32> {
2808 %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [8, 1, 1, 1, 1, 4], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<8x1x1x1x1x4xi32>
2809 return %1 : vector<8x1x1x1x1x4xi32>
2814 // CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_failure_non_full_size
2815 // CHECK-NEXT: vector.extract_strided_slice
2816 func.func @contiguous_extract_strided_slices_to_extract_failure_non_full_size(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<1x1x1x1x1x2xi32> {
2817 %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [1, 1, 1, 1, 1, 2], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x1x1x1x2xi32>
2818 return %1 : vector<1x1x1x1x1x2xi32>
2823 // CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_failure_non_full_inner_size
2824 // CHECK-NEXT: vector.extract_strided_slice
2825 func.func @contiguous_extract_strided_slices_to_extract_failure_non_full_inner_size(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<1x1x2x1x1x1xi32> {
2826 %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [1, 1, 2, 1, 1, 1], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x2x1x1x1xi32>
2827 return %1 : vector<1x1x2x1x1x1xi32>