1 // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(canonicalize)' -split-input-file | FileCheck %s
3 // CHECK-LABEL: func @test_subi_zero
4 func @test_subi_zero(%arg0: i32) -> i32 {
5 // CHECK-NEXT: %c0_i32 = constant 0 : i32
6 // CHECK-NEXT: return %c0
7 %y = subi %arg0, %arg0 : i32
11 // CHECK-LABEL: func @test_subi_zero_vector
12 func @test_subi_zero_vector(%arg0: vector<4xi32>) -> vector<4xi32> {
13 //CHECK-NEXT: %cst = constant dense<0> : vector<4xi32>
14 %y = subi %arg0, %arg0 : vector<4xi32>
15 // CHECK-NEXT: return %cst
16 return %y: vector<4xi32>
19 // CHECK-LABEL: func @test_subi_zero_tensor
20 func @test_subi_zero_tensor(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
21 //CHECK-NEXT: %cst = constant dense<0> : tensor<4x5xi32>
22 %y = subi %arg0, %arg0 : tensor<4x5xi32>
23 // CHECK-NEXT: return %cst
24 return %y: tensor<4x5xi32>
27 // CHECK-LABEL: func @dim
28 func @dim(%arg0: tensor<8x4xf32>) -> index {
30 // CHECK: %c4 = constant 4 : index
31 %c1 = constant 1 : index
32 %0 = dim %arg0, %c1 : tensor<8x4xf32>
34 // CHECK-NEXT: return %c4
38 // CHECK-LABEL: func @test_commutative
39 func @test_commutative(%arg0: i32) -> (i32, i32) {
40 // CHECK: %c42_i32 = constant 42 : i32
41 %c42_i32 = constant 42 : i32
42 // CHECK-NEXT: %0 = addi %arg0, %c42_i32 : i32
43 %y = addi %c42_i32, %arg0 : i32
45 // This should not be swapped.
46 // CHECK-NEXT: %1 = subi %c42_i32, %arg0 : i32
47 %z = subi %c42_i32, %arg0 : i32
49 // CHECK-NEXT: return %0, %1
50 return %y, %z: i32, i32
53 // CHECK-LABEL: func @trivial_dce
54 func @trivial_dce(%arg0: tensor<8x4xf32>) {
55 %c1 = constant 1 : index
56 %0 = dim %arg0, %c1 : tensor<8x4xf32>
61 // CHECK-LABEL: func @load_dce
62 func @load_dce(%arg0: index) {
63 %c4 = constant 4 : index
64 %a = alloc(%c4) : memref<?xf32>
65 %2 = load %a[%arg0] : memref<?xf32>
66 dealloc %a: memref<?xf32>
71 // CHECK-LABEL: func @addi_zero
72 func @addi_zero(%arg0: i32) -> i32 {
73 // CHECK-NEXT: return %arg0
74 %c0_i32 = constant 0 : i32
75 %y = addi %c0_i32, %arg0 : i32
79 // CHECK-LABEL: func @addi_zero_index
80 func @addi_zero_index(%arg0: index) -> index {
81 // CHECK-NEXT: return %arg0
82 %c0_index = constant 0 : index
83 %y = addi %c0_index, %arg0 : index
88 // CHECK-LABEL: func @addi_zero_vector
89 func @addi_zero_vector(%arg0: vector<4 x i32>) -> vector<4 x i32> {
90 // CHECK-NEXT: return %arg0
91 %c0_v4i32 = constant dense<0> : vector<4 x i32>
92 %y = addi %c0_v4i32, %arg0 : vector<4 x i32>
93 return %y: vector<4 x i32>
96 // CHECK-LABEL: func @addi_zero_tensor
97 func @addi_zero_tensor(%arg0: tensor<4 x 5 x i32>) -> tensor<4 x 5 x i32> {
98 // CHECK-NEXT: return %arg0
99 %c0_t45i32 = constant dense<0> : tensor<4 x 5 x i32>
100 %y = addi %arg0, %c0_t45i32 : tensor<4 x 5 x i32>
101 return %y: tensor<4 x 5 x i32>
104 // CHECK-LABEL: func @muli_zero
105 func @muli_zero(%arg0: i32) -> i32 {
106 // CHECK-NEXT: %c0_i32 = constant 0 : i32
107 %c0_i32 = constant 0 : i32
109 %y = muli %c0_i32, %arg0 : i32
111 // CHECK-NEXT: return %c0_i32
115 // CHECK-LABEL: func @muli_zero_index
116 func @muli_zero_index(%arg0: index) -> index {
117 // CHECK-NEXT: %[[CST:.*]] = constant 0 : index
118 %c0_index = constant 0 : index
120 %y = muli %c0_index, %arg0 : index
122 // CHECK-NEXT: return %[[CST]]
126 // CHECK-LABEL: func @muli_zero_vector
127 func @muli_zero_vector(%arg0: vector<4 x i32>) -> vector<4 x i32> {
128 // CHECK-NEXT: %cst = constant dense<0> : vector<4xi32>
129 %cst = constant dense<0> : vector<4 x i32>
131 %y = muli %cst, %arg0 : vector<4 x i32>
133 // CHECK-NEXT: return %cst
134 return %y: vector<4 x i32>
137 // CHECK-LABEL: func @muli_zero_tensor
138 func @muli_zero_tensor(%arg0: tensor<4 x 5 x i32>) -> tensor<4 x 5 x i32> {
139 // CHECK-NEXT: %cst = constant dense<0> : tensor<4x5xi32>
140 %cst = constant dense<0> : tensor<4 x 5 x i32>
142 %y = muli %arg0, %cst : tensor<4 x 5 x i32>
144 // CHECK-NEXT: return %cst
145 return %y: tensor<4 x 5 x i32>
148 // CHECK-LABEL: func @muli_one
149 func @muli_one(%arg0: i32) -> i32 {
150 // CHECK-NEXT: return %arg0
151 %c0_i32 = constant 1 : i32
152 %y = muli %c0_i32, %arg0 : i32
156 // CHECK-LABEL: func @muli_one_index
157 func @muli_one_index(%arg0: index) -> index {
158 // CHECK-NEXT: return %arg0
159 %c0_index = constant 1 : index
160 %y = muli %c0_index, %arg0 : index
164 // CHECK-LABEL: func @muli_one_vector
165 func @muli_one_vector(%arg0: vector<4 x i32>) -> vector<4 x i32> {
166 // CHECK-NEXT: return %arg0
167 %c1_v4i32 = constant dense<1> : vector<4 x i32>
168 %y = muli %c1_v4i32, %arg0 : vector<4 x i32>
169 return %y: vector<4 x i32>
172 // CHECK-LABEL: func @muli_one_tensor
173 func @muli_one_tensor(%arg0: tensor<4 x 5 x i32>) -> tensor<4 x 5 x i32> {
174 // CHECK-NEXT: return %arg0
175 %c1_t45i32 = constant dense<1> : tensor<4 x 5 x i32>
176 %y = muli %arg0, %c1_t45i32 : tensor<4 x 5 x i32>
177 return %y: tensor<4 x 5 x i32>
180 //CHECK-LABEL: func @and_self
181 func @and_self(%arg0: i32) -> i32 {
182 //CHECK-NEXT: return %arg0
183 %1 = and %arg0, %arg0 : i32
187 //CHECK-LABEL: func @and_self_vector
188 func @and_self_vector(%arg0: vector<4xi32>) -> vector<4xi32> {
189 //CHECK-NEXT: return %arg0
190 %1 = and %arg0, %arg0 : vector<4xi32>
191 return %1 : vector<4xi32>
194 //CHECK-LABEL: func @and_self_tensor
195 func @and_self_tensor(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
196 //CHECK-NEXT: return %arg0
197 %1 = and %arg0, %arg0 : tensor<4x5xi32>
198 return %1 : tensor<4x5xi32>
201 //CHECK-LABEL: func @and_zero
202 func @and_zero(%arg0: i32) -> i32 {
203 // CHECK-NEXT: %c0_i32 = constant 0 : i32
204 %c0_i32 = constant 0 : i32
205 // CHECK-NEXT: return %c0_i32
206 %1 = and %arg0, %c0_i32 : i32
210 //CHECK-LABEL: func @and_zero_index
211 func @and_zero_index(%arg0: index) -> index {
212 // CHECK-NEXT: %[[CST:.*]] = constant 0 : index
213 %c0_index = constant 0 : index
214 // CHECK-NEXT: return %[[CST]]
215 %1 = and %arg0, %c0_index : index
219 //CHECK-LABEL: func @and_zero_vector
220 func @and_zero_vector(%arg0: vector<4xi32>) -> vector<4xi32> {
221 // CHECK-NEXT: %cst = constant dense<0> : vector<4xi32>
222 %cst = constant dense<0> : vector<4xi32>
223 // CHECK-NEXT: return %cst
224 %1 = and %arg0, %cst : vector<4xi32>
225 return %1 : vector<4xi32>
228 //CHECK-LABEL: func @and_zero_tensor
229 func @and_zero_tensor(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
230 // CHECK-NEXT: %cst = constant dense<0> : tensor<4x5xi32>
231 %cst = constant dense<0> : tensor<4x5xi32>
232 // CHECK-NEXT: return %cst
233 %1 = and %arg0, %cst : tensor<4x5xi32>
234 return %1 : tensor<4x5xi32>
237 //CHECK-LABEL: func @or_self
238 func @or_self(%arg0: i32) -> i32 {
239 //CHECK-NEXT: return %arg0
240 %1 = or %arg0, %arg0 : i32
244 //CHECK-LABEL: func @or_self_vector
245 func @or_self_vector(%arg0: vector<4xi32>) -> vector<4xi32> {
246 //CHECK-NEXT: return %arg0
247 %1 = or %arg0, %arg0 : vector<4xi32>
248 return %1 : vector<4xi32>
251 //CHECK-LABEL: func @or_self_tensor
252 func @or_self_tensor(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
253 //CHECK-NEXT: return %arg0
254 %1 = or %arg0, %arg0 : tensor<4x5xi32>
255 return %1 : tensor<4x5xi32>
258 //CHECK-LABEL: func @or_zero
259 func @or_zero(%arg0: i32) -> i32 {
260 %c0_i32 = constant 0 : i32
261 // CHECK-NEXT: return %arg0
262 %1 = or %arg0, %c0_i32 : i32
266 //CHECK-LABEL: func @or_zero_index
267 func @or_zero_index(%arg0: index) -> index {
268 %c0_index = constant 0 : index
269 // CHECK-NEXT: return %arg0
270 %1 = or %arg0, %c0_index : index
274 //CHECK-LABEL: func @or_zero_vector
275 func @or_zero_vector(%arg0: vector<4xi32>) -> vector<4xi32> {
276 // CHECK-NEXT: return %arg0
277 %cst = constant dense<0> : vector<4xi32>
278 %1 = or %arg0, %cst : vector<4xi32>
279 return %1 : vector<4xi32>
282 //CHECK-LABEL: func @or_zero_tensor
283 func @or_zero_tensor(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
284 // CHECK-NEXT: return %arg0
285 %cst = constant dense<0> : tensor<4x5xi32>
286 %1 = or %arg0, %cst : tensor<4x5xi32>
287 return %1 : tensor<4x5xi32>
290 //CHECK-LABEL: func @xor_self
291 func @xor_self(%arg0: i32) -> i32 {
292 //CHECK-NEXT: %c0_i32 = constant 0
293 %1 = xor %arg0, %arg0 : i32
294 //CHECK-NEXT: return %c0_i32
298 //CHECK-LABEL: func @xor_self_vector
299 func @xor_self_vector(%arg0: vector<4xi32>) -> vector<4xi32> {
300 //CHECK-NEXT: %cst = constant dense<0> : vector<4xi32>
301 %1 = xor %arg0, %arg0 : vector<4xi32>
302 //CHECK-NEXT: return %cst
303 return %1 : vector<4xi32>
306 //CHECK-LABEL: func @xor_self_tensor
307 func @xor_self_tensor(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
308 //CHECK-NEXT: %cst = constant dense<0> : tensor<4x5xi32>
309 %1 = xor %arg0, %arg0 : tensor<4x5xi32>
310 //CHECK-NEXT: return %cst
311 return %1 : tensor<4x5xi32>
314 // CHECK-LABEL: func @memref_cast_folding
315 func @memref_cast_folding(%arg0: memref<4 x f32>, %arg1: f32) -> (f32, f32) {
316 %0 = memref_cast %arg0 : memref<4xf32> to memref<?xf32>
317 // CHECK-NEXT: %c0 = constant 0 : index
318 %c0 = constant 0 : index
319 %dim = dim %0, %c0 : memref<? x f32>
321 // CHECK-NEXT: affine.load %arg0[3]
322 %1 = affine.load %0[%dim - 1] : memref<?xf32>
324 // CHECK-NEXT: store %arg1, %arg0[%c0] : memref<4xf32>
325 store %arg1, %0[%c0] : memref<?xf32>
327 // CHECK-NEXT: %{{.*}} = load %arg0[%c0] : memref<4xf32>
328 %2 = load %0[%c0] : memref<?xf32>
330 // CHECK-NEXT: dealloc %arg0 : memref<4xf32>
331 dealloc %0: memref<?xf32>
333 // CHECK-NEXT: return %{{.*}}
334 return %1, %2 : f32, f32
337 // CHECK-LABEL: @fold_memref_cast_in_memref_cast
338 // CHECK-SAME: (%[[ARG0:.*]]: memref<42x42xf64>)
339 func @fold_memref_cast_in_memref_cast(%0: memref<42x42xf64>) {
340 // CHECK: %[[folded:.*]] = memref_cast %[[ARG0]] : memref<42x42xf64> to memref<?x?xf64>
341 %4 = memref_cast %0 : memref<42x42xf64> to memref<?x42xf64>
342 // CHECK-NOT: memref_cast
343 %5 = memref_cast %4 : memref<?x42xf64> to memref<?x?xf64>
344 // CHECK: "test.user"(%[[folded]])
345 "test.user"(%5) : (memref<?x?xf64>) -> ()
349 // CHECK-LABEL: @fold_memref_cast_chain
350 // CHECK-SAME: (%[[ARG0:.*]]: memref<42x42xf64>)
351 func @fold_memref_cast_chain(%0: memref<42x42xf64>) {
352 // CHECK-NOT: memref_cast
353 %4 = memref_cast %0 : memref<42x42xf64> to memref<?x42xf64>
354 %5 = memref_cast %4 : memref<?x42xf64> to memref<42x42xf64>
355 // CHECK: "test.user"(%[[ARG0]])
356 "test.user"(%5) : (memref<42x42xf64>) -> ()
360 // CHECK-LABEL: func @alloc_const_fold
361 func @alloc_const_fold() -> memref<?xf32> {
362 // CHECK-NEXT: %0 = alloc() : memref<4xf32>
363 %c4 = constant 4 : index
364 %a = alloc(%c4) : memref<?xf32>
366 // CHECK-NEXT: %1 = memref_cast %0 : memref<4xf32> to memref<?xf32>
367 // CHECK-NEXT: return %1 : memref<?xf32>
368 return %a : memref<?xf32>
371 // CHECK-LABEL: func @dead_alloc_fold
372 func @dead_alloc_fold() {
373 // CHECK-NEXT: return
374 %c4 = constant 4 : index
375 %a = alloc(%c4) : memref<?xf32>
379 // CHECK-LABEL: func @dead_dealloc_fold
380 func @dead_dealloc_fold() {
381 // CHECK-NEXT: return
382 %a = alloc() : memref<4xf32>
383 dealloc %a: memref<4xf32>
387 // CHECK-LABEL: func @dead_dealloc_fold_multi_use
388 func @dead_dealloc_fold_multi_use(%cond : i1) {
389 // CHECK-NEXT: return
390 %a = alloc() : memref<4xf32>
391 cond_br %cond, ^bb1, ^bb2
394 dealloc %a: memref<4xf32>
398 dealloc %a: memref<4xf32>
402 // CHECK-LABEL: func @dead_block_elim
403 func @dead_block_elim() {
417 // CHECK-LABEL: func @dyn_shape_fold(%arg0: index, %arg1: index)
418 func @dyn_shape_fold(%L : index, %M : index) -> (memref<? x ? x i32>, memref<? x ? x f32>) {
419 // CHECK: %c0 = constant 0 : index
420 %zero = constant 0 : index
421 // The constants below disappear after they propagate into shapes.
422 %nine = constant 9 : index
423 %N = constant 1024 : index
424 %K = constant 512 : index
426 // CHECK-NEXT: alloc(%arg0) : memref<?x1024xf32>
427 %a = alloc(%L, %N) : memref<? x ? x f32>
429 // CHECK-NEXT: alloc(%arg1) : memref<4x1024x8x512x?xf32>
430 %b = alloc(%N, %K, %M) : memref<4 x ? x 8 x ? x ? x f32>
432 // CHECK-NEXT: alloc() : memref<512x1024xi32>
433 %c = alloc(%K, %N) : memref<? x ? x i32>
435 // CHECK: alloc() : memref<9x9xf32>
436 %d = alloc(%nine, %nine) : memref<? x ? x f32>
438 // CHECK: alloca(%arg1) : memref<4x1024x8x512x?xf32>
439 %e = alloca(%N, %K, %M) : memref<4 x ? x 8 x ? x ? x f32>
442 affine.for %i = 0 to %L {
443 // CHECK-NEXT: affine.for
444 affine.for %j = 0 to 10 {
445 // CHECK-NEXT: load %0[%arg2, %arg3] : memref<?x1024xf32>
446 // CHECK-NEXT: store %{{.*}}, %1[%c0, %c0, %arg2, %arg3, %c0] : memref<4x1024x8x512x?xf32>
447 %v = load %a[%i, %j] : memref<?x?xf32>
448 store %v, %b[%zero, %zero, %i, %j, %zero] : memref<4x?x8x?x?xf32>
452 return %c, %d : memref<? x ? x i32>, memref<? x ? x f32>
455 #map1 = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
456 #map2 = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s2 + d1 * s1 + d2 + s0)>
457 #map3 = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
459 // CHECK-LABEL: func @dim_op_fold(
460 // CHECK-SAME: %[[ARG0:[a-z0-9]*]]: index
461 // CHECK-SAME: %[[ARG1:[a-z0-9]*]]: index
462 // CHECK-SAME: %[[ARG2:[a-z0-9]*]]: index
463 // CHECK-SAME: %[[BUF:[a-z0-9]*]]: memref<?xi8>
464 func @dim_op_fold(%arg0: index, %arg1: index, %arg2: index, %BUF: memref<?xi8>, %M : index, %N : index, %K : index) {
465 // CHECK-SAME: [[M:arg[0-9]+]]: index
466 // CHECK-SAME: [[N:arg[0-9]+]]: index
467 // CHECK-SAME: [[K:arg[0-9]+]]: index
468 %c0 = constant 0 : index
469 %c1 = constant 1 : index
470 %c2 = constant 2 : index
471 %0 = alloc(%arg0, %arg1) : memref<?x?xf32>
472 %1 = alloc(%arg1, %arg2) : memref<?x8x?xf32>
473 %2 = dim %1, %c2 : memref<?x8x?xf32>
474 affine.for %arg3 = 0 to %2 {
475 %3 = alloc(%arg0) : memref<?xi8>
476 %ub = dim %3, %c0 : memref<?xi8>
477 affine.for %arg4 = 0 to %ub {
478 %s = dim %0, %c0 : memref<?x?xf32>
479 %v = std.view %3[%c0][%arg4, %s] : memref<?xi8> to memref<?x?xf32>
480 %sv = subview %0[%c0, %c0][%s,%arg4][%c1,%c1] : memref<?x?xf32> to memref<?x?xf32, #map1>
481 %l = dim %v, %c1 : memref<?x?xf32>
482 %u = dim %sv, %c0 : memref<?x?xf32, #map1>
483 affine.for %arg5 = %l to %u {
486 %sv2 = subview %0[0, 0][17, %arg4][1, 1] : memref<?x?xf32> to memref<17x?xf32, #map3>
487 %l2 = dim %v, %c1 : memref<?x?xf32>
488 %u2 = dim %sv2, %c1 : memref<17x?xf32, #map3>
489 scf.for %arg5 = %l2 to %u2 step %c1 {
494 // CHECK: affine.for %[[I:.*]] = 0 to %[[ARG2]] {
495 // CHECK-NEXT: affine.for %[[J:.*]] = 0 to %[[ARG0]] {
496 // CHECK-NEXT: affine.for %[[K:.*]] = %[[ARG0]] to %[[ARG0]] {
497 // CHECK-NEXT: "foo"() : () -> ()
499 // CHECK-NEXT: scf.for %[[KK:.*]] = %[[ARG0]] to %[[J]] step %{{.*}} {
500 // CHECK-NEXT: "foo"() : () -> ()
505 %A = view %BUF[%c0][%M, %K] : memref<?xi8> to memref<?x?xf32>
506 %B = view %BUF[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>
507 %C = view %BUF[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
509 %M_ = dim %A, %c0 : memref<?x?xf32>
510 %K_ = dim %A, %c1 : memref<?x?xf32>
511 %N_ = dim %C, %c1 : memref<?x?xf32>
512 scf.for %i = %c0 to %M_ step %c1 {
513 scf.for %j = %c0 to %N_ step %c1 {
514 scf.for %k = %c0 to %K_ step %c1 {
518 // CHECK-NEXT: return
522 // CHECK-LABEL: func @merge_constants
523 func @merge_constants() -> (index, index) {
524 // CHECK-NEXT: %c42 = constant 42 : index
525 %0 = constant 42 : index
526 %1 = constant 42 : index
527 // CHECK-NEXT: return %c42, %c42
528 return %0, %1: index, index
531 // CHECK-LABEL: func @hoist_constant
532 func @hoist_constant(%arg0: memref<8xi32>) {
533 // CHECK-NEXT: %c42_i32 = constant 42 : i32
534 // CHECK-NEXT: affine.for %arg1 = 0 to 8 {
535 affine.for %arg1 = 0 to 8 {
536 // CHECK-NEXT: store %c42_i32, %arg0[%arg1]
537 %c42_i32 = constant 42 : i32
538 store %c42_i32, %arg0[%arg1] : memref<8xi32>
543 // CHECK-LABEL: func @const_fold_propagate
544 func @const_fold_propagate() -> memref<?x?xf32> {
545 %VT_i = constant 512 : index
547 %VT_i_s = affine.apply affine_map<(d0) -> (d0 floordiv 8)> (%VT_i)
548 %VT_k_l = affine.apply affine_map<(d0) -> (d0 floordiv 16)> (%VT_i)
550 // CHECK: = alloc() : memref<64x32xf32>
551 %Av = alloc(%VT_i_s, %VT_k_l) : memref<?x?xf32>
552 return %Av : memref<?x?xf32>
555 // CHECK-LABEL: func @indirect_call_folding
556 func @indirect_target() {
560 func @indirect_call_folding() {
561 // CHECK-NEXT: call @indirect_target() : () -> ()
562 // CHECK-NEXT: return
563 %indirect_fn = constant @indirect_target : () -> ()
564 call_indirect %indirect_fn() : () -> ()
569 // IMPORTANT NOTE: the operations in this test are exactly those produced by
570 // lowering affine.apply affine_map<(i) -> (i mod 42)> to standard operations. Please only
571 // change these operations together with the affine lowering pass tests.
573 // CHECK-LABEL: @lowered_affine_mod
574 func @lowered_affine_mod() -> (index, index) {
575 // CHECK-NEXT: {{.*}} = constant 41 : index
576 %c-43 = constant -43 : index
577 %c42 = constant 42 : index
578 %0 = remi_signed %c-43, %c42 : index
579 %c0 = constant 0 : index
580 %1 = cmpi "slt", %0, %c0 : index
581 %2 = addi %0, %c42 : index
582 %3 = select %1, %2, %0 : index
583 // CHECK-NEXT: {{.*}} = constant 1 : index
584 %c43 = constant 43 : index
585 %c42_0 = constant 42 : index
586 %4 = remi_signed %c43, %c42_0 : index
587 %c0_1 = constant 0 : index
588 %5 = cmpi "slt", %4, %c0_1 : index
589 %6 = addi %4, %c42_0 : index
590 %7 = select %5, %6, %4 : index
591 return %3, %7 : index, index
595 // IMPORTANT NOTE: the operations in this test are exactly those produced by
596 // lowering affine.apply affine_map<(i) -> (i mod 42)> to standard operations. Please only
597 // change these operations together with the affine lowering pass tests.
599 // CHECK-LABEL: func @lowered_affine_floordiv
600 func @lowered_affine_floordiv() -> (index, index) {
601 // CHECK-NEXT: %c-2 = constant -2 : index
602 %c-43 = constant -43 : index
603 %c42 = constant 42 : index
604 %c0 = constant 0 : index
605 %c-1 = constant -1 : index
606 %0 = cmpi "slt", %c-43, %c0 : index
607 %1 = subi %c-1, %c-43 : index
608 %2 = select %0, %1, %c-43 : index
609 %3 = divi_signed %2, %c42 : index
610 %4 = subi %c-1, %3 : index
611 %5 = select %0, %4, %3 : index
612 // CHECK-NEXT: %c1 = constant 1 : index
613 %c43 = constant 43 : index
614 %c42_0 = constant 42 : index
615 %c0_1 = constant 0 : index
616 %c-1_2 = constant -1 : index
617 %6 = cmpi "slt", %c43, %c0_1 : index
618 %7 = subi %c-1_2, %c43 : index
619 %8 = select %6, %7, %c43 : index
620 %9 = divi_signed %8, %c42_0 : index
621 %10 = subi %c-1_2, %9 : index
622 %11 = select %6, %10, %9 : index
623 return %5, %11 : index, index
627 // IMPORTANT NOTE: the operations in this test are exactly those produced by
628 // lowering affine.apply affine_map<(i) -> (i mod 42)> to standard operations. Please only
629 // change these operations together with the affine lowering pass tests.
631 // CHECK-LABEL: func @lowered_affine_ceildiv
632 func @lowered_affine_ceildiv() -> (index, index) {
633 // CHECK-NEXT: %c-1 = constant -1 : index
634 %c-43 = constant -43 : index
635 %c42 = constant 42 : index
636 %c0 = constant 0 : index
637 %c1 = constant 1 : index
638 %0 = cmpi "sle", %c-43, %c0 : index
639 %1 = subi %c0, %c-43 : index
640 %2 = subi %c-43, %c1 : index
641 %3 = select %0, %1, %2 : index
642 %4 = divi_signed %3, %c42 : index
643 %5 = subi %c0, %4 : index
644 %6 = addi %4, %c1 : index
645 %7 = select %0, %5, %6 : index
646 // CHECK-NEXT: %c2 = constant 2 : index
647 %c43 = constant 43 : index
648 %c42_0 = constant 42 : index
649 %c0_1 = constant 0 : index
650 %c1_2 = constant 1 : index
651 %8 = cmpi "sle", %c43, %c0_1 : index
652 %9 = subi %c0_1, %c43 : index
653 %10 = subi %c43, %c1_2 : index
654 %11 = select %8, %9, %10 : index
655 %12 = divi_signed %11, %c42_0 : index
656 %13 = subi %c0_1, %12 : index
657 %14 = addi %12, %c1_2 : index
658 %15 = select %8, %13, %14 : index
659 return %7, %15 : index, index
662 // Checks that NOP casts are removed.
663 // CHECK-LABEL: cast_values
664 func @cast_values(%arg0: tensor<*xi32>, %arg1: memref<?xi32>) -> (tensor<2xi32>, memref<2xi32>) {
667 %0 = tensor_cast %arg0 : tensor<*xi32> to tensor<*xi32>
668 %1 = memref_cast %arg1 : memref<?xi32> to memref<?xi32>
670 // CHECK-NEXT: %0 = tensor_cast %arg0 : tensor<*xi32> to tensor<2xi32>
671 // CHECK-NEXT: %1 = memref_cast %arg1 : memref<?xi32> to memref<2xi32>
672 %2 = tensor_cast %0 : tensor<*xi32> to tensor<2xi32>
673 %3 = memref_cast %1 : memref<?xi32> to memref<2xi32>
676 %4 = tensor_cast %2 : tensor<2xi32> to tensor<2xi32>
677 %5 = memref_cast %3 : memref<2xi32> to memref<2xi32>
679 // CHECK-NEXT: return %0, %1 : tensor<2xi32>, memref<2xi32>
680 return %4, %5 : tensor<2xi32>, memref<2xi32>
685 // CHECK-LABEL: func @view
686 func @view(%arg0 : index) -> (f32, f32, f32, f32) {
687 // CHECK: %[[C15:.*]] = constant 15 : index
688 // CHECK: %[[ALLOC_MEM:.*]] = alloc() : memref<2048xi8>
689 %0 = alloc() : memref<2048xi8>
690 %c0 = constant 0 : index
691 %c7 = constant 7 : index
692 %c11 = constant 11 : index
693 %c15 = constant 15 : index
695 // Test: fold constant sizes.
696 // CHECK: std.view %[[ALLOC_MEM]][%[[C15]]][] : memref<2048xi8> to memref<7x11xf32>
697 %1 = view %0[%c15][%c7, %c11] : memref<2048xi8> to memref<?x?xf32>
698 %r0 = load %1[%c0, %c0] : memref<?x?xf32>
700 // Test: fold one constant size.
701 // CHECK: std.view %[[ALLOC_MEM]][%[[C15]]][%arg0, %arg0] : memref<2048xi8> to memref<?x?x7xf32>
702 %2 = view %0[%c15][%arg0, %arg0, %c7] : memref<2048xi8> to memref<?x?x?xf32>
703 %r1 = load %2[%c0, %c0, %c0] : memref<?x?x?xf32>
705 // Test: preserve an existing static size.
706 // CHECK: std.view %[[ALLOC_MEM]][%[[C15]]][] : memref<2048xi8> to memref<7x4xf32>
707 %3 = view %0[%c15][%c7] : memref<2048xi8> to memref<?x4xf32>
708 %r2 = load %3[%c0, %c0] : memref<?x4xf32>
710 // Test: folding static alloc and memref_cast into a view.
711 // CHECK: std.view %[[ALLOC_MEM]][%[[C15]]][] : memref<2048xi8> to memref<15x7xf32>
712 %4 = memref_cast %0 : memref<2048xi8> to memref<?xi8>
713 %5 = view %4[%c15][%c15, %c7] : memref<?xi8> to memref<?x?xf32>
714 %r3 = load %5[%c0, %c0] : memref<?x?xf32>
715 return %r0, %r1, %r2, %r3 : f32, f32, f32, f32
720 // CHECK-DAG: #[[$BASE_MAP0:map[0-9]+]] = affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>
721 // CHECK-DAG: #[[$SUBVIEW_MAP0:map[0-9]+]] = affine_map<(d0, d1, d2)[s0] -> (d0 * 64 + s0 + d1 * 4 + d2)>
722 // CHECK-DAG: #[[$SUBVIEW_MAP1:map[0-9]+]] = affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2 + 79)>
723 // CHECK-DAG: #[[$SUBVIEW_MAP2:map[0-9]+]] = affine_map<(d0, d1, d2) -> (d0 * 128 + d1 * 28 + d2 * 11)>
724 // CHECK-DAG: #[[$SUBVIEW_MAP3:map[0-9]+]] = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>
725 // CHECK-DAG: #[[$SUBVIEW_MAP4:map[0-9]+]] = affine_map<(d0, d1, d2)[s0] -> (d0 * 128 + s0 + d1 * 28 + d2 * 11)>
726 // CHECK-DAG: #[[$SUBVIEW_MAP5:map[0-9]+]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s0 + d1 * s1 + d2 * s2 + 79)>
727 // CHECK-DAG: #[[$SUBVIEW_MAP6:map[0-9]+]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2 * 2)>
728 // CHECK-DAG: #[[$SUBVIEW_MAP7:map[0-9]+]] = affine_map<(d0, d1)[s0] -> (d0 * 4 + s0 + d1)>
729 // CHECK-DAG: #[[$SUBVIEW_MAP8:map[0-9]+]] = affine_map<(d0, d1) -> (d0 * 4 + d1 + 12)>
732 // CHECK-LABEL: func @subview
733 // CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index
734 func @subview(%arg0 : index, %arg1 : index) -> (index, index) {
735 // CHECK: %[[C0:.*]] = constant 0 : index
736 %c0 = constant 0 : index
737 // CHECK-NOT: constant 1 : index
738 %c1 = constant 1 : index
739 // CHECK-NOT: constant 2 : index
740 %c2 = constant 2 : index
741 // Folded but reappears after subview folding into dim.
742 // CHECK: %[[C7:.*]] = constant 7 : index
743 %c7 = constant 7 : index
744 // Folded but reappears after subview folding into dim.
745 // CHECK: %[[C11:.*]] = constant 11 : index
746 %c11 = constant 11 : index
747 // CHECK-NOT: constant 15 : index
748 %c15 = constant 15 : index
750 // CHECK: %[[ALLOC0:.*]] = alloc()
751 %0 = alloc() : memref<8x16x4xf32, offset : 0, strides : [64, 4, 1]>
753 // Test: subview with constant base memref and constant operands is folded.
754 // Note that the subview uses the base memrefs layout map because it used
755 // zero offset and unit stride arguments.
756 // CHECK: subview %[[ALLOC0]][0, 0, 0] [7, 11, 2] [1, 1, 1] :
757 // CHECK-SAME: memref<8x16x4xf32, #[[$BASE_MAP0]]>
758 // CHECK-SAME: to memref<7x11x2xf32, #[[$BASE_MAP0]]>
759 %1 = subview %0[%c0, %c0, %c0] [%c7, %c11, %c2] [%c1, %c1, %c1]
760 : memref<8x16x4xf32, offset : 0, strides : [64, 4, 1]> to
761 memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
762 %v0 = load %1[%c0, %c0, %c0] : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
764 // Test: subview with one dynamic operand can also be folded.
765 // CHECK: subview %[[ALLOC0]][0, %[[ARG0]], 0] [7, 11, 15] [1, 1, 1] :
766 // CHECK-SAME: memref<8x16x4xf32, #[[$BASE_MAP0]]>
767 // CHECK-SAME: to memref<7x11x15xf32, #[[$SUBVIEW_MAP0]]>
768 %2 = subview %0[%c0, %arg0, %c0] [%c7, %c11, %c15] [%c1, %c1, %c1]
769 : memref<8x16x4xf32, offset : 0, strides : [64, 4, 1]> to
770 memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
771 store %v0, %2[%c0, %c0, %c0] : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
773 // CHECK: %[[ALLOC1:.*]] = alloc(%[[ARG0]])
774 %3 = alloc(%arg0) : memref<?x16x4xf32, offset : 0, strides : [64, 4, 1]>
775 // Test: subview with constant operands but dynamic base memref is folded as long as the strides and offset of the base memref are static.
776 // CHECK: subview %[[ALLOC1]][0, 0, 0] [7, 11, 15] [1, 1, 1] :
777 // CHECK-SAME: memref<?x16x4xf32, #[[$BASE_MAP0]]>
778 // CHECK-SAME: to memref<7x11x15xf32, #[[$BASE_MAP0]]>
779 %4 = subview %3[%c0, %c0, %c0] [%c7, %c11, %c15] [%c1, %c1, %c1]
780 : memref<?x16x4xf32, offset : 0, strides : [64, 4, 1]> to
781 memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
782 store %v0, %4[%c0, %c0, %c0] : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
784 // Test: subview offset operands are folded correctly w.r.t. base strides.
785 // CHECK: subview %[[ALLOC0]][1, 2, 7] [7, 11, 2] [1, 1, 1] :
786 // CHECK-SAME: memref<8x16x4xf32, #[[$BASE_MAP0]]> to
787 // CHECK-SAME: memref<7x11x2xf32, #[[$SUBVIEW_MAP1]]>
788 %5 = subview %0[%c1, %c2, %c7] [%c7, %c11, %c2] [%c1, %c1, %c1]
789 : memref<8x16x4xf32, offset : 0, strides : [64, 4, 1]> to
790 memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
791 store %v0, %5[%c0, %c0, %c0] : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
793 // Test: subview stride operands are folded correctly w.r.t. base strides.
794 // CHECK: subview %[[ALLOC0]][0, 0, 0] [7, 11, 2] [2, 7, 11] :
795 // CHECK-SAME: memref<8x16x4xf32, #[[$BASE_MAP0]]>
796 // CHECK-SAME: to memref<7x11x2xf32, #[[$SUBVIEW_MAP2]]>
797 %6 = subview %0[%c0, %c0, %c0] [%c7, %c11, %c2] [%c2, %c7, %c11]
798 : memref<8x16x4xf32, offset : 0, strides : [64, 4, 1]> to
799 memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
800 store %v0, %6[%c0, %c0, %c0] : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
802 // Test: subview shape are folded, but offsets and strides are not even if base memref is static
803 // CHECK: subview %[[ALLOC0]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [7, 11, 2] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] :
804 // CHECK-SAME: memref<8x16x4xf32, #[[$BASE_MAP0]]> to
805 // CHECK-SAME: memref<7x11x2xf32, #[[$SUBVIEW_MAP3]]>
806 %10 = subview %0[%arg0, %arg0, %arg0] [%c7, %c11, %c2] [%arg1, %arg1, %arg1] :
807 memref<8x16x4xf32, offset:0, strides:[64, 4, 1]> to
808 memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
809 store %v0, %10[%arg1, %arg1, %arg1] :
810 memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
812 // Test: subview strides are folded, but offsets and shape are not even if base memref is static
813 // CHECK: subview %[[ALLOC0]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [2, 7, 11] :
814 // CHECK-SAME: memref<8x16x4xf32, #[[$BASE_MAP0]]> to
815 // CHECK-SAME: memref<?x?x?xf32, #[[$SUBVIEW_MAP4]]
816 %11 = subview %0[%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] [%c2, %c7, %c11] :
817 memref<8x16x4xf32, offset:0, strides:[64, 4, 1]> to
818 memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
819 store %v0, %11[%arg0, %arg0, %arg0] :
820 memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
822 // Test: subview offsets are folded, but strides and shape are not even if base memref is static
823 // CHECK: subview %[[ALLOC0]][1, 2, 7] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [%[[ARG0]], %[[ARG0]], %[[ARG0]]] :
824 // CHECK-SAME: memref<8x16x4xf32, #[[$BASE_MAP0]]> to
825 // CHECK-SAME: memref<?x?x?xf32, #[[$SUBVIEW_MAP5]]
826 %13 = subview %0[%c1, %c2, %c7] [%arg1, %arg1, %arg1] [%arg0, %arg0, %arg0] :
827 memref<8x16x4xf32, offset:0, strides:[64, 4, 1]> to
828 memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
829 store %v0, %13[%arg1, %arg1, %arg1] :
830 memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
832 // CHECK: %[[ALLOC2:.*]] = alloc(%[[ARG0]], %[[ARG0]], %[[ARG1]])
833 %14 = alloc(%arg0, %arg0, %arg1) : memref<?x?x?xf32>
834 // Test: subview shape are folded, even if base memref is not static
835 // CHECK: subview %[[ALLOC2]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [7, 11, 2] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] :
836 // CHECK-SAME: memref<?x?x?xf32> to
837 // CHECK-SAME: memref<7x11x2xf32, #[[$SUBVIEW_MAP3]]>
838 %15 = subview %14[%arg0, %arg0, %arg0] [%c7, %c11, %c2] [%arg1, %arg1, %arg1] :
840 memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
841 store %v0, %15[%arg1, %arg1, %arg1] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
843 // TEST: subview strides are folded, in the type only the most minor stride is folded.
844 // CHECK: subview %[[ALLOC2]][%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] [2, 2, 2] :
845 // CHECK-SAME: memref<?x?x?xf32> to
846 // CHECK-SAME: memref<?x?x?xf32, #[[$SUBVIEW_MAP6]]
847 %16 = subview %14[%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] [%c2, %c2, %c2] :
849 memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
850 store %v0, %16[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
852 // TEST: subview offsets are folded but the type offset remains dynamic, when the base memref is not static
853 // CHECK: subview %[[ALLOC2]][1, 1, 1] [%[[ARG0]], %[[ARG0]], %[[ARG0]]] [%[[ARG1]], %[[ARG1]], %[[ARG1]]] :
854 // CHECK-SAME: memref<?x?x?xf32> to
855 // CHECK-SAME: memref<?x?x?xf32, #[[$SUBVIEW_MAP3]]
856 %17 = subview %14[%c1, %c1, %c1] [%arg0, %arg0, %arg0] [%arg1, %arg1, %arg1] :
858 memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
859 store %v0, %17[%arg0, %arg0, %arg0] : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>
861 // CHECK: %[[ALLOC3:.*]] = alloc() : memref<12x4xf32>
862 %18 = alloc() : memref<12x4xf32>
863 %c4 = constant 4 : index
865 // TEST: subview strides are maintained when sizes are folded
866 // CHECK: subview %[[ALLOC3]][%arg1, %arg1] [2, 4] [1, 1] :
867 // CHECK-SAME: memref<12x4xf32> to
868 // CHECK-SAME: memref<2x4xf32, #[[$SUBVIEW_MAP7]]>
869 %19 = subview %18[%arg1, %arg1] [%c2, %c4] [1, 1] :
871 memref<?x?xf32, offset: ?, strides:[4, 1]>
872 store %v0, %19[%arg1, %arg1] : memref<?x?xf32, offset: ?, strides:[4, 1]>
874 // TEST: subview strides and sizes are maintained when offsets are folded
875 // CHECK: subview %[[ALLOC3]][2, 4] [12, 4] [1, 1] :
876 // CHECK-SAME: memref<12x4xf32> to
877 // CHECK-SAME: memref<12x4xf32, #[[$SUBVIEW_MAP8]]>
878 %20 = subview %18[%c2, %c4] [12, 4] [1, 1] :
880 memref<12x4xf32, offset: ?, strides:[4, 1]>
881 store %v0, %20[%arg1, %arg1] : memref<12x4xf32, offset: ?, strides:[4, 1]>
883 // Test: dim on subview is rewritten to size operand.
884 %7 = dim %4, %c0 : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
885 %8 = dim %4, %c1 : memref<?x?x?xf32, offset : ?, strides : [?, ?, ?]>
887 // CHECK: return %[[C7]], %[[C11]]
888 return %7, %8 : index, index
891 // CHECK-LABEL: func @index_cast
892 // CHECK-SAME: %[[ARG_0:arg[0-9]+]]: i16
893 func @index_cast(%arg0: i16) -> (i16) {
894 %11 = index_cast %arg0 : i16 to index
895 %12 = index_cast %11 : index to i16
896 // CHECK: return %[[ARG_0]] : i16
900 // CHECK-LABEL: func @index_cast_fold
901 func @index_cast_fold() -> (i16, index) {
902 %c4 = constant 4 : index
903 %1 = index_cast %c4 : index to i16
904 %c4_i16 = constant 4 : i16
905 %2 = index_cast %c4_i16 : i16 to index
906 // CHECK: %[[C4_I16:.*]] = constant 4 : i16
907 // CHECK: %[[C4:.*]] = constant 4 : index
908 // CHECK: return %[[C4_I16]], %[[C4]] : i16, index
909 return %1, %2 : i16, index
912 // CHECK-LABEL: func @remove_dead_else
913 func @remove_dead_else(%M : memref<100 x i32>) {
914 affine.for %i = 0 to 100 {
915 affine.load %M[%i] : memref<100xi32>
916 affine.if affine_set<(d0) : (d0 - 2 >= 0)>(%i) {
917 affine.for %j = 0 to 100 {
918 %1 = affine.load %M[%j] : memref<100xi32>
919 "prevent.dce"(%1) : (i32) -> ()
924 affine.load %M[%i] : memref<100xi32>
929 // CHECK-NEXT: affine.for
930 // CHECK-NEXT: affine.load
931 // CHECK-NEXT: "prevent.dce"
937 // CHECK-LABEL: func @divi_signed_by_one
938 // CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
939 func @divi_signed_by_one(%arg0: i32) -> (i32) {
940 %c1 = constant 1 : i32
941 %res = divi_signed %arg0, %c1 : i32
942 // CHECK: return %[[ARG]]
946 // CHECK-LABEL: func @divi_unsigned_by_one
947 // CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
948 func @divi_unsigned_by_one(%arg0: i32) -> (i32) {
949 %c1 = constant 1 : i32
950 %res = divi_unsigned %arg0, %c1 : i32
951 // CHECK: return %[[ARG]]
955 // CHECK-LABEL: func @tensor_divi_signed_by_one
956 // CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
957 func @tensor_divi_signed_by_one(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
958 %c1 = constant dense<1> : tensor<4x5xi32>
959 %res = divi_signed %arg0, %c1 : tensor<4x5xi32>
960 // CHECK: return %[[ARG]]
961 return %res : tensor<4x5xi32>
964 // CHECK-LABEL: func @tensor_divi_unsigned_by_one
965 // CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
966 func @tensor_divi_unsigned_by_one(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
967 %c1 = constant dense<1> : tensor<4x5xi32>
968 %res = divi_unsigned %arg0, %c1 : tensor<4x5xi32>
969 // CHECK: return %[[ARG]]
970 return %res : tensor<4x5xi32>
975 // CHECK-LABEL: func @floordivi_signed_by_one
976 // CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
977 func @floordivi_signed_by_one(%arg0: i32) -> (i32) {
978 %c1 = constant 1 : i32
979 %res = floordivi_signed %arg0, %c1 : i32
980 // CHECK: return %[[ARG]]
984 // CHECK-LABEL: func @tensor_floordivi_signed_by_one
985 // CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
986 func @tensor_floordivi_signed_by_one(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
987 %c1 = constant dense<1> : tensor<4x5xi32>
988 %res = floordivi_signed %arg0, %c1 : tensor<4x5xi32>
989 // CHECK: return %[[ARG]]
990 return %res : tensor<4x5xi32>
995 // CHECK-LABEL: func @ceildivi_signed_by_one
996 // CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
997 func @ceildivi_signed_by_one(%arg0: i32) -> (i32) {
998 %c1 = constant 1 : i32
999 %res = ceildivi_signed %arg0, %c1 : i32
1000 // CHECK: return %[[ARG]]
1004 // CHECK-LABEL: func @tensor_ceildivi_signed_by_one
1005 // CHECK-SAME: %[[ARG:[a-zA-Z0-9]+]]
1006 func @tensor_ceildivi_signed_by_one(%arg0: tensor<4x5xi32>) -> tensor<4x5xi32> {
1007 %c1 = constant dense<1> : tensor<4x5xi32>
1008 %res = ceildivi_signed %arg0, %c1 : tensor<4x5xi32>
1009 // CHECK: return %[[ARG]]
1010 return %res : tensor<4x5xi32>
1015 // CHECK-LABEL: func @memref_cast_folding_subview
1016 func @memref_cast_folding_subview(%arg0: memref<4x5xf32>, %i: index) -> (memref<?x?xf32, offset:? , strides: [?, ?]>) {
1017 %0 = memref_cast %arg0 : memref<4x5xf32> to memref<?x?xf32>
1018 // CHECK-NEXT: subview %{{.*}}: memref<4x5xf32>
1019 %1 = subview %0[%i, %i][%i, %i][%i, %i]: memref<?x?xf32> to memref<?x?xf32, offset:? , strides: [?, ?]>
1020 // CHECK-NEXT: return %{{.*}}
1021 return %1: memref<?x?xf32, offset:? , strides: [?, ?]>
1026 // CHECK-DAG: #[[$map0:.*]] = affine_map<(d0, d1) -> (d0 * 16 + d1)>
1027 // CHECK-DAG: #[[$map1:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
1029 // CHECK-LABEL: func @memref_cast_folding_subview_static(
1030 func @memref_cast_folding_subview_static(%V: memref<16x16xf32>, %a: index, %b: index)
1031 -> memref<3x4xf32, offset:?, strides:[?, 1]>
1033 %0 = memref_cast %V : memref<16x16xf32> to memref<?x?xf32>
1034 %1 = subview %0[0, 0][3, 4][1, 1] : memref<?x?xf32> to memref<3x4xf32, offset:?, strides:[?, 1]>
1036 // CHECK: subview{{.*}}: memref<16x16xf32> to memref<3x4xf32, #[[$map0]]>
1037 // CHECK: memref_cast{{.*}}: memref<3x4xf32, #[[$map0]]> to memref<3x4xf32, #[[$map1]]>
1038 return %1: memref<3x4xf32, offset:?, strides:[?, 1]>
1043 // CHECK-LABEL: func @extract_from_tensor_from_elements
1044 func @extract_from_tensor_from_elements(%element : index) -> index {
1045 // CHECK-SAME: ([[ARG:%.*]]: index)
1046 %c0 = constant 0 : index
1047 %tensor = tensor_from_elements %element : tensor<1xindex>
1048 %extracted_element = tensor.extract %tensor[%c0] : tensor<1xindex>
1049 // CHECK: [[ARG]] : index
1050 return %extracted_element : index
1055 // CHECK-LABEL: func @extract_from_dynamic_tensor_from_elements
1056 // CHECK-SAME: %[[IDX:.*]]: index, %[[TENSOR:.*]]: tensor<*xf32>
1057 func @extract_from_dynamic_tensor_from_elements(%idx: index, %tensor: tensor<*xf32>) -> index {
1058 %size = rank %tensor : tensor<*xf32>
1059 // CHECK-NEXT: %[[RES:.*]] = dim %[[TENSOR]], %[[IDX]]
1060 %0 = dynamic_tensor_from_elements %size {
1062 %1 = dim %tensor, %arg0 : tensor<*xf32>
1065 %1 = tensor.extract %0[%idx] : tensor<?xindex>
1066 // CHECK-NEXT: return %[[RES]]
1072 // CHECK-LABEL: func @extract_from_dynamic_tensor_from_elements_2d
1073 // CHECK-SAME: %[[IDX0:.*]]: index, %[[IDX1:.*]]: index, %[[TENSOR:.*]]: tensor<*xf32>
1074 func @extract_from_dynamic_tensor_from_elements_2d(%idx0: index, %idx1: index, %tensor: tensor<*xf32>) -> index {
1075 %size = rank %tensor : tensor<*xf32>
1076 // CHECK-NEXT: %[[DIM0:.*]] = dim %[[TENSOR]], %[[IDX0]]
1077 // CHECK-NEXT: %[[DIM1:.*]] = dim %[[TENSOR]], %[[IDX1]]
1078 // CHECK-NEXT: %[[RES:.*]] = addi %[[DIM0]], %[[DIM1]]
1079 %0 = dynamic_tensor_from_elements %size, %size {
1080 ^bb0(%arg0: index, %arg1: index):
1081 %1 = dim %tensor, %arg0 : tensor<*xf32>
1082 %2 = dim %tensor, %arg1 : tensor<*xf32>
1083 %3 = addi %1, %2 : index
1085 } : tensor<?x?xindex>
1086 %4 = tensor.extract %0[%idx0, %idx1] : tensor<?x?xindex>
1087 // CHECK-NEXT: return %[[RES]]
1093 // CHECK-LABEL: func @extract_from_dynamic_tensor_from_elements_sideeffects
1094 // CHECK-SAME: %[[IDX:.*]]: index
1095 func @extract_from_dynamic_tensor_from_elements_sideeffects(%idx: index, %tensor: tensor<*xf32>) -> index {
1096 %size = rank %tensor : tensor<*xf32>
1097 %mem = alloc(%size) : memref<?xindex>
1098 // CHECK: %[[DTENSOR:.*]] = dynamic_tensor_from_elements
1099 %0 = dynamic_tensor_from_elements %size {
1101 %1 = dim %tensor, %arg0 : tensor<*xf32>
1102 store %1, %mem[%arg0] : memref<?xindex>
1105 // CHECK: %[[RES:.*]] = tensor.extract %[[DTENSOR]][%[[IDX]]]
1106 %1 = tensor.extract %0[%idx] : tensor<?xindex>
1107 // CHECK-NEXT: return %[[RES]]
1113 // CHECK-LABEL: @static_dynamic_tensor_from_elements
1114 // CHECK-SAME: %[[SIZE1:.*]]: index, %[[SIZE4:.*]]: index)
1115 func @static_dynamic_tensor_from_elements(%size1: index, %size4: index) -> tensor<3x?x?x7x?xindex> {
1116 %c5 = constant 5 : index
1117 // CHECK: dynamic_tensor_from_elements %[[SIZE1]], %[[SIZE4]]
1118 %0 = dynamic_tensor_from_elements %size1, %c5, %size4 {
1119 ^bb0(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index):
1120 %1 = constant 32 : index
1122 // CHECK: : tensor<3x?x5x7x?xindex>
1123 } : tensor<3x?x?x7x?xindex>
1124 // CHECK: tensor_cast %{{.*}} : tensor<3x?x5x7x?xindex> to tensor<3x?x?x7x?xindex>
1125 return %0 : tensor<3x?x?x7x?xindex>
1130 // CHECK-LABEL: @tensor_cast_chain_ok
1131 // CHECK-SAME: %[[IN:.*]]: tensor<*xi32>
1132 func @tensor_cast_chain_ok(%input: tensor<*xi32>) -> tensor<4x8xi32> {
1133 // CHECK-NEXT: %[[RES:.*]] = tensor_cast %[[IN]] : tensor<*xi32> to tensor<4x8xi32>
1134 %0 = tensor_cast %input : tensor<*xi32> to tensor<4x?xi32>
1135 %1 = tensor_cast %0 : tensor<4x?xi32> to tensor<4x8xi32>
1136 // CHECK-NEXT: return %[[RES]]
1137 return %1 : tensor<4x8xi32>
1142 // CHECK-LABEL: @tensor_cast_chain_regain
1143 // CHECK-SAME: %[[IN:.*]]: tensor<4xi32>
1144 func @tensor_cast_chain_regain(%input: tensor<4xi32>) -> tensor<4xi32> {
1145 %0 = tensor_cast %input : tensor<4xi32> to tensor<?xi32>
1146 %1 = tensor_cast %0 : tensor<?xi32> to tensor<4xi32>
1147 // CHECK-NEXT: return %[[IN]]
1148 return %1 : tensor<4xi32>
1153 // CHECK-LABEL: @tensor_cast_chain_keep
1154 // CHECK-SAME: %[[IN:.*]]: tensor<?x?xi32>
1155 func @tensor_cast_chain_keep(%input: tensor<?x?xi32>) -> tensor<?x8xi32> {
1156 // CHECK-NEXT: %[[C1:.*]] = tensor_cast %[[IN]]
1157 %0 = tensor_cast %input : tensor<?x?xi32> to tensor<4x?xi32>
1158 // CHECK-NEXT: %[[C2:.*]] = tensor_cast %[[C1]]
1159 %1 = tensor_cast %0 : tensor<4x?xi32> to tensor<?x8xi32>
1160 // CHECK-NEXT: return %[[C2]]
1161 return %1 : tensor<?x8xi32>
1166 // CHECK-LABEL: @tensor_cast_chain_invalid
1167 // CHECK-SAME: %[[IN:.*]]: tensor<4x8xi32>
1168 func @tensor_cast_chain_invalid(%input: tensor<4x8xi32>) -> tensor<8x4xi32> {
1169 // CHECK-NEXT: %[[C1:.*]] = tensor_cast %[[IN]]
1170 %0 = tensor_cast %input : tensor<4x8xi32> to tensor<?x?xi32>
1171 // CHECK-NEXT: %[[C2:.*]] = tensor_cast %[[C1]]
1172 %1 = tensor_cast %0 : tensor<?x?xi32> to tensor<8x4xi32>
1173 // CHECK-NEXT: return %[[C2]]
1174 return %1 : tensor<8x4xi32>
1179 // CHECK-LABEL: func @subtensor
1180 // CHECK-SAME: %[[ARG0:[0-9a-z]*]]: index, %[[ARG1:[0-9a-z]*]]: index
1181 func @subtensor(%t: tensor<8x16x4xf32>, %arg0 : index, %arg1 : index)
1182 -> tensor<?x?x?xf32>
1184 %c0 = constant 0 : index
1185 %c1 = constant 1 : index
1186 %c2 = constant 2 : index
1187 %c7 = constant 7 : index
1188 %c11 = constant 11 : index
1190 // CHECK: subtensor %{{.*}}[0, 0, 0] [7, 11, 2] [1, 1, 1] :
1191 // CHECK-SAME: tensor<8x16x4xf32> to tensor<7x11x2xf32>
1192 // CHECK: tensor_cast %{{.*}} : tensor<7x11x2xf32> to tensor<?x?x?xf32>
1193 %1 = subtensor %t[%c0, %c0, %c0] [%c7, %c11, %c2] [%c1, %c1, %c1]
1194 : tensor<8x16x4xf32> to tensor<?x?x?xf32>
1196 // Test: subtensor with one dynamic operand can also be folded.
1197 // CHECK: subtensor %{{.*}}[0, 0, 0] [2, %[[ARG0]], 2] [1, 1, 1] :
1198 // CHECK-SAME: tensor<?x?x?xf32> to tensor<2x?x2xf32>
1199 // CHECK: tensor_cast %{{.*}} : tensor<2x?x2xf32> to tensor<?x?x?xf32>
1200 %2 = subtensor %1[%c0, %c0, %c0] [%c2, %arg0, %c2] [%c1, %c1, %c1]
1201 : tensor<?x?x?xf32> to tensor<?x?x?xf32>
1203 return %2 : tensor<?x?x?xf32>
1208 // CHECK-LABEL: func @extract_from_tensor_cast
1209 // CHECK-SAME: %[[TENSOR:.*]]: tensor<*xf32>
1210 func @extract_from_tensor_cast(%tensor: tensor<*xf32>) -> f32 {
1211 // CHECK-NEXT: %[[C0:.*]] = constant 0 : index
1212 %c0 = constant 0 : index
1213 // CHECK-NOT: tensor_cast
1214 %casted = tensor_cast %tensor : tensor<*xf32> to tensor<?xf32>
1215 // CHECK-NEXT: tensor.extract %[[TENSOR]][%[[C0]]]
1216 %result = tensor.extract %casted[%c0] : tensor<?xf32>
1217 return %result : f32