mlir/test/Dialect/Linalg/canonicalize.mlir

   1 // RUN: mlir-opt %s -canonicalize -split-input-file | FileCheck %s
   2
   3 // CHECK-LABEL: func @memref_cast(
   4 func @memref_cast(%a: index, %b: index) -> memref<?x?xf32> {
   5   %c0 = constant 0 : index
   6   %c1 = constant 1 : index
   7   %c8 = constant 8 : index
   8   %c16 = constant 16 : index
   9   %1 = memref.alloc (%b) : memref<?xi8>
  10   %2 = memref.view %1[%c0][] : memref<?xi8> to memref<16x16xf32>
  11   %3 = memref.cast %2 : memref<16x16xf32> to memref<?x?xf32>
  12
  13   // CHECK:  linalg.matmul ins({{.*}}memref<16x16xf32>, memref<16x16xf32>) outs({{.*}}memref<16x16xf32>)
  14   linalg.matmul ins(%3, %3: memref<?x?xf32>, memref<?x?xf32>)
  15                outs(%3: memref<?x?xf32>)
  16   return %3: memref<?x?xf32>
  17 }
  18
  19 // -----
  20
  21 #map = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
  22
  23 // CHECK-LABEL: func @memref_cast_into_tiled_loop(
  24 func @memref_cast_into_tiled_loop(%arg0: memref<192xf32>)  {
  25   %0 = memref.cast %arg0
  26     : memref<192xf32> to memref<192xf32, #map>
  27   %cst = constant 0.000000e+00 : f32
  28   %c24 = constant 24 : index
  29   %c0 = constant 0 : index
  30   %c192 = constant 192 : index
  31   // CHECK: linalg.tiled_loop
  32   // CHECK-SAME: outs (%{{.*}} = %{{.*}}: memref<192xf32>)
  33   linalg.tiled_loop (%arg3) = (%c0) to (%c192) step (%c24)
  34     outs (%out = %0: memref<192xf32, #map>) {
  35     %14 = affine.min affine_map<(d0) -> (-d0 + 192, 24)>(%arg3)
  36     %16 = memref.subview %out[%arg3] [%14] [1]
  37       : memref<192xf32, #map> to memref<?xf32, #map>
  38     linalg.fill(%cst, %16) : f32, memref<?xf32, #map>
  39     linalg.yield
  40   }
  41   return
  42 }
  43
  44 // -----
  45
  46 // CHECK-LABEL: zero_rank_reshape_multi
  47 func @zero_rank_reshape_multi(%arg0: tensor<f32>) -> tensor<f32> {
  48   // CHECK: return %arg0
  49   %0 = linalg.tensor_expand_shape %arg0 [] : tensor<f32> into tensor<1xf32>
  50   %1 = linalg.tensor_expand_shape %0 [[0, 1]] : tensor<1xf32> into tensor<1x1xf32>
  51   %2 = linalg.tensor_collapse_shape %1 [] : tensor<1x1xf32> into tensor<f32>
  52   return %2 : tensor<f32>
  53 }
  54
  55 // -----
  56
  57 func @collapsing_tensor_reshapes(%arg0 : tensor<?x?x?x?x?xf32>) -> tensor<?x?xf32>
  58 {
  59   %0 = linalg.tensor_collapse_shape %arg0 [[0, 1], [2], [3, 4]]
  60       : tensor<?x?x?x?x?xf32> into tensor<?x?x?xf32>
  61   %1 = linalg.tensor_collapse_shape %0 [[0, 1], [2]]
  62       : tensor<?x?x?xf32> into tensor<?x?xf32>
  63   return %1 : tensor<?x?xf32>
  64 }
  65 // CHECK-LABEL: collapsing_tensor_reshapes
  66 //       CHECK:   linalg.tensor_collapse_shape %{{.*}} {{\[}}[0, 1, 2], [3, 4]]
  67 //   CHECK-NOT:   linalg.tensor_collapse_shape
  68
  69 // -----
  70
  71 func @collapsing_tensor_reshapes_to_zero_dim(%arg0 : tensor<1x1x1xf32>)
  72                                              -> tensor<f32> {
  73   %0 = linalg.tensor_collapse_shape %arg0 [[0, 1, 2]]
  74       : tensor<1x1x1xf32> into tensor<1xf32>
  75   %1 = linalg.tensor_collapse_shape %0 [] : tensor<1xf32> into tensor<f32>
  76   return %1 : tensor<f32>
  77 }
  78 // CHECK-LABEL: collapsing_tensor_reshapes_to_zero
  79 //       CHECK:   linalg.tensor_collapse_shape %{{.*}} []
  80 //  CHECK-SAME:     tensor<1x1x1xf32> into tensor<f32>
  81
  82 // -----
  83
  84 func @expanding_tensor_reshapes(%arg0 : tensor<?x?xf32>) -> tensor<?x6x4x?x5xf32>
  85 {
  86   %0 = linalg.tensor_expand_shape %arg0 [[0, 1], [2]]
  87       : tensor<?x?xf32> into tensor<?x4x?xf32>
  88   %1 = linalg.tensor_expand_shape %0 [[0, 1], [2], [3, 4]]
  89       : tensor<?x4x?xf32> into tensor<?x6x4x?x5xf32>
  90   return %1 : tensor<?x6x4x?x5xf32>
  91 }
  92 // CHECK-LABEL: expanding_tensor_reshapes
  93 //       CHECK:   linalg.tensor_expand_shape %{{.*}} {{\[}}[0, 1, 2], [3, 4]]
  94 //   CHECK-NOT:   linalg.tensor_expand_shape
  95
  96 // -----
  97
  98 func @expanding_tensor_reshapes_to_zero_dim(%arg0 : tensor<f32>)
  99                                              -> tensor<1x1x1xf32> {
 100   %0 = linalg.tensor_expand_shape %arg0 [] : tensor<f32> into tensor<1xf32>
 101   %1 = linalg.tensor_expand_shape %0 [[0, 1, 2]]
 102       : tensor<1xf32> into tensor<1x1x1xf32>
 103   return %1 : tensor<1x1x1xf32>
 104 }
 105 // CHECK-LABEL: expanding_tensor_reshapes_to_zero
 106 //       CHECK:   linalg.tensor_expand_shape %{{.*}} []
 107 //  CHECK-SAME:     tensor<f32> into tensor<1x1x1xf32>
 108
 109 // -----
 110
 111 func @fold_tensor_reshape(%arg0 : tensor<12x4xf32>) -> tensor<12x4xf32>
 112 {
 113   %0 = linalg.tensor_expand_shape %arg0 [[0, 1], [2]]
 114       : tensor<12x4xf32> into tensor<3x4x4xf32>
 115   %1 = linalg.tensor_collapse_shape %0 [[0, 1], [2]]
 116       : tensor<3x4x4xf32> into tensor<12x4xf32>
 117   return %1 : tensor<12x4xf32>
 118 }
 119 // CHECK-LABEL: @fold_tensor_reshape
 120 //   CHECK-NOT:   linalg.{{.*}}shape
 121
 122 // -----
 123
 124 func @fold_tensor_reshape_dynamic(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32>
 125 {
 126   %0 = linalg.tensor_expand_shape %arg0 [[0, 1], [2]]
 127       : tensor<?x?xf32> into tensor<?x4x?xf32>
 128   %1 = linalg.tensor_collapse_shape %0 [[0, 1], [2]]
 129       : tensor<?x4x?xf32> into tensor<?x?xf32>
 130   return %1 : tensor<?x?xf32>
 131 }
 132 // CHECK-LABEL: @fold_tensor_reshape_dynamic
 133 //   CHECK-NOT:   linalg.{{.*}}_shape
 134
 135 // -----
 136
 137 func @reshape_collapse(%arg0 : tensor<2x3x4x5x6x7x8xf32>) -> tensor<24x5x42x8xf32>
 138 {
 139   %0 = linalg.tensor_collapse_shape %arg0 [[0, 1, 2, 3, 4, 5, 6]]
 140       : tensor<2x3x4x5x6x7x8xf32> into tensor<40320xf32>
 141   %1 = linalg.tensor_expand_shape %0 [[0, 1, 2, 3]]
 142       : tensor<40320xf32> into tensor<24x5x42x8xf32>
 143   return %1 : tensor<24x5x42x8xf32>
 144 }
 145 //      CHECK: func @reshape_collapse
 146 // CHECK-SAME:   %[[ARG0:.+]]: tensor<2x3x4x5x6x7x8xf32>
 147 //      CHECK:   %[[RESULT:.+]] = linalg.tensor_collapse_shape %[[ARG0]]
 148 // CHECK-SAME:     [0, 1, 2], [3], [4, 5], [6]
 149 //      CHECK:   return %[[RESULT]]
 150
 151 // -----
 152
 153 func @reshape_expand(%arg0 : tensor<24x5x42x8xf32>) -> tensor<2x3x4x5x6x7x8xf32>
 154 {
 155   %0 = linalg.tensor_collapse_shape %arg0 [[0, 1, 2, 3]]
 156       : tensor<24x5x42x8xf32> into tensor<40320xf32>
 157   %1 = linalg.tensor_expand_shape %0 [[0, 1, 2, 3, 4, 5, 6]]
 158       : tensor<40320xf32> into tensor<2x3x4x5x6x7x8xf32>
 159   return %1 : tensor<2x3x4x5x6x7x8xf32>
 160 }
 161 //      CHECK: func @reshape_expand
 162 // CHECK-SAME:   %[[ARG0:.+]]: tensor<24x5x42x8xf32>
 163 //      CHECK:   %[[RESULT:.+]] = linalg.tensor_expand_shape %[[ARG0]]
 164 // CHECK-SAME:     [0, 1, 2], [3], [4, 5], [6]
 165 //      CHECK:   return %[[RESULT]]
 166
 167 // -----
 168
 169 func @expand_reshape_1D(%arg0 : tensor<2048xf32>) -> tensor<4x512xf32>
 170 {
 171   %0 = linalg.tensor_expand_shape %arg0 [[0, 1, 2, 3]]
 172     : tensor<2048xf32> into tensor<1x4x1x512xf32>
 173   %1 = linalg.tensor_collapse_shape %0 [[0, 1, 2], [3]]
 174     : tensor<1x4x1x512xf32> into tensor<4x512xf32>
 175   return %1 : tensor<4x512xf32>
 176 }
 177 //       CHECK: func @expand_reshape_1D
 178 //       CHECK: linalg.tensor_expand_shape %{{.*}} {{\[}}[0, 1]]
 179 //  CHECK-SAME:   tensor<2048xf32> into tensor<4x512xf32>
 180
 181 // -----
 182
 183 func @fold_reshape_1D(%arg0 : tensor<4x512xf32>) -> tensor<2048xf32>
 184 {
 185   %0 = linalg.tensor_expand_shape %arg0 [[0, 1, 2], [3]]
 186     : tensor<4x512xf32> into tensor<1x4x1x512xf32>
 187   %1 = linalg.tensor_collapse_shape %0 [[0, 1, 2, 3]]
 188     : tensor<1x4x1x512xf32> into tensor<2048xf32>
 189   return %1 : tensor<2048xf32>
 190 }
 191 //       CHECK: func @fold_reshape_1D
 192 //       CHECK: linalg.tensor_collapse_shape %{{.*}} {{\[}}[0, 1]]
 193 //  CHECK-SAME:   tensor<4x512xf32> into tensor<2048xf32>
 194
 195 // -----
 196
 197 func @fold_reshape_unit_dims(%arg0 : tensor<2048x1x1xf32>) -> tensor<4x512x1x1xf32>
 198 {
 199   %0 = linalg.tensor_expand_shape %arg0 [[0, 1, 2, 3], [4], [5]]
 200     : tensor<2048x1x1xf32> into tensor<1x4x1x512x1x1xf32>
 201   %1 = linalg.tensor_collapse_shape %0 [[0, 1, 2], [3], [4], [5]]
 202     : tensor<1x4x1x512x1x1xf32> into tensor<4x512x1x1xf32>
 203   return %1 : tensor<4x512x1x1xf32>
 204 }
 205 //       CHECK: func @fold_reshape_unit_dims
 206 //       CHECK: linalg.tensor_expand_shape %{{.*}} {{\[}}[0, 1], [2], [3]]
 207 //  CHECK-SAME:   tensor<2048x1x1xf32> into tensor<4x512x1x1xf32>
 208
 209 // -----
 210
 211 func @expand_reshape_unit_dims(%arg0 : tensor<2048x1x2048xf32>) -> tensor<4x512x1x512x4xf32>
 212 {
 213   %0 = linalg.tensor_expand_shape %arg0 [[0, 1, 2, 3, 4], [5], [6, 7, 8]]
 214     : tensor<2048x1x2048xf32> into tensor<1x4x1x512x1x1x512x1x4xf32>
 215   %1 = linalg.tensor_collapse_shape %0 [[0, 1, 2], [3, 4], [5], [6, 7], [8]]
 216     : tensor<1x4x1x512x1x1x512x1x4xf32> into tensor<4x512x1x512x4xf32>
 217   return %1 : tensor<4x512x1x512x4xf32>
 218 }
 219 //       CHECK: func @expand_reshape_unit_dims
 220 //       CHECK: linalg.tensor_expand_shape %{{.*}} {{\[}}[0, 1], [2], [3, 4]]
 221 //  CHECK-SAME:   tensor<2048x1x2048xf32> into tensor<4x512x1x512x4xf32>
 222
 223 // -----
 224
 225 func @fold_reshape_trailing_unit_dims(%arg0: tensor<2xf32>) -> tensor<2x1xf32>
 226 {
 227   %0 = linalg.tensor_expand_shape %arg0 [[0, 1, 2]]
 228       : tensor<2xf32> into tensor<2x1x1xf32>
 229   %1 = linalg.tensor_collapse_shape %0 [[0], [1, 2]]
 230       : tensor<2x1x1xf32> into tensor<2x1xf32>
 231   return %1 : tensor<2x1xf32>
 232 }
 233 //       CHECK: func @fold_reshape_trailing_unit_dims
 234 //       CHECK: linalg.tensor_expand_shape %{{.*}} {{\[}}[0, 1]]
 235 //  CHECK-SAME:   tensor<2xf32> into tensor<2x1xf32>
 236
 237 // -----
 238
 239 func @collapse_reshape_unit_dims_dynamic(%arg0 : tensor<?x1x?x1x1x?x?x1x1xf32>) -> tensor<?x?x?x?xf32>
 240 {
 241   %0 = linalg.tensor_collapse_shape %arg0 [[0], [1, 2], [3], [4], [5], [6, 7, 8]]
 242     : tensor<?x1x?x1x1x?x?x1x1xf32> into tensor<?x?x1x1x?x?xf32>
 243   %1 = linalg.tensor_collapse_shape %0 [[0], [1], [2, 3, 4], [5]]
 244     : tensor<?x?x1x1x?x?xf32> into tensor<?x?x?x?xf32>
 245   return %1 : tensor<?x?x?x?xf32>
 246 }
 247 //       CHECK: func @collapse_reshape_unit_dims_dynamic
 248 //       CHECK: linalg.tensor_collapse_shape
 249 //  CHECK-SAME:   [0], [1, 2], [3, 4, 5], [6, 7, 8]
 250 //  CHECK-SAME:   tensor<?x1x?x1x1x?x?x1x1xf32> into tensor<?x?x?x?xf32>
 251
 252 // -----
 253
 254 func @fold_reshape_trailing_unit_dims(%arg0: tensor<2xf32>) -> tensor<2x1xf32>
 255 {
 256   %0 = linalg.tensor_expand_shape %arg0 [[0, 1, 2]]
 257       : tensor<2xf32> into tensor<2x1x1xf32>
 258   %1 = linalg.tensor_collapse_shape %0 [[0], [1, 2]]
 259       : tensor<2x1x1xf32> into tensor<2x1xf32>
 260   return %1 : tensor<2x1xf32>
 261 }
 262 //       CHECK: func @fold_reshape_trailing_unit_dims
 263 //       CHECK: linalg.tensor_expand_shape %{{.*}} {{\[}}[0, 1]]
 264 //  CHECK-SAME:   tensor<2xf32> into tensor<2x1xf32>
 265
 266 // -----
 267
 268 func @fold_reshape_trailing_unit_dims_dynamic(%arg0: tensor<1x1x?x1x1x1xf32>) -> tensor<?xf32>
 269 {
 270   %0 = linalg.tensor_collapse_shape %arg0 [[0, 1, 2], [3], [4], [5]]
 271       : tensor<1x1x?x1x1x1xf32> into tensor<?x1x1x1xf32>
 272   %1 = linalg.tensor_collapse_shape %0 [[0, 1, 2, 3]]
 273       : tensor<?x1x1x1xf32> into tensor<?xf32>
 274   return %1 : tensor<?xf32>
 275 }
 276 //       CHECK: func @fold_reshape_trailing_unit_dims_dynamic
 277 //       CHECK: linalg.tensor_collapse_shape %{{.*}} {{\[}}[0, 1, 2, 3, 4, 5]]
 278 //  CHECK-SAME:   tensor<1x1x?x1x1x1xf32> into tensor<?xf32>
 279
 280 // -----
 281
 282 func @no_fold_reshapes(%arg0 : tensor<?x?x?xf32>) -> tensor<?x?xf32>
 283 {
 284   %0 = linalg.tensor_expand_shape %arg0 [[0], [1], [2, 3]]
 285       : tensor<?x?x?xf32> into tensor<?x?x1x?xf32>
 286   %1 = linalg.tensor_collapse_shape %0 [[0], [1, 2, 3]]
 287       : tensor<?x?x1x?xf32> into tensor<?x?xf32>
 288   return %1 : tensor<?x?xf32>
 289 }
 290 // CHECK-LABEL: func @no_fold_reshapes
 291 //       CHECK:   linalg.tensor_expand_shape
 292 //       CHECK:   linalg.tensor_collapse_shape
 293
 294 // -----
 295
 296 func @no_fold_reshape_incompatible(%arg0 : tensor<4x6x8xf32>) -> tensor<2x6x16xf32>
 297 {
 298   %0 = linalg.tensor_expand_shape %arg0 [[0, 1], [2, 3], [4]]
 299       : tensor<4x6x8xf32> into tensor<2x2x3x2x8xf32>
 300   %1 = linalg.tensor_collapse_shape %0 [[0], [1, 2], [3, 4]]
 301       : tensor<2x2x3x2x8xf32> into tensor<2x6x16xf32>
 302   return %1 : tensor<2x6x16xf32>
 303 }
 304 // CHECK-LABEL: func @no_fold_reshape_incompatible
 305 //       CHECK:   linalg.tensor_expand_shape
 306 //       CHECK:   linalg.tensor_collapse_shape
 307
 308 // -----
 309
 310 func @no_fold_reshape_empty_expr(%arg0: tensor<3x2x2xf32>) -> tensor<12x1xf32> {
 311   %0 = linalg.tensor_expand_shape %arg0 [[0], [1], [2, 3]]
 312       : tensor<3x2x2xf32> into tensor<3x2x2x1xf32>
 313   %1 = linalg.tensor_collapse_shape %0 [[0, 1, 2], [3]]
 314       : tensor<3x2x2x1xf32> into tensor<12x1xf32>
 315   return %1 : tensor<12x1xf32>
 316 }
 317 //      CHECK: func @no_fold_reshape_empty_expr
 318 // CHECK-SAME:    %[[ARG0:.+]]: tensor<3x2x2xf32>
 319 //      CHECK:    %[[RARG0:.+]] = linalg.tensor_expand_shape %[[ARG0]]
 320 // CHECK-SAME:      [0], [1], [2, 3]
 321 //      CHECK:    %[[RES:.+]] = linalg.tensor_collapse_shape %[[RARG0]]
 322 // CHECK-SAME:      [0, 1, 2], [3]
 323 //      CHECK:    return %[[RES:.+]] : tensor<12x1xf32>
 324
 325 // -----
 326
 327 #accesses = [
 328   affine_map<(i) -> (i)>
 329 ]
 330
 331 #trait = {
 332   indexing_maps = #accesses,
 333   iterator_types = ["parallel"]
 334 }
 335
 336 func @dce_zero_memref(%arg0 : memref<0xf32>, %arg1: tensor<0xf32>) -> tensor<0xf32> {
 337   // memref<0x32> is expected to be dce'ed
 338   linalg.copy(%arg0, %arg0): memref<0xf32>, memref<0xf32>
 339
 340   // tensor<0xf32> cannot be dce'ed
 341   %1 = linalg.generic #trait outs(%arg1 : tensor<0xf32>) {
 342   ^bb(%0: f32) :
 343     linalg.yield %0 : f32
 344   } -> tensor<0xf32>
 345
 346   return %1: tensor<0xf32>
 347 }
 348 // CHECK-LABEL: @dce_zero_memref
 349 //  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: memref<0xf32>
 350 //  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<0xf32>
 351 //   CHECK-NOT:   linalg.copy
 352 //  CHECK-NEXT:   return %[[ARG1]]
 353
 354 // -----
 355
 356 func @reshape_splat_constant_int32() -> tensor<2x4x2xi32>
 357 {
 358   %c0 = constant dense<42> : tensor<2x8xi32>
 359   %0 = linalg.tensor_expand_shape %c0 [[0], [1, 2]]
 360       : tensor<2x8xi32> into tensor<2x4x2xi32>
 361   return %0 : tensor<2x4x2xi32>
 362 }
 363 // CHECK-LABEL: @reshape_splat_constant_int32
 364 //       CHECK:   %[[CST:.*]] = constant dense<{{.*}}> : tensor<2x4x2xi32>
 365 //   CHECK-NOT:   linalg.tensor_expand_shape
 366 //       CHECK:   return %[[CST]]
 367
 368 func @reshape_splat_constant_int16() -> tensor<2x4x2xi16>
 369 {
 370   %c0 = constant dense<42> : tensor<2x8xi16>
 371   %0 = linalg.tensor_expand_shape %c0 [[0], [1, 2]]
 372       : tensor<2x8xi16> into tensor<2x4x2xi16>
 373   return %0 : tensor<2x4x2xi16>
 374 }
 375 // CHECK-LABEL: @reshape_splat_constant_int16
 376 //       CHECK:   %[[CST:.*]] = constant dense<{{.*}}> : tensor<2x4x2xi16>
 377 //   CHECK-NOT:   linalg.tensor_expand_shape
 378 //       CHECK:   return %[[CST]]
 379
 380 func @reshape_splat_constant_float32() -> tensor<2x4x2xf32>
 381 {
 382   %c0 = constant dense<42.0> : tensor<2x8xf32>
 383   %0 = linalg.tensor_expand_shape %c0 [[0], [1, 2]]
 384       : tensor<2x8xf32> into tensor<2x4x2xf32>
 385   return %0 : tensor<2x4x2xf32>
 386 }
 387 // CHECK-LABEL: @reshape_splat_constant_float32
 388 //       CHECK:   %[[CST:.*]] = constant dense<{{.*}}> : tensor<2x4x2xf32>
 389 //   CHECK-NOT:   linalg.tensor_expand_shape
 390 //       CHECK:   return %[[CST]]
 391
 392 func @reshape_splat_constant_float64() -> tensor<2x4x2xf64>
 393 {
 394   %c0 = constant dense<42.0> : tensor<2x8xf64>
 395   %0 = linalg.tensor_expand_shape %c0 [[0], [1, 2]]
 396       : tensor<2x8xf64> into tensor<2x4x2xf64>
 397   return %0 : tensor<2x4x2xf64>
 398 }
 399 // CHECK-LABEL: @reshape_splat_constant_float64
 400 //       CHECK:   %[[CST:.*]] = constant dense<{{.*}}> : tensor<2x4x2xf64>
 401 //   CHECK-NOT:   linalg.tensor_expand_shape
 402 //       CHECK:   return %[[CST]]
 403
 404 // -----
 405
 406 // CHECK-LABEL: func @tensor.cast(
 407 func @tensor.cast(%a : tensor<3x4xf32>, %b : tensor<4x?xf32>, %c : tensor<3x?xf32>)
 408   -> tensor<3x?xf32>
 409 {
 410   %ta = tensor.cast %a : tensor<3x4xf32> to tensor<?x?xf32>
 411   %tb = tensor.cast %b : tensor<4x?xf32> to tensor<?x?xf32>
 412   %tc = tensor.cast %c : tensor<3x?xf32> to tensor<?x?xf32>
 413
 414   //      CHECK:  linalg.matmul ins({{.*}}tensor<3x4xf32>, tensor<4x?xf32>)
 415   // CHECK-SAME:    outs({{.*}}tensor<3x?xf32>) -> tensor<3x?xf32>
 416   %0 = linalg.matmul ins(%ta, %tb: tensor<?x?xf32>, tensor<?x?xf32>)
 417                     outs(%tc: tensor<?x?xf32>) -> tensor<?x?xf32>
 418
 419   %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<3x?xf32>
 420
 421   return %1: tensor<3x?xf32>
 422 }
 423
 424 // -----
 425
 426 // CHECK-LABEL: func @linalg_effects(
 427 //  CHECK-SAME:     %[[A:[a-z0-9]*]]: tensor<?x?xf32>
 428 //  CHECK-SAME:     %[[B:[a-z0-9]*]]: memref<?x?xf32>
 429 //  CHECK-SAME:     %[[C:[a-z0-9]*]]: tensor<?x?xf32>
 430 func @linalg_effects(%a : tensor<?x?xf32>, %b : memref<?x?xf32>, %c : tensor<?x?xf32>) {
 431   // CHECK-NOT:   %{{.*}} = linalg.matmul
 432   %t = linalg.matmul ins(%a, %b : tensor<?x?xf32>, memref<?x?xf32>)
 433                     outs(%c : tensor<?x?xf32>) -> tensor<?x?xf32>
 434
 435   // CHECK:   linalg.matmul
 436   linalg.matmul ins(%a, %c : tensor<?x?xf32>, tensor<?x?xf32>)
 437                outs(%b : memref<?x?xf32>)
 438   return
 439 }
 440
 441 // -----
 442
 443 func @init_tensor_canonicalize() -> (tensor<4x5x?xf32>) {
 444   %c6 = constant 6 : index
 445   %0 = linalg.init_tensor [4, 5, %c6] : tensor<4x5x?xf32>
 446   return %0 : tensor<4x5x?xf32>
 447 }
 448 // CHECK: func @init_tensor_canonicalize
 449 // CHECK:   %[[T0:.+]] = linalg.init_tensor [4, 5, 6] : tensor<4x5x6xf32>
 450 // CHECK:   %[[T1:.+]] = tensor.cast %[[T0]] : tensor<4x5x6xf32> to tensor<4x5x?xf32>
 451 // CHECK:   return %[[T1]]
 452
 453 // -----
 454
 455 func @init_tensor_reshape_expansion(%arg0 : index) -> tensor<2x3x5x4x?x7xf32> {
 456   %0 = linalg.init_tensor [6, 5, %arg0] : tensor<6x5x?xf32>
 457   %1 = linalg.tensor_expand_shape %0 [[0, 1], [2], [3, 4, 5]]
 458       : tensor<6x5x?xf32> into tensor<2x3x5x4x?x7xf32>
 459   return %1 : tensor<2x3x5x4x?x7xf32>
 460 }
 461 //      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 floordiv 28)>
 462 //      CHECK: func @init_tensor_reshape_expansion
 463 // CHECK-SAME:     %[[ARG0:.+]]: index
 464 // CHECK-NEXT:   %[[D:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
 465 // CHECK-NEXT:   %[[INIT:.+]] = linalg.init_tensor [2, 3, 5, 4, %[[D]], 7]
 466 // CHECK-NEXT:   return %[[INIT]]
 467
 468 // -----
 469
 470 func @init_tensor_reshape_collapse(%arg0 : index) -> tensor<6x5x?xf32> {
 471   %0 = linalg.init_tensor [2, 3, 5, 4, %arg0, 7] : tensor<2x3x5x4x?x7xf32>
 472   %1 = linalg.tensor_collapse_shape %0 [[0, 1], [2], [3, 4, 5]]
 473       : tensor<2x3x5x4x?x7xf32> into tensor<6x5x?xf32>
 474   return %1 : tensor<6x5x?xf32>
 475 }
 476 //      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 * 28)>
 477 //      CHECK: func @init_tensor_reshape_collapse
 478 // CHECK-SAME:     %[[ARG0:.+]]: index
 479 // CHECK-NEXT:   %[[D:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
 480 // CHECK-NEXT:   %[[INIT:.+]] = linalg.init_tensor [6, 5, %[[D]]]
 481 // CHECK-NEXT:   return %[[INIT]]
 482
 483 // -----
 484
 485 #map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
 486 func @remove_no_op(%arg0 : tensor<?x?x?xf32>, %arg1 : tensor<?x?x?xf32>)
 487   -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
 488   %c0 = constant 0 : index
 489   %c1 = constant 1 : index
 490   %c2 = constant 2 : index
 491   %0 = tensor.dim %arg0, %c0 : tensor<?x?x?xf32>
 492   %1 = tensor.dim %arg0, %c1 : tensor<?x?x?xf32>
 493   %2 = tensor.dim %arg0, %c2 : tensor<?x?x?xf32>
 494   %3 = linalg.init_tensor [%0, %1, %2] : tensor<?x?x?xf32>
 495   %4, %5 = linalg.generic {
 496     indexing_maps = [#map, #map, #map, #map],
 497     iterator_types = ["parallel", "parallel", "parallel"]
 498   } ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
 499     outs(%3, %3 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
 500   ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32, %arg5 : f32):
 501     linalg.yield %arg3, %arg2 : f32, f32
 502   } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
 503   return %4, %5 : tensor<?x?x?xf32>, tensor<?x?x?xf32>
 504 }
 505 // CHECK-LABEL: func @remove_no_op
 506 //  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
 507 //  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
 508 //       CHECK:     return %[[ARG1]], %[[ARG0]]
 509
 510 // -----
 511
 512 #map = affine_map<(d0, d1) -> (d0, d1)>
 513 func @keep_not_noop(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32> {
 514   %c0 = constant 0 : index
 515   %c1 = constant 1 : index
 516   %cst = constant 1.000000e+00 : f32
 517   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
 518   %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
 519   %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
 520   br ^bb1(%cst : f32)
 521
 522 ^bb1(%arg1 : f32):
 523   %3 = linalg.generic
 524     {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]}
 525     ins(%arg0 : tensor<?x?xf32>) outs(%2 : tensor<?x?xf32>) {
 526     ^bb0(%arg2: f32, %arg3 : f32):
 527       linalg.yield %arg1 : f32
 528     } -> tensor<?x?xf32>
 529   return %3 : tensor<?x?xf32>
 530 }
 531 // CHECK-LABEL: func @keep_not_noop
 532 //       CHECK:   %[[RESULT:.+]] = linalg.generic
 533 //       CHECK:   return %[[RESULT]]
 534
 535 // -----
 536
 537 #map = affine_map<(d0, d1) -> (d0, d1)>
 538 func @keep_not_noop(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>)
 539   -> (tensor<?x?xf32>, tensor<?x?xf32>) {
 540   %c0 = constant 0 : index
 541   %c1 = constant 1 : index
 542   %cst = constant 1.000000e+00 : f32
 543   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
 544   %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
 545   %2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
 546   br ^bb1(%cst : f32)
 547
 548 ^bb1(%arg2 : f32):
 549   %3:2 = linalg.generic
 550     {indexing_maps = [#map, #map, #map, #map],
 551      iterator_types = ["parallel", "parallel"]}
 552     ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
 553     outs(%2, %2 : tensor<?x?xf32>, tensor<?x?xf32>) {
 554     ^bb0(%arg3: f32, %arg4 : f32, %arg5 : f32, %arg6 : f32):
 555       linalg.yield %arg2, %arg4 : f32, f32
 556     } -> (tensor<?x?xf32>, tensor<?x?xf32>)
 557   return %3#0, %3#1 : tensor<?x?xf32>, tensor<?x?xf32>
 558 }
 559 // CHECK-LABEL: func @keep_not_noop
 560 //       CHECK:   %[[RESULT:.+]]:2 = linalg.generic
 561 //       CHECK:   return %[[RESULT]]#0, %[[RESULT]]#1
 562
 563 // -----
 564
 565 func @fold_init_tensor_with_slice
 566   (%arg0 : index, %arg1 : index) -> tensor<5x?x20xf32>
 567 {
 568   %0 = linalg.init_tensor[%arg0, 10, 40] : tensor<?x10x40xf32>
 569   %1 = tensor.extract_slice %0[0, 0, 0] [5, %arg1, 20] [1, 1, 1]
 570     : tensor<?x10x40xf32> to tensor<5x?x20xf32>
 571   return %1 : tensor<5x?x20xf32>
 572 }
 573 //      CHECK: func @fold_init_tensor_with_slice
 574 // CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: index
 575 // CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: index
 576 //      CHECK:   %[[T0:.+]] = linalg.init_tensor [5, %[[ARG1]], 20]
 577 //      CHECK:   return %[[T0]]
 578
 579 // -----
 580
 581 #accesses = [
 582   affine_map<(i, j) -> (i, j)>
 583 ]
 584
 585 #trait = {
 586   indexing_maps = #accesses,
 587   iterator_types = ["parallel", "parallel"]
 588 }
 589
 590 // CHECK-LABEL: func @dead_linalg_tensor
 591 //   CHECK-NOT:   linalg.fill
 592 //   CHECK-NOT:   linalg.matmul
 593 //   CHECK-NOT:   linalg.generic
 594 //   CHECK-NOT:   linalg.pad_tensor
 595 //       CHECK:   return
 596 func @dead_linalg_tensor(%arg0 : tensor<7x7xi32>, %arg1 : tensor<7x7xf32>,
 597                          %arg2: tensor<?x?xf32>, %high : index) {
 598   %c0_i32 = constant 0 : i32
 599   %c0 = constant 0 : index
 600   %cst = constant 0.000000e+00 : f32
 601   %0 = linalg.fill(%c0_i32, %arg0) : i32, tensor<7x7xi32> -> tensor<7x7xi32>
 602   %1 = linalg.matmul ins(%arg1, %arg1: tensor<7x7xf32>, tensor<7x7xf32>)
 603                      outs(%arg1: tensor<7x7xf32>) -> tensor<7x7xf32>
 604   %2 = linalg.generic #trait outs(%arg0 : tensor<7x7xi32>) {
 605   ^bb(%3: i32) :
 606     linalg.yield %3 : i32
 607   } -> tensor<7x7xi32>
 608   %3 = linalg.pad_tensor %arg2 low[%c0, %c0] high[%high, %high] {
 609         ^bb0(%arg9: index, %arg10: index):  // no predecessors
 610           linalg.yield %cst : f32
 611   } : tensor<?x?xf32> to tensor<2x4xf32>
 612   return
 613 }
 614
 615 // -----
 616
 617 // CHECK-LABEL: func @pad_tensor_same_static_shape(
 618 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<5x6xf32>
 619 //   CHECK-NOT:   linalg.pad_tensor
 620 //       CHECK:   return %[[ARG0]]
 621 func @pad_tensor_same_static_shape(%arg0: tensor<5x6xf32>, %a: index)
 622     -> tensor<5x6xf32> {
 623   %cst = constant 0.000000e+00 : f32
 624   %0 = linalg.pad_tensor %arg0 low[%a, 0] high[0, %a] {
 625         ^bb0(%arg1: index, %arg2: index):
 626           linalg.yield %cst : f32
 627   } : tensor<5x6xf32> to tensor<5x6xf32>
 628   return %0 : tensor<5x6xf32>
 629 }
 630
 631 // -----
 632 // CHECK-LABEL:   func @pad_tensor_after_cast_differnt_shape(
 633 // CHECK-SAME:      %[[INPUT:.*]]: tensor<?x64x?x?xf32>) -> tensor<?x?x?x?xf32> {
 634 // CHECK:           %[[CST:.*]] = constant 0.000000e+00 : f32
 635 // CHECK:           %[[PADDED:.*]] = linalg.pad_tensor %[[INPUT]]
 636 // CHECK-SAME:        low[0, 0, 1, 1] high[0, 0, 1, 1]  {
 637 // CHECK:           ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index):
 638 // CHECK:             linalg.yield %[[CST]] : f32
 639 // CHECK:           } : tensor<?x64x?x?xf32> to tensor<?x64x?x?xf32>
 640 // CHECK:           %[[DYNAMIC:.*]] = tensor.cast %[[PADDED:.*]] :
 641 // CHECK-SAME:         tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
 642 // CHECK:           return %[[DYNAMIC]] : tensor<?x?x?x?xf32>
 643 // CHECK:         }
 644 func @pad_tensor_after_cast_differnt_shape(%arg0: tensor<?x64x?x?xf32>)
 645     -> tensor<?x?x?x?xf32> {
 646   %cst = constant 0.000000e+00 : f32
 647   %dynamic = tensor.cast %arg0 : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
 648   %padded = linalg.pad_tensor %dynamic low[0, 0, 1, 1] high[0, 0, 1, 1]  {
 649     ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):  // no predecessors
 650     linalg.yield %cst: f32
 651   } : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
 652   return %padded: tensor<?x?x?x?xf32>
 653 }
 654
 655 // -----
 656 // CHECK-LABEL:   func @pad_tensor_after_cast_same_shape(
 657 // CHECK-SAME:      %[[INPUT:.*]]: tensor<?x64x?x?xf32>,
 658 // CHECK-SAME:      %[[PADDING:.*]]: index) -> tensor<?x?x?x?xf32> {
 659 // CHECK:           %[[CST:.*]] = constant 0.000000e+00 : f32
 660 // CHECK:           %[[PADDED:.*]] = linalg.pad_tensor %[[INPUT]]
 661 // CHECK-SAME:        low[0, %[[PADDING]], 1, 1] high[0, %[[PADDING]], 1, 1]  {
 662 // CHECK:           ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index):
 663 // CHECK:             linalg.yield %[[CST]] : f32
 664 // CHECK:           } : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
 665 // CHECK:           return %[[PADDED:.*]] : tensor<?x?x?x?xf32>
 666 // CHECK:         }
 667 func @pad_tensor_after_cast_same_shape(%arg0: tensor<?x64x?x?xf32>, %padding : index)
 668     -> tensor<?x?x?x?xf32> {
 669   %cst = constant 0.000000e+00 : f32
 670   %dynamic = tensor.cast %arg0 : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
 671   %padded = linalg.pad_tensor %dynamic low[0, %padding, 1, 1] high[0, %padding, 1, 1]  {
 672     ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):  // no predecessors
 673     linalg.yield %cst: f32
 674   } : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
 675   return %padded: tensor<?x?x?x?xf32>
 676 }
 677
 678 // -----
 679 func @propogate_casts(%arg0 : tensor<?x?xf32>, %arg1 : f32, %arg2 : index,
 680     %arg3 : index) -> tensor<?x?xf32> {
 681   %c0 = constant 0 : index
 682   %c1 = constant 1 : index
 683   %c21 = constant 21 : index
 684   %c42 = constant 42 : index
 685   %0 = linalg.init_tensor [%c21, %c42] : tensor<?x?xf32>
 686   %1 = linalg.fill(%arg1, %0) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
 687   %2 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
 688   %3 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
 689   %4 = tensor.insert_slice %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
 690   return %4 : tensor<?x?xf32>
 691 }
 692 // CHECK-LABEL: func @propogate_casts
 693 //       CHECK:   %[[INIT:.+]] = linalg.init_tensor [21, 42]
 694 //       CHECK:   %[[FILL:.+]] = linalg.fill(%{{.+}}, %[[INIT]])
 695 //       CHECK:   %[[INSERTED:.+]] = tensor.insert_slice %{{.+}} into %[[FILL]]
 696 //       CHECK:   %[[RESULT:.+]] = tensor.cast %[[INSERTED]]
 697 //       CHECK:   return %[[RESULT]]
 698
 699 // -----
 700
 701 // CHECK-LABEL: @self_copy
 702 func @self_copy(%arg0 : memref<2x3x?x4xf32>) {
 703
 704 //   CHECK-NOT: linalg.copy
 705   linalg.copy(%arg0, %arg0): memref<2x3x?x4xf32>, memref<2x3x?x4xf32>
 706
 707 //   CHECK: return
 708   return
 709 }
 710
 711 // -----
 712
 713 // CHECK-LABEL: @self_copy_with_permutation
 714 func @self_copy_with_permutation(%arg0 : memref<2x3x?x4xf32>) {
 715
 716 //   CHECK: linalg.copy
 717   linalg.copy(%arg0, %arg0)
 718     {inputPermutation = affine_map<(i, j, k, l) -> (j, k, i, l)>,
 719      outputPermuation = affine_map<(i, j, k, l) -> (i, j, k, l)>} : memref<2x3x?x4xf32>, memref<2x3x?x4xf32>
 720
 721 //   CHECK: return
 722   return
 723 }
 724
 725 // -----
 726
 727 // CHECK-LABEL: func @fold_fill_reshape()
 728 func @fold_fill_reshape() -> tensor<6x4xf32> {
 729   %zero = constant 0.0 : f32
 730   // CHECK: %[[INIT:.+]] = linalg.init_tensor [6, 4] : tensor<6x4xf32>
 731   %init = linalg.init_tensor [1, 2, 3, 4] : tensor<1x2x3x4xf32>
 732   // CHECK: %[[FILL:.+]] = linalg.fill(%cst, %[[INIT]]) : f32, tensor<6x4xf32> -> tensor<6x4xf32>
 733   %fill = linalg.fill(%zero, %init) : f32, tensor<1x2x3x4xf32> -> tensor<1x2x3x4xf32>
 734   %reshape = linalg.tensor_collapse_shape %fill [[0, 1, 2], [3]]
 735       : tensor<1x2x3x4xf32> into tensor<6x4xf32>
 736   // CHECK: return %[[FILL]] : tensor<6x4xf32>
 737   return %reshape : tensor<6x4xf32>
 738 }
 739
 740 // -----
 741
 742 //       CHECK: func @fold_fill_reshape_dynamic
 743 //  CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?x?x?x?xf32>
 744 func @fold_fill_reshape_dynamic(%arg0 : tensor<?x?x?x?x?xf32>) -> tensor<?x?xf32> {
 745   %zero = constant 0.0 : f32
 746   // CHECK: %[[RESHAPE:.+]] = linalg.tensor_collapse_shape %[[ARG0]]
 747   %0 = linalg.fill(%zero, %arg0) : f32, tensor<?x?x?x?x?xf32> -> tensor<?x?x?x?x?xf32>
 748   // CHECK: %[[RESULT:.+]] = linalg.fill(%{{.+}}, %[[RESHAPE]])
 749   %1 = linalg.tensor_collapse_shape %0 [[0, 1, 2], [3, 4]]
 750       : tensor<?x?x?x?x?xf32> into tensor<?x?xf32>
 751   // CHECK: return %[[RESULT]]
 752   return %1 : tensor<?x?xf32>
 753 }
 754
 755
 756 // -----
 757
 758 func private @foo(%A: memref<48xf32>, %B: tensor<48xf32>,
 759                   %C: memref<48xf32>) -> (tensor<48xf32>)
 760
 761 func @fold_tiled_loop_results(%A: memref<48xf32>, %B: tensor<48xf32>,
 762     %C: memref<48xf32>, %C_tensor: tensor<48xf32>) -> tensor<48xf32> {
 763   %c0 = constant 0 : index
 764   %c24 = constant 24 : index
 765   %c48 = constant 48 : index
 766   %useful, %useless = linalg.tiled_loop (%i) = (%c0) to (%c48) step (%c24)
 767       ins (%A_ = %A: memref<48xf32>)
 768       outs (%B_ = %B: tensor<48xf32>,
 769             %CT_ = %C_tensor: tensor<48xf32>,
 770             %C_ = %C: memref<48xf32>) {
 771         %result = call @foo(%A_, %B_, %C_)
 772           : (memref<48xf32>, tensor<48xf32>, memref<48xf32>)-> (tensor<48xf32>)
 773     linalg.yield %result, %CT_ : tensor<48xf32>, tensor<48xf32>
 774   }
 775   return %useful : tensor<48xf32>
 776 }
 777
 778 // CHECK-LABEL: func @fold_tiled_loop_results(
 779 // CHECK-SAME:   %[[A:.*]]: [[BUF_TY:memref<48xf32>]], %[[B:.*]]: [[TY:tensor<48xf32>]],
 780 // CHECK-SAME:   %[[C:.*]]: [[BUF_TY]],  %[[C_TENSOR:.*]]: [[TY]]) -> [[TY]] {
 781
 782 // CHECK-DAG:  %[[C0:.*]] = constant 0 : index
 783 // CHECK-DAG:  %[[C24:.*]] = constant 24 : index
 784 // CHECK-DAG:  %[[C48:.*]] = constant 48 : index
 785
 786 // CHECK-NOT: %{{.*}} = linalg.tiled_loop
 787 // CHECK:  %[[RESULT:.*]] = linalg.tiled_loop (%{{.*}}) = (%[[C0]])
 788 // CHECK-SAME: to (%[[C48]]) step (%[[C24]])
 789 // CHECK-SAME: ins (%[[A_:.*]] = %[[A]]: [[BUF_TY]])
 790 // CHECK-SAME: outs (%[[B_:.*]] = %[[B]]: [[TY]], %[[C_:.*]] = %[[C]]: [[BUF_TY]]) {
 791 // CHECK-NEXT:   %[[RES:.*]] = call @foo(%[[A_]], %[[B_]], %[[C_]])
 792 // CHECK-NEXT:   linalg.yield %[[RES]] :
 793
 794 // CHECK: return %[[RESULT]]
 795
 796 // -----
 797
 798 func private @foo(%A: memref<192xf32>, %B: tensor<192xf32>) -> tensor<192xf32>
 799
 800 func @fold_tiled_loop_inputs(%A: memref<192xf32>, %A_tensor: tensor<192xf32>,
 801                              %B_tensor: tensor<192xf32>) -> tensor<192xf32> {
 802   %c0 = constant 0 : index
 803   %c24 = constant 24 : index
 804   %c192 = constant 192 : index
 805   %result = linalg.tiled_loop (%i) = (%c0) to (%c192) step (%c24)
 806       ins (%A_ = %A: memref<192xf32>, %AT_ = %A_tensor: tensor<192xf32>)
 807       outs (%BT_ = %B_tensor: tensor<192xf32>) {
 808     %0 = call @foo(%A_, %BT_) : (memref<192xf32>, tensor<192xf32>) -> tensor<192xf32>
 809     linalg.yield %0 : tensor<192xf32>
 810   }
 811   return %result : tensor<192xf32>
 812 }
 813
 814 // CHECK-LABEL: func @fold_tiled_loop_inputs
 815 // CHECK: %[[RESULT:.*]] = linalg.tiled_loop
 816 // CHECK-SAME: ins (%{{.*}} = %{{.*}}: memref<192xf32>)
 817
 818 // CHECK: return %[[RESULT]]
 819
 820 // -----
 821
 822 func @tensor_pad_cast_fold(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> {
 823   %c0 = constant 0 : index
 824   %cst = constant 0.0 : f32
 825   %0 = tensor.cast %arg0 : tensor<4x4xf32> to tensor<?x?xf32>
 826   %1 = linalg.pad_tensor %0 low[%c0, %c0] high[%c0, %c0]  {
 827     ^bb0(%arg1: index, %arg2: index):  // no predecessors
 828       linalg.yield %cst : f32
 829   } : tensor<?x?xf32> to tensor<4x4xf32>
 830   return %1 : tensor<4x4xf32>
 831 }
 832 // CHECK-LABEL: @tensor_pad_cast
 833 // CHECK-SAME: %[[ARG0:.+]]: tensor<4x4xf32>
 834 // CHECK: return %[[ARG0]]
 835
 836 // -----
 837
 838 // CHECK-LABEL: func @fold_pad_tensor_source_cast(
 839 //  CHECK-SAME:                  %[[ARG0:.*]]: tensor<4x?xf32>
 840 //   CHECK-NOT:   tensor.cast
 841 //       CHECK:   %[[RESULT:.*]] = linalg.pad_tensor %[[ARG0]]
 842 func @fold_pad_tensor_source_cast(%arg0: tensor<4x?xf32>) -> tensor<4x4xf32> {
 843   %cst = constant 0.0 : f32
 844   %0 = tensor.cast %arg0 : tensor<4x?xf32> to tensor<?x?xf32>
 845   %1 = linalg.pad_tensor %0 low[0, 0] high[0, 1]  {
 846     ^bb0(%arg1: index, %arg2: index):  // no predecessors
 847       linalg.yield %cst : f32
 848   } : tensor<?x?xf32> to tensor<4x4xf32>
 849   return %1 : tensor<4x4xf32>
 850 }
 851
 852 // -----
 853
 854 // CHECK-LABEL: func @pad_static_zero_cast(
 855 //  CHECK-SAME:                  %[[ARG0:.*]]: tensor<?x?x?xf32>
 856 //   CHECK-NOT:   linalg.pad_tensor
 857 //       CHECK:   %[[RESULT:.*]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
 858 //       CHECK:   return %[[RESULT]]
 859 func @pad_static_zero_cast(%arg0: tensor<?x?x?xf32>, %pad_value: f32) -> tensor<2x3x4xf32> {
 860   %c0 = constant 0 : index
 861   %0 = linalg.pad_tensor %arg0 low[0, %c0, 0] high[0, 0, %c0] {
 862     ^bb0(%arg1: index, %arg2: index, %arg3: index):
 863       linalg.yield %pad_value : f32
 864     } : tensor<?x?x?xf32> to tensor<2x3x4xf32>
 865
 866   return %0 : tensor<2x3x4xf32>
 867 }
 868
 869 // -----
 870
 871 func private @some_use(%i : index, %j : index)
 872
 873 // CHECK-LABEL: func @init_canonicalize
 874 //  CHECK-SAME:   %[[I:.*]]: index
 875 func @init_canonicalize(%i : index) {
 876   %c0 = constant 0 : index
 877   %c1 = constant 1 : index
 878
 879   // CHECK-NOT: init_tensor
 880   %0 = linalg.init_tensor [%i, 42] : tensor<?x42xf32>
 881
 882   // CHECK-NOT: tensor.dim
 883   %1 = tensor.dim %0, %c0: tensor<?x42xf32>
 884   %2 = tensor.dim %0, %c1: tensor<?x42xf32>
 885
 886   // CHECK: %[[c42:.*]] = constant 42 : index
 887   // CHECK: call @some_use(%[[I]], %[[c42]])
 888   call @some_use(%1, %2) : (index, index) -> ()
 889
 890   return
 891 }
 892
 893 // -----
 894
 895 // CHECK-LABEL: func @rank_reducing_init_extract
 896 func @rank_reducing_init_extract(%sz : index, %idx : index) -> tensor<2xf32> {
 897   // CHECK: linalg.init_tensor [2] : tensor<2xf32>
 898   %a = linalg.init_tensor [%sz, 2] : tensor<?x2xf32>
 899
 900   // CHECK-NOT: extract
 901   %r = tensor.extract_slice %a[%idx, 0] [1, 2] [1, 1] : tensor<?x2xf32> to tensor<2xf32>
 902   return %r: tensor<2xf32>
 903 }
 904
 905 // -----
 906
 907 // CHECK-LABEL: func @dim_of_tiled_loop_input_no_canonicalize(
 908 //  CHECK-SAME:     %[[arg0:.*]]: tensor<?x?xf32>, %[[arg1:.*]]: tensor<?x?xf32>, %[[arg2:.*]]: tensor<?x?xf32>
 909 //       CHECK:   %[[c0:.*]] = constant 0 : index
 910 //       CHECK:   linalg.tiled_loop {{.*}} outs (%[[o:.*]] =
 911 //       CHECK:     %[[dim:.*]] = tensor.dim %[[o]], %[[c0]]
 912 //       CHECK:     index_cast %[[dim]]
 913 func @dim_of_tiled_loop_input_no_canonicalize(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>, %s: index)
 914     -> tensor<?x?xf32> {
 915   %c0 = constant 0 : index
 916   %c1 = constant 1 : index
 917   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
 918   %d1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
 919   %r = linalg.tiled_loop (%iv0, %iv1) = (%c0, %c0)
 920       to (%d0, %d1) step (%c1, %c1)
 921       ins (%in0 = %arg0 : tensor<?x?xf32>, %in1 = %arg1 : tensor<?x?xf32>)
 922       outs (%out1 = %arg2 : tensor<?x?xf32>) {
 923     %inner_dim = tensor.dim %out1, %c0 : tensor<?x?xf32>
 924     %cast1 = std.index_cast %inner_dim : index to i32
 925     %cast2 = std.sitofp %cast1 : i32 to f32
 926     %fill = linalg.fill(%cast2, %out1) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
 927     %slice = tensor.extract_slice %fill[0, 0][%s, %s][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
 928     linalg.yield %slice : tensor<?x?xf32>
 929   }
 930   return %r : tensor<?x?xf32>
 931 }
 932
 933 // -----
 934
 935 // CHECK-LABEL: func @dim_of_tiled_loop_input(
 936 //  CHECK-SAME:     %[[arg0:.*]]: tensor<?x?xf32>, %[[arg1:.*]]: tensor<?x?xf32>, %[[arg2:.*]]: tensor<?x?xf32>
 937 //       CHECK:   %[[c0:.*]] = constant 0 : index
 938 //       CHECK:   linalg.tiled_loop
 939 //       CHECK:     %[[dim:.*]] = tensor.dim %[[arg1]], %[[c0]]
 940 //       CHECK:     index_cast %[[dim]]
 941 func @dim_of_tiled_loop_input(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>)
 942     -> tensor<?x?xf32> {
 943   %c0 = constant 0 : index
 944   %c1 = constant 1 : index
 945   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
 946   %d1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
 947   %r = linalg.tiled_loop (%iv0, %iv1) = (%c0, %c0)
 948       to (%d0, %d1) step (%c1, %c1)
 949       ins (%in0 = %arg0 : tensor<?x?xf32>, %in1 = %arg1 : tensor<?x?xf32>)
 950       outs (%out1 = %arg2 : tensor<?x?xf32>) {
 951     %inner_dim = tensor.dim %in1, %c0 : tensor<?x?xf32>
 952     %cast1 = std.index_cast %inner_dim : index to i32
 953     %cast2 = std.sitofp %cast1 : i32 to f32
 954     %fill = linalg.fill(%cast2, %out1) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
 955     linalg.yield %fill : tensor<?x?xf32>
 956   }
 957   return %r : tensor<?x?xf32>
 958 }