mlir/test/Dialect/Vector/canonicalize.mlir

   1 // RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file -allow-unregistered-dialect | FileCheck %s
   2
   3 // CHECK-LABEL: create_vector_mask_to_constant_mask
   4 func.func @create_vector_mask_to_constant_mask() -> (vector<4x3xi1>) {
   5   %c2 = arith.constant 2 : index
   6   %c3 = arith.constant 3 : index
   7   // CHECK: vector.constant_mask [3, 2] : vector<4x3xi1>
   8   %0 = vector.create_mask %c3, %c2 : vector<4x3xi1>
   9   return %0 : vector<4x3xi1>
  10 }
  11 // -----
  12
  13 // CHECK-LABEL: create_scalable_vector_mask_to_constant_mask
  14 func.func @create_scalable_vector_mask_to_constant_mask() -> (vector<[8]xi1>) {
  15   %c-1 = arith.constant -1 : index
  16   // CHECK: vector.constant_mask [0] : vector<[8]xi1>
  17   %0 = vector.create_mask %c-1 : vector<[8]xi1>
  18   return %0 : vector<[8]xi1>
  19 }
  20
  21 // -----
  22
  23 // CHECK-LABEL: create_vector_mask_to_constant_mask_truncation
  24 func.func @create_vector_mask_to_constant_mask_truncation() -> (vector<4x3xi1>) {
  25   %c2 = arith.constant 2 : index
  26   %c5 = arith.constant 5 : index
  27   // CHECK: vector.constant_mask [4, 2] : vector<4x3xi1>
  28   %0 = vector.create_mask %c5, %c2 : vector<4x3xi1>
  29   return %0 : vector<4x3xi1>
  30 }
  31
  32 // -----
  33
  34 // CHECK-LABEL: create_vector_mask_to_constant_mask_truncation_neg
  35 func.func @create_vector_mask_to_constant_mask_truncation_neg() -> (vector<4x3xi1>) {
  36   %cneg2 = arith.constant -2 : index
  37   %c5 = arith.constant 5 : index
  38   // CHECK: vector.constant_mask [0, 0] : vector<4x3xi1>
  39   %0 = vector.create_mask %c5, %cneg2 : vector<4x3xi1>
  40   return %0 : vector<4x3xi1>
  41 }
  42
  43 // -----
  44
  45 // CHECK-LABEL: create_vector_mask_to_constant_mask_truncation_zero
  46 func.func @create_vector_mask_to_constant_mask_truncation_zero() -> (vector<4x3xi1>) {
  47   %c2 = arith.constant 2 : index
  48   %c0 = arith.constant 0 : index
  49   // CHECK: vector.constant_mask [0, 0] : vector<4x3xi1>
  50   %0 = vector.create_mask %c0, %c2 : vector<4x3xi1>
  51   return %0 : vector<4x3xi1>
  52 }
  53
  54 // -----
  55
  56 // CHECK-LABEL: create_vector_mask_to_constant_mask_scalable_all_true
  57 func.func @create_vector_mask_to_constant_mask_scalable_all_true() -> (vector<8x[16]xi1>) {
  58   %c8 = arith.constant 8 : index
  59   %c16 = arith.constant 16 : index
  60   %0 = vector.vscale
  61   %1 = arith.muli %0, %c16 : index
  62   // CHECK: vector.constant_mask [8, 16] : vector<8x[16]xi1>
  63   %10 = vector.create_mask %c8, %1 : vector<8x[16]xi1>
  64   return %10 : vector<8x[16]xi1>
  65 }
  66
  67 // -----
  68
  69 // CHECK-LABEL: create_mask_transpose_to_transposed_create_mask
  70 //  CHECK-SAME: %[[DIM0:.*]]: index, %[[DIM1:.*]]: index, %[[DIM2:.*]]: index
  71 func.func @create_mask_transpose_to_transposed_create_mask(
  72   %dim0: index, %dim1: index, %dim2: index) -> (vector<2x3x4xi1>, vector<4x2x3xi1>) {
  73   //     CHECK: vector.create_mask %[[DIM0]], %[[DIM1]], %[[DIM2]] : vector<2x3x4xi1>
  74   //     CHECK: vector.create_mask %[[DIM2]], %[[DIM0]], %[[DIM1]] : vector<4x2x3xi1>
  75   // CHECK-NOT: vector.transpose
  76   %0 = vector.create_mask %dim0, %dim1, %dim2 : vector<2x3x4xi1>
  77   %1 = vector.transpose %0, [2, 0, 1] : vector<2x3x4xi1> to vector<4x2x3xi1>
  78   return %0, %1 : vector<2x3x4xi1>, vector<4x2x3xi1>
  79 }
  80
  81 // -----
  82
  83 // CHECK-LABEL: extract_from_create_mask
  84 //  CHECK-SAME: %[[DIM0:.*]]: index, %[[DIM1:.*]]: index
  85 func.func @extract_from_create_mask(%dim0: index, %dim1: index) -> vector<[4]x[4]xi1> {
  86   %c2 = arith.constant 2 : index
  87   %mask = vector.create_mask %c2, %dim0, %dim1 : vector<4x[4]x[4]xi1>
  88   // CHECK: vector.create_mask %[[DIM0]], %[[DIM1]] : vector<[4]x[4]xi1>
  89   // CHECK-NOT: vector.extract
  90   %extract = vector.extract %mask[1] : vector<[4]x[4]xi1> from vector<4x[4]x[4]xi1>
  91   return %extract : vector<[4]x[4]xi1>
  92 }
  93
  94 // -----
  95
  96 // CHECK-LABEL: extract_from_create_mask_all_false
  97 func.func @extract_from_create_mask_all_false(%dim0: index, %dim1: index) -> vector<[4]x[4]xi1> {
  98   %c2 = arith.constant 2 : index
  99   %mask = vector.create_mask %c2, %dim0, %dim1 : vector<4x[4]x[4]xi1>
 100   // CHECK: arith.constant dense<false> : vector<[4]x[4]xi1>
 101   // CHECK-NOT: vector.extract
 102   %extract = vector.extract %mask[2] : vector<[4]x[4]xi1> from vector<4x[4]x[4]xi1>
 103   return %extract : vector<[4]x[4]xi1>
 104 }
 105
 106 // -----
 107
 108 // CHECK-LABEL: extract_from_create_mask_leading_scalable
 109 //  CHECK-SAME: %[[DIM0:.*]]: index
 110 func.func @extract_from_create_mask_leading_scalable(%dim0: index) -> vector<8xi1> {
 111   %c3 = arith.constant 3 : index
 112   %mask = vector.create_mask %c3, %dim0 : vector<[4]x8xi1>
 113   // CHECK: vector.create_mask %[[DIM0]] : vector<8xi1>
 114   // CHECK-NOT: vector.extract
 115   %extract = vector.extract %mask[1] : vector<8xi1> from vector<[4]x8xi1>
 116   return %extract : vector<8xi1>
 117 }
 118
 119 // -----
 120
 121 // CHECK-LABEL: extract_from_create_mask_dynamic_position
 122 //  CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index
 123 func.func @extract_from_create_mask_dynamic_position(%dim0: index, %index: index) -> vector<6xi1> {
 124   %c4 = arith.constant 4 : index
 125   %c3 = arith.constant 3 : index
 126   %mask = vector.create_mask %c3, %c4, %dim0 : vector<4x4x6xi1>
 127   // CHECK: vector.create_mask %[[DIM0]] : vector<6xi1>
 128   // CHECK-NOT: vector.extract
 129   %extract = vector.extract %mask[2, %index] : vector<6xi1> from vector<4x4x6xi1>
 130   return %extract : vector<6xi1>
 131 }
 132
 133 // -----
 134
 135 // CHECK-LABEL: extract_from_create_mask_dynamic_position_all_false
 136 //  CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index
 137 func.func @extract_from_create_mask_dynamic_position_all_false(%dim0: index, %index: index) -> vector<6xi1> {
 138   %c0 = arith.constant 0 : index
 139   %c1 = arith.constant 1 : index
 140   %mask = vector.create_mask %c1, %c0, %dim0 : vector<1x4x6xi1>
 141   // CHECK: arith.constant dense<false> : vector<6xi1>
 142   // CHECK-NOT: vector.extract
 143   %extract = vector.extract %mask[0, %index] : vector<6xi1> from vector<1x4x6xi1>
 144   return %extract : vector<6xi1>
 145 }
 146
 147 // -----
 148
 149 // CHECK-LABEL: extract_from_create_mask_dynamic_position_unknown
 150 //  CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index
 151 func.func @extract_from_create_mask_dynamic_position_unknown(%dim0: index, %index: index) -> vector<6xi1> {
 152   %c2 = arith.constant 2 : index
 153   %mask = vector.create_mask %c2, %dim0 : vector<4x6xi1>
 154   // CHECK: %[[C2:.*]] = arith.constant 2 : index
 155   // CHECK-NEXT: %[[MASK:.*]] = vector.create_mask %[[C2]], %[[DIM0]] : vector<4x6xi1>
 156   // CHECK-NEXT: vector.extract %[[MASK]][%[[INDEX]]] : vector<6xi1> from vector<4x6xi1>
 157   %extract = vector.extract %mask[%index] : vector<6xi1> from vector<4x6xi1>
 158   return %extract : vector<6xi1>
 159 }
 160
 161 // -----
 162
 163 // CHECK-LABEL: extract_from_create_mask_mixed_position_unknown
 164 //  CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index
 165 func.func @extract_from_create_mask_mixed_position_unknown(%dim0: index, %index0: index) -> vector<4xi1> {
 166   %c2 = arith.constant 2 : index
 167   %mask = vector.create_mask %c2, %c2, %dim0 : vector<2x4x4xi1>
 168   // CHECK: %[[C2:.*]] = arith.constant 2 : index
 169   // CHECK-NEXT: %[[MASK:.*]] = vector.create_mask %[[C2]], %[[C2]], %[[DIM0]] : vector<2x4x4xi1>
 170   // CHECK-NEXT: vector.extract %[[MASK]][1, %[[INDEX]]] : vector<4xi1> from vector<2x4x4xi1>
 171   %extract = vector.extract %mask[1, %index0] : vector<4xi1> from vector<2x4x4xi1>
 172   return %extract : vector<4xi1>
 173 }
 174
 175 // -----
 176
 177 // CHECK-LABEL: extract_from_non_constant_create_mask
 178 //  CHECK-SAME: %[[DIM0:.*]]: index
 179 func.func @extract_from_non_constant_create_mask(%dim0: index) -> vector<[2]xi1> {
 180   %mask = vector.create_mask %dim0, %dim0 : vector<[2]x[2]xi1>
 181   // CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM0]], %[[DIM0]] : vector<[2]x[2]xi1>
 182   // CHECK-NEXT: vector.extract %[[MASK]][0] : vector<[2]xi1> from vector<[2]x[2]xi1>
 183   %extract = vector.extract %mask[0] : vector<[2]xi1> from vector<[2]x[2]xi1>
 184   return %extract : vector<[2]xi1>
 185 }
 186
 187 // -----
 188
 189 // CHECK-LABEL: constant_mask_transpose_to_transposed_constant_mask
 190 func.func @constant_mask_transpose_to_transposed_constant_mask() -> (vector<2x3x4xi1>, vector<4x2x3xi1>) {
 191   //     CHECK: vector.constant_mask [1, 2, 3] : vector<2x3x4xi1>
 192   //     CHECK: vector.constant_mask [3, 1, 2] : vector<4x2x3xi1>
 193   // CHECK-NOT: vector.transpose
 194   %0 = vector.constant_mask [1, 2, 3] : vector<2x3x4xi1>
 195   %1 = vector.transpose %0, [2, 0, 1] : vector<2x3x4xi1> to vector<4x2x3xi1>
 196   return %0, %1 : vector<2x3x4xi1>, vector<4x2x3xi1>
 197 }
 198
 199 // -----
 200
 201 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
 202   %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
 203   %1 = vector.extract_strided_slice %0
 204     {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]}
 205       : vector<4x3xi1> to vector<2x2xi1>
 206   // CHECK: vector.constant_mask [2, 2] : vector<2x2xi1>
 207   return %1 : vector<2x2xi1>
 208 }
 209
 210 // -----
 211
 212 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
 213   %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
 214   %1 = vector.extract_strided_slice %0
 215     {offsets = [1, 0], sizes = [2, 2], strides = [1, 1]}
 216       : vector<4x3xi1> to vector<2x2xi1>
 217   // CHECK: vector.constant_mask [1, 2] : vector<2x2xi1>
 218   return %1 : vector<2x2xi1>
 219 }
 220
 221 // -----
 222
 223 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
 224   %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
 225   %1 = vector.extract_strided_slice %0
 226     {offsets = [0, 1], sizes = [2, 2], strides = [1, 1]}
 227       : vector<4x3xi1> to vector<2x2xi1>
 228   // CHECK: vector.constant_mask [2, 1] : vector<2x2xi1>
 229   return %1 : vector<2x2xi1>
 230 }
 231
 232 // -----
 233
 234 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
 235   %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
 236   %1 = vector.extract_strided_slice %0
 237     {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]}
 238       : vector<4x3xi1> to vector<2x2xi1>
 239   // CHECK: vector.constant_mask [0, 0] : vector<2x2xi1>
 240   return %1 : vector<2x2xi1>
 241 }
 242
 243 // -----
 244
 245 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x1xi1>) {
 246   %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
 247   %1 = vector.extract_strided_slice %0
 248     {offsets = [0, 2], sizes = [2, 1], strides = [1, 1]}
 249       : vector<4x3xi1> to vector<2x1xi1>
 250   // CHECK: vector.constant_mask [0, 0] : vector<2x1xi1>
 251   return %1 : vector<2x1xi1>
 252 }
 253
 254 // -----
 255
 256 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x1xi1>) {
 257   %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
 258   %1 = vector.extract_strided_slice %0
 259     {offsets = [0, 1], sizes = [2, 1], strides = [1, 1]}
 260       : vector<4x3xi1> to vector<2x1xi1>
 261   // CHECK: vector.constant_mask [2, 1] : vector<2x1xi1>
 262   return %1 : vector<2x1xi1>
 263 }
 264
 265 // -----
 266
 267 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x1xi1>) {
 268   %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
 269   %1 = vector.extract_strided_slice %0
 270     {offsets = [1, 1], sizes = [2, 1], strides = [1, 1]}
 271       : vector<4x3xi1> to vector<2x1xi1>
 272   // CHECK: vector.constant_mask [1, 1] : vector<2x1xi1>
 273   return %1 : vector<2x1xi1>
 274 }
 275
 276 // -----
 277
 278 // CHECK-LABEL: extract_strided_fold
 279 //  CHECK-SAME: (%[[ARG:.*]]: vector<4x3xi1>)
 280 //  CHECK-NEXT:   return %[[ARG]] : vector<4x3xi1>
 281 func.func @extract_strided_fold(%arg : vector<4x3xi1>) -> (vector<4x3xi1>) {
 282   %0 = vector.extract_strided_slice %arg
 283     {offsets = [0, 0], sizes = [4, 3], strides = [1, 1]}
 284       : vector<4x3xi1> to vector<4x3xi1>
 285   return %0 : vector<4x3xi1>
 286 }
 287
 288 // -----
 289
 290 // CHECK-LABEL: extract_strided_fold_insert
 291 //  CHECK-SAME: (%[[ARG:.*]]: vector<4x4xf32>
 292 //  CHECK-NEXT:   return %[[ARG]] : vector<4x4xf32>
 293 func.func @extract_strided_fold_insert(%a: vector<4x4xf32>, %b: vector<8x16xf32>)
 294   -> (vector<4x4xf32>) {
 295   %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]}
 296     : vector<4x4xf32> into vector<8x16xf32>
 297   %1 = vector.extract_strided_slice %0
 298     {offsets = [2, 2], sizes = [4, 4], strides = [1, 1]}
 299       : vector<8x16xf32> to vector<4x4xf32>
 300   return %1 : vector<4x4xf32>
 301 }
 302
 303 // -----
 304
 305 // Case where the vector inserted is a subset of the vector extracted.
 306 // CHECK-LABEL: extract_strided_fold_insert
 307 //  CHECK-SAME: (%[[ARG0:.*]]: vector<6x4xf32>
 308 //  CHECK-NEXT:   %[[EXT:.*]] = vector.extract_strided_slice %[[ARG0]]
 309 //  CHECK-SAME:     {offsets = [0, 0], sizes = [4, 4], strides = [1, 1]}
 310 //  CHECK-SAME:       : vector<6x4xf32> to vector<4x4xf32>
 311 //  CHECK-NEXT:   return %[[EXT]] : vector<4x4xf32>
 312 func.func @extract_strided_fold_insert(%a: vector<6x4xf32>, %b: vector<8x16xf32>)
 313   -> (vector<4x4xf32>) {
 314   %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]}
 315     : vector<6x4xf32> into vector<8x16xf32>
 316   %1 = vector.extract_strided_slice %0
 317     {offsets = [2, 2], sizes = [4, 4], strides = [1, 1]}
 318       : vector<8x16xf32> to vector<4x4xf32>
 319   return %1 : vector<4x4xf32>
 320 }
 321
 322 // -----
 323
 324 // Negative test where the extract is not a subset of the element inserted.
 325 // CHECK-LABEL: extract_strided_fold_negative
 326 //  CHECK-SAME: (%[[ARG0:.*]]: vector<4x4xf32>, %[[ARG1:.*]]: vector<8x16xf32>
 327 //       CHECK:   %[[INS:.*]] = vector.insert_strided_slice %[[ARG0]], %[[ARG1]]
 328 //  CHECK-SAME:     {offsets = [2, 2], strides = [1, 1]}
 329 //  CHECK-SAME:       : vector<4x4xf32> into vector<8x16xf32>
 330 //       CHECK:   %[[EXT:.*]] = vector.extract_strided_slice %[[INS]]
 331 //  CHECK-SAME:     {offsets = [2, 2], sizes = [6, 4], strides = [1, 1]}
 332 //  CHECK-SAME:       : vector<8x16xf32> to vector<6x4xf32>
 333 //  CHECK-NEXT:   return %[[EXT]] : vector<6x4xf32>
 334 func.func @extract_strided_fold_negative(%a: vector<4x4xf32>, %b: vector<8x16xf32>)
 335   -> (vector<6x4xf32>) {
 336   %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]}
 337     : vector<4x4xf32> into vector<8x16xf32>
 338   %1 = vector.extract_strided_slice %0
 339     {offsets = [2, 2], sizes = [6, 4], strides = [1, 1]}
 340       : vector<8x16xf32> to vector<6x4xf32>
 341   return %1 : vector<6x4xf32>
 342 }
 343
 344 // -----
 345
 346 // Case where we need to go through 2 level of insert element.
 347 // CHECK-LABEL: extract_strided_fold_insert
 348 //  CHECK-SAME: (%[[ARG0:.*]]: vector<2x8xf32>, %[[ARG1:.*]]: vector<1x4xf32>,
 349 //  CHECK-NEXT:   %[[EXT:.*]] = vector.extract_strided_slice %[[ARG1]]
 350 //  CHECK-SAME:     {offsets = [0, 0], sizes = [1, 1], strides = [1, 1]}
 351 //  CHECK-SAME:       : vector<1x4xf32> to vector<1x1xf32>
 352 //  CHECK-NEXT:   return %[[EXT]] : vector<1x1xf32>
 353 func.func @extract_strided_fold_insert(%a: vector<2x8xf32>, %b: vector<1x4xf32>,
 354                                   %c : vector<1x4xf32>) -> (vector<1x1xf32>) {
 355   %0 = vector.insert_strided_slice %b, %a {offsets = [0, 1], strides = [1, 1]}
 356     : vector<1x4xf32> into vector<2x8xf32>
 357   %1 = vector.insert_strided_slice %c, %0 {offsets = [1, 0], strides = [1, 1]}
 358     : vector<1x4xf32> into vector<2x8xf32>
 359   %2 = vector.extract_strided_slice %1
 360       {offsets = [0, 1], sizes = [1, 1], strides = [1, 1]}
 361         : vector<2x8xf32> to vector<1x1xf32>
 362   return %2 : vector<1x1xf32>
 363 }
 364
 365 // -----
 366
 367 // CHECK-LABEL: transpose_1D_identity
 368 // CHECK-SAME: ([[ARG:%.*]]: vector<4xf32>)
 369 func.func @transpose_1D_identity(%arg : vector<4xf32>) -> vector<4xf32> {
 370   // CHECK-NOT: transpose
 371   %0 = vector.transpose %arg, [0] : vector<4xf32> to vector<4xf32>
 372   // CHECK-NEXT: return [[ARG]]
 373   return %0 : vector<4xf32>
 374 }
 375
 376 // -----
 377
 378 // CHECK-LABEL: transpose_2D_identity
 379 // CHECK-SAME: ([[ARG:%.*]]: vector<4x3xf32>)
 380 func.func @transpose_2D_identity(%arg : vector<4x3xf32>) -> vector<4x3xf32> {
 381   // CHECK-NOT: transpose
 382   %0 = vector.transpose %arg, [0, 1] : vector<4x3xf32> to vector<4x3xf32>
 383   // CHECK-NEXT: return [[ARG]]
 384   return %0 : vector<4x3xf32>
 385 }
 386
 387 // -----
 388
 389 // CHECK-LABEL: transpose_3D_identity
 390 // CHECK-SAME: ([[ARG:%.*]]: vector<4x3x2xf32>)
 391 func.func @transpose_3D_identity(%arg : vector<4x3x2xf32>) -> vector<4x3x2xf32> {
 392   // CHECK-NOT: transpose
 393   %0 = vector.transpose %arg, [0, 1, 2] : vector<4x3x2xf32> to vector<4x3x2xf32>
 394   // CHECK-NEXT: return [[ARG]]
 395   return %0 : vector<4x3x2xf32>
 396 }
 397
 398 // -----
 399
 400 // CHECK-LABEL: transpose_2D_sequence
 401 // CHECK-SAME: ([[ARG:%.*]]: vector<4x3xf32>)
 402 func.func @transpose_2D_sequence(%arg : vector<4x3xf32>) -> vector<4x3xf32> {
 403   // CHECK-NOT: transpose
 404   %0 = vector.transpose %arg, [1, 0] : vector<4x3xf32> to vector<3x4xf32>
 405   %1 = vector.transpose %0, [0, 1] : vector<3x4xf32> to vector<3x4xf32>
 406   %2 = vector.transpose %1, [1, 0] : vector<3x4xf32> to vector<4x3xf32>
 407   %3 = vector.transpose %2, [0, 1] : vector<4x3xf32> to vector<4x3xf32>
 408   // CHECK: [[ADD:%.*]] = arith.addf [[ARG]], [[ARG]]
 409   %4 = arith.addf %2, %3 : vector<4x3xf32>
 410   // CHECK-NEXT: return [[ADD]]
 411   return %4 : vector<4x3xf32>
 412 }
 413
 414 // -----
 415
 416 // CHECK-LABEL: transpose_3D_sequence
 417 // CHECK-SAME: ([[ARG:%.*]]: vector<4x3x2xf32>)
 418 func.func @transpose_3D_sequence(%arg : vector<4x3x2xf32>) -> vector<4x3x2xf32> {
 419   // CHECK: [[T0:%.*]] = vector.transpose [[ARG]], [2, 1, 0]
 420   %0 = vector.transpose %arg, [1, 2, 0] : vector<4x3x2xf32> to vector<3x2x4xf32>
 421   %1 = vector.transpose %0, [1, 0, 2] : vector<3x2x4xf32> to vector<2x3x4xf32>
 422   // CHECK: [[T1:%.*]] = vector.transpose %arg0, [2, 1, 0]
 423   %2 = vector.transpose %1, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
 424   %3 = vector.transpose %2, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32>
 425   // CHECK: [[MUL:%.*]] = arith.mulf [[T0]], [[T1]]
 426   %4 = arith.mulf %1, %3 : vector<2x3x4xf32>
 427   // CHECK: [[T5:%.*]] = vector.transpose [[MUL]], [2, 1, 0]
 428   %5 = vector.transpose %4, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
 429   // CHECK-NOT: transpose
 430   %6 = vector.transpose %3, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
 431   // CHECK: [[ADD:%.*]] = arith.addf [[T5]], [[ARG]]
 432   %7 = arith.addf %5, %6 : vector<4x3x2xf32>
 433   // CHECK-NEXT: return [[ADD]]
 434   return %7 : vector<4x3x2xf32>
 435 }
 436
 437 // -----
 438
 439 // CHECK-LABEL: cast_transfers
 440 func.func @cast_transfers(%A: memref<4x8xf32>) -> (vector<4x8xf32>) {
 441   %c0 = arith.constant 0 : index
 442   %f0 = arith.constant 0.0 : f32
 443   %0 = memref.cast %A : memref<4x8xf32> to memref<?x?xf32>
 444
 445   // CHECK: vector.transfer_read %{{.*}} {in_bounds = [true, true]} : memref<4x8xf32>, vector<4x8xf32>
 446   %1 = vector.transfer_read %0[%c0, %c0], %f0 : memref<?x?xf32>, vector<4x8xf32>
 447
 448   // CHECK: vector.transfer_write %{{.*}} {in_bounds = [true, true]} : vector<4x8xf32>, memref<4x8xf32>
 449   vector.transfer_write %1, %0[%c0, %c0] : vector<4x8xf32>, memref<?x?xf32>
 450   return %1 : vector<4x8xf32>
 451 }
 452
 453 // -----
 454
 455 // CHECK-LABEL: cast_transfers
 456 func.func @cast_transfers(%A: tensor<4x8xf32>) -> (vector<4x8xf32>) {
 457   %c0 = arith.constant 0 : index
 458   %f0 = arith.constant 0.0 : f32
 459   %0 = tensor.cast %A : tensor<4x8xf32> to tensor<?x?xf32>
 460
 461   // CHECK: vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor<4x8xf32>, vector<4x8xf32>
 462   %1 = vector.transfer_read %0[%c0, %c0], %f0 : tensor<?x?xf32>, vector<4x8xf32>
 463
 464   return %1 : vector<4x8xf32>
 465 }
 466
 467 // -----
 468
 469 // CHECK-LABEL: func @insert_extract_transpose_2d(
 470 //  CHECK-SAME: %[[V:[a-zA-Z0-9]*]]: vector<2x3xf32>,
 471 //  CHECK-SAME: %[[F0:[a-zA-Z0-9]*]]: f32,
 472 //  CHECK-SAME: %[[F1:[a-zA-Z0-9]*]]: f32,
 473 //  CHECK-SAME: %[[F2:[a-zA-Z0-9]*]]: f32,
 474 //  CHECK-SAME: %[[F3:[a-zA-Z0-9]*]]: f32
 475 func.func @insert_extract_transpose_2d(
 476     %v: vector<2x3xf32>, %f0: f32, %f1: f32, %f2: f32, %f3: f32)
 477 -> (f32, f32, f32)
 478 {
 479   %0 = vector.insert %f0, %v[0, 0] : f32 into vector<2x3xf32>
 480   %1 = vector.insert %f1, %0[0, 1] : f32 into vector<2x3xf32>
 481   %2 = vector.insert %f2, %1[1, 0] : f32 into vector<2x3xf32>
 482   %3 = vector.insert %f3, %2[1, 1] : f32 into vector<2x3xf32>
 483   %4 = vector.transpose %3, [1, 0] : vector<2x3xf32> to vector<3x2xf32>
 484   %5 = vector.insert %f3, %4[1, 0] : f32 into vector<3x2xf32>
 485   %6 = vector.transpose %5, [1, 0] : vector<3x2xf32> to vector<2x3xf32>
 486
 487   // Expected %f2 from %2 = vector.insert %f2, %1[1, 0].
 488   %r1 = vector.extract %3[1, 0] : f32 from vector<2x3xf32>
 489
 490   // Expected %f1 from %1 = vector.insert %f1, %0[0, 1] followed by
 491   // transpose [1, 0].
 492   %r2 = vector.extract %4[1, 0] : f32 from vector<3x2xf32>
 493
 494   // Expected %f2 from %2 = vector.insert %f2, %1[1, 0] followed by double
 495   // transpose [1, 0].
 496   %r3 = vector.extract %6[1, 0] : f32 from vector<2x3xf32>
 497
 498   // CHECK-NEXT: return %[[F2]], %[[F1]], %[[F2]] : f32, f32, f32
 499   return %r1, %r2, %r3 : f32, f32, f32
 500 }
 501
 502 // -----
 503
 504 // CHECK-LABEL: insert_extract_chain
 505 //  CHECK-SAME: %[[V234:[a-zA-Z0-9]*]]: vector<2x3x4xf32>
 506 //  CHECK-SAME: %[[V34:[a-zA-Z0-9]*]]: vector<3x4xf32>
 507 //  CHECK-SAME: %[[V4:[a-zA-Z0-9]*]]: vector<4xf32>
 508 func.func @insert_extract_chain(%v234: vector<2x3x4xf32>, %v34: vector<3x4xf32>, %v4: vector<4xf32>)
 509     -> (vector<4xf32>, vector<4xf32>, vector<3x4xf32>, vector<3x4xf32>) {
 510   // CHECK-NEXT: %[[A34:.*]] = vector.insert
 511   %A34 = vector.insert %v34, %v234[0]: vector<3x4xf32> into vector<2x3x4xf32>
 512   // CHECK-NEXT: %[[B34:.*]] = vector.insert
 513   %B34 = vector.insert %v34, %A34[1]: vector<3x4xf32> into vector<2x3x4xf32>
 514   // CHECK-NEXT: %[[A4:.*]] = vector.insert
 515   %A4 = vector.insert %v4, %B34[1, 0]: vector<4xf32> into vector<2x3x4xf32>
 516   // CHECK-NEXT: %[[B4:.*]] = vector.insert
 517   %B4 = vector.insert %v4, %A4[1, 1]: vector<4xf32> into vector<2x3x4xf32>
 518
 519   // Case 2.a. [1, 1] == insertpos ([1, 1])
 520   // Match %A4 insertionpos and fold to its source(i.e. %V4).
 521    %r0 = vector.extract %B4[1, 1]: vector<4xf32> from vector<2x3x4xf32>
 522
 523   // Case 3.a. insertpos ([1]) is a prefix of [1, 0].
 524   // Traverse %B34 to its source(i.e. %V34@[*0*]).
 525   // CHECK-NEXT: %[[R1:.*]] = vector.extract %[[V34]][0]
 526    %r1 = vector.extract %B34[1, 0]: vector<4xf32> from vector<2x3x4xf32>
 527
 528   // Case 4. [1] is a prefix of insertpos ([1, 1]).
 529   // Cannot traverse %B4.
 530   // CHECK-NEXT: %[[R2:.*]] = vector.extract %[[B4]][1]
 531    %r2 = vector.extract %B4[1]: vector<3x4xf32> from vector<2x3x4xf32>
 532
 533   // Case 5. [0] is disjoint from insertpos ([1, 1]).
 534   // Traverse %B4 to its dest(i.e. %A4@[0]).
 535   // Traverse %A4 to its dest(i.e. %B34@[0]).
 536   // Traverse %B34 to its dest(i.e. %A34@[0]).
 537   // Match %A34 insertionpos and fold to its source(i.e. %V34).
 538    %r3 = vector.extract %B4[0]: vector<3x4xf32> from vector<2x3x4xf32>
 539
 540   // CHECK: return %[[V4]], %[[R1]], %[[R2]], %[[V34]]
 541   return %r0, %r1, %r2, %r3:
 542     vector<4xf32>, vector<4xf32>, vector<3x4xf32>, vector<3x4xf32>
 543 }
 544
 545 // -----
 546
 547 // CHECK-LABEL: func @insert_extract_transpose_3d(
 548 //  CHECK-SAME: %[[V234:[a-zA-Z0-9]*]]: vector<2x3x4xf32>
 549 func.func @insert_extract_transpose_3d(
 550   %v234: vector<2x3x4xf32>, %v43: vector<4x3xf32>, %f0: f32)
 551     -> (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<3x4xf32>) {
 552
 553   %a432 = vector.transpose %v234, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
 554   %b432 = vector.insert %f0, %a432[0, 0, 1] : f32 into vector<4x3x2xf32>
 555   %c234 = vector.transpose %b432, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32>
 556   // Case 1. %c234 = transpose [2,1,0] posWithSentinels [1,2,-1] -> [-1,2,1]
 557   // Case 5. %b432 = insert [0,0,1] (inter([.,2,1], [.,0,1]) == 0) prop to %v432
 558   // Case 1. %a432 = transpose [2,1,0] posWithSentinels [-1,2,1] -> [1,2,-1]
 559   // can extract directly from %v234, the rest folds.
 560   // CHECK: %[[R0:.*]] = vector.extract %[[V234]][1, 2]
 561   %r0 = vector.extract %c234[1, 2] : vector<4xf32> from vector<2x3x4xf32>
 562
 563   // CHECK-NEXT: vector.transpose
 564   // CHECK-NEXT: vector.insert
 565   // CHECK-NEXT: %[[F234:.*]] = vector.transpose
 566   %d432 = vector.transpose %v234, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
 567   %e432 = vector.insert %f0, %d432[0, 2, 1] : f32 into vector<4x3x2xf32>
 568   %f234 = vector.transpose %e432, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32>
 569   // Case 1. %c234 = transpose [2,1,0] posWithSentinels [1,2,-1] -> [-1,2,1]
 570   // Case 4. %b432 = insert [0,0,1] (inter([.,2,1], [.,2,1]) != 0)
 571   // Bail, cannot do better than the current.
 572   // CHECK: %[[R1:.*]] = vector.extract %[[F234]]
 573   %r1 = vector.extract %f234[1, 2] : vector<4xf32> from vector<2x3x4xf32>
 574
 575   // CHECK-NEXT: vector.transpose
 576   // CHECK-NEXT: vector.insert
 577   // CHECK-NEXT: %[[H234:.*]] = vector.transpose
 578   %g243 = vector.transpose %v234, [0, 2, 1] : vector<2x3x4xf32> to vector<2x4x3xf32>
 579   %h243 = vector.insert %v43, %g243[0] : vector<4x3xf32> into vector<2x4x3xf32>
 580   %i234 = vector.transpose %h243, [0, 2, 1] : vector<2x4x3xf32> to vector<2x3x4xf32>
 581   // Case 1. %i234 = transpose [0,2,1] posWithSentinels [0,-1,-2] -> [0,-2,-1]
 582   // Case 3.b. %b432 = insert [0] is prefix of [0,.,.] but internal transpose.
 583   // Bail, cannot do better than the current.
 584   // CHECK: %[[R2:.*]] = vector.extract %[[H234]][0, 1]
 585   %r2 = vector.extract %i234[0, 1] : vector<4xf32> from vector<2x3x4xf32>
 586
 587   // CHECK-NEXT: vector.transpose
 588   // CHECK-NEXT: vector.insert
 589   // CHECK-NEXT: %[[K234:.*]] = vector.transpose
 590   %j243 = vector.transpose %v234, [0, 2, 1] : vector<2x3x4xf32> to vector<2x4x3xf32>
 591   %k243 = vector.insert %v43, %j243[0] : vector<4x3xf32> into vector<2x4x3xf32>
 592   %l234 = vector.transpose %k243, [0, 2, 1] : vector<2x4x3xf32> to vector<2x3x4xf32>
 593   // Case 1. %i234 = transpose [0,2,1] posWithSentinels [0,-1,-2] -> [0,-2,-1]
 594   // Case 2.b. %b432 = insert [0] == [0,.,.] but internal transpose.
 595   // Bail, cannot do better than the current.
 596   // CHECK: %[[R3:.*]] = vector.extract %[[K234]][0]
 597   %r3 = vector.extract %l234[0] : vector<3x4xf32> from vector<2x3x4xf32>
 598
 599   // CHECK-NEXT: return %[[R0]], %[[R1]], %[[R2]], %[[R3]]
 600   return %r0, %r1, %r2, %r3: vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<3x4xf32>
 601 }
 602
 603 // -----
 604
 605 // CHECK-LABEL: fold_extracts
 606 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: vector<3x4x5x6xf32>
 607 func.func @fold_extracts(%a : vector<3x4x5x6xf32>) -> (f32, vector<4x5x6xf32>) {
 608   %b = vector.extract %a[0] : vector<4x5x6xf32> from vector<3x4x5x6xf32>
 609   %c = vector.extract %b[1, 2] : vector<6xf32> from vector<4x5x6xf32>
 610   //  CHECK-NEXT: vector.extract %[[A]][0, 1, 2, 3] : f32 from vector<3x4x5x6xf32>
 611   %d = vector.extract %c[3] : f32 from vector<6xf32>
 612
 613   //  CHECK-NEXT: vector.extract %[[A]][0] : vector<4x5x6xf32> from vector<3x4x5x6xf32>
 614   %e = vector.extract %a[0] : vector<4x5x6xf32> from vector<3x4x5x6xf32>
 615
 616   //  CHECK-NEXT: return
 617   return %d, %e : f32, vector<4x5x6xf32>
 618 }
 619
 620 // -----
 621
 622 // CHECK-LABEL: fold_extract_transpose
 623 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: vector<3x4x5x6xf32>
 624 //  CHECK-SAME:   %[[B:[a-zA-Z0-9]*]]: vector<3x6x5x6xf32>
 625 func.func @fold_extract_transpose(
 626     %a : vector<3x4x5x6xf32>, %b : vector<3x6x5x6xf32>) -> (
 627       vector<6xf32>, vector<6xf32>, vector<6xf32>) {
 628   // [3] is a proper most minor identity map in transpose.
 629   // Permutation is a self inverse and we have.
 630   // [0, 2, 1] ^ -1 o [0, 1, 2] = [0, 2, 1] o [0, 1, 2]
 631   //                            = [0, 2, 1]
 632   //  CHECK-NEXT: vector.extract %[[A]][0, 2, 1] : vector<6xf32> from vector<3x4x5x6xf32>
 633   %0 = vector.transpose %a, [0, 2, 1, 3] : vector<3x4x5x6xf32> to vector<3x5x4x6xf32>
 634   %1 = vector.extract %0[0, 1, 2] : vector<6xf32> from vector<3x5x4x6xf32>
 635
 636   // [3] is a proper most minor identity map in transpose.
 637   // Permutation is a not self inverse and we have.
 638   // [1, 2, 0] ^ -1 o [0, 1, 2] = [2, 0, 1] o [0, 1, 2]
 639   //                            = [2, 0, 1]
 640   //  CHECK-NEXT: vector.extract %[[A]][2, 0, 1] : vector<6xf32> from vector<3x4x5x6xf32>
 641   %2 = vector.transpose %a, [1, 2, 0, 3] : vector<3x4x5x6xf32> to vector<4x5x3x6xf32>
 642   %3 = vector.extract %2[0, 1, 2] : vector<6xf32> from vector<4x5x3x6xf32>
 643
 644   // Not a minor identity map so intra-vector level has been permuted
 645   //  CHECK-NEXT: vector.transpose %[[B]], [0, 2, 3, 1]
 646   //  CHECK-NEXT: vector.extract %{{.*}}[0, 1, 2]
 647   %4 = vector.transpose %b, [0, 2, 3, 1] : vector<3x6x5x6xf32> to vector<3x5x6x6xf32>
 648   %5 = vector.extract %4[0, 1, 2] : vector<6xf32> from vector<3x5x6x6xf32>
 649
 650   return %1, %3, %5 : vector<6xf32>, vector<6xf32>, vector<6xf32>
 651 }
 652
 653 // -----
 654
 655 // CHECK-LABEL: fold_extract_broadcast
 656 //  CHECK-SAME:   %[[A:.*]]: f32
 657 //       CHECK:   return %[[A]] : f32
 658 func.func @fold_extract_broadcast(%a : f32) -> f32 {
 659   %b = vector.broadcast %a : f32 to vector<1x2x4xf32>
 660   %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32>
 661   return %r : f32
 662 }
 663
 664 // -----
 665
 666 // CHECK-LABEL: fold_extract_broadcast_0dvec
 667 //  CHECK-SAME:   %[[A:.*]]: vector<f32>
 668 //       CHECK:   %[[B:.+]] = vector.extractelement %[[A]][] : vector<f32>
 669 //       CHECK:   return %[[B]] : f32
 670 func.func @fold_extract_broadcast_0dvec(%a : vector<f32>) -> f32 {
 671   %b = vector.broadcast %a : vector<f32> to vector<1x2x4xf32>
 672   %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32>
 673   return %r : f32
 674 }
 675
 676 // -----
 677
 678 // CHECK-LABEL: fold_extract_broadcast_negative
 679 //       CHECK:   vector.broadcast %{{.*}} : vector<1x1xf32> to vector<1x1x4xf32>
 680 //       CHECK:   vector.extract %{{.*}}[0, 0] : vector<4xf32> from vector<1x1x4xf32>
 681 func.func @fold_extract_broadcast_negative(%a : vector<1x1xf32>) -> vector<4xf32> {
 682   %b = vector.broadcast %a : vector<1x1xf32> to vector<1x1x4xf32>
 683   %r = vector.extract %b[0, 0] : vector<4xf32> from vector<1x1x4xf32>
 684   return %r : vector<4xf32>
 685 }
 686
 687 // -----
 688
 689 // CHECK-LABEL: fold_extract_splat
 690 //  CHECK-SAME:   %[[A:.*]]: f32
 691 //       CHECK:   return %[[A]] : f32
 692 func.func @fold_extract_splat(%a : f32) -> f32 {
 693   %b = vector.splat %a : vector<1x2x4xf32>
 694   %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32>
 695   return %r : f32
 696 }
 697
 698 // -----
 699
 700 // CHECK-LABEL: fold_extract_broadcast_vector
 701 //  CHECK-SAME:   %[[A:.*]]: vector<4xf32>
 702 //       CHECK:   return %[[A]] : vector<4xf32>
 703 func.func @fold_extract_broadcast_vector(%a : vector<4xf32>) -> vector<4xf32> {
 704   %b = vector.broadcast %a : vector<4xf32> to vector<1x2x4xf32>
 705   %r = vector.extract %b[0, 1] : vector<4xf32> from vector<1x2x4xf32>
 706   return %r : vector<4xf32>
 707 }
 708
 709 // -----
 710
 711 // CHECK-LABEL: fold_extract_broadcast
 712 //  CHECK-SAME:   %[[A:.*]]: vector<4xf32>
 713 //       CHECK:   %[[R:.*]] = vector.extract %[[A]][2] : f32 from vector<4xf32>
 714 //       CHECK:   return %[[R]] : f32
 715 func.func @fold_extract_broadcast(%a : vector<4xf32>) -> f32 {
 716   %b = vector.broadcast %a : vector<4xf32> to vector<1x2x4xf32>
 717   %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32>
 718   return %r : f32
 719 }
 720
 721 // -----
 722
 723 // CHECK-LABEL: fold_extract_broadcast
 724 //       CHECK:   %[[B:.*]] = vector.broadcast %{{.*}} : f32 to vector<4xf32>
 725 //       CHECK:   return %[[B]] : vector<4xf32>
 726 func.func @fold_extract_broadcast(%a : f32) -> vector<4xf32> {
 727   %b = vector.broadcast %a : f32 to vector<1x2x4xf32>
 728   %r = vector.extract %b[0, 1] : vector<4xf32> from vector<1x2x4xf32>
 729   return %r : vector<4xf32>
 730 }
 731
 732 // -----
 733
 734 // CHECK-LABEL: fold_extract_broadcast
 735 //  CHECK-SAME:   %[[A:.*]]: vector<1xf32>
 736 //       CHECK:   %[[R:.*]] = vector.broadcast %[[A]] : vector<1xf32> to vector<8xf32>
 737 //       CHECK:   return %[[R]] : vector<8xf32>
 738 func.func @fold_extract_broadcast(%a : vector<1xf32>) -> vector<8xf32> {
 739   %b = vector.broadcast %a : vector<1xf32> to vector<1x8xf32>
 740   %r = vector.extract %b[0] : vector<8xf32> from vector<1x8xf32>
 741   return %r : vector<8xf32>
 742 }
 743 // -----
 744
 745 // CHECK-LABEL: @fold_extract_shuffle
 746 //  CHECK-SAME:   %[[A:.*]]: vector<8xf32>, %[[B:.*]]: vector<8xf32>
 747 //   CHECK-NOT:   vector.shuffle
 748 //       CHECK:   vector.extract %[[A]][0] : f32 from vector<8xf32>
 749 //       CHECK:   vector.extract %[[B]][0] : f32 from vector<8xf32>
 750 //       CHECK:   vector.extract %[[A]][7] : f32 from vector<8xf32>
 751 //       CHECK:   vector.extract %[[B]][7] : f32 from vector<8xf32>
 752 func.func @fold_extract_shuffle(%a : vector<8xf32>, %b : vector<8xf32>)
 753                                 -> (f32, f32, f32, f32) {
 754   %shuffle = vector.shuffle %a, %b [0, 8, 7, 15] : vector<8xf32>, vector<8xf32>
 755   %e0 = vector.extract %shuffle[0] : f32 from vector<4xf32>
 756   %e1 = vector.extract %shuffle[1] : f32 from vector<4xf32>
 757   %e2 = vector.extract %shuffle[2] : f32 from vector<4xf32>
 758   %e3 = vector.extract %shuffle[3] : f32 from vector<4xf32>
 759   return %e0, %e1, %e2, %e3 : f32, f32, f32, f32
 760 }
 761
 762 // -----
 763
 764 // CHECK-LABEL: func @fold_extract_shapecast
 765 //  CHECK-SAME: (%[[A0:.*]]: vector<5x1x3x2xf32>, %[[A1:.*]]: vector<8x4x2xf32>
 766 //       CHECK:   %[[R0:.*]] = vector.extract %[[A0]][1, 0, 1, 1] : f32 from vector<5x1x3x2xf32>
 767 //       CHECK:   %[[R1:.*]] = vector.extract %[[A0]][1, 0, 2] : vector<2xf32> from vector<5x1x3x2xf32>
 768 //       CHECK:   %[[R2:.*]] = vector.extract %[[A1]][7] : vector<4x2xf32> from vector<8x4x2xf32>
 769 //       CHECK:   return %[[R0]], %[[R1]], %[[R2]], %[[A1]] : f32, vector<2xf32>, vector<4x2xf32>, vector<8x4x2xf32>
 770 func.func @fold_extract_shapecast(%arg0 : vector<5x1x3x2xf32>,
 771                              %arg1 : vector<8x4x2xf32>)
 772   -> (f32, vector<2xf32>, vector<4x2xf32>, vector<8x4x2xf32>) {
 773   %0 = vector.shape_cast %arg0 : vector<5x1x3x2xf32> to vector<15x2xf32>
 774   %1 = vector.shape_cast %arg1 : vector<8x4x2xf32> to vector<4x2x4x2xf32>
 775   %2 = vector.shape_cast %arg1 : vector<8x4x2xf32> to vector<1x8x4x2xf32>
 776   %r1 = vector.extract %0[4, 1] : f32 from vector<15x2xf32>
 777   %r2 = vector.extract %0[5] : vector<2xf32> from vector<15x2xf32>
 778   %r3 = vector.extract %1[3, 1] : vector<4x2xf32> from vector<4x2x4x2xf32>
 779   %r4 = vector.extract %2[0] : vector<8x4x2xf32> from vector<1x8x4x2xf32>
 780   return %r1, %r2, %r3, %r4 : f32, vector<2xf32>, vector<4x2xf32>, vector<8x4x2xf32>
 781 }
 782
 783 // -----
 784
 785 // CHECK-LABEL: fold_extract_shapecast_negative
 786 //       CHECK:   %[[V:.*]] = vector.shape_cast %{{.*}} : vector<16xf32> to vector<2x4x2xf32>
 787 //       CHECK:   %[[R:.*]] = vector.extract %[[V]][1] : vector<4x2xf32> from vector<2x4x2xf32>
 788 //       CHECK:   return %[[R]] : vector<4x2xf32>
 789 func.func @fold_extract_shapecast_negative(%arg0 : vector<16xf32>) -> vector<4x2xf32> {
 790   %0 = vector.shape_cast %arg0 : vector<16xf32> to vector<2x4x2xf32>
 791   %r = vector.extract %0[1] : vector<4x2xf32> from vector<2x4x2xf32>
 792   return %r : vector<4x2xf32>
 793 }
 794
 795 // -----
 796
 797 // CHECK-LABEL: dont_fold_0d_extract_shapecast
 798 //       CHECK:   %[[V:.*]] = vector.shape_cast %{{.*}} : vector<f32> to vector<1xf32>
 799 //       CHECK:   %[[R:.*]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
 800 //       CHECK:   return %[[R]] : f32
 801 func.func @dont_fold_0d_extract_shapecast(%arg0 : vector<f32>) -> f32 {
 802   %0 = vector.shape_cast %arg0 : vector<f32> to vector<1xf32>
 803   %r = vector.extract %0[0] : f32 from vector<1xf32>
 804   return %r : f32
 805 }
 806
 807 // -----
 808
 809 // CHECK-LABEL: fold_extract_shapecast_to_shapecast
 810 //  CHECK-SAME: (%[[ARG:.+]]: vector<3x4xf32>)
 811 //       CHECK:   %[[R:.+]] = vector.shape_cast %[[ARG]] : vector<3x4xf32> to vector<12xf32>
 812 //       CHECK:   return %[[R]]
 813 func.func @fold_extract_shapecast_to_shapecast(%arg0 : vector<3x4xf32>) -> vector<12xf32> {
 814   %0 = vector.shape_cast %arg0 : vector<3x4xf32> to vector<1x12xf32>
 815   %r = vector.extract %0[0] : vector<12xf32> from vector<1x12xf32>
 816   return %r : vector<12xf32>
 817 }
 818
 819 // -----
 820
 821 // CHECK-LABEL: func @extract_no_fold_scalar_to_0d(
 822 //  CHECK-SAME:     %[[v:.*]]: vector<f32>)
 823 //       CHECK:   %[[extract:.*]] = vector.extract %[[v]][] : f32 from vector<f32>
 824 //       CHECK:   return %[[extract]]
 825 func.func @extract_no_fold_scalar_to_0d(%v: vector<f32>) -> f32 {
 826   %0 = vector.extract %v[] : f32 from vector<f32>
 827   return %0 : f32
 828 }
 829
 830 // -----
 831
 832 // CHECK-LABEL: func @insert_fold_same_rank(
 833 //  CHECK-SAME:     %[[v:.*]]: vector<2x2xf32>)
 834 //       CHECK:      %[[CST:.+]] = arith.constant
 835 //  CHECK-SAME:                    : vector<2x2xf32>
 836 //       CHECK-NOT:  vector.insert
 837 //       CHECK:   return %[[CST]]
 838 func.func @insert_fold_same_rank(%v: vector<2x2xf32>) -> vector<2x2xf32> {
 839   %cst = arith.constant dense<0.000000e+00> : vector<2x2xf32>
 840   %0 = vector.insert %cst, %v [] : vector<2x2xf32> into vector<2x2xf32>
 841   return %0 : vector<2x2xf32>
 842 }
 843
 844 // -----
 845
 846 // CHECK-LABEL: func @insert_no_fold_scalar_to_0d(
 847 //  CHECK-SAME:     %[[v:.*]]: vector<f32>)
 848 //       CHECK:   %[[extract:.*]] = vector.insert %{{.*}}, %[[v]] [] : f32 into vector<f32>
 849 //       CHECK:   return %[[extract]]
 850 func.func @insert_no_fold_scalar_to_0d(%v: vector<f32>) -> vector<f32> {
 851   %cst = arith.constant 0.000000e+00 : f32
 852   %0 = vector.insert %cst, %v [] : f32 into vector<f32>
 853   return %0 : vector<f32>
 854 }
 855
 856 // -----
 857
 858 // CHECK-LABEL: dont_fold_expand_collapse
 859 //       CHECK:   %[[A:.*]] = vector.shape_cast %{{.*}} : vector<1x1x64xf32> to vector<1x1x8x8xf32>
 860 //       CHECK:   %[[B:.*]] = vector.shape_cast %{{.*}} : vector<1x1x8x8xf32> to vector<8x8xf32>
 861 //       CHECK:   return %[[B]] : vector<8x8xf32>
 862 func.func @dont_fold_expand_collapse(%arg0: vector<1x1x64xf32>) -> vector<8x8xf32> {
 863     %0 = vector.shape_cast %arg0 : vector<1x1x64xf32> to vector<1x1x8x8xf32>
 864     %1 = vector.shape_cast %0 : vector<1x1x8x8xf32> to vector<8x8xf32>
 865     return %1 : vector<8x8xf32>
 866 }
 867
 868 // -----
 869
 870 // CHECK-LABEL: func @fold_broadcast_shapecast
 871 //  CHECK-SAME: (%[[V:.+]]: vector<4xf32>)
 872 //       CHECK:   return %[[V]]
 873 func.func @fold_broadcast_shapecast(%arg0: vector<4xf32>) -> vector<4xf32> {
 874     %0 = vector.broadcast %arg0 : vector<4xf32> to vector<1x1x4xf32>
 875     %1 = vector.shape_cast %0 : vector<1x1x4xf32> to vector<4xf32>
 876     return %1 : vector<4xf32>
 877 }
 878
 879 // -----
 880
 881 // CHECK-LABEL: func @canonicalize_broadcast_shapecast_scalar
 882 //       CHECK:   vector.broadcast
 883 //   CHECK-NOT:   vector.shape_cast
 884 func.func @canonicalize_broadcast_shapecast_scalar(%arg0: f32) -> vector<1xf32> {
 885     %0 = vector.broadcast %arg0 : f32 to vector<1x1x1xf32>
 886     %1 = vector.shape_cast %0 : vector<1x1x1xf32> to vector<1xf32>
 887     return %1 : vector<1xf32>
 888 }
 889
 890 // -----
 891
 892 // CHECK-LABEL: func @dont_fold_broadcast_shapecast_diff_shape
 893 //       CHECK:   vector.broadcast
 894 //       CHECK:   vector.shape_cast
 895 func.func @dont_fold_broadcast_shapecast_diff_shape(%arg0: vector<4xf32>) -> vector<8xf32> {
 896     %0 = vector.broadcast %arg0 : vector<4xf32> to vector<1x2x4xf32>
 897     %1 = vector.shape_cast %0 : vector<1x2x4xf32> to vector<8xf32>
 898     return %1 : vector<8xf32>
 899 }
 900
 901 // -----
 902
 903 // CHECK-LABEL: func @canonicalize_broadcast_shapecast_to_broadcast
 904 //       CHECK:   vector.broadcast
 905 //   CHECK-NOT:   vector.shape_cast
 906 func.func @canonicalize_broadcast_shapecast_to_broadcast(%arg0: vector<3xf32>) -> vector<8x3xf32> {
 907     %0 = vector.broadcast %arg0 : vector<3xf32> to vector<2x4x3xf32>
 908     %1 = vector.shape_cast %0 : vector<2x4x3xf32> to vector<8x3xf32>
 909     return %1 : vector<8x3xf32>
 910 }
 911
 912 // -----
 913
 914 // CHECK-LABEL: func @canonicalize_broadcast_shapecast_to_shapecast
 915 //   CHECK-NOT:   vector.broadcast
 916 //       CHECK:   vector.shape_cast {{.+}} : vector<3x4xf32> to vector<1x12xf32>
 917 func.func @canonicalize_broadcast_shapecast_to_shapecast(%arg0: vector<3x4xf32>) -> vector<1x12xf32> {
 918     %0 = vector.broadcast %arg0 : vector<3x4xf32> to vector<1x1x3x4xf32>
 919     %1 = vector.shape_cast %0 : vector<1x1x3x4xf32> to vector<1x12xf32>
 920     return %1 : vector<1x12xf32>
 921 }
 922
 923 // -----
 924
 925 // CHECK-LABEL: fold_vector_transfer_masks
 926 func.func @fold_vector_transfer_masks(%A: memref<?x?xf32>) -> (vector<4x8xf32>, vector<4x[4]xf32>) {
 927   // CHECK: %[[C0:.+]] = arith.constant 0 : index
 928   %c0 = arith.constant 0 : index
 929   // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f32
 930   %f0 = arith.constant 0.0 : f32
 931
 932   %mask = vector.constant_mask [8, 4] : vector<8x4xi1>
 933
 934   %arith_all_true_mask = arith.constant dense<true> : vector<4x[4]xi1>
 935
 936   // CHECK: vector.transfer_read %{{.*}}, %[[F0]] {permutation_map
 937   %1 = vector.transfer_read %A[%c0, %c0], %f0, %mask
 938       {permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : memref<?x?xf32>, vector<4x8xf32>
 939
 940   // CHECK: vector.transfer_write {{.*}}[%[[C0]], %[[C0]]] {permutation_map
 941   vector.transfer_write %1, %A[%c0, %c0], %mask
 942       {permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : vector<4x8xf32>, memref<?x?xf32>
 943
 944   // CHECK: vector.transfer_read %{{.*}}, %[[F0]] :
 945   %2 = vector.transfer_read %A[%c0, %c0], %f0, %arith_all_true_mask : memref<?x?xf32>, vector<4x[4]xf32>
 946
 947   // CHECK: vector.transfer_write {{.*}}[%[[C0]], %[[C0]]] :
 948   vector.transfer_write %2, %A[%c0, %c0], %arith_all_true_mask : vector<4x[4]xf32>, memref<?x?xf32>
 949
 950   // CHECK: return
 951   return %1, %2 : vector<4x8xf32>, vector<4x[4]xf32>
 952 }
 953
 954 // -----
 955
 956 // CHECK-LABEL: fold_vector_transfers
 957 func.func @fold_vector_transfers(%A: memref<?x8xf32>) -> (vector<4x8xf32>, vector<4x9xf32>) {
 958   %c0 = arith.constant 0 : index
 959   %f0 = arith.constant 0.0 : f32
 960
 961   // CHECK: vector.transfer_read %{{.*}} {in_bounds = [false, true]}
 962   %1 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x8xf32>, vector<4x8xf32>
 963
 964   // CHECK: vector.transfer_write %{{.*}} {in_bounds = [false, true]}
 965   vector.transfer_write %1, %A[%c0, %c0] : vector<4x8xf32>, memref<?x8xf32>
 966
 967   // Both dims may be out-of-bounds, attribute is elided.
 968   // CHECK: vector.transfer_read %{{.*}}
 969   // CHECK-NOT: in_bounds
 970   %2 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x8xf32>, vector<4x9xf32>
 971
 972   // Both dims may be out-of-bounds, attribute is elided.
 973   // CHECK: vector.transfer_write %{{.*}}
 974   // CHECK-NOT: in_bounds
 975   vector.transfer_write %2, %A[%c0, %c0] : vector<4x9xf32>, memref<?x8xf32>
 976
 977   // CHECK: return
 978   return %1, %2 : vector<4x8xf32>, vector<4x9xf32>
 979 }
 980
 981 // -----
 982
 983 // CHECK-LABEL: bitcast_folding
 984 //  CHECK-SAME:   %[[A:.*]]: vector<4x8xf32>
 985 //  CHECK-SAME:   %[[B:.*]]: vector<2xi32>
 986 //  CHECK:        return %[[A]], %[[B]] : vector<4x8xf32>, vector<2xi32>
 987 func.func @bitcast_folding(%I1: vector<4x8xf32>, %I2: vector<2xi32>) -> (vector<4x8xf32>, vector<2xi32>) {
 988   %0 = vector.bitcast %I1 : vector<4x8xf32> to vector<4x8xf32>
 989   %1 = vector.bitcast %I2 : vector<2xi32> to vector<4xi16>
 990   %2 = vector.bitcast %1 : vector<4xi16> to vector<2xi32>
 991   return %0, %2 : vector<4x8xf32>, vector<2xi32>
 992 }
 993
 994 // CHECK-LABEL: func @bitcast_f16_to_f32
 995 //              bit pattern: 0x40004000
 996 //       CHECK-DAG: %[[CST1:.+]] = arith.constant dense<2.00390625> : vector<4xf32>
 997 //              bit pattern: 0x00000000
 998 //       CHECK-DAG: %[[CST0:.+]] = arith.constant dense<0.000000e+00> : vector<4xf32>
 999 //       CHECK: return %[[CST0]], %[[CST1]]
1000 func.func @bitcast_f16_to_f32() -> (vector<4xf32>, vector<4xf32>) {
1001   %cst0 = arith.constant dense<0.0> : vector<8xf16> // bit pattern: 0x0000
1002   %cst1 = arith.constant dense<2.0> : vector<8xf16> // bit pattern: 0x4000
1003   %cast0 = vector.bitcast %cst0: vector<8xf16> to vector<4xf32>
1004   %cast1 = vector.bitcast %cst1: vector<8xf16> to vector<4xf32>
1005   return %cast0, %cast1: vector<4xf32>, vector<4xf32>
1006 }
1007
1008 // CHECK-LABEL: func @bitcast_i8_to_i32
1009 //              bit pattern: 0xA0A0A0A0
1010 //       CHECK-DAG: %[[CST1:.+]] = arith.constant dense<-1600085856> : vector<4xi32>
1011 //              bit pattern: 0x00000000
1012 //       CHECK-DAG: %[[CST0:.+]] = arith.constant dense<0> : vector<4xi32>
1013 //       CHECK: return %[[CST0]], %[[CST1]]
1014 func.func @bitcast_i8_to_i32() -> (vector<4xi32>, vector<4xi32>) {
1015   %cst0 = arith.constant dense<0> : vector<16xi8> // bit pattern: 0x00
1016   %cst1 = arith.constant dense<160> : vector<16xi8> // bit pattern: 0xA0
1017   %cast0 = vector.bitcast %cst0: vector<16xi8> to vector<4xi32>
1018   %cast1 = vector.bitcast %cst1: vector<16xi8> to vector<4xi32>
1019   return %cast0, %cast1: vector<4xi32>, vector<4xi32>
1020 }
1021
1022 // -----
1023
1024 // CHECK-LABEL: broadcast_folding1
1025 //       CHECK: %[[CST:.*]] = arith.constant dense<42> : vector<4xi32>
1026 //   CHECK-NOT: vector.broadcast
1027 //       CHECK: return %[[CST]]
1028 func.func @broadcast_folding1() -> vector<4xi32> {
1029   %0 = arith.constant 42 : i32
1030   %1 = vector.broadcast %0 : i32 to vector<4xi32>
1031   return %1 : vector<4xi32>
1032 }
1033
1034 // -----
1035
1036 // CHECK-LABEL: @broadcast_folding2
1037 //       CHECK: %[[CST:.*]] = arith.constant dense<42> : vector<4x16xi32>
1038 //   CHECK-NOT: vector.broadcast
1039 //       CHECK: return %[[CST]]
1040 func.func @broadcast_folding2() -> vector<4x16xi32> {
1041   %0 = arith.constant 42 : i32
1042   %1 = vector.broadcast %0 : i32 to vector<16xi32>
1043   %2 = vector.broadcast %1 : vector<16xi32> to vector<4x16xi32>
1044   return %2 : vector<4x16xi32>
1045 }
1046
1047 // -----
1048
1049 // CHECK-LABEL: @fold_consecutive_broadcasts(
1050 //  CHECK-SAME:                              %[[ARG0:.*]]: i32
1051 //       CHECK: %[[RESULT:.*]] = vector.broadcast %[[ARG0]] : i32 to vector<4x16xi32>
1052 //       CHECK: return %[[RESULT]]
1053 func.func @fold_consecutive_broadcasts(%a : i32) -> vector<4x16xi32> {
1054   %1 = vector.broadcast %a : i32 to vector<16xi32>
1055   %2 = vector.broadcast %1 : vector<16xi32> to vector<4x16xi32>
1056   return %2 : vector<4x16xi32>
1057 }
1058
1059 // -----
1060
1061 // CHECK-LABEL: shape_cast_constant
1062 //       CHECK-DAG: %[[CST1:.*]] = arith.constant dense<1> : vector<3x4x2xi32>
1063 //       CHECK-DAG: %[[CST0:.*]] = arith.constant dense<2.000000e+00> : vector<20x2xf32>
1064 //       CHECK: return %[[CST0]], %[[CST1]] : vector<20x2xf32>, vector<3x4x2xi32>
1065 func.func @shape_cast_constant() -> (vector<20x2xf32>, vector<3x4x2xi32>) {
1066   %cst = arith.constant dense<2.000000e+00> : vector<5x4x2xf32>
1067   %cst_1 = arith.constant dense<1> : vector<12x2xi32>
1068   %0 = vector.shape_cast %cst : vector<5x4x2xf32> to vector<20x2xf32>
1069   %1 = vector.shape_cast %cst_1 : vector<12x2xi32> to vector<3x4x2xi32>
1070   return %0, %1 : vector<20x2xf32>, vector<3x4x2xi32>
1071 }
1072
1073 // -----
1074
1075 // CHECK-LABEL: extract_strided_constant
1076 //       CHECK-DAG: %[[CST1:.*]] = arith.constant dense<1> : vector<2x13x3xi32>
1077 //       CHECK-DAG: %[[CST0:.*]] = arith.constant dense<2.000000e+00> : vector<12x2xf32>
1078 //       CHECK: return %[[CST0]], %[[CST1]] : vector<12x2xf32>, vector<2x13x3xi32>
1079 func.func @extract_strided_constant() -> (vector<12x2xf32>, vector<2x13x3xi32>) {
1080   %cst = arith.constant dense<2.000000e+00> : vector<29x7xf32>
1081   %cst_1 = arith.constant dense<1> : vector<4x37x9xi32>
1082   %0 = vector.extract_strided_slice %cst
1083     {offsets = [2, 3], sizes = [12, 2], strides = [1, 1]}
1084       : vector<29x7xf32> to vector<12x2xf32>
1085   %1 = vector.extract_strided_slice %cst_1
1086     {offsets = [1, 2, 5], sizes = [2, 13, 3], strides = [1, 1, 1]}
1087       : vector<4x37x9xi32> to vector<2x13x3xi32>
1088   return %0, %1 : vector<12x2xf32>, vector<2x13x3xi32>
1089 }
1090
1091 // -----
1092
1093 // CHECK-LABEL: extract_strided_broadcast
1094 //       CHECK:   %[[B:.*]] = vector.broadcast %{{.*}} : vector<4xf16> to vector<2x4xf16>
1095 //  CHECK-NEXT:   return %[[B]] : vector<2x4xf16>
1096 func.func @extract_strided_broadcast(%arg0: vector<4xf16>) -> vector<2x4xf16> {
1097  %0 = vector.broadcast %arg0 : vector<4xf16> to vector<16x4xf16>
1098  %1 = vector.extract_strided_slice %0
1099   {offsets = [0, 0], sizes = [2, 4], strides = [1, 1]} :
1100   vector<16x4xf16> to vector<2x4xf16>
1101   return %1 : vector<2x4xf16>
1102 }
1103
1104 // -----
1105
1106 // CHECK-LABEL: extract_strided_broadcast2
1107 //       CHECK:   %[[E:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0], sizes = [2], strides = [1]} : vector<4xf16> to vector<2xf16>
1108 //  CHECK-NEXT:   %[[B:.*]] = vector.broadcast %[[E]] : vector<2xf16> to vector<2x2xf16>
1109 //  CHECK-NEXT:   return %[[B]] : vector<2x2xf16>
1110 func.func @extract_strided_broadcast2(%arg0: vector<4xf16>) -> vector<2x2xf16> {
1111  %0 = vector.broadcast %arg0 : vector<4xf16> to vector<16x4xf16>
1112  %1 = vector.extract_strided_slice %0
1113   {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} :
1114   vector<16x4xf16> to vector<2x2xf16>
1115   return %1 : vector<2x2xf16>
1116 }
1117
1118 // -----
1119
1120 // CHECK-LABEL: func @extract_strided_broadcast3
1121 //  CHECK-SAME: (%[[ARG:.+]]: vector<1xf32>)
1122 //       CHECK: %[[V:.+]] = vector.broadcast %[[ARG]] : vector<1xf32> to vector<1x4xf32>
1123 //       CHECK: return %[[V]]
1124 func.func @extract_strided_broadcast3(%arg0: vector<1xf32>) -> vector<1x4xf32> {
1125  %0 = vector.broadcast %arg0 : vector<1xf32> to vector<1x8xf32>
1126  %1 = vector.extract_strided_slice %0
1127       {offsets = [0, 4], sizes = [1, 4], strides = [1, 1]}
1128       : vector<1x8xf32> to vector<1x4xf32>
1129   return %1 : vector<1x4xf32>
1130 }
1131
1132 // -----
1133
1134 // CHECK-LABEL: func @extract_strided_broadcast4
1135 //  CHECK-SAME: (%[[ARG:.+]]: f32)
1136 //       CHECK: %[[V:.+]] = vector.broadcast %[[ARG]] : f32 to vector<1x4xf32>
1137 //       CHECK: return %[[V]]
1138 func.func @extract_strided_broadcast4(%arg0: f32) -> vector<1x4xf32> {
1139  %0 = vector.broadcast %arg0 : f32 to vector<1x8xf32>
1140  %1 = vector.extract_strided_slice %0
1141       {offsets = [0, 4], sizes = [1, 4], strides = [1, 1]}
1142       : vector<1x8xf32> to vector<1x4xf32>
1143   return %1 : vector<1x4xf32>
1144 }
1145
1146 // -----
1147
1148 // CHECK-LABEL: consecutive_shape_cast
1149 //       CHECK:   %[[C:.*]] = vector.shape_cast %{{.*}} : vector<16xf16> to vector<4x4xf16>
1150 //  CHECK-NEXT:   return %[[C]] : vector<4x4xf16>
1151 func.func @consecutive_shape_cast(%arg0: vector<16xf16>) -> vector<4x4xf16> {
1152   %0 = vector.shape_cast %arg0 : vector<16xf16> to vector<2x8xf16>
1153   %1 = vector.shape_cast %0 : vector<2x8xf16> to vector<4x4xf16>
1154   return %1 : vector<4x4xf16>
1155 }
1156
1157 // -----
1158
1159 // CHECK-LABEL: func @dead_transfer_op
1160 //   CHECK-NOT:   vector.transfer_read
1161 //   CHECK-NOT:   vector.transfer_write
1162 //       CHECK:   return
1163 func.func @dead_transfer_op(%arg0 : tensor<4x4xf32>, %arg1 : memref<4x4xf32>,
1164                        %v0 : vector<1x4xf32>) {
1165   %c0 = arith.constant 0 : index
1166   %cf0 = arith.constant 0.0 : f32
1167   %r = vector.transfer_read %arg1[%c0, %c0], %cf0 :
1168     memref<4x4xf32>, vector<1x4xf32>
1169   %w = vector.transfer_write %v0, %arg0[%c0, %c0] :
1170     vector<1x4xf32>, tensor<4x4xf32>
1171   return
1172 }
1173
1174 // -----
1175
1176 // CHECK-LABEL: func @dead_load
1177 //   CHECK-NOT:   vector.maskedload
1178 //   CHECK-NOT:   vector.gather
1179 //   CHECK-NOT:   vector.expandload
1180 //       CHECK:   return
1181 func.func @dead_load(%base: memref<?xf32>, %indices: vector<16xi32>,
1182                           %mask: vector<16xi1>, %passthru: vector<16xf32>) {
1183   %c0 = arith.constant 0 : index
1184   %0 = vector.maskedload %base[%c0], %mask, %passthru :
1185     memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1186   %1 = vector.gather %base[%c0][%indices], %mask, %passthru :
1187     memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1188   %2 = vector.expandload %base[%c0], %mask, %passthru :
1189     memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1190   return
1191 }
1192
1193 // -----
1194
1195 #contraction_accesses0 = [
1196   affine_map<(i, j, k) -> (i, k)>,
1197   affine_map<(i, j, k) -> (k, j)>,
1198   affine_map<(i, j, k) -> (i, j)>
1199 ]
1200 #contraction_trait0 = {
1201   indexing_maps = #contraction_accesses0,
1202   iterator_types = ["parallel", "parallel", "reduction"]
1203 }
1204
1205 // CHECK-LABEL: func @contractions
1206 //  CHECK-SAME:   %[[A:[0-9a-zA-Z]+]]: vector<2x3xf32>
1207 //  CHECK-SAME:   %[[B:[0-9a-zA-Z]+]]: vector<3x4xf32>
1208 //  CHECK-SAME:   %[[C:[0-9a-zA-Z]+]]: vector<2x4xf32>
1209 //  CHECK-SAME:   %[[A_I8:[0-9a-zA-Z]+]]: vector<2x3xi8>
1210 //  CHECK-SAME:   %[[B_I8:[0-9a-zA-Z]+]]: vector<3x4xi8>
1211 //  CHECK-SAME:   %[[C_I8:[0-9a-zA-Z]+]]: vector<2x4xi8>
1212 func.func @contractions(%a: vector<2x3xf32>, %b: vector<3x4xf32>, %c: vector<2x4xf32>,
1213                    %a_i8: vector<2x3xi8>, %b_i8: vector<3x4xi8>, %c_i8: vector<2x4xi8>)
1214   -> (vector<2x4xf32>, vector<2x4xi8>)
1215 {
1216   // CHECK-NOT: arith.constant
1217   %vf_0 = arith.constant dense <0.0>: vector<2x4xf32>
1218   // CHECK-NOT: arith.addf
1219   //     CHECK: %[[D:.*]] = vector.contract {{.*}} %[[A]], %[[B]], %[[C]]
1220   %0 = vector.contract #contraction_trait0 %a, %b, %vf_0:
1221     vector<2x3xf32>, vector<3x4xf32> into vector<2x4xf32>
1222   // CHECK-NOT: arith.addf
1223   %1 = arith.addf %0, %c: vector<2x4xf32>
1224
1225   // CHECK-NOT: arith.constant
1226   %vi8_0 = arith.constant dense <0>: vector<2x4xi8>
1227   // CHECK-NOT: arith.addi
1228   //     CHECK: %[[D_I8:.*]] = vector.contract {{.*}} %[[A_I8]], %[[B_I8]], %[[C_I8]]
1229   %i8_0 = vector.contract #contraction_trait0 %a_i8, %b_i8, %vi8_0:
1230     vector<2x3xi8>, vector<3x4xi8> into vector<2x4xi8>
1231   // CHECK-NOT: arith.addi
1232   %i8_1 = arith.addi %i8_0, %c_i8: vector<2x4xi8>
1233
1234   // CHECK: return %[[D]], %[[D_I8]]
1235   return %1, %i8_1: vector<2x4xf32>, vector<2x4xi8>
1236 }
1237
1238 // -----
1239
1240 // CHECK-LABEL: func @transfer_folding_1
1241 //  CHECK-SAME:   %[[T0:[0-9a-zA-Z]+]]: tensor<2x3x4xf32>
1242 //  CHECK-SAME:   %[[T1:[0-9a-zA-Z]+]]: tensor<2x3x4xf32>
1243 func.func @transfer_folding_1(%t0: tensor<2x3x4xf32>, %t1: tensor<2x3x4xf32>)
1244   -> (tensor<2x3x4xf32>, tensor<2x3x4xf32>, tensor<2x3x4xf32>)
1245 {
1246   %c0 = arith.constant 0 : index
1247   %pad = arith.constant 0.0 : f32
1248   %v = vector.transfer_read %t0[%c0, %c0, %c0], %pad {in_bounds = [true, true, true]} :
1249     tensor<2x3x4xf32>, vector<2x3x4xf32>
1250
1251   %r0 = vector.transfer_write %v, %t1[%c0, %c0, %c0] {in_bounds = [true, true, true]} :
1252     vector<2x3x4xf32>, tensor<2x3x4xf32>
1253
1254   %t2 = "test.constant"() { value = dense<6.0> : tensor<2x3x4xf32>} : () -> (tensor<2x3x4xf32>)
1255   %r1 = vector.transfer_write %v, %t2[%c0, %c0, %c0] {in_bounds = [true, true, true]} :
1256     vector<2x3x4xf32>, tensor<2x3x4xf32>
1257
1258
1259   // CHECK-NEXT: some_op_that_may_have_side_effects
1260   %t3 = "some_op_that_may_have_side_effects"() : () -> (tensor<2x3x4xf32>)
1261   %r2 = vector.transfer_write %v, %t0[%c0, %c0, %c0] {in_bounds = [true, true, true]} :
1262     vector<2x3x4xf32>, tensor<2x3x4xf32>
1263
1264   // CHECK-NEXT: return %[[T0]], %[[T0]], %[[T0]]
1265   return %r0, %r1, %r2: tensor<2x3x4xf32>, tensor<2x3x4xf32>, tensor<2x3x4xf32>
1266 }
1267
1268 // -----
1269
1270 // CHECK-LABEL: func @store_after_load_tensor
1271 //  CHECK-SAME: (%[[ARG:.*]]: tensor<4x4xf32>)
1272 //   CHECK-NOT:   vector.transfer_read
1273 //   CHECK-NOT:   vector.transfer_write
1274 //       CHECK:   return %[[ARG]] : tensor<4x4xf32>
1275 func.func @store_after_load_tensor(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
1276   %c1 = arith.constant 1 : index
1277   %c0 = arith.constant 0 : index
1278   %cf0 = arith.constant 0.0 : f32
1279   %0 = vector.transfer_read %arg0[%c1, %c0], %cf0 :
1280     tensor<4x4xf32>, vector<1x4xf32>
1281   %w0 = vector.transfer_write %0, %arg0[%c1, %c0] :
1282     vector<1x4xf32>, tensor<4x4xf32>
1283   return %w0 : tensor<4x4xf32>
1284 }
1285
1286 // -----
1287
1288 // CHECK-LABEL: func @store_after_load_tensor_negative
1289 //       CHECK:   vector.transfer_read
1290 //       CHECK:   vector.transfer_write
1291 //       CHECK:   return
1292 func.func @store_after_load_tensor_negative(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
1293   %c1 = arith.constant 1 : index
1294   %c0 = arith.constant 0 : index
1295   %cf0 = arith.constant 0.0 : f32
1296   %0 = vector.transfer_read %arg0[%c1, %c0], %cf0 :
1297     tensor<4x4xf32>, vector<1x4xf32>
1298   %w0 = vector.transfer_write %0, %arg0[%c0, %c0] :
1299     vector<1x4xf32>, tensor<4x4xf32>
1300   return %w0 : tensor<4x4xf32>
1301 }
1302
1303 // -----
1304
1305 // CHECK-LABEL: func @store_to_load_tensor
1306 //  CHECK-SAME: (%[[ARG:.*]]: tensor<4x4xf32>, %[[V0:.*]]: vector<1x4xf32>, %[[V1:.*]]: vector<1x4xf32>)
1307 //   CHECK-NOT:   vector.transfer_write
1308 //   CHECK-NOT:   vector.transfer_read
1309 //       CHECK:   return %[[V0]] : vector<1x4xf32>
1310 func.func @store_to_load_tensor(%arg0 : tensor<4x4xf32>,
1311   %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>) -> vector<1x4xf32> {
1312   %c1 = arith.constant 1 : index
1313   %c2 = arith.constant 2 : index
1314   %c0 = arith.constant 0 : index
1315   %cf0 = arith.constant 0.0 : f32
1316   %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} :
1317     vector<1x4xf32>, tensor<4x4xf32>
1318   %w1 = vector.transfer_write %v1, %w0[%c2, %c0] {in_bounds = [true, true]} :
1319     vector<1x4xf32>, tensor<4x4xf32>
1320   %0 = vector.transfer_read %w1[%c1, %c0], %cf0 {in_bounds = [true, true]} :
1321     tensor<4x4xf32>, vector<1x4xf32>
1322   return %0 : vector<1x4xf32>
1323 }
1324
1325 // -----
1326
1327 // CHECK-LABEL: func @store_to_load_negative_tensor
1328 //       CHECK:   vector.transfer_write
1329 //       CHECK:   vector.transfer_write
1330 //       CHECK:   %[[V:.*]] = vector.transfer_read
1331 //       CHECK:   return %[[V]] : vector<1x4xf32>
1332 func.func @store_to_load_negative_tensor(%arg0 : tensor<4x4xf32>,
1333   %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) -> vector<1x4xf32> {
1334   %c1 = arith.constant 1 : index
1335   %c2 = arith.constant 2 : index
1336   %c0 = arith.constant 0 : index
1337   %cf0 = arith.constant 0.0 : f32
1338   %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} :
1339     vector<1x4xf32>, tensor<4x4xf32>
1340   %w1 = vector.transfer_write %v0, %w0[%i, %i] {in_bounds = [true, true]} :
1341     vector<1x4xf32>, tensor<4x4xf32>
1342   %0 = vector.transfer_read %w1[%c1, %c0], %cf0 {in_bounds = [true, true]} :
1343     tensor<4x4xf32>, vector<1x4xf32>
1344   return %0 : vector<1x4xf32>
1345 }
1346
1347 // -----
1348
1349 // CHECK-LABEL: func @store_to_load_tensor_broadcast
1350 //  CHECK-SAME: (%[[ARG:.*]]: tensor<4x4xf32>, %[[V0:.*]]: vector<4x2xf32>)
1351 //       CHECK:   %[[B:.*]] = vector.broadcast %[[V0]] : vector<4x2xf32> to vector<6x4x2xf32>
1352 //       CHECK:   %[[T:.*]] = vector.transpose %[[B]], [1, 2, 0] : vector<6x4x2xf32> to vector<4x2x6xf32>
1353 //       CHECK:   return %[[T]] : vector<4x2x6xf32>
1354 func.func @store_to_load_tensor_broadcast(%arg0 : tensor<4x4xf32>,
1355   %v0 : vector<4x2xf32>) -> vector<4x2x6xf32> {
1356   %c0 = arith.constant 0 : index
1357   %cf0 = arith.constant 0.0 : f32
1358   %w0 = vector.transfer_write %v0, %arg0[%c0, %c0] {in_bounds = [true, true]} :
1359     vector<4x2xf32>, tensor<4x4xf32>
1360   %0 = vector.transfer_read %w0[%c0, %c0], %cf0 {in_bounds = [true, true, true],
1361   permutation_map = affine_map<(d0, d1) -> (d0, d1, 0)>} :
1362     tensor<4x4xf32>, vector<4x2x6xf32>
1363   return %0 : vector<4x2x6xf32>
1364 }
1365
1366 // -----
1367
1368 // CHECK-LABEL: func @store_to_load_tensor_broadcast_scalable
1369 //  CHECK-SAME: (%[[ARG:.*]]: tensor<?xf32>, %[[V0:.*]]: vector<[4]xf32>)
1370 //       CHECK:   %[[B:.*]] = vector.broadcast %[[V0]] : vector<[4]xf32> to vector<6x[4]xf32>
1371 //       CHECK:   return %[[B]] : vector<6x[4]xf32>
1372 func.func @store_to_load_tensor_broadcast_scalable(%arg0 : tensor<?xf32>,
1373   %v0 : vector<[4]xf32>) -> vector<6x[4]xf32> {
1374   %c0 = arith.constant 0 : index
1375   %cf0 = arith.constant 0.0 : f32
1376   %w0 = vector.transfer_write %v0, %arg0[%c0] {in_bounds = [true]} :
1377     vector<[4]xf32>, tensor<?xf32>
1378   %0 = vector.transfer_read %w0[%c0], %cf0 {in_bounds = [true, true],
1379   permutation_map = affine_map<(d0) -> (0, d0)>} :
1380     tensor<?xf32>, vector<6x[4]xf32>
1381   return %0 : vector<6x[4]xf32>
1382 }
1383
1384 // -----
1385
1386 // CHECK-LABEL: func @store_to_load_tensor_perm_broadcast
1387 //  CHECK-SAME: (%[[ARG:.*]]: tensor<4x4x4xf32>, %[[V0:.*]]: vector<4x1xf32>)
1388 //       CHECK:   %[[B:.*]] = vector.broadcast %[[V0]] : vector<4x1xf32> to vector<100x5x4x1xf32>
1389 //       CHECK:   %[[T:.*]] = vector.transpose %[[B]], [3, 0, 2, 1] : vector<100x5x4x1xf32> to vector<1x100x4x5xf32>
1390 //       CHECK:   return %[[T]] : vector<1x100x4x5xf32>
1391 func.func @store_to_load_tensor_perm_broadcast(%arg0 : tensor<4x4x4xf32>,
1392   %v0 : vector<4x1xf32>) -> vector<1x100x4x5xf32> {
1393   %c0 = arith.constant 0 : index
1394   %cf0 = arith.constant 0.0 : f32
1395   %w0 = vector.transfer_write %v0, %arg0[%c0, %c0, %c0] {in_bounds = [true, true],
1396   permutation_map = affine_map<(d0, d1, d2) -> (d2, d1)>} :
1397     vector<4x1xf32>, tensor<4x4x4xf32>
1398   %0 = vector.transfer_read %w0[%c0, %c0, %c0], %cf0 {in_bounds = [true, true, true, true],
1399   permutation_map = affine_map<(d0, d1, d2) -> (d1, 0, d2, 0)>} :
1400     tensor<4x4x4xf32>, vector<1x100x4x5xf32>
1401   return %0 : vector<1x100x4x5xf32>
1402 }
1403
1404 // -----
1405
1406
1407 // CHECK-LABEL: func @dead_store_tensor
1408 //   CHECK-DAG:      %[[C0:.*]] = arith.constant 0 : index
1409 //   CHECK-DAG:      %[[C1:.*]] = arith.constant 1 : index
1410 //   CHECK-DAG:      %[[C2:.*]] = arith.constant 2 : index
1411 //   CHECK-NOT:   vector.transfer_write {{.*}}, {{.*}}[%[[C1]], %[[C0]]
1412 //       CHECK:   vector.transfer_write {{.*}}, {{.*}}[%[[C2]], %[[C0]]
1413 //       CHECK:   %[[VTW:.*]] = vector.transfer_write {{.*}}, {{.*}}[%[[C1]], %[[C0]]
1414 //       CHECK:   return %[[VTW]] : tensor<4x4xf32>
1415 func.func @dead_store_tensor(%arg0 : tensor<4x4xf32>,
1416   %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) -> tensor<4x4xf32> {
1417   %c1 = arith.constant 1 : index
1418   %c2 = arith.constant 2 : index
1419   %c0 = arith.constant 0 : index
1420   %cf0 = arith.constant 0.0 : f32
1421   %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} :
1422     vector<1x4xf32>, tensor<4x4xf32>
1423   %w1 = vector.transfer_write %v0, %w0[%c2, %c0] {in_bounds = [true, true]} :
1424     vector<1x4xf32>, tensor<4x4xf32>
1425   %w2 = vector.transfer_write %v1, %w1[%c1, %c0] {in_bounds = [true, true]} :
1426     vector<1x4xf32>, tensor<4x4xf32>
1427   return %w2 : tensor<4x4xf32>
1428 }
1429
1430 // -----
1431
1432 // CHECK-LABEL: func @dead_store_tensor_negative
1433 //   CHECK-DAG:      %[[C0:.*]] = arith.constant 0 : index
1434 //   CHECK-DAG:      %[[C1:.*]] = arith.constant 1 : index
1435 //       CHECK:   vector.transfer_write
1436 //       CHECK:   vector.transfer_write
1437 //       CHECK:   vector.transfer_read
1438 //       CHECK:   %[[VTW:.*]] = vector.transfer_write {{.*}}, {{.*}}[%[[C1]], %[[C0]]]
1439 //       CHECK:   return %[[VTW]] : tensor<4x4xf32>
1440 func.func @dead_store_tensor_negative(%arg0 : tensor<4x4xf32>,
1441   %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) -> tensor<4x4xf32> {
1442   %c1 = arith.constant 1 : index
1443   %c2 = arith.constant 2 : index
1444   %c0 = arith.constant 0 : index
1445   %cf0 = arith.constant 0.0 : f32
1446   %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} :
1447     vector<1x4xf32>, tensor<4x4xf32>
1448   %w1 = vector.transfer_write %v0, %w0[%c2, %c0] {in_bounds = [true, true]} :
1449     vector<1x4xf32>, tensor<4x4xf32>
1450   %0 = vector.transfer_read %w1[%i, %i], %cf0 {in_bounds = [true, true]} :
1451     tensor<4x4xf32>, vector<1x4xf32>
1452   %x = arith.addf %0, %0 : vector<1x4xf32>
1453   %w2 = vector.transfer_write %x, %w0[%c1, %c0] {in_bounds = [true, true]} :
1454     vector<1x4xf32>, tensor<4x4xf32>
1455   return %w2 : tensor<4x4xf32>
1456 }
1457
1458 // -----
1459
1460 //       CHECK: #[[$MAP:[0-9a-z]+]] = affine_map<(d0, d1) -> (d1, d0)>
1461
1462 // CHECK-LABEL: func @swap_extract_slice_transfer_write
1463 //  CHECK-SAME:   %[[VEC:.*]]: vector<8x4xf32>
1464 //  CHECK-SAME:   %[[INIT_TENSOR:.*]]: tensor<4x8xf32>,
1465 //  CHECK-SAME:   %[[ITER_ARG:.*]]: tensor<64x64xf32>,
1466 //  CHECK-SAME:   %[[IV:.*]]: index, %[[SZ:.*]]: index)
1467 func.func @swap_extract_slice_transfer_write(%arg0 : vector<8x4xf32>,
1468                                              %arg1 : tensor<4x8xf32>,
1469                                              %arg2 : tensor<64x64xf32>,
1470                                              %iv : index, %sz : index) -> tensor<64x64xf32> {
1471   //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
1472   %c0 = arith.constant 0 : index
1473
1474   //       CHECK:   %[[T0:.*]] = tensor.extract_slice %[[ITER_ARG]]
1475   //  CHECK-SAME:                 [%[[IV]], 16] [%[[SZ]], 8]
1476   //       CHECK:   %[[T1:.*]] = vector.transfer_write %[[VEC]]
1477   //  CHECK-SAME:                 %[[T0]][%[[C0]], %[[C0]]]
1478   //  CHECK-SAME:                 in_bounds = [true, false]
1479   //  CHECK-SAME:                 permutation_map = #[[$MAP]]
1480   //       CHECK:   %[[T2:.*]] = tensor.insert_slice %[[T1]] into %[[ITER_ARG]]
1481   //  CHECK-SAME:                 [%[[IV]], 16] [%[[SZ]], 8]
1482   %0 = vector.transfer_write %arg0, %arg1[%c0, %c0] {in_bounds = [true, true], permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : vector<8x4xf32>, tensor<4x8xf32>
1483   %1 = tensor.extract_slice %0[0, 0] [%sz, 8] [1, 1] : tensor<4x8xf32> to tensor<?x8xf32>
1484   %2 = tensor.insert_slice %1 into %arg2[%iv, 16] [%sz, 8] [1, 1] : tensor<?x8xf32> into tensor<64x64xf32>
1485
1486   //       CHECK:   return %[[T2]]
1487   func.return %2 : tensor<64x64xf32>
1488 }
1489
1490 // -----
1491
1492 // CHECK-LABEL: func @do_not_swap_extract_slice_transfer_write
1493 //  CHECK-SAME:   %[[VEC:.*]]: vector<8xf32>,
1494 //  CHECK-SAME:   %[[VEC_SMALL:.*]]: vector<4xf32>,
1495 //  CHECK-SAME:   %[[INIT_TENSOR:.*]]: tensor<8xf32>,
1496 //  CHECK-SAME:   %[[ITER_ARG:.*]]: tensor<64xf32>,
1497 //  CHECK-SAME:   %[[IV:.*]]: index, %[[SZ:.*]]: index)
1498 func.func @do_not_swap_extract_slice_transfer_write(%arg0 : vector<8xf32>,
1499                                                     %arg1 : vector<4xf32>,
1500                                                     %arg2 : tensor<8xf32>,
1501                                                     %arg3 : tensor<64xf32>,
1502                                                     %iv : index, %sz : index) -> (tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) {
1503   //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
1504   %c0 = arith.constant 0 : index
1505
1506   // Don't swap if the extracted and inserted slices do not match.
1507   //       CHECK:   %[[T0:.*]] = vector.transfer_write %[[VEC]]
1508   //       CHECK:   %[[T1:.*]] = tensor.extract_slice %[[T0]]
1509   //       CHECK:   %[[T2:.*]] = tensor.insert_slice %[[T1]]
1510   %0 = vector.transfer_write %arg0, %arg2[%c0] {in_bounds = [true]} : vector<8xf32>, tensor<8xf32>
1511   %1 = tensor.extract_slice %0[0] [%iv] [1] : tensor<8xf32> to tensor<?xf32>
1512   %2 = tensor.insert_slice %1 into %arg3[%iv] [%sz] [1] : tensor<?xf32> into tensor<64xf32>
1513
1514   // Don't swap if the TransferWriteOp takes a small vector.
1515   //       CHECK:   %[[T3:.*]] = vector.transfer_write %[[VEC_SMALL]]
1516   //       CHECK:   %[[T4:.*]] = tensor.extract_slice %[[T3]]
1517   //       CHECK:   %[[T5:.*]] = tensor.insert_slice %[[T4]]
1518   %3 = vector.transfer_write %arg1, %arg2[%c0] {in_bounds = [true]} : vector<4xf32>, tensor<8xf32>
1519   %4 = tensor.extract_slice %3[0] [%sz] [1] : tensor<8xf32> to tensor<?xf32>
1520   %5 = tensor.insert_slice %4 into %arg3[%iv] [%sz] [1] : tensor<?xf32> into tensor<64xf32>
1521
1522   // Don't swap if the one of the operations is rank-reducing.
1523   //       CHECK:   %[[T6:.*]] = vector.transfer_write %[[VEC]]
1524   //       CHECK:   %[[T7:.*]] = tensor.extract_slice %[[T6]]
1525   //       CHECK:   %[[T8:.*]] = tensor.insert_slice %[[T7]]
1526   %6 = vector.transfer_write %arg0, %arg2[%c0] {in_bounds = [true]} : vector<8xf32>, tensor<8xf32>
1527   %7 = tensor.extract_slice %6[0] [1] [1] : tensor<8xf32> to tensor<f32>
1528   %8 = tensor.insert_slice %7 into %arg3[%iv] [1] [1] : tensor<f32> into tensor<64xf32>
1529
1530   //       CHECK:   return %[[T2]], %[[T5]], %[[T8]]
1531   func.return %2, %5, %8 : tensor<64xf32>, tensor<64xf32>, tensor<64xf32>
1532 }
1533
1534 // -----
1535
1536 // CHECK-LABEL: func @vector_multi_reduction_single_parallel(
1537 //  CHECK-SAME:     %[[v:.*]]: vector<2xf32>,
1538 func.func @vector_multi_reduction_single_parallel(%arg0: vector<2xf32>, %acc: vector<2xf32>) -> vector<2xf32> {
1539     %0 = vector.multi_reduction <mul>, %arg0, %acc [] : vector<2xf32> to vector<2xf32>
1540
1541 //       CHECK:     return %[[v]] : vector<2xf32>
1542     return %0 : vector<2xf32>
1543 }
1544
1545 // -----
1546
1547 // CHECK-LABEL: func @masked_vector_multi_reduction_single_parallel(
1548 //  CHECK-SAME:     %[[VAL_0:.*]]: vector<2xf32>, %{{.*}}: vector<2xf32>,
1549 func.func @masked_vector_multi_reduction_single_parallel(%arg0: vector<2xf32>, %acc: vector<2xf32>, %mask: vector<2xi1>) -> vector<2xf32> {
1550     %0 = vector.mask %mask { vector.multi_reduction <mul>, %arg0, %acc [] : vector<2xf32> to vector<2xf32> } : vector<2xi1> -> vector<2xf32>
1551 //       CHECK:   return %[[VAL_0]] : vector<2xf32>
1552     return %0 : vector<2xf32>
1553 }
1554
1555 // -----
1556
1557 // CHECK-LABEL: func @vector_multi_reduction_unit_dimensions(
1558 //  CHECK-SAME: %[[SOURCE:.+]]: vector<5x1x4x1x20xf32>, %[[ACC:.+]]: vector<5x4x20xf32>
1559 func.func @vector_multi_reduction_unit_dimensions(%source: vector<5x1x4x1x20xf32>, %acc: vector<5x4x20xf32>) -> vector<5x4x20xf32> {
1560 //       CHECK:   %[[CAST:.+]] = vector.shape_cast  %[[SOURCE]] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32>
1561 //       CHECK:   %[[RESULT:.+]] = arith.mulf  %[[ACC]], %[[CAST]] : vector<5x4x20xf32>
1562     %0 = vector.multi_reduction <mul>, %source, %acc [1, 3] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32>
1563
1564 //       CHECK:     return %[[RESULT]] : vector<5x4x20xf32>
1565     return %0 : vector<5x4x20xf32>
1566 }
1567
1568 // -----
1569 // CHECK-LABEL:   func.func @vector_multi_reduction_scalable(
1570 // CHECK-SAME:     %[[VAL_0:.*]]: vector<1x[4]x1xf32>,
1571 // CHECK-SAME:     %[[VAL_1:.*]]: vector<1x[4]xf32>,
1572 // CHECK-SAME:     %[[VAL_2:.*]]: vector<1x[4]x1xi1>)
1573 func.func @vector_multi_reduction_scalable(%source: vector<1x[4]x1xf32>,
1574                                            %acc: vector<1x[4]xf32>,
1575                                            %mask: vector<1x[4]x1xi1>) -> vector<1x[4]xf32> {
1576 // CHECK:           %[[VAL_3:.*]] = vector.shape_cast %[[VAL_2]] : vector<1x[4]x1xi1> to vector<1x[4]xi1>
1577 // CHECK:           %[[VAL_4:.*]] = vector.shape_cast %[[VAL_0]] : vector<1x[4]x1xf32> to vector<1x[4]xf32>
1578 // CHECK:           %[[VAL_5:.*]] = arith.addf %[[VAL_1]], %[[VAL_4]] : vector<1x[4]xf32>
1579 // CHECK:           %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_4]] : vector<1x[4]xi1>, vector<1x[4]xf32>
1580     %0 = vector.mask %mask { vector.multi_reduction <add>, %source, %acc [2] : vector<1x[4]x1xf32> to vector<1x[4]xf32> } :
1581           vector<1x[4]x1xi1> -> vector<1x[4]xf32>
1582
1583     return %0 : vector<1x[4]xf32>
1584 }
1585
1586 // -----
1587
1588 // CHECK-LABEL: func @masked_vector_multi_reduction_unit_dimensions
1589 //  CHECK-SAME: %[[VAL_0:.*]]: vector<5x1x4x1x20xf32>, %[[VAL_1:.*]]: vector<5x4x20xf32>,
1590 //  CHECK-SAME: %[[VAL_2:.*]]: vector<5x1x4x1x20xi1>)
1591 func.func @masked_vector_multi_reduction_unit_dimensions(%source: vector<5x1x4x1x20xf32>,
1592                                                          %acc: vector<5x4x20xf32>,
1593                                                          %mask: vector<5x1x4x1x20xi1>) -> vector<5x4x20xf32> {
1594 //       CHECK:   %[[VAL_3:.*]] = vector.shape_cast %[[VAL_2]] : vector<5x1x4x1x20xi1> to vector<5x4x20xi1>
1595 //       CHECK:   %[[VAL_4:.*]] = vector.shape_cast %[[VAL_0]] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32>
1596 //       CHECK:   %[[VAL_5:.*]] = arith.mulf %[[VAL_1]], %[[VAL_4]] : vector<5x4x20xf32>
1597 //       CHECK:   %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_4]] : vector<5x4x20xi1>, vector<5x4x20xf32>
1598 %0 = vector.mask %mask { vector.multi_reduction <mul>, %source, %acc [1, 3] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32> } :
1599            vector<5x1x4x1x20xi1> -> vector<5x4x20xf32>
1600     return %0 : vector<5x4x20xf32>
1601 }
1602
1603 // -----
1604
1605 // CHECK-LABEL: func @vector_multi_reduction_unit_dimensions_fail(
1606 //  CHECK-SAME: %[[SRC:.+]]: vector<5x1x4x1x20xf32>, %[[ACCUM:.+]]: vector<5x1x20xf32>
1607 func.func @vector_multi_reduction_unit_dimensions_fail(%source: vector<5x1x4x1x20xf32>, %acc: vector<5x1x20xf32>) -> vector<5x1x20xf32> {
1608 //       CHECK:   %[[RES:.+]] = vector.multi_reduction  <mul>, %[[SRC]], %[[ACCUM]] [1, 2] : vector<5x1x4x1x20xf32> to vector<5x1x20xf32>
1609     %0 = vector.multi_reduction <mul>, %source, %acc [1, 2] : vector<5x1x4x1x20xf32> to vector<5x1x20xf32>
1610
1611 //       CHECK:     return %[[RES]] : vector<5x1x20xf32>
1612     return %0 : vector<5x1x20xf32>
1613 }
1614
1615 // -----
1616
1617 // CHECK-LABEL: func @vector_multi_reduction_unit_dimensions_single_elem(
1618 //  CHECK-SAME: %[[SOURCE:.+]]: vector<1x1x1xf32>, %[[ACC:.+]]: f32
1619 func.func @vector_multi_reduction_unit_dimensions_single_elem(%source: vector<1x1x1xf32>, %acc: f32) -> f32 {
1620 //       CHECK:   %[[CAST:.+]] = vector.extract  %[[SOURCE]][0, 0, 0] : f32 from vector<1x1x1xf32>
1621 //       CHECK:   %[[RESULT:.+]] = arith.mulf  %[[ACC]], %[[CAST]] : f32
1622     %0 = vector.multi_reduction <mul>, %source, %acc [0,1,2] : vector<1x1x1xf32> to f32
1623
1624 //       CHECK:     return %[[RESULT]] : f32
1625     return %0 : f32
1626 }
1627
1628 // -----
1629
1630 // CHECK-LABEL: func @masked_vector_multi_reduction_unit_dimensions_single_elem(
1631 //  CHECK-SAME: %[[VAL_0:.*]]: vector<1x1x1xf32>, %[[VAL_1:.*]]: f32,
1632 //  CHECK-SAME: %[[VAL_2:.*]]: vector<1x1x1xi1>)
1633 func.func @masked_vector_multi_reduction_unit_dimensions_single_elem(%source: vector<1x1x1xf32>, %acc: f32, %mask: vector<1x1x1xi1>) -> f32 {
1634       // CHECK:           %[[VAL_3:.*]] = vector.extract %[[VAL_2]][0, 0, 0] : i1 from vector<1x1x1xi1>
1635       // CHECK:           %[[VAL_4:.*]] = vector.extract %[[VAL_0]][0, 0, 0] : f32 from vector<1x1x1xf32>
1636       // CHECK:           %[[VAL_5:.*]] = arith.mulf %[[VAL_1]], %[[VAL_4]] : f32
1637       // CHECK:           %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_4]] : f32
1638   %0 = vector.mask %mask { vector.multi_reduction <mul>, %source, %acc [0,1,2] : vector<1x1x1xf32> to f32 } : vector<1x1x1xi1> -> f32
1639     return %0 : f32
1640 }
1641
1642 // -----
1643
1644 // CHECK-LABEL: func @insert_strided_slice_full_range
1645 //  CHECK-SAME: %[[SOURCE:.+]]: vector<16x16xf16>, %{{.+}}: vector<16x16xf16>
1646 func.func @insert_strided_slice_full_range(%source: vector<16x16xf16>, %dest: vector<16x16xf16>) -> vector<16x16xf16> {
1647   %0 = vector.insert_strided_slice %source, %dest {offsets = [0, 0], strides = [1, 1]} : vector<16x16xf16> into vector<16x16xf16>
1648   // CHECK: return %[[SOURCE]]
1649   return %0: vector<16x16xf16>
1650 }
1651
1652 // -----
1653
1654 // CHECK-LABEL: extract_strided_splat
1655 //       CHECK:   %[[B:.*]] = vector.splat %{{.*}} : vector<2x4xf16>
1656 //  CHECK-NEXT:   return %[[B]] : vector<2x4xf16>
1657 func.func @extract_strided_splat(%arg0: f16) -> vector<2x4xf16> {
1658  %0 = vector.splat %arg0 : vector<16x4xf16>
1659  %1 = vector.extract_strided_slice %0
1660   {offsets = [1, 0], sizes = [2, 4], strides = [1, 1]} :
1661   vector<16x4xf16> to vector<2x4xf16>
1662   return %1 : vector<2x4xf16>
1663 }
1664
1665 // -----
1666
1667 // CHECK-LABEL: func @insert_extract_to_broadcast
1668 //  CHECK-SAME: (%[[ARG0:.*]]: vector<1x1x4xf32>, %[[ARG1:.*]]: vector<4xf32>)
1669 //       CHECK:   %[[V0:.*]] = vector.extract %[[ARG0]][0, 0] : vector<4xf32> from vector<1x1x4xf32>
1670 //       CHECK:   %[[V1:.*]] = vector.broadcast %[[ARG1]] : vector<4xf32> to vector<1x1x4xf32>
1671 //       CHECK:   return %[[V0]], %[[V1]] : vector<4xf32>, vector<1x1x4xf32>
1672 func.func @insert_extract_to_broadcast(%arg0 : vector<1x1x4xf32>,
1673   %arg1 : vector<4xf32>) -> (vector<4xf32>, vector<1x1x4xf32>) {
1674   %0 = vector.extract %arg0[0, 0] : vector<4xf32> from vector<1x1x4xf32>
1675   %1 = vector.insert %arg1, %arg0 [0, 0] : vector<4xf32> into vector<1x1x4xf32>
1676   return %0, %1 : vector<4xf32>, vector<1x1x4xf32>
1677 }
1678
1679 // -----
1680
1681 // CHECK-LABEL: func.func @extract_splat_constant
1682 //   CHECK-DAG:   %[[CST1:.*]] = arith.constant 1 : i32
1683 //   CHECK-DAG:   %[[CST0:.*]] = arith.constant dense<2.000000e+00> : vector<7xf32>
1684 //  CHECK-NEXT:   return %[[CST0]], %[[CST1]] : vector<7xf32>, i32
1685 func.func @extract_splat_constant() -> (vector<7xf32>, i32) {
1686   %cst = arith.constant dense<2.000000e+00> : vector<29x7xf32>
1687   %cst_1 = arith.constant dense<1> : vector<4x37x9xi32>
1688   %0 = vector.extract %cst[2] : vector<7xf32> from vector<29x7xf32>
1689   %1 = vector.extract %cst_1[1, 4, 5] : i32 from vector<4x37x9xi32>
1690   return %0, %1 : vector<7xf32>, i32
1691 }
1692
1693 // -----
1694
1695 // CHECK-LABEL: func.func @extract_1d_constant
1696 //   CHECK-DAG: %[[I32CST:.*]] = arith.constant 3 : i32
1697 //   CHECK-DAG: %[[IDXCST:.*]] = arith.constant 1 : index
1698 //   CHECK-DAG: %[[F32CST:.*]] = arith.constant 2.000000e+00 : f32
1699 //  CHECK-NEXT: return %[[I32CST]], %[[IDXCST]], %[[F32CST]] : i32, index, f32
1700 func.func @extract_1d_constant() -> (i32, index, f32) {
1701   %icst = arith.constant dense<[1, 2, 3, 4]> : vector<4xi32>
1702   %e = vector.extract %icst[2] : i32 from vector<4xi32>
1703   %idx_cst = arith.constant dense<[0, 1, 2]> : vector<3xindex>
1704   %f = vector.extract %idx_cst[1] : index from vector<3xindex>
1705   %fcst = arith.constant dense<[2.000000e+00, 3.000000e+00, 4.000000e+00]> : vector<3xf32>
1706   %g = vector.extract %fcst[0] : f32 from vector<3xf32>
1707   return %e, %f, %g : i32, index, f32
1708 }
1709
1710 // -----
1711
1712 // CHECK-LABEL: func.func @extract_2d_constant
1713 //   CHECK-DAG: %[[ACST:.*]] = arith.constant 0 : i32
1714 //   CHECK-DAG: %[[BCST:.*]] = arith.constant 2 : i32
1715 //   CHECK-DAG: %[[CCST:.*]] = arith.constant 3 : i32
1716 //   CHECK-DAG: %[[DCST:.*]] = arith.constant 5 : i32
1717 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] : i32, i32, i32, i32
1718 func.func @extract_2d_constant() -> (i32, i32, i32, i32) {
1719   %cst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32>
1720   %a = vector.extract %cst[0, 0] : i32 from vector<2x3xi32>
1721   %b = vector.extract %cst[0, 2] : i32 from vector<2x3xi32>
1722   %c = vector.extract %cst[1, 0] : i32 from vector<2x3xi32>
1723   %d = vector.extract %cst[1, 2] : i32 from vector<2x3xi32>
1724   return %a, %b, %c, %d : i32, i32, i32, i32
1725 }
1726
1727 // -----
1728
1729 // CHECK-LABEL: func.func @extract_vector_2d_constant
1730 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[0, 1, 2]> : vector<3xi32>
1731 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[3, 4, 5]> : vector<3xi32>
1732 //  CHECK-NEXT: return %[[ACST]], %[[BCST]] : vector<3xi32>, vector<3xi32>
1733 func.func @extract_vector_2d_constant() -> (vector<3xi32>, vector<3xi32>) {
1734   %cst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32>
1735   %a = vector.extract %cst[0] : vector<3xi32> from vector<2x3xi32>
1736   %b = vector.extract %cst[1] : vector<3xi32> from vector<2x3xi32>
1737   return %a, %b : vector<3xi32>, vector<3xi32>
1738 }
1739
1740 // -----
1741
1742 // CHECK-LABEL: func.func @extract_3d_constant
1743 //   CHECK-DAG: %[[ACST:.*]] = arith.constant 0 : i32
1744 //   CHECK-DAG: %[[BCST:.*]] = arith.constant 1 : i32
1745 //   CHECK-DAG: %[[CCST:.*]] = arith.constant 9 : i32
1746 //   CHECK-DAG: %[[DCST:.*]] = arith.constant 10 : i32
1747 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] : i32, i32, i32, i32
1748 func.func @extract_3d_constant() -> (i32, i32, i32, i32) {
1749   %cst = arith.constant dense<[[[0, 1], [2, 3], [4, 5]], [[6, 7], [8, 9], [10, 11]]]> : vector<2x3x2xi32>
1750   %a = vector.extract %cst[0, 0, 0] : i32 from vector<2x3x2xi32>
1751   %b = vector.extract %cst[0, 0, 1] : i32 from vector<2x3x2xi32>
1752   %c = vector.extract %cst[1, 1, 1] : i32 from vector<2x3x2xi32>
1753   %d = vector.extract %cst[1, 2, 0] : i32 from vector<2x3x2xi32>
1754   return %a, %b, %c, %d : i32, i32, i32, i32
1755 }
1756
1757 // -----
1758
1759 // CHECK-LABEL: func.func @extract_vector_3d_constant
1760 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[0, 1\], \[2, 3\], \[4, 5\]\]}}> : vector<3x2xi32>
1761 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[6, 7\], \[8, 9\], \[10, 11\]\]}}> : vector<3x2xi32>
1762 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<[8, 9]> : vector<2xi32>
1763 //   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<[10, 11]> : vector<2xi32>
1764 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] : vector<3x2xi32>, vector<3x2xi32>, vector<2xi32>, vector<2xi32>
1765 func.func @extract_vector_3d_constant() -> (vector<3x2xi32>, vector<3x2xi32>, vector<2xi32>, vector<2xi32>) {
1766   %cst = arith.constant dense<[[[0, 1], [2, 3], [4, 5]], [[6, 7], [8, 9], [10, 11]]]> : vector<2x3x2xi32>
1767   %a = vector.extract %cst[0] : vector<3x2xi32> from vector<2x3x2xi32>
1768   %b = vector.extract %cst[1] : vector<3x2xi32> from vector<2x3x2xi32>
1769   %c = vector.extract %cst[1, 1] : vector<2xi32> from vector<2x3x2xi32>
1770   %d = vector.extract %cst[1, 2] : vector<2xi32> from vector<2x3x2xi32>
1771   return %a, %b, %c, %d : vector<3x2xi32>, vector<3x2xi32>, vector<2xi32>, vector<2xi32>
1772 }
1773
1774 // -----
1775
1776 // CHECK-LABEL: func.func @extract_splat_vector_3d_constant
1777 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<0> : vector<2xi32>
1778 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<4> : vector<2xi32>
1779 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<5> : vector<2xi32>
1780 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<2xi32>, vector<2xi32>, vector<2xi32>
1781 func.func @extract_splat_vector_3d_constant() -> (vector<2xi32>, vector<2xi32>, vector<2xi32>) {
1782   %cst = arith.constant dense<[[[0, 0], [1, 1], [2, 2]], [[3, 3], [4, 4], [5, 5]]]> : vector<2x3x2xi32>
1783   %a = vector.extract %cst[0, 0] : vector<2xi32> from vector<2x3x2xi32>
1784   %b = vector.extract %cst[1, 1] : vector<2xi32> from vector<2x3x2xi32>
1785   %c = vector.extract %cst[1, 2] : vector<2xi32> from vector<2x3x2xi32>
1786   return %a, %b, %c : vector<2xi32>, vector<2xi32>, vector<2xi32>
1787 }
1788
1789 // -----
1790
1791 // CHECK-LABEL: func.func @extract_strided_slice_1d_constant
1792 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[0, 1, 2]> : vector<3xi32>
1793 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[1, 2]> : vector<2xi32>
1794 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<2> : vector<1xi32>
1795 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<3xi32>, vector<2xi32>, vector<1xi32>
1796 func.func @extract_strided_slice_1d_constant() -> (vector<3xi32>, vector<2xi32>, vector<1xi32>) {
1797   %cst = arith.constant dense<[0, 1, 2]> : vector<3xi32>
1798   %a = vector.extract_strided_slice %cst
1799    {offsets = [0], sizes = [3], strides = [1]} : vector<3xi32> to vector<3xi32>
1800   %b = vector.extract_strided_slice %cst
1801    {offsets = [1], sizes = [2], strides = [1]} : vector<3xi32> to vector<2xi32>
1802   %c = vector.extract_strided_slice %cst
1803    {offsets = [2], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32>
1804   return %a, %b, %c : vector<3xi32>, vector<2xi32>, vector<1xi32>
1805 }
1806
1807 // -----
1808
1809 // CHECK-LABEL: func.func @extract_strided_slice_2d_constant
1810 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<0> : vector<1x1xi32>
1811 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[4, 5\]\]}}> : vector<1x2xi32>
1812 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[1, 2\], \[4, 5\]\]}}> : vector<2x2xi32>
1813 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<1x1xi32>, vector<1x2xi32>, vector<2x2xi32>
1814 func.func @extract_strided_slice_2d_constant() -> (vector<1x1xi32>, vector<1x2xi32>, vector<2x2xi32>) {
1815   %cst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32>
1816   %a = vector.extract_strided_slice %cst
1817    {offsets = [0, 0], sizes = [1, 1], strides = [1, 1]} : vector<2x3xi32> to vector<1x1xi32>
1818   %b = vector.extract_strided_slice %cst
1819    {offsets = [1, 1], sizes = [1, 2], strides = [1, 1]} : vector<2x3xi32> to vector<1x2xi32>
1820   %c = vector.extract_strided_slice %cst
1821    {offsets = [0, 1], sizes = [2, 2], strides = [1, 1]} : vector<2x3xi32> to vector<2x2xi32>
1822   return %a, %b, %c : vector<1x1xi32>, vector<1x2xi32>, vector<2x2xi32>
1823 }
1824
1825 // -----
1826
1827 // CHECK-LABEL: func.func @extract_strided_slice_3d_constant
1828 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[\[8, 9\], \[10, 11\]\]\]}}> : vector<1x2x2xi32>
1829 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[\[2, 3\]\]\]}}> : vector<1x1x2xi32>
1830 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[\[6, 7\]\], \[\[10, 11\]\]\]}}> : vector<2x1x2xi32>
1831 //   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<11> : vector<1x1x1xi32>
1832 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]]
1833 func.func @extract_strided_slice_3d_constant() -> (vector<1x2x2xi32>, vector<1x1x2xi32>, vector<2x1x2xi32>, vector<1x1x1xi32>) {
1834   %cst = arith.constant dense<[[[0, 1], [2, 3]], [[4, 5], [6, 7]], [[8, 9], [10, 11]]]> : vector<3x2x2xi32>
1835   %a = vector.extract_strided_slice %cst
1836    {offsets = [2], sizes = [1], strides = [1]} : vector<3x2x2xi32> to vector<1x2x2xi32>
1837   %b = vector.extract_strided_slice %cst
1838    {offsets = [0, 1], sizes = [1, 1], strides = [1, 1]} : vector<3x2x2xi32> to vector<1x1x2xi32>
1839   %c = vector.extract_strided_slice %cst
1840    {offsets = [1, 1, 0], sizes = [2, 1, 2], strides = [1, 1, 1]} : vector<3x2x2xi32> to vector<2x1x2xi32>
1841   %d = vector.extract_strided_slice %cst
1842    {offsets = [2, 1, 1], sizes = [1, 1, 1], strides = [1, 1, 1]} : vector<3x2x2xi32> to vector<1x1x1xi32>
1843   return %a, %b, %c, %d : vector<1x2x2xi32>, vector<1x1x2xi32>, vector<2x1x2xi32>, vector<1x1x1xi32>
1844 }
1845
1846 // -----
1847
1848 // CHECK-LABEL: extract_extract_strided
1849 //  CHECK-SAME: %[[A:.*]]: vector<32x16x4xf16>
1850 //       CHECK: %[[V:.*]] = vector.extract %[[A]][9, 7] : vector<4xf16> from vector<32x16x4xf16>
1851 //       CHECK: return %[[V]] : vector<4xf16>
1852 func.func @extract_extract_strided(%arg0: vector<32x16x4xf16>) -> vector<4xf16> {
1853  %1 = vector.extract_strided_slice %arg0
1854   {offsets = [7, 3], sizes = [10, 8], strides = [1, 1]} :
1855   vector<32x16x4xf16> to vector<10x8x4xf16>
1856   %2 = vector.extract %1[2, 4] : vector<4xf16> from vector<10x8x4xf16>
1857   return %2 : vector<4xf16>
1858 }
1859
1860 // -----
1861
1862 // CHECK-LABEL: extract_insert_strided
1863 //  CHECK-SAME: %[[A:.*]]: vector<6x4xf32>
1864 //       CHECK: %[[V:.*]] = vector.extract %[[A]][0, 2] : f32 from vector<6x4xf32>
1865 //       CHECK: return %[[V]] : f32
1866 func.func @extract_insert_strided(%a: vector<6x4xf32>, %b: vector<8x16xf32>)
1867   -> f32 {
1868   %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]}
1869     : vector<6x4xf32> into vector<8x16xf32>
1870   %2 = vector.extract %0[2, 4] : f32 from vector<8x16xf32>
1871   return %2 : f32
1872 }
1873
1874 // -----
1875
1876 // CHECK-LABEL: extract_insert_rank_reduce
1877 //  CHECK-SAME: %[[A:.*]]: vector<4xf32>
1878 //       CHECK: %[[V:.*]] = vector.extract %[[A]][2] : f32 from vector<4xf32>
1879 //       CHECK: return %[[V]] : f32
1880 func.func @extract_insert_rank_reduce(%a: vector<4xf32>, %b: vector<8x16xf32>)
1881   -> f32 {
1882   %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1]}
1883     : vector<4xf32> into vector<8x16xf32>
1884   %2 = vector.extract %0[2, 4] : f32 from vector<8x16xf32>
1885   return %2 : f32
1886 }
1887
1888 // -----
1889
1890 // CHECK-LABEL: extract_insert_negative
1891 //       CHECK: vector.insert_strided_slice
1892 //       CHECK: vector.extract
1893 func.func @extract_insert_negative(%a: vector<2x15xf32>, %b: vector<12x8x16xf32>)
1894   -> vector<16xf32> {
1895   %0 = vector.insert_strided_slice %a, %b {offsets = [4, 2, 0], strides = [1, 1]}
1896     : vector<2x15xf32> into vector<12x8x16xf32>
1897   %2 = vector.extract %0[4, 2] : vector<16xf32> from vector<12x8x16xf32>
1898   return %2 : vector<16xf32>
1899 }
1900
1901 // -----
1902
1903 // CHECK-LABEL: extract_insert_chain
1904 //  CHECK-SAME: (%[[A:.*]]: vector<2x16xf32>, %[[B:.*]]: vector<12x8x16xf32>, %[[C:.*]]: vector<2x16xf32>)
1905 //       CHECK: %[[V:.*]] = vector.extract %[[C]][0] : vector<16xf32> from vector<2x16xf32>
1906 //       CHECK: return %[[V]] : vector<16xf32>
1907 func.func @extract_insert_chain(%a: vector<2x16xf32>, %b: vector<12x8x16xf32>, %c: vector<2x16xf32>)
1908   -> vector<16xf32> {
1909   %0 = vector.insert_strided_slice %c, %b {offsets = [4, 2, 0], strides = [1, 1]}
1910     : vector<2x16xf32> into vector<12x8x16xf32>
1911   %1 = vector.insert_strided_slice %a, %0 {offsets = [0, 2, 0], strides = [1, 1]}
1912     : vector<2x16xf32> into vector<12x8x16xf32>
1913   %2 = vector.extract %1[4, 2] : vector<16xf32> from vector<12x8x16xf32>
1914   return %2 : vector<16xf32>
1915 }
1916
1917 // -----
1918
1919 // CHECK-LABEL: extract_from_extract_chain_should_not_fold_dynamic_extracts
1920 //  CHECK-SAME: (%[[VEC:.*]]: vector<2x4xf32>, %[[IDX:.*]]: index)
1921 //       CHECK: %[[A:.*]] = vector.extract %[[VEC]][%[[IDX]]] : vector<4xf32> from vector<2x4xf32>
1922 //       CHECK: %[[B:.*]] = vector.extract %[[A]][1] : f32 from vector<4xf32>
1923 func.func @extract_from_extract_chain_should_not_fold_dynamic_extracts(%v: vector<2x4xf32>, %index: index) -> f32 {
1924   %0 = vector.extract %v[%index] : vector<4xf32> from vector<2x4xf32>
1925   %1 = vector.extract %0[1] : f32 from vector<4xf32>
1926   return %1 : f32
1927 }
1928
1929 // -----
1930
1931 // CHECK-LABEL: extract_extract_strided2
1932 //  CHECK-SAME: %[[A:.*]]: vector<2x4xf32>
1933 //       CHECK: %[[V:.*]] = vector.extract %[[A]][1] : vector<4xf32> from vector<2x4xf32>
1934 //       CHECK: return %[[V]] : vector<4xf32>
1935 func.func @extract_extract_strided2(%A: vector<2x4xf32>)
1936   -> (vector<4xf32>) {
1937  %0 = vector.extract_strided_slice %A {offsets = [1, 0], sizes = [1, 4], strides = [1, 1]} : vector<2x4xf32> to vector<1x4xf32>
1938  %1 = vector.extract %0[0] : vector<4xf32> from vector<1x4xf32>
1939  return %1 : vector<4xf32>
1940 }
1941
1942 // -----
1943
1944 // CHECK-LABEL: func @splat_fold
1945 func.func @splat_fold() -> vector<4xf32> {
1946   %c = arith.constant 1.0 : f32
1947   %v = vector.splat %c : vector<4xf32>
1948   return %v : vector<4xf32>
1949
1950   // CHECK-NEXT: [[V:%.*]] = arith.constant dense<1.000000e+00> : vector<4xf32>
1951   // CHECK-NEXT: return [[V]] : vector<4xf32>
1952 }
1953
1954 // -----
1955
1956 // CHECK-LABEL: func @shuffle_1d
1957 //       CHECK:   %[[V:.+]] = arith.constant dense<[3, 2, 5, 1]> : vector<4xi32>
1958 //       CHECK:   return %[[V]]
1959 func.func @shuffle_1d() -> vector<4xi32> {
1960   %v0 = arith.constant dense<[0, 1, 2]> : vector<3xi32>
1961   %v1 = arith.constant dense<[3, 4, 5]> : vector<3xi32>
1962   %shuffle = vector.shuffle %v0, %v1 [3, 2, 5, 1] : vector<3xi32>, vector<3xi32>
1963   return %shuffle : vector<4xi32>
1964 }
1965
1966 // CHECK-LABEL: func @shuffle_canonicalize_0d
1967 func.func @shuffle_canonicalize_0d(%v0 : vector<i32>, %v1 : vector<i32>) -> vector<1xi32> {
1968   // CHECK: vector.broadcast %{{.*}} : vector<i32> to vector<1xi32>
1969   %shuffle = vector.shuffle %v0, %v1 [0] : vector<i32>, vector<i32>
1970   return %shuffle : vector<1xi32>
1971 }
1972
1973 // CHECK-LABEL: func @shuffle_fold1
1974 //       CHECK:   %arg0 : vector<4xi32>
1975 func.func @shuffle_fold1(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<4xi32> {
1976   %shuffle = vector.shuffle %v0, %v1 [0, 1, 2, 3] : vector<4xi32>, vector<2xi32>
1977   return %shuffle : vector<4xi32>
1978 }
1979
1980 // CHECK-LABEL: func @shuffle_fold2
1981 //       CHECK:   %arg1 : vector<2xi32>
1982 func.func @shuffle_fold2(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<2xi32> {
1983   %shuffle = vector.shuffle %v0, %v1 [4, 5] : vector<4xi32>, vector<2xi32>
1984   return %shuffle : vector<2xi32>
1985 }
1986
1987 // CHECK-LABEL: func @shuffle_fold3
1988 //       CHECK:   return %arg0 : vector<4x5x6xi32>
1989 func.func @shuffle_fold3(%v0 : vector<4x5x6xi32>, %v1 : vector<2x5x6xi32>) -> vector<4x5x6xi32> {
1990   %shuffle = vector.shuffle %v0, %v1 [0, 1, 2, 3] : vector<4x5x6xi32>, vector<2x5x6xi32>
1991   return %shuffle : vector<4x5x6xi32>
1992 }
1993
1994 // CHECK-LABEL: func @shuffle_fold4
1995 //       CHECK:   return %arg1 : vector<2x5x6xi32>
1996 func.func @shuffle_fold4(%v0 : vector<4x5x6xi32>, %v1 : vector<2x5x6xi32>) -> vector<2x5x6xi32> {
1997   %shuffle = vector.shuffle %v0, %v1 [4, 5] : vector<4x5x6xi32>, vector<2x5x6xi32>
1998   return %shuffle : vector<2x5x6xi32>
1999 }
2000
2001 // CHECK-LABEL: func @shuffle_nofold1
2002 //       CHECK:   %[[V:.+]] = vector.shuffle %arg0, %arg1 [0, 1, 2, 3, 4] : vector<4xi32>, vector<2xi32>
2003 //       CHECK:   return %[[V]]
2004 func.func @shuffle_nofold1(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<5xi32> {
2005   %shuffle = vector.shuffle %v0, %v1 [0, 1, 2, 3, 4] : vector<4xi32>, vector<2xi32>
2006   return %shuffle : vector<5xi32>
2007 }
2008
2009 // -----
2010
2011 // CHECK-LABEL: func @transpose_scalar_broadcast1
2012 //  CHECK-SAME: (%[[ARG:.+]]: vector<1xf32>)
2013 //       CHECK:   %[[V:.+]] = vector.broadcast %[[ARG]] : vector<1xf32> to vector<1x8xf32>
2014 //       CHECK:   return %[[V]] : vector<1x8xf32>
2015 func.func @transpose_scalar_broadcast1(%value: vector<1xf32>) -> vector<1x8xf32> {
2016   %bcast = vector.broadcast %value : vector<1xf32> to vector<8x1xf32>
2017   %t = vector.transpose %bcast, [1, 0] : vector<8x1xf32> to vector<1x8xf32>
2018   return %t : vector<1x8xf32>
2019 }
2020
2021 // -----
2022
2023 // CHECK-LABEL: func @transpose_scalar_broadcast2
2024 //  CHECK-SAME: (%[[ARG:.+]]: f32)
2025 //       CHECK:   %[[V:.+]] = vector.broadcast %[[ARG]] : f32 to vector<1x8xf32>
2026 //       CHECK:   return %[[V]] : vector<1x8xf32>
2027 func.func @transpose_scalar_broadcast2(%value: f32) -> vector<1x8xf32> {
2028   %bcast = vector.broadcast %value : f32 to vector<8x1xf32>
2029   %t = vector.transpose %bcast, [1, 0] : vector<8x1xf32> to vector<1x8xf32>
2030   return %t : vector<1x8xf32>
2031 }
2032
2033 // -----
2034
2035 // CHECK-LABEL: func @transpose_splat_constant
2036 //       CHECK:   %[[CST:.+]] = arith.constant dense<5.000000e+00> : vector<8x4xf32>
2037 //       CHECK:   return %[[CST]]
2038 func.func @transpose_splat_constant() -> vector<8x4xf32> {
2039   %cst = arith.constant dense<5.0> : vector<4x8xf32>
2040   %0 = vector.transpose %cst, [1, 0] : vector<4x8xf32> to vector<8x4xf32>
2041   return %0 : vector<8x4xf32>
2042 }
2043
2044 // CHECK-LABEL:   func @transpose_splat2(
2045 // CHECK-SAME:                           %[[VAL_0:.*]]: f32) -> vector<3x4xf32> {
2046 // CHECK:           %[[VAL_1:.*]] = vector.splat %[[VAL_0]] : vector<3x4xf32>
2047 // CHECK:           return %[[VAL_1]] : vector<3x4xf32>
2048 // CHECK:         }
2049 func.func @transpose_splat2(%arg : f32) -> vector<3x4xf32> {
2050   %splat = vector.splat %arg : vector<4x3xf32>
2051   %0 = vector.transpose %splat, [1, 0] : vector<4x3xf32> to vector<3x4xf32>
2052   return %0 : vector<3x4xf32>
2053 }
2054
2055 // -----
2056
2057 // CHECK-LABEL: func.func @insert_1d_constant
2058 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[9, 1, 2]> : vector<3xi32>
2059 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[0, 9, 2]> : vector<3xi32>
2060 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<[0, 1, 9]> : vector<3xi32>
2061 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<3xi32>, vector<3xi32>, vector<3xi32>
2062 func.func @insert_1d_constant() -> (vector<3xi32>, vector<3xi32>, vector<3xi32>) {
2063   %vcst = arith.constant dense<[0, 1, 2]> : vector<3xi32>
2064   %icst = arith.constant 9 : i32
2065   %a = vector.insert %icst, %vcst[0] : i32 into vector<3xi32>
2066   %b = vector.insert %icst, %vcst[1] : i32 into vector<3xi32>
2067   %c = vector.insert %icst, %vcst[2] : i32 into vector<3xi32>
2068   return %a, %b, %c : vector<3xi32>, vector<3xi32>, vector<3xi32>
2069 }
2070
2071 // -----
2072
2073 // CHECK-LABEL: func.func @insert_2d_constant
2074 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[99, 1, 2\], \[3, 4, 5\]\]}}> : vector<2x3xi32>
2075 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[0, 1, 2\], \[3, 4, 99\]\]}}> : vector<2x3xi32>
2076 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[90, 91, 92\], \[3, 4, 5\]\]}}> : vector<2x3xi32>
2077 //   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<{{\[\[0, 1, 2\], \[90, 91, 92\]\]}}> : vector<2x3xi32>
2078 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]]
2079 func.func @insert_2d_constant() -> (vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>) {
2080   %vcst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32>
2081   %cst_scalar = arith.constant 99 : i32
2082   %cst_1d = arith.constant dense<[90, 91, 92]> : vector<3xi32>
2083   %a = vector.insert %cst_scalar, %vcst[0, 0] : i32 into vector<2x3xi32>
2084   %b = vector.insert %cst_scalar, %vcst[1, 2] : i32 into vector<2x3xi32>
2085   %c = vector.insert %cst_1d, %vcst[0] : vector<3xi32> into vector<2x3xi32>
2086   %d = vector.insert %cst_1d, %vcst[1] : vector<3xi32> into vector<2x3xi32>
2087   return %a, %b, %c, %d : vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>
2088 }
2089
2090 // -----
2091
2092 // CHECK-LABEL: func.func @insert_2d_splat_constant
2093 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<0> : vector<2x3xi32>
2094 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[99, 0, 0\], \[0, 0, 0\]\]}}> : vector<2x3xi32>
2095 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[0, 0, 0\], \[0, 99, 0\]\]}}> : vector<2x3xi32>
2096 //   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<{{\[\[33, 33, 33\], \[0, 0, 0\]\]}}> : vector<2x3xi32>
2097 //   CHECK-DAG: %[[ECST:.*]] = arith.constant dense<{{\[\[0, 0, 0\], \[33, 33, 33\]\]}}> : vector<2x3xi32>
2098 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]], %[[ECST]]
2099 func.func @insert_2d_splat_constant()
2100   -> (vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>) {
2101   %vcst = arith.constant dense<0> : vector<2x3xi32>
2102   %cst_zero = arith.constant 0 : i32
2103   %cst_scalar = arith.constant 99 : i32
2104   %cst_1d = arith.constant dense<33> : vector<3xi32>
2105   %a = vector.insert %cst_zero, %vcst[0, 0] : i32 into vector<2x3xi32>
2106   %b = vector.insert %cst_scalar, %vcst[0, 0] : i32 into vector<2x3xi32>
2107   %c = vector.insert %cst_scalar, %vcst[1, 1] : i32 into vector<2x3xi32>
2108   %d = vector.insert %cst_1d, %vcst[0] : vector<3xi32> into vector<2x3xi32>
2109   %e = vector.insert %cst_1d, %vcst[1] : vector<3xi32> into vector<2x3xi32>
2110   return %a, %b, %c, %d, %e : vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>
2111 }
2112
2113 // -----
2114
2115 // CHECK-LABEL: func @insert_element_fold
2116 //       CHECK:   %[[V:.+]] = arith.constant dense<[0, 1, 7, 3]> : vector<4xi32>
2117 //       CHECK:   return %[[V]]
2118 func.func @insert_element_fold() -> vector<4xi32> {
2119   %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32>
2120   %s = arith.constant 7 : i32
2121   %i = arith.constant 2 : i32
2122   %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32>
2123   return %1 : vector<4xi32>
2124 }
2125
2126 // -----
2127
2128 // CHECK-LABEL: func @insert_element_invalid_fold
2129 func.func @insert_element_invalid_fold() -> vector<1xf32> {
2130   // Out-of-bound index here.
2131   %c26 = arith.constant 26 : index
2132   %cst_2 = arith.constant 1.60215309E+9 : f32
2133   %cst_20 = arith.constant dense<1.60215309E+9> : vector<1xf32>
2134 // CHECK: vector.insertelement
2135   %46 = vector.insertelement %cst_2, %cst_20[%c26 : index] : vector<1xf32>
2136   return %46 : vector<1xf32>
2137 }
2138
2139
2140 // -----
2141
2142 // Do not crash on poison
2143 // CHECK-LABEL: func @insert_poison_fold1
2144 //       CHECK:   vector.insertelement
2145 func.func @insert_poison_fold1() -> vector<4xi32> {
2146   %v = ub.poison : vector<4xi32>
2147   %s = arith.constant 7 : i32
2148   %i = arith.constant 2 : i32
2149   %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32>
2150   return %1 : vector<4xi32>
2151 }
2152
2153 // -----
2154
2155 // Do not crash on poison
2156 // CHECK-LABEL: func @insert_poison_fold2
2157 //       CHECK:   vector.insertelement
2158 func.func @insert_poison_fold2() -> vector<4xi32> {
2159   %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32>
2160   %s = ub.poison : i32
2161   %i = arith.constant 2 : i32
2162   %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32>
2163   return %1 : vector<4xi32>
2164 }
2165
2166 // -----
2167
2168 // Do not crash on poison
2169 // CHECK-LABEL: func @insert_poison_fold3
2170 //       CHECK:   vector.insertelement
2171 func.func @insert_poison_fold3() -> vector<4xi32> {
2172   %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32>
2173   %s = arith.constant 7 : i32
2174   %i = ub.poison : i32
2175   %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32>
2176   return %1 : vector<4xi32>
2177 }
2178
2179 // -----
2180
2181 // CHECK-LABEL: func @extract_element_fold
2182 //       CHECK:   %[[C:.+]] = arith.constant 5 : i32
2183 //       CHECK:   return %[[C]]
2184 func.func @extract_element_fold() -> i32 {
2185   %v = arith.constant dense<[1, 3, 5, 7]> : vector<4xi32>
2186   %i = arith.constant 2 : i32
2187   %1 = vector.extractelement %v[%i : i32] : vector<4xi32>
2188   return %1 : i32
2189 }
2190
2191 // CHECK-LABEL: func @extract_element_splat_fold
2192 //  CHECK-SAME: (%[[ARG:.+]]: i32)
2193 //       CHECK:   return %[[ARG]]
2194 func.func @extract_element_splat_fold(%a : i32) -> i32 {
2195   %v = vector.splat %a : vector<4xi32>
2196   %i = arith.constant 2 : i32
2197   %1 = vector.extractelement %v[%i : i32] : vector<4xi32>
2198   return %1 : i32
2199 }
2200
2201 // -----
2202
2203 // Do not crash on poison
2204 // CHECK-LABEL: func @extract_element_poison_fold1
2205 //       CHECK:   vector.extractelement
2206 func.func @extract_element_poison_fold1() -> i32 {
2207   %v = ub.poison : vector<4xi32>
2208   %i = arith.constant 2 : i32
2209   %1 = vector.extractelement %v[%i : i32] : vector<4xi32>
2210   return %1 : i32
2211 }
2212
2213 // -----
2214
2215 // Do not crash on poison
2216 // CHECK-LABEL: func @extract_element_poison_fold2
2217 //       CHECK:   vector.extractelement
2218 func.func @extract_element_poison_fold2() -> i32 {
2219   %v = arith.constant dense<[1, 3, 5, 7]> : vector<4xi32>
2220   %i = ub.poison : i32
2221   %1 = vector.extractelement %v[%i : i32] : vector<4xi32>
2222   return %1 : i32
2223 }
2224
2225 // -----
2226
2227 // CHECK-LABEL: func @reduce_one_element_vector_extract
2228 //  CHECK-SAME: (%[[V:.+]]: vector<1xf32>)
2229 //       CHECK:   %[[S:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2230 //       CHECK:   return %[[S]] : f32
2231 func.func @reduce_one_element_vector_extract(%a : vector<1xf32>) -> f32 {
2232   %s = vector.reduction <add>, %a : vector<1xf32> into f32
2233   return %s : f32
2234 }
2235
2236 // -----
2237
2238 // CHECK-LABEL: func @masked_reduce_one_element_vector_extract
2239 //  CHECK-SAME: %[[VAL_0:.*]]: vector<1xf32>, %[[VAL_1:.*]]: vector<1xi1>)
2240 func.func @masked_reduce_one_element_vector_extract(%a : vector<1xf32>, %mask : vector<1xi1>) -> f32 {
2241 //       CHECK:   %[[VAL_2:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<1xf32>
2242   %s = vector.mask %mask { vector.reduction <add>, %a : vector<1xf32> into f32 }
2243          : vector<1xi1> -> f32
2244   return %s : f32
2245 }
2246
2247 // -----
2248
2249 // CHECK-LABEL: func @reduce_one_element_vector_addf
2250 //  CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
2251 //       CHECK:   %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2252 //       CHECK:   %[[S:.+]] = arith.addf %[[A]], %arg1 : f32
2253 //       CHECK:   return %[[S]]
2254 func.func @reduce_one_element_vector_addf(%a : vector<1xf32>, %b: f32) -> f32 {
2255   %s = vector.reduction <add>, %a, %b : vector<1xf32> into f32
2256   return %s : f32
2257 }
2258
2259 // -----
2260
2261 // CHECK-LABEL: func @reduce_one_element_vector_addf_fastmath
2262 //  CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
2263 //       CHECK:   %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2264 //       CHECK:   %[[S:.+]] = arith.addf %[[A]], %arg1 fastmath<nnan,ninf> : f32
2265 //       CHECK:   return %[[S]]
2266 func.func @reduce_one_element_vector_addf_fastmath(%a : vector<1xf32>, %b: f32) -> f32 {
2267   %s = vector.reduction <add>, %a, %b fastmath<nnan,ninf> : vector<1xf32> into f32
2268   return %s : f32
2269 }
2270
2271 // -----
2272
2273 // CHECK-LABEL: func @masked_reduce_one_element_vector_addf
2274 //  CHECK-SAME: %[[VAL_0:.*]]: vector<1xf32>, %[[VAL_1:.*]]: f32,
2275 //  CHECK-SAME: %[[VAL_2:.*]]: vector<1xi1>)
2276 func.func @masked_reduce_one_element_vector_addf(%a: vector<1xf32>,
2277                                                  %b: f32,
2278                                                  %mask: vector<1xi1>) -> f32 {
2279 //       CHECK:   %[[VAL_3:.*]] = vector.extract %[[VAL_2]][0] : i1 from vector<1xi1>
2280 //       CHECK:   %[[VAL_4:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<1xf32>
2281 //       CHECK:   %[[VAL_5:.*]] = arith.addf %[[VAL_4]], %[[VAL_1]] : f32
2282 //       CHECK:   %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_1]] : f32
2283   %s = vector.mask %mask { vector.reduction <add>, %a, %b : vector<1xf32> into f32 }
2284          : vector<1xi1> -> f32
2285   return %s : f32
2286 }
2287
2288 // -----
2289
2290 // CHECK-LABEL: func @reduce_one_element_vector_mulf
2291 //  CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
2292 //       CHECK:   %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2293 //       CHECK:   %[[S:.+]] = arith.mulf %[[A]], %arg1 : f32
2294 //       CHECK:   return %[[S]]
2295 func.func @reduce_one_element_vector_mulf(%a : vector<1xf32>, %b: f32) -> f32 {
2296   %s = vector.reduction <mul>, %a, %b : vector<1xf32> into f32
2297   return %s : f32
2298 }
2299
2300 // -----
2301
2302 // CHECK-LABEL: func @dont_reduce_one_element_vector
2303 //       CHECK: vector.reduction
2304 func.func @dont_reduce_one_element_vector(%a : vector<4xf32>) -> f32 {
2305   %s = vector.reduction <add>, %a : vector<4xf32> into f32
2306   return %s : f32
2307 }
2308
2309 // -----
2310
2311 // CHECK-LABEL: func @reduce_one_element_vector_maximumf
2312 //  CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
2313 //       CHECK:   %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2314 //       CHECK:   %[[S:.+]] = arith.maximumf %[[A]], %[[B]] : f32
2315 //       CHECK:   return %[[S]]
2316 func.func @reduce_one_element_vector_maximumf(%a : vector<1xf32>, %b: f32) -> f32 {
2317   %s = vector.reduction <maximumf>, %a, %b : vector<1xf32> into f32
2318   return %s : f32
2319 }
2320
2321 // -----
2322
2323 // CHECK-LABEL: func @bitcast(
2324 //  CHECK-SAME:               %[[ARG:.*]]: vector<4x8xf32>) -> vector<4x16xi16> {
2325 //       CHECK: vector.bitcast %[[ARG:.*]] : vector<4x8xf32> to vector<4x16xi16>
2326 func.func @bitcast(%a: vector<4x8xf32>) -> vector<4x16xi16> {
2327   %0 = vector.bitcast %a : vector<4x8xf32> to vector<4x8xi32>
2328   %1 = vector.bitcast %0 : vector<4x8xi32> to vector<4x16xi16>
2329   return %1 : vector<4x16xi16>
2330 }
2331
2332 // -----
2333
2334 // CHECK-LABEL: @insert_strided_slice_splat
2335 //  CHECK-SAME: (%[[ARG:.*]]: f32)
2336 //  CHECK-NEXT:   %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<8x16xf32>
2337 //  CHECK-NEXT:   return %[[SPLAT]] : vector<8x16xf32>
2338 func.func @insert_strided_slice_splat(%x: f32) -> (vector<8x16xf32>) {
2339   %splat0 = vector.splat %x : vector<4x4xf32>
2340   %splat1 = vector.splat %x : vector<8x16xf32>
2341   %0 = vector.insert_strided_slice %splat0, %splat1 {offsets = [2, 2], strides = [1, 1]}
2342     : vector<4x4xf32> into vector<8x16xf32>
2343   return %0 : vector<8x16xf32>
2344 }
2345
2346
2347 // -----
2348
2349 // CHECK-LABEL: @insert_extract_strided_slice
2350 //  CHECK-SAME: (%[[ARG:.*]]: vector<8x16xf32>)
2351 //  CHECK-NEXT:   return %[[ARG]] : vector<8x16xf32>
2352 func.func @insert_extract_strided_slice(%x: vector<8x16xf32>) -> (vector<8x16xf32>) {
2353   %0 = vector.extract_strided_slice %x {offsets = [0, 8], sizes = [2, 4], strides = [1, 1]}
2354         : vector<8x16xf32> to vector<2x4xf32>
2355   %1 = vector.insert_strided_slice %0, %x {offsets = [0, 8], strides = [1, 1]}
2356         : vector<2x4xf32> into vector<8x16xf32>
2357   return %1 : vector<8x16xf32>
2358 }
2359
2360 // -----
2361
2362 // CHECK-LABEL: func.func @insert_strided_1d_constant
2363 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[4, 1, 2]> : vector<3xi32>
2364 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[0, 1, 4]> : vector<3xi32>
2365 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<[5, 6, 2]> : vector<3xi32>
2366 //   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<[0, 5, 6]> : vector<3xi32>
2367 //   CHECK-DAG: %[[ECST:.*]] = arith.constant dense<[7, 8, 9]> : vector<3xi32>
2368 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]], %[[ECST]]
2369 func.func @insert_strided_1d_constant() ->
2370   (vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>) {
2371   %vcst = arith.constant dense<[0, 1, 2]> : vector<3xi32>
2372   %cst_1 = arith.constant dense<4> : vector<1xi32>
2373   %cst_2 = arith.constant dense<[5, 6]> : vector<2xi32>
2374   %cst_3 = arith.constant dense<[7, 8, 9]> : vector<3xi32>
2375   %a = vector.insert_strided_slice %cst_1, %vcst {offsets = [0], strides = [1]} : vector<1xi32> into vector<3xi32>
2376   %b = vector.insert_strided_slice %cst_1, %vcst {offsets = [2], strides = [1]} : vector<1xi32> into vector<3xi32>
2377   %c = vector.insert_strided_slice %cst_2, %vcst {offsets = [0], strides = [1]} : vector<2xi32> into vector<3xi32>
2378   %d = vector.insert_strided_slice %cst_2, %vcst {offsets = [1], strides = [1]} : vector<2xi32> into vector<3xi32>
2379   %e = vector.insert_strided_slice %cst_3, %vcst {offsets = [0], strides = [1]} : vector<3xi32> into vector<3xi32>
2380   return %a, %b, %c, %d, %e : vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>
2381 }
2382
2383 // -----
2384
2385 // CHECK-LABEL: func.func @insert_strided_2d_constant
2386 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[0, 1\], \[9, 3\], \[4, 5\]\]}}> : vector<3x2xi32>
2387 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[0, 1\], \[2, 3\], \[4, 9\]\]}}> : vector<3x2xi32>
2388 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[18, 19\], \[2, 3\], \[4, 5\]\]}}> : vector<3x2xi32>
2389 //   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<{{\[\[0, 1\], \[18, 19\], \[4, 5\]\]}}> : vector<3x2xi32>
2390 //   CHECK-DAG: %[[ECST:.*]] = arith.constant dense<{{\[\[0, 1\], \[2, 3\], \[18, 19\]\]}}> : vector<3x2xi32>
2391 //   CHECK-DAG: %[[FCST:.*]] = arith.constant dense<{{\[\[28, 29\], \[38, 39\], \[4, 5\]\]}}> : vector<3x2xi32>
2392 //   CHECK-DAG: %[[GCST:.*]] = arith.constant dense<{{\[\[0, 1\], \[28, 29\], \[38, 39\]\]}}> : vector<3x2xi32>
2393 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]], %[[ECST]], %[[FCST]], %[[GCST]]
2394 func.func @insert_strided_2d_constant() ->
2395   (vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>) {
2396   %vcst = arith.constant dense<[[0, 1], [2, 3], [4, 5]]> : vector<3x2xi32>
2397   %cst_1 = arith.constant dense<9> : vector<1xi32>
2398   %cst_2 = arith.constant dense<[18, 19]> : vector<2xi32>
2399   %cst_3 = arith.constant dense<[[28, 29], [38, 39]]> : vector<2x2xi32>
2400   %a = vector.insert_strided_slice %cst_1, %vcst {offsets = [1, 0], strides = [1]} : vector<1xi32> into vector<3x2xi32>
2401   %b = vector.insert_strided_slice %cst_1, %vcst {offsets = [2, 1], strides = [1]} : vector<1xi32> into vector<3x2xi32>
2402   %c = vector.insert_strided_slice %cst_2, %vcst {offsets = [0, 0], strides = [1]} : vector<2xi32> into vector<3x2xi32>
2403   %d = vector.insert_strided_slice %cst_2, %vcst {offsets = [1, 0], strides = [1]} : vector<2xi32> into vector<3x2xi32>
2404   %e = vector.insert_strided_slice %cst_2, %vcst {offsets = [2, 0], strides = [1]} : vector<2xi32> into vector<3x2xi32>
2405   %f = vector.insert_strided_slice %cst_3, %vcst {offsets = [0, 0], strides = [1, 1]} : vector<2x2xi32> into vector<3x2xi32>
2406   %g = vector.insert_strided_slice %cst_3, %vcst {offsets = [1, 0], strides = [1, 1]} : vector<2x2xi32> into vector<3x2xi32>
2407   return %a, %b, %c, %d, %e, %f, %g :
2408     vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>
2409 }
2410
2411 // -----
2412
2413 // CHECK-LABEL: func @shuffle_splat
2414 //  CHECK-SAME:   (%[[ARG:.*]]: i32)
2415 //  CHECK-NEXT:   %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<4xi32>
2416 //  CHECK-NEXT:   return %[[SPLAT]] : vector<4xi32>
2417 func.func @shuffle_splat(%x : i32) -> vector<4xi32> {
2418   %v0 = vector.splat %x : vector<4xi32>
2419   %v1 = vector.splat %x : vector<2xi32>
2420   %shuffle = vector.shuffle %v0, %v1 [2, 3, 4, 5] : vector<4xi32>, vector<2xi32>
2421   return %shuffle : vector<4xi32>
2422 }
2423
2424
2425 // -----
2426
2427 // CHECK-LABEL: func @insert_splat
2428 //  CHECK-SAME:   (%[[ARG:.*]]: i32)
2429 //  CHECK-NEXT:   %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<2x4x3xi32>
2430 //  CHECK-NEXT:   return %[[SPLAT]] : vector<2x4x3xi32>
2431 func.func @insert_splat(%x : i32) -> vector<2x4x3xi32> {
2432   %v0 = vector.splat %x : vector<4x3xi32>
2433   %v1 = vector.splat %x : vector<2x4x3xi32>
2434   %insert = vector.insert %v0, %v1[0] : vector<4x3xi32> into vector<2x4x3xi32>
2435   return %insert : vector<2x4x3xi32>
2436 }
2437
2438 // -----
2439
2440 // CHECK-LABEL: func.func @transfer_read_from_rank_reducing_extract_slice
2441 //       CHECK:   tensor.extract_slice
2442 //       CHECK:   vector.transfer_read
2443 func.func @transfer_read_from_rank_reducing_extract_slice(%src: tensor<1x8x8x8xf32>, %i1: index, %i2: index, %i3: index, %i4: index) -> vector<4xf32> {
2444   %c0 = arith.constant 0 : index
2445   %f0 = arith.constant 0.000000e+00 : f32
2446   %0 = tensor.extract_slice %src[0, %i1, %i2, %i3] [1, 4, 1, 4] [1, 1, 1, 1] : tensor<1x8x8x8xf32> to tensor<1x4x4xf32>
2447   %1 = vector.transfer_read %0[%c0, %i4, %c0], %f0 {in_bounds = [true]} : tensor<1x4x4xf32>, vector<4xf32>
2448   return %1 : vector<4xf32>
2449 }
2450
2451 // -----
2452
2453 // CHECK-LABEL: func.func @extract_from_broadcast
2454 func.func @extract_from_broadcast(%src: vector<1x1x1xf32>) -> vector<1xf32> {
2455   %0 = vector.broadcast %src : vector<1x1x1xf32> to vector<1x1x32x1xf32>
2456
2457   //  CHECK-NEXT:   %0 = vector.extract {{.*}}[0, 0] : vector<1xf32> from vector<1x1x1xf32>
2458   //  CHECK-NEXT:   return %0 : vector<1xf32>
2459   %1 = vector.extract %0[0, 0, 31] : vector<1xf32> from vector<1x1x32x1xf32>
2460   return %1: vector<1xf32>
2461 }
2462
2463 // CHECK-LABEL: func.func @extract_from_stretch_broadcast
2464 func.func @extract_from_stretch_broadcast(%src: vector<3x1x2xf32>) -> f32 {
2465   //  CHECK-NEXT:  %0 = vector.extract {{.*}}[0, 0, 0] : f32 from vector<3x1x2xf32>
2466   //  CHECK-NEXT:  return %0 : f32
2467   %0 = vector.broadcast %src : vector<3x1x2xf32> to vector<3x4x2xf32>
2468   %1 = vector.extract %0[0, 2, 0] : f32 from vector<3x4x2xf32>
2469   return %1: f32
2470 }
2471
2472 // -----
2473 // CHECK-LABEL: func.func @extract_strided_slice_of_constant_mask
2474 func.func @extract_strided_slice_of_constant_mask() -> vector<5x7xi1>{
2475   //  CHECK-NEXT:   %[[RES:.*]] = vector.constant_mask [5, 4] : vector<5x7xi1>
2476   //  CHECK-NEXT:   return %[[RES]] : vector<5x7xi1>
2477   %c4 = arith.constant 4 : index
2478   %c10 = arith.constant 10 : index
2479   %mask = vector.create_mask %c10, %c4 : vector<12x7xi1>
2480   %res = vector.extract_strided_slice %mask {offsets = [3], sizes = [5], strides = [1]} : vector<12x7xi1> to vector<5x7xi1>
2481   return %res : vector<5x7xi1>
2482 }
2483
2484 // -----
2485
2486 // CHECK-LABEL: func.func @fold_extractelement_of_broadcast(
2487 //  CHECK-SAME:     %[[f:.*]]: f32
2488 //       CHECK:   return %[[f]]
2489 func.func @fold_extractelement_of_broadcast(%f: f32) -> f32 {
2490   %0 = vector.broadcast %f : f32 to vector<15xf32>
2491   %c5 = arith.constant 5 : index
2492   %1 = vector.extractelement %0 [%c5 : index] : vector<15xf32>
2493   return %1 : f32
2494 }
2495
2496 // -----
2497
2498 // CHECK-LABEL: func.func @fold_0d_vector_reduction
2499 func.func @fold_0d_vector_reduction(%arg0: vector<f32>) -> f32 {
2500   // CHECK-NEXT: %[[RES:.*]] = vector.extractelement %arg{{.*}}[] : vector<f32>
2501   // CHECK-NEXT: return %[[RES]] : f32
2502   %0 = vector.reduction <add>, %arg0 : vector<f32> into f32
2503   return %0 : f32
2504 }
2505
2506 // -----
2507
2508 // CHECK-LABEL: func @empty_vector_mask
2509 func.func @empty_vector_mask(%mask : vector<8xi1>) {
2510 //   CHECK-NOT:   vector.mask
2511   vector.mask %mask { } : vector<8xi1>
2512   return
2513 }
2514
2515 // -----
2516
2517 // CHECK-LABEL: func @empty_vector_mask_with_return
2518 //  CHECK-SAME:     %[[IN:.*]]: vector<8xf32>
2519 func.func @empty_vector_mask_with_return(%a : vector<8xf32>, %mask : vector<8xi1>) -> vector<8xf32> {
2520 //   CHECK-NOT:   vector.mask
2521 //       CHECK:   return %[[IN]] : vector<8xf32>
2522   %0 = vector.mask %mask { vector.yield %a : vector<8xf32> } : vector<8xi1> -> vector<8xf32>
2523   return %0 : vector<8xf32>
2524 }
2525
2526 // -----
2527
2528 // CHECK-LABEL: func @all_true_vector_mask
2529 //  CHECK-SAME:     %[[IN:.*]]: tensor<3x4xf32>
2530 func.func @all_true_vector_mask(%ta : tensor<3x4xf32>) -> vector<3x4xf32> {
2531 //   CHECK-NOT:   vector.mask
2532 //       CHECK:   %[[LD:.*]] = vector.transfer_read %[[IN]]
2533 //       CHECK:   return %[[LD]] : vector<3x4xf32>
2534   %c0 = arith.constant 0 : index
2535   %cf0 = arith.constant 0.0 : f32
2536   %all_true = vector.constant_mask [3, 4] : vector<3x4xi1>
2537   %0 = vector.mask %all_true { vector.transfer_read %ta[%c0, %c0], %cf0 : tensor<3x4xf32>, vector<3x4xf32> } : vector<3x4xi1> -> vector<3x4xf32>
2538   return %0 : vector<3x4xf32>
2539 }
2540
2541 // -----
2542
2543 // CHECK-LABEL: func @all_true_vector_mask_no_result(
2544 func.func @all_true_vector_mask_no_result(%a : vector<3x4xf32>, %m : memref<3x4xf32>) {
2545 //   CHECK-NOT:   vector.mask
2546 //       CHECK:   vector.transfer_write
2547   %c0 = arith.constant 0 : index
2548   %all_true = vector.constant_mask [3, 4] : vector<3x4xi1>
2549   vector.mask %all_true { vector.transfer_write %a, %m[%c0, %c0] : vector<3x4xf32>, memref<3x4xf32> } : vector<3x4xi1>
2550   return
2551 }
2552
2553 // -----
2554
2555 // CHECK-LABEL:   func.func @fold_shape_cast_with_mask(
2556 // CHECK-SAME:     %[[VAL_0:.*]]: tensor<1x?xf32>) -> vector<1x4xi1> {
2557 func.func @fold_shape_cast_with_mask(%arg0: tensor<1x?xf32>) -> vector<1x4xi1> {
2558 // CHECK-NOT: vector.shape_cast
2559 // CHECK:     %[[VAL_1:.*]] = arith.constant 1 : index
2560 // CHECK:     %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<1x?xf32>
2561 // CHECK:     %[[VAL_3:.*]] = vector.create_mask %[[VAL_1]], %[[VAL_2]] : vector<1x4xi1>
2562 // CHECK:     return %[[VAL_3]] : vector<1x4xi1>
2563   %c1 = arith.constant 1 : index
2564   %dim = tensor.dim %arg0, %c1 : tensor<1x?xf32>
2565   %1 = vector.create_mask %c1, %dim, %c1, %c1 : vector<1x4x1x1xi1>
2566   %2 = vector.shape_cast %1 : vector<1x4x1x1xi1> to vector<1x4xi1>
2567   return %2 : vector<1x4xi1>
2568 }
2569
2570 // -----
2571
2572 // CHECK-LABEL:   func.func @fold_shape_cast_with_mask_scalable(
2573 // CHECK-SAME:    %[[VAL_0:.*]]: tensor<1x?xf32>) -> vector<1x[4]xi1> {
2574 func.func @fold_shape_cast_with_mask_scalable(%arg0: tensor<1x?xf32>) -> vector<1x[4]xi1> {
2575 // CHECK-NOT: vector.shape_cast
2576 // CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
2577 // CHECK:           %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<1x?xf32>
2578 // CHECK:           %[[VAL_3:.*]] = vector.create_mask %[[VAL_1]], %[[VAL_2]] : vector<1x[4]xi1>
2579 // CHECK:           return %[[VAL_3]] : vector<1x[4]xi1>
2580   %c1 = arith.constant 1 : index
2581   %dim = tensor.dim %arg0, %c1 : tensor<1x?xf32>
2582   %1 = vector.create_mask %c1, %dim, %c1, %c1 : vector<1x[4]x1x1xi1>
2583   %2 = vector.shape_cast %1 : vector<1x[4]x1x1xi1> to vector<1x[4]xi1>
2584   return %2 : vector<1x[4]xi1>
2585 }
2586
2587 // -----
2588
2589 // Check that scalable "1" (i.e. [1]) is not folded
2590 // CHECK-LABEL:   func.func @fold_shape_cast_with_mask_scalable_one(
2591 // CHECK-SAME:    %[[VAL_0:.*]]: tensor<1x?xf32>) -> vector<1x[1]xi1> {
2592 func.func @fold_shape_cast_with_mask_scalable_one(%arg0: tensor<1x?xf32>) -> vector<1x[1]xi1>{
2593 // CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
2594 // CHECK:           %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<1x?xf32>
2595 // CHECK:           %[[VAL_3:.*]] = vector.create_mask %[[VAL_1]], %[[VAL_2]] : vector<1x[1]xi1>
2596 // CHECK:           return %[[VAL_3]] : vector<1x[1]xi1>
2597   %c1 = arith.constant 1 : index
2598   %dim = tensor.dim %arg0, %c1 : tensor<1x?xf32>
2599   %1 = vector.create_mask %c1, %dim, %c1 : vector<1x[1]x1xi1>
2600   %2 = vector.shape_cast %1 : vector<1x[1]x1xi1> to vector<1x[1]xi1>
2601   return %2 : vector<1x[1]xi1>
2602 }
2603
2604 // -----
2605
2606 // CHECK-LABEL:   func.func @fold_shape_cast_with_constant_mask() -> vector<4xi1> {
2607 func.func @fold_shape_cast_with_constant_mask() -> vector<4xi1>{
2608 // CHECK-NOT: vector.shape_cast
2609 // CHECK:           %[[VAL_0:.*]] = vector.constant_mask [1] : vector<4xi1>
2610 // CHECK:           return %[[VAL_0]] : vector<4xi1>
2611   %1 = vector.constant_mask [1, 1, 1] : vector<4x1x1xi1>
2612   %2 = vector.shape_cast %1 : vector<4x1x1xi1> to vector<4xi1>
2613   return %2 : vector<4xi1>
2614 }
2615
2616 // -----
2617
2618 // TODO: This IR could be canonicalized but the canonicalization pattern is not
2619 // smart enough. For now, just make sure that we do not crash.
2620
2621 // CHECK-LABEL: func.func @load_store_forwarding_rank_mismatch(
2622 //       CHECK:   vector.transfer_write
2623 //       CHECK:   vector.transfer_read
2624 func.func @load_store_forwarding_rank_mismatch(%v0: vector<4x1x1xf32>, %arg0: tensor<4x4x4xf32>) -> (vector<1x100x4x5xf32>) {
2625   %c0 = arith.constant 0 : index
2626   %cf0 = arith.constant 0.0 : f32
2627   // d0 is explicitly written.
2628   %w0 = vector.transfer_write %v0, %arg0[%c0, %c0, %c0]
2629       {in_bounds = [true, true, true],
2630       permutation_map = affine_map<(d0, d1, d2) -> (d2, d1, d0)>} :
2631       vector<4x1x1xf32>, tensor<4x4x4xf32>
2632   // d0 is implicitly read (rank-reduction of unit dim).
2633   %r = vector.transfer_read %w0[%c0, %c0, %c0], %cf0
2634       {in_bounds = [true, true, true, true],
2635       permutation_map = affine_map<(d0, d1, d2) -> (d1, 0, d2, 0)>} :
2636       tensor<4x4x4xf32>, vector<1x100x4x5xf32>
2637   return %r : vector<1x100x4x5xf32>
2638 }
2639
2640 // -----
2641
2642 // CHECK-LABEL: func.func @rank_0_shuffle_to_interleave(
2643 //  CHECK-SAME:     %[[LHS:.*]]: vector<f64>, %[[RHS:.*]]: vector<f64>)
2644 func.func @rank_0_shuffle_to_interleave(%arg0: vector<f64>, %arg1: vector<f64>) -> vector<2xf64> {
2645   // CHECK: %[[ZIP:.*]] = vector.interleave %[[LHS]], %[[RHS]] : vector<f64> -> vector<2xf64>
2646   // CHECK: return %[[ZIP]]
2647   %0 = vector.shuffle %arg0, %arg1 [0, 1] : vector<f64>, vector<f64>
2648   return %0 : vector<2xf64>
2649 }
2650
2651 // -----
2652
2653 // CHECK-LABEL: func.func @rank_1_shuffle_to_interleave(
2654 //  CHECK-SAME:     %[[LHS:.*]]: vector<6xi32>, %[[RHS:.*]]: vector<6xi32>)
2655 func.func @rank_1_shuffle_to_interleave(%arg0: vector<6xi32>, %arg1: vector<6xi32>) -> vector<12xi32> {
2656   // CHECK: %[[ZIP:.*]] = vector.interleave %[[LHS]], %[[RHS]] : vector<6xi32> -> vector<12xi32>
2657   // CHECK: return %[[ZIP]]
2658   %0 = vector.shuffle %arg0, %arg1 [0, 6, 1, 7, 2, 8, 3, 9, 4, 10, 5, 11] : vector<6xi32>, vector<6xi32>
2659   return %0 : vector<12xi32>
2660 }
2661
2662 // -----
2663
2664 // CHECK-LABEL: func @extract_from_0d_splat_broadcast_regression(
2665 //  CHECK-SAME:     %[[a:.*]]: f32, %[[b:.*]]: vector<f32>, %[[c:.*]]: vector<2xf32>)
2666 func.func @extract_from_0d_splat_broadcast_regression(%a: f32, %b: vector<f32>, %c: vector<2xf32>) -> (f32, f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32>) {
2667   // Splat scalar to 0D and extract scalar.
2668   %0 = vector.splat %a : vector<f32>
2669   %1 = vector.extract %0[] : f32 from vector<f32>
2670
2671   // Broadcast scalar to 0D and extract scalar.
2672   %2 = vector.broadcast %a : f32 to vector<f32>
2673   %3 = vector.extract %2[] : f32 from vector<f32>
2674
2675   // Broadcast 0D to 3D and extract scalar.
2676   // CHECK: %[[extract1:.*]] = vector.extractelement %[[b]][] : vector<f32>
2677   %4 = vector.broadcast %b : vector<f32> to vector<1x2x4xf32>
2678   %5 = vector.extract %4[0, 0, 1] : f32 from vector<1x2x4xf32>
2679
2680   // Splat scalar to 2D and extract scalar.
2681   %6 = vector.splat %a : vector<2x3xf32>
2682   %7 = vector.extract %6[0, 1] : f32 from vector<2x3xf32>
2683
2684   // Broadcast scalar to 3D and extract scalar.
2685   %8 = vector.broadcast %a : f32 to vector<5x6x7xf32>
2686   %9 = vector.extract %8[2, 1, 5] : f32 from vector<5x6x7xf32>
2687
2688   // Extract 2D from 3D that was broadcasted from a scalar.
2689   // CHECK: %[[extract2:.*]] = vector.broadcast %[[a]] : f32 to vector<6x7xf32>
2690   %10 = vector.extract %8[2] : vector<6x7xf32> from vector<5x6x7xf32>
2691
2692   // Extract 1D from 2D that was splat'ed from a scalar.
2693   // CHECK: %[[extract3:.*]] = vector.broadcast %[[a]] : f32 to vector<3xf32>
2694   %11 = vector.extract %6[1] : vector<3xf32> from vector<2x3xf32>
2695
2696   // CHECK:   return %[[a]], %[[a]], %[[extract1]], %[[a]], %[[a]], %[[extract2]], %[[extract3]]
2697   return %1, %3, %5, %7, %9, %10, %11 : f32, f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32>
2698 }
2699
2700 // -----
2701
2702 // CHECK-LABEL: func @extract_scalar_from_from_elements(
2703 //  CHECK-SAME:     %[[a:.*]]: f32, %[[b:.*]]: f32)
2704 func.func @extract_scalar_from_from_elements(%a: f32, %b: f32) -> (f32, f32, f32, f32, f32, f32, f32) {
2705   // Extract from 0D.
2706   %0 = vector.from_elements %a : vector<f32>
2707   %1 = vector.extract %0[] : f32 from vector<f32>
2708
2709   // Extract from 1D.
2710   %2 = vector.from_elements %a : vector<1xf32>
2711   %3 = vector.extract %2[0] : f32 from vector<1xf32>
2712   %4 = vector.from_elements %a, %b, %a, %a, %b : vector<5xf32>
2713   %5 = vector.extract %4[4] : f32 from vector<5xf32>
2714
2715   // Extract from 2D.
2716   %6 = vector.from_elements %a, %a, %a, %b, %b, %b : vector<2x3xf32>
2717   %7 = vector.extract %6[0, 0] : f32 from vector<2x3xf32>
2718   %8 = vector.extract %6[0, 1] : f32 from vector<2x3xf32>
2719   %9 = vector.extract %6[1, 1] : f32 from vector<2x3xf32>
2720   %10 = vector.extract %6[1, 2] : f32 from vector<2x3xf32>
2721
2722   // CHECK: return %[[a]], %[[a]], %[[b]], %[[a]], %[[a]], %[[b]], %[[b]]
2723   return %1, %3, %5, %7, %8, %9, %10 : f32, f32, f32, f32, f32, f32, f32
2724 }
2725
2726 // -----
2727
2728 // CHECK-LABEL: func @extract_1d_from_from_elements(
2729 //  CHECK-SAME:     %[[a:.*]]: f32, %[[b:.*]]: f32)
2730 func.func @extract_1d_from_from_elements(%a: f32, %b: f32) -> (vector<3xf32>, vector<3xf32>) {
2731   %0 = vector.from_elements %a, %a, %a, %b, %b, %b : vector<2x3xf32>
2732   // CHECK: %[[splat1:.*]] = vector.splat %[[a]] : vector<3xf32>
2733   %1 = vector.extract %0[0] : vector<3xf32> from vector<2x3xf32>
2734   // CHECK: %[[splat2:.*]] = vector.splat %[[b]] : vector<3xf32>
2735   %2 = vector.extract %0[1] : vector<3xf32> from vector<2x3xf32>
2736   // CHECK: return %[[splat1]], %[[splat2]]
2737   return %1, %2 : vector<3xf32>, vector<3xf32>
2738 }
2739
2740 // -----
2741
2742 // CHECK-LABEL: func @extract_2d_from_from_elements(
2743 //  CHECK-SAME:     %[[a:.*]]: f32, %[[b:.*]]: f32)
2744 func.func @extract_2d_from_from_elements(%a: f32, %b: f32) -> (vector<2x2xf32>, vector<2x2xf32>) {
2745   %0 = vector.from_elements %a, %a, %a, %b, %b, %b, %b, %a, %b, %a, %a, %b : vector<3x2x2xf32>
2746   // CHECK: %[[splat1:.*]] = vector.from_elements %[[a]], %[[a]], %[[a]], %[[b]] : vector<2x2xf32>
2747   %1 = vector.extract %0[0] : vector<2x2xf32> from vector<3x2x2xf32>
2748   // CHECK: %[[splat2:.*]] = vector.from_elements %[[b]], %[[b]], %[[b]], %[[a]] : vector<2x2xf32>
2749   %2 = vector.extract %0[1] : vector<2x2xf32> from vector<3x2x2xf32>
2750   // CHECK: return %[[splat1]], %[[splat2]]
2751   return %1, %2 : vector<2x2xf32>, vector<2x2xf32>
2752 }
2753
2754 // -----
2755
2756 // CHECK-LABEL: func @from_elements_to_splat(
2757 //  CHECK-SAME:     %[[a:.*]]: f32, %[[b:.*]]: f32)
2758 func.func @from_elements_to_splat(%a: f32, %b: f32) -> (vector<2x3xf32>, vector<2x3xf32>, vector<f32>) {
2759   // CHECK: %[[splat:.*]] = vector.splat %[[a]] : vector<2x3xf32>
2760   %0 = vector.from_elements %a, %a, %a, %a, %a, %a : vector<2x3xf32>
2761   // CHECK: %[[from_el:.*]] = vector.from_elements {{.*}} : vector<2x3xf32>
2762   %1 = vector.from_elements %a, %a, %a, %a, %b, %a : vector<2x3xf32>
2763   // CHECK: %[[splat2:.*]] = vector.splat %[[a]] : vector<f32>
2764   %2 = vector.from_elements %a : vector<f32>
2765   // CHECK: return %[[splat]], %[[from_el]], %[[splat2]]
2766   return %0, %1, %2 : vector<2x3xf32>, vector<2x3xf32>, vector<f32>
2767 }
2768
2769
2770 // -----
2771
2772 // CHECK-LABEL: func @vector_insert_const_regression(
2773 //       CHECK:   llvm.mlir.undef
2774 //       CHECK:   vector.insert
2775 func.func @vector_insert_const_regression(%arg0: i8) -> vector<4xi8> {
2776   %0 = llvm.mlir.undef : vector<4xi8>
2777   %1 = vector.insert %arg0, %0 [0] : i8 into vector<4xi8>
2778   return %1 : vector<4xi8>
2779 }
2780
2781 // -----
2782
2783 // CHECK-LABEL: @contiguous_extract_strided_slices_to_extract
2784 // CHECK:        %[[EXTRACT:.+]] = vector.extract {{.*}}[0, 0, 0, 0, 0] : vector<4xi32> from vector<8x1x2x1x1x4xi32>
2785 // CHECK-NEXT:   return %[[EXTRACT]] :  vector<4xi32>
2786 func.func @contiguous_extract_strided_slices_to_extract(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<4xi32> {
2787   %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [1, 1, 1, 1, 1, 4], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x1x1x1x4xi32>
2788   %2 = vector.shape_cast %1 : vector<1x1x1x1x1x4xi32> to vector<4xi32>
2789   return %2 : vector<4xi32>
2790 }
2791
2792 // -----
2793
2794 // CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_shorter_size_list
2795 // CHECK:        %[[EXTRACT:.+]] = vector.extract {{.*}}[0, 0, 0, 0] : vector<1x4xi32> from vector<8x1x2x1x1x4xi32>
2796 // CHECK-NEXT:   return %[[EXTRACT]] :  vector<1x4xi32>
2797 func.func @contiguous_extract_strided_slices_to_extract_shorter_size_list(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<1x4xi32> {
2798   %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 1, 1, 1], strides = [1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x1x1x1x4xi32>
2799   %2 = vector.shape_cast %1 : vector<1x1x1x1x1x4xi32> to vector<1x4xi32>
2800   return %2 : vector<1x4xi32>
2801 }
2802
2803 // -----
2804
2805 // CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_failure_non_unit_outer_size
2806 // CHECK-NEXT:   vector.extract_strided_slice
2807 func.func @contiguous_extract_strided_slices_to_extract_failure_non_unit_outer_size(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<8x1x1x1x1x4xi32> {
2808   %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [8, 1, 1, 1, 1, 4], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<8x1x1x1x1x4xi32>
2809   return %1 : vector<8x1x1x1x1x4xi32>
2810 }
2811
2812 // -----
2813
2814 // CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_failure_non_full_size
2815 // CHECK-NEXT:   vector.extract_strided_slice
2816 func.func @contiguous_extract_strided_slices_to_extract_failure_non_full_size(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<1x1x1x1x1x2xi32> {
2817   %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [1, 1, 1, 1, 1, 2], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x1x1x1x2xi32>
2818   return %1 : vector<1x1x1x1x1x2xi32>
2819 }
2820
2821 // -----
2822
2823 // CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_failure_non_full_inner_size
2824 // CHECK-NEXT:    vector.extract_strided_slice
2825 func.func @contiguous_extract_strided_slices_to_extract_failure_non_full_inner_size(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<1x1x2x1x1x1xi32> {
2826   %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [1, 1, 2, 1, 1, 1], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x2x1x1x1xi32>
2827   return %1 : vector<1x1x2x1x1x1xi32>
2828 }