mlir/test/Dialect/Vector/canonicalize.mlir

   1 // RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file -allow-unregistered-dialect | FileCheck %s
   2
   3 // CHECK-LABEL: create_vector_mask_to_constant_mask
   4 func.func @create_vector_mask_to_constant_mask() -> (vector<4x3xi1>) {
   5   %c2 = arith.constant 2 : index
   6   %c3 = arith.constant 3 : index
   7   // CHECK: vector.constant_mask [3, 2] : vector<4x3xi1>
   8   %0 = vector.create_mask %c3, %c2 : vector<4x3xi1>
   9   return %0 : vector<4x3xi1>
  10 }
  11 // -----
  12
  13 // CHECK-LABEL: create_scalable_vector_mask_to_constant_mask
  14 func.func @create_scalable_vector_mask_to_constant_mask() -> (vector<[8]xi1>) {
  15   %c-1 = arith.constant -1 : index
  16   // CHECK: vector.constant_mask [0] : vector<[8]xi1>
  17   %0 = vector.create_mask %c-1 : vector<[8]xi1>
  18   return %0 : vector<[8]xi1>
  19 }
  20
  21 // -----
  22
  23 // CHECK-LABEL: create_vector_mask_to_constant_mask_truncation
  24 func.func @create_vector_mask_to_constant_mask_truncation() -> (vector<4x3xi1>) {
  25   %c2 = arith.constant 2 : index
  26   %c5 = arith.constant 5 : index
  27   // CHECK: vector.constant_mask [4, 2] : vector<4x3xi1>
  28   %0 = vector.create_mask %c5, %c2 : vector<4x3xi1>
  29   return %0 : vector<4x3xi1>
  30 }
  31
  32 // -----
  33
  34 // CHECK-LABEL: create_vector_mask_to_constant_mask_truncation_neg
  35 func.func @create_vector_mask_to_constant_mask_truncation_neg() -> (vector<4x3xi1>) {
  36   %cneg2 = arith.constant -2 : index
  37   %c5 = arith.constant 5 : index
  38   // CHECK: vector.constant_mask [0, 0] : vector<4x3xi1>
  39   %0 = vector.create_mask %c5, %cneg2 : vector<4x3xi1>
  40   return %0 : vector<4x3xi1>
  41 }
  42
  43 // -----
  44
  45 // CHECK-LABEL: create_vector_mask_to_constant_mask_truncation_zero
  46 func.func @create_vector_mask_to_constant_mask_truncation_zero() -> (vector<4x3xi1>) {
  47   %c2 = arith.constant 2 : index
  48   %c0 = arith.constant 0 : index
  49   // CHECK: vector.constant_mask [0, 0] : vector<4x3xi1>
  50   %0 = vector.create_mask %c0, %c2 : vector<4x3xi1>
  51   return %0 : vector<4x3xi1>
  52 }
  53
  54 // -----
  55
  56 // CHECK-LABEL: create_vector_mask_to_constant_mask_scalable_all_true
  57 func.func @create_vector_mask_to_constant_mask_scalable_all_true() -> (vector<8x[16]xi1>) {
  58   %c8 = arith.constant 8 : index
  59   %c16 = arith.constant 16 : index
  60   %0 = vector.vscale
  61   %1 = arith.muli %0, %c16 : index
  62   // CHECK: vector.constant_mask [8, 16] : vector<8x[16]xi1>
  63   %10 = vector.create_mask %c8, %1 : vector<8x[16]xi1>
  64   return %10 : vector<8x[16]xi1>
  65 }
  66
  67 // -----
  68
  69 // CHECK-LABEL: create_mask_transpose_to_transposed_create_mask
  70 //  CHECK-SAME: %[[DIM0:.*]]: index, %[[DIM1:.*]]: index, %[[DIM2:.*]]: index
  71 func.func @create_mask_transpose_to_transposed_create_mask(
  72   %dim0: index, %dim1: index, %dim2: index) -> (vector<2x3x4xi1>, vector<4x2x3xi1>) {
  73   //     CHECK: vector.create_mask %[[DIM0]], %[[DIM1]], %[[DIM2]] : vector<2x3x4xi1>
  74   //     CHECK: vector.create_mask %[[DIM2]], %[[DIM0]], %[[DIM1]] : vector<4x2x3xi1>
  75   // CHECK-NOT: vector.transpose
  76   %0 = vector.create_mask %dim0, %dim1, %dim2 : vector<2x3x4xi1>
  77   %1 = vector.transpose %0, [2, 0, 1] : vector<2x3x4xi1> to vector<4x2x3xi1>
  78   return %0, %1 : vector<2x3x4xi1>, vector<4x2x3xi1>
  79 }
  80
  81 // -----
  82
  83 // CHECK-LABEL: extract_from_create_mask
  84 //  CHECK-SAME: %[[DIM0:.*]]: index, %[[DIM1:.*]]: index
  85 func.func @extract_from_create_mask(%dim0: index, %dim1: index) -> vector<[4]x[4]xi1> {
  86   %c2 = arith.constant 2 : index
  87   %mask = vector.create_mask %c2, %dim0, %dim1 : vector<4x[4]x[4]xi1>
  88   // CHECK: vector.create_mask %[[DIM0]], %[[DIM1]] : vector<[4]x[4]xi1>
  89   // CHECK-NOT: vector.extract
  90   %extract = vector.extract %mask[1] : vector<[4]x[4]xi1> from vector<4x[4]x[4]xi1>
  91   return %extract : vector<[4]x[4]xi1>
  92 }
  93
  94 // -----
  95
  96 // CHECK-LABEL: extract_from_create_mask_all_false
  97 func.func @extract_from_create_mask_all_false(%dim0: index, %dim1: index) -> vector<[4]x[4]xi1> {
  98   %c2 = arith.constant 2 : index
  99   %mask = vector.create_mask %c2, %dim0, %dim1 : vector<4x[4]x[4]xi1>
 100   // CHECK: arith.constant dense<false> : vector<[4]x[4]xi1>
 101   // CHECK-NOT: vector.extract
 102   %extract = vector.extract %mask[2] : vector<[4]x[4]xi1> from vector<4x[4]x[4]xi1>
 103   return %extract : vector<[4]x[4]xi1>
 104 }
 105
 106 // -----
 107
 108 // CHECK-LABEL: extract_from_create_mask_leading_scalable
 109 //  CHECK-SAME: %[[DIM0:.*]]: index
 110 func.func @extract_from_create_mask_leading_scalable(%dim0: index) -> vector<8xi1> {
 111   %c3 = arith.constant 3 : index
 112   %mask = vector.create_mask %c3, %dim0 : vector<[4]x8xi1>
 113   // CHECK: vector.create_mask %[[DIM0]] : vector<8xi1>
 114   // CHECK-NOT: vector.extract
 115   %extract = vector.extract %mask[1] : vector<8xi1> from vector<[4]x8xi1>
 116   return %extract : vector<8xi1>
 117 }
 118
 119 // -----
 120
 121 // CHECK-LABEL: extract_from_create_mask_dynamic_position
 122 //  CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index
 123 func.func @extract_from_create_mask_dynamic_position(%dim0: index, %index: index) -> vector<6xi1> {
 124   %c4 = arith.constant 4 : index
 125   %c3 = arith.constant 3 : index
 126   %mask = vector.create_mask %c3, %c4, %dim0 : vector<4x4x6xi1>
 127   // CHECK: vector.create_mask %[[DIM0]] : vector<6xi1>
 128   // CHECK-NOT: vector.extract
 129   %extract = vector.extract %mask[2, %index] : vector<6xi1> from vector<4x4x6xi1>
 130   return %extract : vector<6xi1>
 131 }
 132
 133 // -----
 134
 135 // CHECK-LABEL: @extract_scalar_poison_idx
 136 func.func @extract_scalar_poison_idx(%a: vector<4x5xf32>) -> f32 {
 137   //  CHECK-NOT: vector.extract
 138   // CHECK-NEXT: ub.poison : f32
 139   %0 = vector.extract %a[-1, 0] : f32 from vector<4x5xf32>
 140   return %0 : f32
 141 }
 142
 143 // -----
 144
 145 // CHECK-LABEL: @extract_vector_poison_idx
 146 func.func @extract_vector_poison_idx(%a: vector<4x5xf32>) -> vector<5xf32> {
 147   //  CHECK-NOT: vector.extract
 148   // CHECK-NEXT: ub.poison : vector<5xf32>
 149   %0 = vector.extract %a[-1] : vector<5xf32> from vector<4x5xf32>
 150   return %0 : vector<5xf32>
 151 }
 152
 153 // -----
 154
 155 // CHECK-LABEL: @extract_multiple_poison_idx
 156 func.func @extract_multiple_poison_idx(%a: vector<4x5x8xf32>)
 157     -> vector<8xf32> {
 158   //  CHECK-NOT: vector.extract
 159   // CHECK-NEXT: ub.poison : vector<8xf32>
 160   %0 = vector.extract %a[-1, -1] : vector<8xf32> from vector<4x5x8xf32>
 161   return %0 : vector<8xf32>
 162 }
 163
 164 // -----
 165
 166 // CHECK-LABEL: extract_from_create_mask_dynamic_position_all_false
 167 //  CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index
 168 func.func @extract_from_create_mask_dynamic_position_all_false(%dim0: index, %index: index) -> vector<6xi1> {
 169   %c0 = arith.constant 0 : index
 170   %c1 = arith.constant 1 : index
 171   %mask = vector.create_mask %c1, %c0, %dim0 : vector<1x4x6xi1>
 172   // CHECK: arith.constant dense<false> : vector<6xi1>
 173   // CHECK-NOT: vector.extract
 174   %extract = vector.extract %mask[0, %index] : vector<6xi1> from vector<1x4x6xi1>
 175   return %extract : vector<6xi1>
 176 }
 177
 178 // -----
 179
 180 // CHECK-LABEL: extract_from_create_mask_dynamic_position_unknown
 181 //  CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index
 182 func.func @extract_from_create_mask_dynamic_position_unknown(%dim0: index, %index: index) -> vector<6xi1> {
 183   %c2 = arith.constant 2 : index
 184   %mask = vector.create_mask %c2, %dim0 : vector<4x6xi1>
 185   // CHECK: %[[C2:.*]] = arith.constant 2 : index
 186   // CHECK-NEXT: %[[MASK:.*]] = vector.create_mask %[[C2]], %[[DIM0]] : vector<4x6xi1>
 187   // CHECK-NEXT: vector.extract %[[MASK]][%[[INDEX]]] : vector<6xi1> from vector<4x6xi1>
 188   %extract = vector.extract %mask[%index] : vector<6xi1> from vector<4x6xi1>
 189   return %extract : vector<6xi1>
 190 }
 191
 192 // -----
 193
 194 // CHECK-LABEL: extract_from_create_mask_mixed_position_unknown
 195 //  CHECK-SAME: %[[DIM0:.*]]: index, %[[INDEX:.*]]: index
 196 func.func @extract_from_create_mask_mixed_position_unknown(%dim0: index, %index0: index) -> vector<4xi1> {
 197   %c2 = arith.constant 2 : index
 198   %mask = vector.create_mask %c2, %c2, %dim0 : vector<2x4x4xi1>
 199   // CHECK: %[[C2:.*]] = arith.constant 2 : index
 200   // CHECK-NEXT: %[[MASK:.*]] = vector.create_mask %[[C2]], %[[C2]], %[[DIM0]] : vector<2x4x4xi1>
 201   // CHECK-NEXT: vector.extract %[[MASK]][1, %[[INDEX]]] : vector<4xi1> from vector<2x4x4xi1>
 202   %extract = vector.extract %mask[1, %index0] : vector<4xi1> from vector<2x4x4xi1>
 203   return %extract : vector<4xi1>
 204 }
 205
 206 // -----
 207
 208 // CHECK-LABEL: extract_from_non_constant_create_mask
 209 //  CHECK-SAME: %[[DIM0:.*]]: index
 210 func.func @extract_from_non_constant_create_mask(%dim0: index) -> vector<[2]xi1> {
 211   %mask = vector.create_mask %dim0, %dim0 : vector<[2]x[2]xi1>
 212   // CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM0]], %[[DIM0]] : vector<[2]x[2]xi1>
 213   // CHECK-NEXT: vector.extract %[[MASK]][0] : vector<[2]xi1> from vector<[2]x[2]xi1>
 214   %extract = vector.extract %mask[0] : vector<[2]xi1> from vector<[2]x[2]xi1>
 215   return %extract : vector<[2]xi1>
 216 }
 217
 218 // -----
 219
 220 // CHECK-LABEL: constant_mask_transpose_to_transposed_constant_mask
 221 func.func @constant_mask_transpose_to_transposed_constant_mask() -> (vector<2x3x4xi1>, vector<4x2x3xi1>) {
 222   //     CHECK: vector.constant_mask [1, 2, 3] : vector<2x3x4xi1>
 223   //     CHECK: vector.constant_mask [3, 1, 2] : vector<4x2x3xi1>
 224   // CHECK-NOT: vector.transpose
 225   %0 = vector.constant_mask [1, 2, 3] : vector<2x3x4xi1>
 226   %1 = vector.transpose %0, [2, 0, 1] : vector<2x3x4xi1> to vector<4x2x3xi1>
 227   return %0, %1 : vector<2x3x4xi1>, vector<4x2x3xi1>
 228 }
 229
 230 // -----
 231
 232 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
 233   %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
 234   %1 = vector.extract_strided_slice %0
 235     {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]}
 236       : vector<4x3xi1> to vector<2x2xi1>
 237   // CHECK: vector.constant_mask [2, 2] : vector<2x2xi1>
 238   return %1 : vector<2x2xi1>
 239 }
 240
 241 // -----
 242
 243 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
 244   %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
 245   %1 = vector.extract_strided_slice %0
 246     {offsets = [1, 0], sizes = [2, 2], strides = [1, 1]}
 247       : vector<4x3xi1> to vector<2x2xi1>
 248   // CHECK: vector.constant_mask [1, 2] : vector<2x2xi1>
 249   return %1 : vector<2x2xi1>
 250 }
 251
 252 // -----
 253
 254 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
 255   %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
 256   %1 = vector.extract_strided_slice %0
 257     {offsets = [0, 1], sizes = [2, 2], strides = [1, 1]}
 258       : vector<4x3xi1> to vector<2x2xi1>
 259   // CHECK: vector.constant_mask [2, 1] : vector<2x2xi1>
 260   return %1 : vector<2x2xi1>
 261 }
 262
 263 // -----
 264
 265 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
 266   %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
 267   %1 = vector.extract_strided_slice %0
 268     {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]}
 269       : vector<4x3xi1> to vector<2x2xi1>
 270   // CHECK: vector.constant_mask [0, 0] : vector<2x2xi1>
 271   return %1 : vector<2x2xi1>
 272 }
 273
 274 // -----
 275
 276 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x1xi1>) {
 277   %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
 278   %1 = vector.extract_strided_slice %0
 279     {offsets = [0, 2], sizes = [2, 1], strides = [1, 1]}
 280       : vector<4x3xi1> to vector<2x1xi1>
 281   // CHECK: vector.constant_mask [0, 0] : vector<2x1xi1>
 282   return %1 : vector<2x1xi1>
 283 }
 284
 285 // -----
 286
 287 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x1xi1>) {
 288   %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
 289   %1 = vector.extract_strided_slice %0
 290     {offsets = [0, 1], sizes = [2, 1], strides = [1, 1]}
 291       : vector<4x3xi1> to vector<2x1xi1>
 292   // CHECK: vector.constant_mask [2, 1] : vector<2x1xi1>
 293   return %1 : vector<2x1xi1>
 294 }
 295
 296 // -----
 297
 298 func.func @extract_strided_slice_of_constant_mask() -> (vector<2x1xi1>) {
 299   %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
 300   %1 = vector.extract_strided_slice %0
 301     {offsets = [1, 1], sizes = [2, 1], strides = [1, 1]}
 302       : vector<4x3xi1> to vector<2x1xi1>
 303   // CHECK: vector.constant_mask [1, 1] : vector<2x1xi1>
 304   return %1 : vector<2x1xi1>
 305 }
 306
 307 // -----
 308
 309 // CHECK-LABEL: extract_strided_fold
 310 //  CHECK-SAME: (%[[ARG:.*]]: vector<4x3xi1>)
 311 //  CHECK-NEXT:   return %[[ARG]] : vector<4x3xi1>
 312 func.func @extract_strided_fold(%arg : vector<4x3xi1>) -> (vector<4x3xi1>) {
 313   %0 = vector.extract_strided_slice %arg
 314     {offsets = [0, 0], sizes = [4, 3], strides = [1, 1]}
 315       : vector<4x3xi1> to vector<4x3xi1>
 316   return %0 : vector<4x3xi1>
 317 }
 318
 319 // -----
 320
 321 // CHECK-LABEL: extract_strided_fold_insert
 322 //  CHECK-SAME: (%[[ARG:.*]]: vector<4x4xf32>
 323 //  CHECK-NEXT:   return %[[ARG]] : vector<4x4xf32>
 324 func.func @extract_strided_fold_insert(%a: vector<4x4xf32>, %b: vector<8x16xf32>)
 325   -> (vector<4x4xf32>) {
 326   %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]}
 327     : vector<4x4xf32> into vector<8x16xf32>
 328   %1 = vector.extract_strided_slice %0
 329     {offsets = [2, 2], sizes = [4, 4], strides = [1, 1]}
 330       : vector<8x16xf32> to vector<4x4xf32>
 331   return %1 : vector<4x4xf32>
 332 }
 333
 334 // -----
 335
 336 // Case where the vector inserted is a subset of the vector extracted.
 337 // CHECK-LABEL: extract_strided_fold_insert
 338 //  CHECK-SAME: (%[[ARG0:.*]]: vector<6x4xf32>
 339 //  CHECK-NEXT:   %[[EXT:.*]] = vector.extract_strided_slice %[[ARG0]]
 340 //  CHECK-SAME:     {offsets = [0, 0], sizes = [4, 4], strides = [1, 1]}
 341 //  CHECK-SAME:       : vector<6x4xf32> to vector<4x4xf32>
 342 //  CHECK-NEXT:   return %[[EXT]] : vector<4x4xf32>
 343 func.func @extract_strided_fold_insert(%a: vector<6x4xf32>, %b: vector<8x16xf32>)
 344   -> (vector<4x4xf32>) {
 345   %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]}
 346     : vector<6x4xf32> into vector<8x16xf32>
 347   %1 = vector.extract_strided_slice %0
 348     {offsets = [2, 2], sizes = [4, 4], strides = [1, 1]}
 349       : vector<8x16xf32> to vector<4x4xf32>
 350   return %1 : vector<4x4xf32>
 351 }
 352
 353 // -----
 354
 355 // Negative test where the extract is not a subset of the element inserted.
 356 // CHECK-LABEL: extract_strided_fold_negative
 357 //  CHECK-SAME: (%[[ARG0:.*]]: vector<4x4xf32>, %[[ARG1:.*]]: vector<8x16xf32>
 358 //       CHECK:   %[[INS:.*]] = vector.insert_strided_slice %[[ARG0]], %[[ARG1]]
 359 //  CHECK-SAME:     {offsets = [2, 2], strides = [1, 1]}
 360 //  CHECK-SAME:       : vector<4x4xf32> into vector<8x16xf32>
 361 //       CHECK:   %[[EXT:.*]] = vector.extract_strided_slice %[[INS]]
 362 //  CHECK-SAME:     {offsets = [2, 2], sizes = [6, 4], strides = [1, 1]}
 363 //  CHECK-SAME:       : vector<8x16xf32> to vector<6x4xf32>
 364 //  CHECK-NEXT:   return %[[EXT]] : vector<6x4xf32>
 365 func.func @extract_strided_fold_negative(%a: vector<4x4xf32>, %b: vector<8x16xf32>)
 366   -> (vector<6x4xf32>) {
 367   %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]}
 368     : vector<4x4xf32> into vector<8x16xf32>
 369   %1 = vector.extract_strided_slice %0
 370     {offsets = [2, 2], sizes = [6, 4], strides = [1, 1]}
 371       : vector<8x16xf32> to vector<6x4xf32>
 372   return %1 : vector<6x4xf32>
 373 }
 374
 375 // -----
 376
 377 // Case where we need to go through 2 level of insert element.
 378 // CHECK-LABEL: extract_strided_fold_insert
 379 //  CHECK-SAME: (%[[ARG0:.*]]: vector<2x8xf32>, %[[ARG1:.*]]: vector<1x4xf32>,
 380 //  CHECK-NEXT:   %[[EXT:.*]] = vector.extract_strided_slice %[[ARG1]]
 381 //  CHECK-SAME:     {offsets = [0, 0], sizes = [1, 1], strides = [1, 1]}
 382 //  CHECK-SAME:       : vector<1x4xf32> to vector<1x1xf32>
 383 //  CHECK-NEXT:   return %[[EXT]] : vector<1x1xf32>
 384 func.func @extract_strided_fold_insert(%a: vector<2x8xf32>, %b: vector<1x4xf32>,
 385                                   %c : vector<1x4xf32>) -> (vector<1x1xf32>) {
 386   %0 = vector.insert_strided_slice %b, %a {offsets = [0, 1], strides = [1, 1]}
 387     : vector<1x4xf32> into vector<2x8xf32>
 388   %1 = vector.insert_strided_slice %c, %0 {offsets = [1, 0], strides = [1, 1]}
 389     : vector<1x4xf32> into vector<2x8xf32>
 390   %2 = vector.extract_strided_slice %1
 391       {offsets = [0, 1], sizes = [1, 1], strides = [1, 1]}
 392         : vector<2x8xf32> to vector<1x1xf32>
 393   return %2 : vector<1x1xf32>
 394 }
 395
 396 // -----
 397
 398 // CHECK-LABEL: transpose_1D_identity
 399 // CHECK-SAME: ([[ARG:%.*]]: vector<4xf32>)
 400 func.func @transpose_1D_identity(%arg : vector<4xf32>) -> vector<4xf32> {
 401   // CHECK-NOT: transpose
 402   %0 = vector.transpose %arg, [0] : vector<4xf32> to vector<4xf32>
 403   // CHECK-NEXT: return [[ARG]]
 404   return %0 : vector<4xf32>
 405 }
 406
 407 // -----
 408
 409 // CHECK-LABEL: transpose_2D_identity
 410 // CHECK-SAME: ([[ARG:%.*]]: vector<4x3xf32>)
 411 func.func @transpose_2D_identity(%arg : vector<4x3xf32>) -> vector<4x3xf32> {
 412   // CHECK-NOT: transpose
 413   %0 = vector.transpose %arg, [0, 1] : vector<4x3xf32> to vector<4x3xf32>
 414   // CHECK-NEXT: return [[ARG]]
 415   return %0 : vector<4x3xf32>
 416 }
 417
 418 // -----
 419
 420 // CHECK-LABEL: transpose_3D_identity
 421 // CHECK-SAME: ([[ARG:%.*]]: vector<4x3x2xf32>)
 422 func.func @transpose_3D_identity(%arg : vector<4x3x2xf32>) -> vector<4x3x2xf32> {
 423   // CHECK-NOT: transpose
 424   %0 = vector.transpose %arg, [0, 1, 2] : vector<4x3x2xf32> to vector<4x3x2xf32>
 425   // CHECK-NEXT: return [[ARG]]
 426   return %0 : vector<4x3x2xf32>
 427 }
 428
 429 // -----
 430
 431 // CHECK-LABEL: transpose_2D_sequence
 432 // CHECK-SAME: ([[ARG:%.*]]: vector<4x3xf32>)
 433 func.func @transpose_2D_sequence(%arg : vector<4x3xf32>) -> vector<4x3xf32> {
 434   // CHECK-NOT: transpose
 435   %0 = vector.transpose %arg, [1, 0] : vector<4x3xf32> to vector<3x4xf32>
 436   %1 = vector.transpose %0, [0, 1] : vector<3x4xf32> to vector<3x4xf32>
 437   %2 = vector.transpose %1, [1, 0] : vector<3x4xf32> to vector<4x3xf32>
 438   %3 = vector.transpose %2, [0, 1] : vector<4x3xf32> to vector<4x3xf32>
 439   // CHECK: [[ADD:%.*]] = arith.addf [[ARG]], [[ARG]]
 440   %4 = arith.addf %2, %3 : vector<4x3xf32>
 441   // CHECK-NEXT: return [[ADD]]
 442   return %4 : vector<4x3xf32>
 443 }
 444
 445 // -----
 446
 447 // CHECK-LABEL: transpose_3D_sequence
 448 // CHECK-SAME: ([[ARG:%.*]]: vector<4x3x2xf32>)
 449 func.func @transpose_3D_sequence(%arg : vector<4x3x2xf32>) -> vector<4x3x2xf32> {
 450   // CHECK: [[T0:%.*]] = vector.transpose [[ARG]], [2, 1, 0]
 451   %0 = vector.transpose %arg, [1, 2, 0] : vector<4x3x2xf32> to vector<3x2x4xf32>
 452   %1 = vector.transpose %0, [1, 0, 2] : vector<3x2x4xf32> to vector<2x3x4xf32>
 453   // CHECK: [[T1:%.*]] = vector.transpose %arg0, [2, 1, 0]
 454   %2 = vector.transpose %1, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
 455   %3 = vector.transpose %2, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32>
 456   // CHECK: [[MUL:%.*]] = arith.mulf [[T0]], [[T1]]
 457   %4 = arith.mulf %1, %3 : vector<2x3x4xf32>
 458   // CHECK: [[T5:%.*]] = vector.transpose [[MUL]], [2, 1, 0]
 459   %5 = vector.transpose %4, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
 460   // CHECK-NOT: transpose
 461   %6 = vector.transpose %3, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
 462   // CHECK: [[ADD:%.*]] = arith.addf [[T5]], [[ARG]]
 463   %7 = arith.addf %5, %6 : vector<4x3x2xf32>
 464   // CHECK-NEXT: return [[ADD]]
 465   return %7 : vector<4x3x2xf32>
 466 }
 467
 468 // -----
 469
 470 // CHECK-LABEL: cast_transfers
 471 func.func @cast_transfers(%A: memref<4x8xf32>) -> (vector<4x8xf32>) {
 472   %c0 = arith.constant 0 : index
 473   %f0 = arith.constant 0.0 : f32
 474   %0 = memref.cast %A : memref<4x8xf32> to memref<?x?xf32>
 475
 476   // CHECK: vector.transfer_read %{{.*}} {in_bounds = [true, true]} : memref<4x8xf32>, vector<4x8xf32>
 477   %1 = vector.transfer_read %0[%c0, %c0], %f0 : memref<?x?xf32>, vector<4x8xf32>
 478
 479   // CHECK: vector.transfer_write %{{.*}} {in_bounds = [true, true]} : vector<4x8xf32>, memref<4x8xf32>
 480   vector.transfer_write %1, %0[%c0, %c0] : vector<4x8xf32>, memref<?x?xf32>
 481   return %1 : vector<4x8xf32>
 482 }
 483
 484 // -----
 485
 486 // CHECK-LABEL: cast_transfers
 487 func.func @cast_transfers(%A: tensor<4x8xf32>) -> (vector<4x8xf32>) {
 488   %c0 = arith.constant 0 : index
 489   %f0 = arith.constant 0.0 : f32
 490   %0 = tensor.cast %A : tensor<4x8xf32> to tensor<?x?xf32>
 491
 492   // CHECK: vector.transfer_read %{{.*}} {in_bounds = [true, true]} : tensor<4x8xf32>, vector<4x8xf32>
 493   %1 = vector.transfer_read %0[%c0, %c0], %f0 : tensor<?x?xf32>, vector<4x8xf32>
 494
 495   return %1 : vector<4x8xf32>
 496 }
 497
 498 // -----
 499
 500 // CHECK-LABEL: func @insert_extract_transpose_2d(
 501 //  CHECK-SAME: %[[V:[a-zA-Z0-9]*]]: vector<2x3xf32>,
 502 //  CHECK-SAME: %[[F0:[a-zA-Z0-9]*]]: f32,
 503 //  CHECK-SAME: %[[F1:[a-zA-Z0-9]*]]: f32,
 504 //  CHECK-SAME: %[[F2:[a-zA-Z0-9]*]]: f32,
 505 //  CHECK-SAME: %[[F3:[a-zA-Z0-9]*]]: f32
 506 func.func @insert_extract_transpose_2d(
 507     %v: vector<2x3xf32>, %f0: f32, %f1: f32, %f2: f32, %f3: f32)
 508 -> (f32, f32, f32)
 509 {
 510   %0 = vector.insert %f0, %v[0, 0] : f32 into vector<2x3xf32>
 511   %1 = vector.insert %f1, %0[0, 1] : f32 into vector<2x3xf32>
 512   %2 = vector.insert %f2, %1[1, 0] : f32 into vector<2x3xf32>
 513   %3 = vector.insert %f3, %2[1, 1] : f32 into vector<2x3xf32>
 514   %4 = vector.transpose %3, [1, 0] : vector<2x3xf32> to vector<3x2xf32>
 515   %5 = vector.insert %f3, %4[1, 0] : f32 into vector<3x2xf32>
 516   %6 = vector.transpose %5, [1, 0] : vector<3x2xf32> to vector<2x3xf32>
 517
 518   // Expected %f2 from %2 = vector.insert %f2, %1[1, 0].
 519   %r1 = vector.extract %3[1, 0] : f32 from vector<2x3xf32>
 520
 521   // Expected %f1 from %1 = vector.insert %f1, %0[0, 1] followed by
 522   // transpose [1, 0].
 523   %r2 = vector.extract %4[1, 0] : f32 from vector<3x2xf32>
 524
 525   // Expected %f2 from %2 = vector.insert %f2, %1[1, 0] followed by double
 526   // transpose [1, 0].
 527   %r3 = vector.extract %6[1, 0] : f32 from vector<2x3xf32>
 528
 529   // CHECK-NEXT: return %[[F2]], %[[F1]], %[[F2]] : f32, f32, f32
 530   return %r1, %r2, %r3 : f32, f32, f32
 531 }
 532
 533 // -----
 534
 535 // CHECK-LABEL: insert_extract_chain
 536 //  CHECK-SAME: %[[V234:[a-zA-Z0-9]*]]: vector<2x3x4xf32>
 537 //  CHECK-SAME: %[[V34:[a-zA-Z0-9]*]]: vector<3x4xf32>
 538 //  CHECK-SAME: %[[V4:[a-zA-Z0-9]*]]: vector<4xf32>
 539 func.func @insert_extract_chain(%v234: vector<2x3x4xf32>, %v34: vector<3x4xf32>, %v4: vector<4xf32>)
 540     -> (vector<4xf32>, vector<4xf32>, vector<3x4xf32>, vector<3x4xf32>) {
 541   // CHECK-NEXT: %[[A34:.*]] = vector.insert
 542   %A34 = vector.insert %v34, %v234[0]: vector<3x4xf32> into vector<2x3x4xf32>
 543   // CHECK-NEXT: %[[B34:.*]] = vector.insert
 544   %B34 = vector.insert %v34, %A34[1]: vector<3x4xf32> into vector<2x3x4xf32>
 545   // CHECK-NEXT: %[[A4:.*]] = vector.insert
 546   %A4 = vector.insert %v4, %B34[1, 0]: vector<4xf32> into vector<2x3x4xf32>
 547   // CHECK-NEXT: %[[B4:.*]] = vector.insert
 548   %B4 = vector.insert %v4, %A4[1, 1]: vector<4xf32> into vector<2x3x4xf32>
 549
 550   // Case 2.a. [1, 1] == insertpos ([1, 1])
 551   // Match %A4 insertionpos and fold to its source(i.e. %V4).
 552    %r0 = vector.extract %B4[1, 1]: vector<4xf32> from vector<2x3x4xf32>
 553
 554   // Case 3.a. insertpos ([1]) is a prefix of [1, 0].
 555   // Traverse %B34 to its source(i.e. %V34@[*0*]).
 556   // CHECK-NEXT: %[[R1:.*]] = vector.extract %[[V34]][0]
 557    %r1 = vector.extract %B34[1, 0]: vector<4xf32> from vector<2x3x4xf32>
 558
 559   // Case 4. [1] is a prefix of insertpos ([1, 1]).
 560   // Cannot traverse %B4.
 561   // CHECK-NEXT: %[[R2:.*]] = vector.extract %[[B4]][1]
 562    %r2 = vector.extract %B4[1]: vector<3x4xf32> from vector<2x3x4xf32>
 563
 564   // Case 5. [0] is disjoint from insertpos ([1, 1]).
 565   // Traverse %B4 to its dest(i.e. %A4@[0]).
 566   // Traverse %A4 to its dest(i.e. %B34@[0]).
 567   // Traverse %B34 to its dest(i.e. %A34@[0]).
 568   // Match %A34 insertionpos and fold to its source(i.e. %V34).
 569    %r3 = vector.extract %B4[0]: vector<3x4xf32> from vector<2x3x4xf32>
 570
 571   // CHECK: return %[[V4]], %[[R1]], %[[R2]], %[[V34]]
 572   return %r0, %r1, %r2, %r3:
 573     vector<4xf32>, vector<4xf32>, vector<3x4xf32>, vector<3x4xf32>
 574 }
 575
 576 // -----
 577
 578 // CHECK-LABEL: func @insert_extract_transpose_3d(
 579 //  CHECK-SAME: %[[V234:[a-zA-Z0-9]*]]: vector<2x3x4xf32>
 580 func.func @insert_extract_transpose_3d(
 581   %v234: vector<2x3x4xf32>, %v43: vector<4x3xf32>, %f0: f32)
 582     -> (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<3x4xf32>) {
 583
 584   %a432 = vector.transpose %v234, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
 585   %b432 = vector.insert %f0, %a432[0, 0, 1] : f32 into vector<4x3x2xf32>
 586   %c234 = vector.transpose %b432, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32>
 587   // Case 1. %c234 = transpose [2,1,0] posWithSentinels [1,2,-1] -> [-1,2,1]
 588   // Case 5. %b432 = insert [0,0,1] (inter([.,2,1], [.,0,1]) == 0) prop to %v432
 589   // Case 1. %a432 = transpose [2,1,0] posWithSentinels [-1,2,1] -> [1,2,-1]
 590   // can extract directly from %v234, the rest folds.
 591   // CHECK: %[[R0:.*]] = vector.extract %[[V234]][1, 2]
 592   %r0 = vector.extract %c234[1, 2] : vector<4xf32> from vector<2x3x4xf32>
 593
 594   // CHECK-NEXT: vector.transpose
 595   // CHECK-NEXT: vector.insert
 596   // CHECK-NEXT: %[[F234:.*]] = vector.transpose
 597   %d432 = vector.transpose %v234, [2, 1, 0] : vector<2x3x4xf32> to vector<4x3x2xf32>
 598   %e432 = vector.insert %f0, %d432[0, 2, 1] : f32 into vector<4x3x2xf32>
 599   %f234 = vector.transpose %e432, [2, 1, 0] : vector<4x3x2xf32> to vector<2x3x4xf32>
 600   // Case 1. %c234 = transpose [2,1,0] posWithSentinels [1,2,-1] -> [-1,2,1]
 601   // Case 4. %b432 = insert [0,0,1] (inter([.,2,1], [.,2,1]) != 0)
 602   // Bail, cannot do better than the current.
 603   // CHECK: %[[R1:.*]] = vector.extract %[[F234]]
 604   %r1 = vector.extract %f234[1, 2] : vector<4xf32> from vector<2x3x4xf32>
 605
 606   // CHECK-NEXT: vector.transpose
 607   // CHECK-NEXT: vector.insert
 608   // CHECK-NEXT: %[[H234:.*]] = vector.transpose
 609   %g243 = vector.transpose %v234, [0, 2, 1] : vector<2x3x4xf32> to vector<2x4x3xf32>
 610   %h243 = vector.insert %v43, %g243[0] : vector<4x3xf32> into vector<2x4x3xf32>
 611   %i234 = vector.transpose %h243, [0, 2, 1] : vector<2x4x3xf32> to vector<2x3x4xf32>
 612   // Case 1. %i234 = transpose [0,2,1] posWithSentinels [0,-1,-2] -> [0,-2,-1]
 613   // Case 3.b. %b432 = insert [0] is prefix of [0,.,.] but internal transpose.
 614   // Bail, cannot do better than the current.
 615   // CHECK: %[[R2:.*]] = vector.extract %[[H234]][0, 1]
 616   %r2 = vector.extract %i234[0, 1] : vector<4xf32> from vector<2x3x4xf32>
 617
 618   // CHECK-NEXT: vector.transpose
 619   // CHECK-NEXT: vector.insert
 620   // CHECK-NEXT: %[[K234:.*]] = vector.transpose
 621   %j243 = vector.transpose %v234, [0, 2, 1] : vector<2x3x4xf32> to vector<2x4x3xf32>
 622   %k243 = vector.insert %v43, %j243[0] : vector<4x3xf32> into vector<2x4x3xf32>
 623   %l234 = vector.transpose %k243, [0, 2, 1] : vector<2x4x3xf32> to vector<2x3x4xf32>
 624   // Case 1. %i234 = transpose [0,2,1] posWithSentinels [0,-1,-2] -> [0,-2,-1]
 625   // Case 2.b. %b432 = insert [0] == [0,.,.] but internal transpose.
 626   // Bail, cannot do better than the current.
 627   // CHECK: %[[R3:.*]] = vector.extract %[[K234]][0]
 628   %r3 = vector.extract %l234[0] : vector<3x4xf32> from vector<2x3x4xf32>
 629
 630   // CHECK-NEXT: return %[[R0]], %[[R1]], %[[R2]], %[[R3]]
 631   return %r0, %r1, %r2, %r3: vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<3x4xf32>
 632 }
 633
 634 // -----
 635
 636 // CHECK-LABEL: fold_extracts
 637 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: vector<3x4x5x6xf32>
 638 func.func @fold_extracts(%a : vector<3x4x5x6xf32>) -> (f32, vector<4x5x6xf32>) {
 639   %b = vector.extract %a[0] : vector<4x5x6xf32> from vector<3x4x5x6xf32>
 640   %c = vector.extract %b[1, 2] : vector<6xf32> from vector<4x5x6xf32>
 641   //  CHECK-NEXT: vector.extract %[[A]][0, 1, 2, 3] : f32 from vector<3x4x5x6xf32>
 642   %d = vector.extract %c[3] : f32 from vector<6xf32>
 643
 644   //  CHECK-NEXT: vector.extract %[[A]][0] : vector<4x5x6xf32> from vector<3x4x5x6xf32>
 645   %e = vector.extract %a[0] : vector<4x5x6xf32> from vector<3x4x5x6xf32>
 646
 647   //  CHECK-NEXT: return
 648   return %d, %e : f32, vector<4x5x6xf32>
 649 }
 650
 651 // -----
 652
 653 // CHECK-LABEL: fold_extract_transpose
 654 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: vector<3x4x5x6xf32>
 655 //  CHECK-SAME:   %[[B:[a-zA-Z0-9]*]]: vector<3x6x5x6xf32>
 656 func.func @fold_extract_transpose(
 657     %a : vector<3x4x5x6xf32>, %b : vector<3x6x5x6xf32>) -> (
 658       vector<6xf32>, vector<6xf32>, vector<6xf32>) {
 659   // [3] is a proper most minor identity map in transpose.
 660   // Permutation is a self inverse and we have.
 661   // [0, 2, 1] ^ -1 o [0, 1, 2] = [0, 2, 1] o [0, 1, 2]
 662   //                            = [0, 2, 1]
 663   //  CHECK-NEXT: vector.extract %[[A]][0, 2, 1] : vector<6xf32> from vector<3x4x5x6xf32>
 664   %0 = vector.transpose %a, [0, 2, 1, 3] : vector<3x4x5x6xf32> to vector<3x5x4x6xf32>
 665   %1 = vector.extract %0[0, 1, 2] : vector<6xf32> from vector<3x5x4x6xf32>
 666
 667   // [3] is a proper most minor identity map in transpose.
 668   // Permutation is a not self inverse and we have.
 669   // [1, 2, 0] ^ -1 o [0, 1, 2] = [2, 0, 1] o [0, 1, 2]
 670   //                            = [2, 0, 1]
 671   //  CHECK-NEXT: vector.extract %[[A]][2, 0, 1] : vector<6xf32> from vector<3x4x5x6xf32>
 672   %2 = vector.transpose %a, [1, 2, 0, 3] : vector<3x4x5x6xf32> to vector<4x5x3x6xf32>
 673   %3 = vector.extract %2[0, 1, 2] : vector<6xf32> from vector<4x5x3x6xf32>
 674
 675   // Not a minor identity map so intra-vector level has been permuted
 676   //  CHECK-NEXT: vector.transpose %[[B]], [0, 2, 3, 1]
 677   //  CHECK-NEXT: vector.extract %{{.*}}[0, 1, 2]
 678   %4 = vector.transpose %b, [0, 2, 3, 1] : vector<3x6x5x6xf32> to vector<3x5x6x6xf32>
 679   %5 = vector.extract %4[0, 1, 2] : vector<6xf32> from vector<3x5x6x6xf32>
 680
 681   return %1, %3, %5 : vector<6xf32>, vector<6xf32>, vector<6xf32>
 682 }
 683
 684 // -----
 685
 686 // CHECK-LABEL: fold_extract_broadcast
 687 //  CHECK-SAME:   %[[A:.*]]: f32
 688 //       CHECK:   return %[[A]] : f32
 689 func.func @fold_extract_broadcast(%a : f32) -> f32 {
 690   %b = vector.broadcast %a : f32 to vector<1x2x4xf32>
 691   %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32>
 692   return %r : f32
 693 }
 694
 695 // -----
 696
 697 // CHECK-LABEL: fold_extract_broadcast_0dvec
 698 //  CHECK-SAME:   %[[A:.*]]: vector<f32>
 699 //       CHECK:   %[[B:.+]] = vector.extractelement %[[A]][] : vector<f32>
 700 //       CHECK:   return %[[B]] : f32
 701 func.func @fold_extract_broadcast_0dvec(%a : vector<f32>) -> f32 {
 702   %b = vector.broadcast %a : vector<f32> to vector<1x2x4xf32>
 703   %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32>
 704   return %r : f32
 705 }
 706
 707 // -----
 708
 709 // CHECK-LABEL: fold_extract_broadcast_negative
 710 //       CHECK:   vector.broadcast %{{.*}} : vector<1x1xf32> to vector<1x1x4xf32>
 711 //       CHECK:   vector.extract %{{.*}}[0, 0] : vector<4xf32> from vector<1x1x4xf32>
 712 func.func @fold_extract_broadcast_negative(%a : vector<1x1xf32>) -> vector<4xf32> {
 713   %b = vector.broadcast %a : vector<1x1xf32> to vector<1x1x4xf32>
 714   %r = vector.extract %b[0, 0] : vector<4xf32> from vector<1x1x4xf32>
 715   return %r : vector<4xf32>
 716 }
 717
 718 // -----
 719
 720 // CHECK-LABEL: fold_extract_splat
 721 //  CHECK-SAME:   %[[A:.*]]: f32
 722 //       CHECK:   return %[[A]] : f32
 723 func.func @fold_extract_splat(%a : f32) -> f32 {
 724   %b = vector.splat %a : vector<1x2x4xf32>
 725   %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32>
 726   return %r : f32
 727 }
 728
 729 // -----
 730
 731 // CHECK-LABEL: fold_extract_broadcast_vector
 732 //  CHECK-SAME:   %[[A:.*]]: vector<4xf32>
 733 //       CHECK:   return %[[A]] : vector<4xf32>
 734 func.func @fold_extract_broadcast_vector(%a : vector<4xf32>) -> vector<4xf32> {
 735   %b = vector.broadcast %a : vector<4xf32> to vector<1x2x4xf32>
 736   %r = vector.extract %b[0, 1] : vector<4xf32> from vector<1x2x4xf32>
 737   return %r : vector<4xf32>
 738 }
 739
 740 // -----
 741
 742 // CHECK-LABEL: fold_extract_broadcast
 743 //  CHECK-SAME:   %[[A:.*]]: vector<4xf32>
 744 //       CHECK:   %[[R:.*]] = vector.extract %[[A]][2] : f32 from vector<4xf32>
 745 //       CHECK:   return %[[R]] : f32
 746 func.func @fold_extract_broadcast(%a : vector<4xf32>) -> f32 {
 747   %b = vector.broadcast %a : vector<4xf32> to vector<1x2x4xf32>
 748   %r = vector.extract %b[0, 1, 2] : f32 from vector<1x2x4xf32>
 749   return %r : f32
 750 }
 751
 752 // -----
 753
 754 // CHECK-LABEL: fold_extract_broadcast
 755 //       CHECK:   %[[B:.*]] = vector.broadcast %{{.*}} : f32 to vector<4xf32>
 756 //       CHECK:   return %[[B]] : vector<4xf32>
 757 func.func @fold_extract_broadcast(%a : f32) -> vector<4xf32> {
 758   %b = vector.broadcast %a : f32 to vector<1x2x4xf32>
 759   %r = vector.extract %b[0, 1] : vector<4xf32> from vector<1x2x4xf32>
 760   return %r : vector<4xf32>
 761 }
 762
 763 // -----
 764
 765 // CHECK-LABEL: fold_extract_broadcast
 766 //  CHECK-SAME:   %[[A:.*]]: vector<1xf32>
 767 //       CHECK:   %[[R:.*]] = vector.broadcast %[[A]] : vector<1xf32> to vector<8xf32>
 768 //       CHECK:   return %[[R]] : vector<8xf32>
 769 func.func @fold_extract_broadcast(%a : vector<1xf32>) -> vector<8xf32> {
 770   %b = vector.broadcast %a : vector<1xf32> to vector<1x8xf32>
 771   %r = vector.extract %b[0] : vector<8xf32> from vector<1x8xf32>
 772   return %r : vector<8xf32>
 773 }
 774 // -----
 775
 776 // CHECK-LABEL: @fold_extract_shuffle
 777 //  CHECK-SAME:   %[[A:.*]]: vector<8xf32>, %[[B:.*]]: vector<8xf32>
 778 //   CHECK-NOT:   vector.shuffle
 779 //       CHECK:   vector.extract %[[A]][0] : f32 from vector<8xf32>
 780 //       CHECK:   vector.extract %[[B]][0] : f32 from vector<8xf32>
 781 //       CHECK:   vector.extract %[[A]][7] : f32 from vector<8xf32>
 782 //       CHECK:   vector.extract %[[B]][7] : f32 from vector<8xf32>
 783 func.func @fold_extract_shuffle(%a : vector<8xf32>, %b : vector<8xf32>)
 784                                 -> (f32, f32, f32, f32) {
 785   %shuffle = vector.shuffle %a, %b [0, 8, 7, 15] : vector<8xf32>, vector<8xf32>
 786   %e0 = vector.extract %shuffle[0] : f32 from vector<4xf32>
 787   %e1 = vector.extract %shuffle[1] : f32 from vector<4xf32>
 788   %e2 = vector.extract %shuffle[2] : f32 from vector<4xf32>
 789   %e3 = vector.extract %shuffle[3] : f32 from vector<4xf32>
 790   return %e0, %e1, %e2, %e3 : f32, f32, f32, f32
 791 }
 792
 793 // -----
 794
 795 // CHECK-LABEL: func @fold_extract_shapecast
 796 //  CHECK-SAME: (%[[A0:.*]]: vector<5x1x3x2xf32>, %[[A1:.*]]: vector<8x4x2xf32>
 797 //       CHECK:   %[[R0:.*]] = vector.extract %[[A0]][1, 0, 1, 1] : f32 from vector<5x1x3x2xf32>
 798 //       CHECK:   %[[R1:.*]] = vector.extract %[[A0]][1, 0, 2] : vector<2xf32> from vector<5x1x3x2xf32>
 799 //       CHECK:   %[[R2:.*]] = vector.extract %[[A1]][7] : vector<4x2xf32> from vector<8x4x2xf32>
 800 //       CHECK:   return %[[R0]], %[[R1]], %[[R2]], %[[A1]] : f32, vector<2xf32>, vector<4x2xf32>, vector<8x4x2xf32>
 801 func.func @fold_extract_shapecast(%arg0 : vector<5x1x3x2xf32>,
 802                              %arg1 : vector<8x4x2xf32>)
 803   -> (f32, vector<2xf32>, vector<4x2xf32>, vector<8x4x2xf32>) {
 804   %0 = vector.shape_cast %arg0 : vector<5x1x3x2xf32> to vector<15x2xf32>
 805   %1 = vector.shape_cast %arg1 : vector<8x4x2xf32> to vector<4x2x4x2xf32>
 806   %2 = vector.shape_cast %arg1 : vector<8x4x2xf32> to vector<1x8x4x2xf32>
 807   %r1 = vector.extract %0[4, 1] : f32 from vector<15x2xf32>
 808   %r2 = vector.extract %0[5] : vector<2xf32> from vector<15x2xf32>
 809   %r3 = vector.extract %1[3, 1] : vector<4x2xf32> from vector<4x2x4x2xf32>
 810   %r4 = vector.extract %2[0] : vector<8x4x2xf32> from vector<1x8x4x2xf32>
 811   return %r1, %r2, %r3, %r4 : f32, vector<2xf32>, vector<4x2xf32>, vector<8x4x2xf32>
 812 }
 813
 814 // -----
 815
 816 // CHECK-LABEL: fold_extract_shapecast_0d_result
 817 //  CHECK-SAME: %[[IN:.*]]: vector<1x1x1xf32>
 818 //       CHECK:   %[[R:.*]] = vector.extract %[[IN]][0, 0, 0] : f32 from vector<1x1x1xf32>
 819 //       CHECK:   return %[[R]] : f32
 820 func.func @fold_extract_shapecast_0d_result(%arg0 : vector<1x1x1xf32>) -> f32 {
 821   %0 = vector.shape_cast %arg0 : vector<1x1x1xf32> to vector<f32>
 822   %r = vector.extract %0[] : f32 from vector<f32>
 823   return %r : f32
 824 }
 825
 826 // -----
 827
 828 // CHECK-LABEL: fold_extract_shapecast_0d_source
 829 //  CHECK-SAME: %[[IN:.*]]: vector<f32>
 830 //       CHECK:   %[[R:.*]] = vector.extract %[[IN]][] : f32 from vector<f32>
 831 //       CHECK:   return %[[R]] : f32
 832 func.func @fold_extract_shapecast_0d_source(%arg0 : vector<f32>) -> f32 {
 833   %0 = vector.shape_cast %arg0 : vector<f32> to vector<1xf32>
 834   %r = vector.extract %0[0] : f32 from vector<1xf32>
 835   return %r : f32
 836 }
 837
 838 // -----
 839
 840 // CHECK-LABEL: fold_extract_shapecast_negative
 841 //       CHECK:   %[[V:.*]] = vector.shape_cast %{{.*}} : vector<16xf32> to vector<2x4x2xf32>
 842 //       CHECK:   %[[R:.*]] = vector.extract %[[V]][1] : vector<4x2xf32> from vector<2x4x2xf32>
 843 //       CHECK:   return %[[R]] : vector<4x2xf32>
 844 func.func @fold_extract_shapecast_negative(%arg0 : vector<16xf32>) -> vector<4x2xf32> {
 845   %0 = vector.shape_cast %arg0 : vector<16xf32> to vector<2x4x2xf32>
 846   %r = vector.extract %0[1] : vector<4x2xf32> from vector<2x4x2xf32>
 847   return %r : vector<4x2xf32>
 848 }
 849
 850 // -----
 851
 852 // CHECK-LABEL: fold_extract_shapecast_to_shapecast
 853 //  CHECK-SAME: (%[[ARG:.+]]: vector<3x4xf32>)
 854 //       CHECK:   %[[R:.+]] = vector.shape_cast %[[ARG]] : vector<3x4xf32> to vector<12xf32>
 855 //       CHECK:   return %[[R]]
 856 func.func @fold_extract_shapecast_to_shapecast(%arg0 : vector<3x4xf32>) -> vector<12xf32> {
 857   %0 = vector.shape_cast %arg0 : vector<3x4xf32> to vector<1x12xf32>
 858   %r = vector.extract %0[0] : vector<12xf32> from vector<1x12xf32>
 859   return %r : vector<12xf32>
 860 }
 861
 862 // -----
 863
 864 // CHECK-LABEL: func @extract_no_fold_scalar_to_0d(
 865 //  CHECK-SAME:     %[[v:.*]]: vector<f32>)
 866 //       CHECK:   %[[extract:.*]] = vector.extract %[[v]][] : f32 from vector<f32>
 867 //       CHECK:   return %[[extract]]
 868 func.func @extract_no_fold_scalar_to_0d(%v: vector<f32>) -> f32 {
 869   %0 = vector.extract %v[] : f32 from vector<f32>
 870   return %0 : f32
 871 }
 872
 873 // -----
 874
 875 // CHECK-LABEL: func @insert_fold_same_rank(
 876 //  CHECK-SAME:     %[[v:.*]]: vector<2x2xf32>)
 877 //       CHECK:      %[[CST:.+]] = arith.constant
 878 //  CHECK-SAME:                    : vector<2x2xf32>
 879 //       CHECK-NOT:  vector.insert
 880 //       CHECK:   return %[[CST]]
 881 func.func @insert_fold_same_rank(%v: vector<2x2xf32>) -> vector<2x2xf32> {
 882   %cst = arith.constant dense<0.000000e+00> : vector<2x2xf32>
 883   %0 = vector.insert %cst, %v [] : vector<2x2xf32> into vector<2x2xf32>
 884   return %0 : vector<2x2xf32>
 885 }
 886
 887 // -----
 888
 889 // CHECK-LABEL: func @insert_no_fold_scalar_to_0d(
 890 //  CHECK-SAME:     %[[v:.*]]: vector<f32>)
 891 //       CHECK:   %[[extract:.*]] = vector.insert %{{.*}}, %[[v]] [] : f32 into vector<f32>
 892 //       CHECK:   return %[[extract]]
 893 func.func @insert_no_fold_scalar_to_0d(%v: vector<f32>) -> vector<f32> {
 894   %cst = arith.constant 0.000000e+00 : f32
 895   %0 = vector.insert %cst, %v [] : f32 into vector<f32>
 896   return %0 : vector<f32>
 897 }
 898
 899 // -----
 900
 901 // CHECK-LABEL: dont_fold_expand_collapse
 902 //       CHECK:   %[[A:.*]] = vector.shape_cast %{{.*}} : vector<1x1x64xf32> to vector<1x1x8x8xf32>
 903 //       CHECK:   %[[B:.*]] = vector.shape_cast %{{.*}} : vector<1x1x8x8xf32> to vector<8x8xf32>
 904 //       CHECK:   return %[[B]] : vector<8x8xf32>
 905 func.func @dont_fold_expand_collapse(%arg0: vector<1x1x64xf32>) -> vector<8x8xf32> {
 906     %0 = vector.shape_cast %arg0 : vector<1x1x64xf32> to vector<1x1x8x8xf32>
 907     %1 = vector.shape_cast %0 : vector<1x1x8x8xf32> to vector<8x8xf32>
 908     return %1 : vector<8x8xf32>
 909 }
 910
 911 // -----
 912
 913 // CHECK-LABEL: func @fold_broadcast_shapecast
 914 //  CHECK-SAME: (%[[V:.+]]: vector<4xf32>)
 915 //       CHECK:   return %[[V]]
 916 func.func @fold_broadcast_shapecast(%arg0: vector<4xf32>) -> vector<4xf32> {
 917     %0 = vector.broadcast %arg0 : vector<4xf32> to vector<1x1x4xf32>
 918     %1 = vector.shape_cast %0 : vector<1x1x4xf32> to vector<4xf32>
 919     return %1 : vector<4xf32>
 920 }
 921
 922 // -----
 923
 924 // CHECK-LABEL: func @canonicalize_broadcast_shapecast_scalar
 925 //       CHECK:   vector.broadcast
 926 //   CHECK-NOT:   vector.shape_cast
 927 func.func @canonicalize_broadcast_shapecast_scalar(%arg0: f32) -> vector<1xf32> {
 928     %0 = vector.broadcast %arg0 : f32 to vector<1x1x1xf32>
 929     %1 = vector.shape_cast %0 : vector<1x1x1xf32> to vector<1xf32>
 930     return %1 : vector<1xf32>
 931 }
 932
 933 // -----
 934
 935 // CHECK-LABEL: func @dont_fold_broadcast_shapecast_diff_shape
 936 //       CHECK:   vector.broadcast
 937 //       CHECK:   vector.shape_cast
 938 func.func @dont_fold_broadcast_shapecast_diff_shape(%arg0: vector<4xf32>) -> vector<8xf32> {
 939     %0 = vector.broadcast %arg0 : vector<4xf32> to vector<1x2x4xf32>
 940     %1 = vector.shape_cast %0 : vector<1x2x4xf32> to vector<8xf32>
 941     return %1 : vector<8xf32>
 942 }
 943
 944 // -----
 945
 946 // CHECK-LABEL: func @canonicalize_broadcast_shapecast_to_broadcast
 947 //       CHECK:   vector.broadcast
 948 //   CHECK-NOT:   vector.shape_cast
 949 func.func @canonicalize_broadcast_shapecast_to_broadcast(%arg0: vector<3xf32>) -> vector<8x3xf32> {
 950     %0 = vector.broadcast %arg0 : vector<3xf32> to vector<2x4x3xf32>
 951     %1 = vector.shape_cast %0 : vector<2x4x3xf32> to vector<8x3xf32>
 952     return %1 : vector<8x3xf32>
 953 }
 954
 955 // -----
 956
 957 // CHECK-LABEL: func @canonicalize_broadcast_shapecast_to_shapecast
 958 //   CHECK-NOT:   vector.broadcast
 959 //       CHECK:   vector.shape_cast {{.+}} : vector<3x4xf32> to vector<1x12xf32>
 960 func.func @canonicalize_broadcast_shapecast_to_shapecast(%arg0: vector<3x4xf32>) -> vector<1x12xf32> {
 961     %0 = vector.broadcast %arg0 : vector<3x4xf32> to vector<1x1x3x4xf32>
 962     %1 = vector.shape_cast %0 : vector<1x1x3x4xf32> to vector<1x12xf32>
 963     return %1 : vector<1x12xf32>
 964 }
 965
 966 // -----
 967
 968 // CHECK-LABEL: fold_vector_transfer_masks
 969 func.func @fold_vector_transfer_masks(%A: memref<?x?xf32>) -> (vector<4x8xf32>, vector<4x[4]xf32>) {
 970   // CHECK: %[[C0:.+]] = arith.constant 0 : index
 971   %c0 = arith.constant 0 : index
 972   // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f32
 973   %f0 = arith.constant 0.0 : f32
 974
 975   %mask = vector.constant_mask [8, 4] : vector<8x4xi1>
 976
 977   %arith_all_true_mask = arith.constant dense<true> : vector<4x[4]xi1>
 978
 979   // CHECK: vector.transfer_read %{{.*}}, %[[F0]] {permutation_map
 980   %1 = vector.transfer_read %A[%c0, %c0], %f0, %mask
 981       {permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : memref<?x?xf32>, vector<4x8xf32>
 982
 983   // CHECK: vector.transfer_write {{.*}}[%[[C0]], %[[C0]]] {permutation_map
 984   vector.transfer_write %1, %A[%c0, %c0], %mask
 985       {permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : vector<4x8xf32>, memref<?x?xf32>
 986
 987   // CHECK: vector.transfer_read %{{.*}}, %[[F0]] :
 988   %2 = vector.transfer_read %A[%c0, %c0], %f0, %arith_all_true_mask : memref<?x?xf32>, vector<4x[4]xf32>
 989
 990   // CHECK: vector.transfer_write {{.*}}[%[[C0]], %[[C0]]] :
 991   vector.transfer_write %2, %A[%c0, %c0], %arith_all_true_mask : vector<4x[4]xf32>, memref<?x?xf32>
 992
 993   // CHECK: return
 994   return %1, %2 : vector<4x8xf32>, vector<4x[4]xf32>
 995 }
 996
 997 // -----
 998
 999 // CHECK-LABEL: fold_vector_transfers
1000 func.func @fold_vector_transfers(%A: memref<?x8xf32>) -> (vector<4x8xf32>, vector<4x9xf32>) {
1001   %c0 = arith.constant 0 : index
1002   %f0 = arith.constant 0.0 : f32
1003
1004   // CHECK: vector.transfer_read %{{.*}} {in_bounds = [false, true]}
1005   %1 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x8xf32>, vector<4x8xf32>
1006
1007   // CHECK: vector.transfer_write %{{.*}} {in_bounds = [false, true]}
1008   vector.transfer_write %1, %A[%c0, %c0] : vector<4x8xf32>, memref<?x8xf32>
1009
1010   // Both dims may be out-of-bounds, attribute is elided.
1011   // CHECK: vector.transfer_read %{{.*}}
1012   // CHECK-NOT: in_bounds
1013   %2 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x8xf32>, vector<4x9xf32>
1014
1015   // Both dims may be out-of-bounds, attribute is elided.
1016   // CHECK: vector.transfer_write %{{.*}}
1017   // CHECK-NOT: in_bounds
1018   vector.transfer_write %2, %A[%c0, %c0] : vector<4x9xf32>, memref<?x8xf32>
1019
1020   // CHECK: return
1021   return %1, %2 : vector<4x8xf32>, vector<4x9xf32>
1022 }
1023
1024 // -----
1025
1026 // CHECK-LABEL: bitcast_folding
1027 //  CHECK-SAME:   %[[A:.*]]: vector<4x8xf32>
1028 //  CHECK-SAME:   %[[B:.*]]: vector<2xi32>
1029 //  CHECK:        return %[[A]], %[[B]] : vector<4x8xf32>, vector<2xi32>
1030 func.func @bitcast_folding(%I1: vector<4x8xf32>, %I2: vector<2xi32>) -> (vector<4x8xf32>, vector<2xi32>) {
1031   %0 = vector.bitcast %I1 : vector<4x8xf32> to vector<4x8xf32>
1032   %1 = vector.bitcast %I2 : vector<2xi32> to vector<4xi16>
1033   %2 = vector.bitcast %1 : vector<4xi16> to vector<2xi32>
1034   return %0, %2 : vector<4x8xf32>, vector<2xi32>
1035 }
1036
1037 // CHECK-LABEL: func @bitcast_f16_to_f32
1038 //              bit pattern: 0x40004000
1039 //       CHECK-DAG: %[[CST1:.+]] = arith.constant dense<2.00390625> : vector<4xf32>
1040 //              bit pattern: 0x00000000
1041 //       CHECK-DAG: %[[CST0:.+]] = arith.constant dense<0.000000e+00> : vector<4xf32>
1042 //       CHECK: return %[[CST0]], %[[CST1]]
1043 func.func @bitcast_f16_to_f32() -> (vector<4xf32>, vector<4xf32>) {
1044   %cst0 = arith.constant dense<0.0> : vector<8xf16> // bit pattern: 0x0000
1045   %cst1 = arith.constant dense<2.0> : vector<8xf16> // bit pattern: 0x4000
1046   %cast0 = vector.bitcast %cst0: vector<8xf16> to vector<4xf32>
1047   %cast1 = vector.bitcast %cst1: vector<8xf16> to vector<4xf32>
1048   return %cast0, %cast1: vector<4xf32>, vector<4xf32>
1049 }
1050
1051 // CHECK-LABEL: func @bitcast_i8_to_i32
1052 //              bit pattern: 0xA0A0A0A0
1053 //       CHECK-DAG: %[[CST1:.+]] = arith.constant dense<-1600085856> : vector<4xi32>
1054 //              bit pattern: 0x00000000
1055 //       CHECK-DAG: %[[CST0:.+]] = arith.constant dense<0> : vector<4xi32>
1056 //       CHECK: return %[[CST0]], %[[CST1]]
1057 func.func @bitcast_i8_to_i32() -> (vector<4xi32>, vector<4xi32>) {
1058   %cst0 = arith.constant dense<0> : vector<16xi8> // bit pattern: 0x00
1059   %cst1 = arith.constant dense<160> : vector<16xi8> // bit pattern: 0xA0
1060   %cast0 = vector.bitcast %cst0: vector<16xi8> to vector<4xi32>
1061   %cast1 = vector.bitcast %cst1: vector<16xi8> to vector<4xi32>
1062   return %cast0, %cast1: vector<4xi32>, vector<4xi32>
1063 }
1064
1065 // -----
1066
1067 // CHECK-LABEL: broadcast_folding1
1068 //       CHECK: %[[CST:.*]] = arith.constant dense<42> : vector<4xi32>
1069 //   CHECK-NOT: vector.broadcast
1070 //       CHECK: return %[[CST]]
1071 func.func @broadcast_folding1() -> vector<4xi32> {
1072   %0 = arith.constant 42 : i32
1073   %1 = vector.broadcast %0 : i32 to vector<4xi32>
1074   return %1 : vector<4xi32>
1075 }
1076
1077 // -----
1078
1079 // CHECK-LABEL: @broadcast_folding2
1080 //       CHECK: %[[CST:.*]] = arith.constant dense<42> : vector<4x16xi32>
1081 //   CHECK-NOT: vector.broadcast
1082 //       CHECK: return %[[CST]]
1083 func.func @broadcast_folding2() -> vector<4x16xi32> {
1084   %0 = arith.constant 42 : i32
1085   %1 = vector.broadcast %0 : i32 to vector<16xi32>
1086   %2 = vector.broadcast %1 : vector<16xi32> to vector<4x16xi32>
1087   return %2 : vector<4x16xi32>
1088 }
1089
1090 // -----
1091
1092 // CHECK-LABEL: @fold_consecutive_broadcasts(
1093 //  CHECK-SAME:                              %[[ARG0:.*]]: i32
1094 //       CHECK: %[[RESULT:.*]] = vector.broadcast %[[ARG0]] : i32 to vector<4x16xi32>
1095 //       CHECK: return %[[RESULT]]
1096 func.func @fold_consecutive_broadcasts(%a : i32) -> vector<4x16xi32> {
1097   %1 = vector.broadcast %a : i32 to vector<16xi32>
1098   %2 = vector.broadcast %1 : vector<16xi32> to vector<4x16xi32>
1099   return %2 : vector<4x16xi32>
1100 }
1101
1102 // -----
1103
1104 // CHECK-LABEL: shape_cast_constant
1105 //       CHECK-DAG: %[[CST1:.*]] = arith.constant dense<1> : vector<3x4x2xi32>
1106 //       CHECK-DAG: %[[CST0:.*]] = arith.constant dense<2.000000e+00> : vector<20x2xf32>
1107 //       CHECK: return %[[CST0]], %[[CST1]] : vector<20x2xf32>, vector<3x4x2xi32>
1108 func.func @shape_cast_constant() -> (vector<20x2xf32>, vector<3x4x2xi32>) {
1109   %cst = arith.constant dense<2.000000e+00> : vector<5x4x2xf32>
1110   %cst_1 = arith.constant dense<1> : vector<12x2xi32>
1111   %0 = vector.shape_cast %cst : vector<5x4x2xf32> to vector<20x2xf32>
1112   %1 = vector.shape_cast %cst_1 : vector<12x2xi32> to vector<3x4x2xi32>
1113   return %0, %1 : vector<20x2xf32>, vector<3x4x2xi32>
1114 }
1115
1116 // -----
1117
1118 // CHECK-LABEL: extract_strided_constant
1119 //       CHECK-DAG: %[[CST1:.*]] = arith.constant dense<1> : vector<2x13x3xi32>
1120 //       CHECK-DAG: %[[CST0:.*]] = arith.constant dense<2.000000e+00> : vector<12x2xf32>
1121 //       CHECK: return %[[CST0]], %[[CST1]] : vector<12x2xf32>, vector<2x13x3xi32>
1122 func.func @extract_strided_constant() -> (vector<12x2xf32>, vector<2x13x3xi32>) {
1123   %cst = arith.constant dense<2.000000e+00> : vector<29x7xf32>
1124   %cst_1 = arith.constant dense<1> : vector<4x37x9xi32>
1125   %0 = vector.extract_strided_slice %cst
1126     {offsets = [2, 3], sizes = [12, 2], strides = [1, 1]}
1127       : vector<29x7xf32> to vector<12x2xf32>
1128   %1 = vector.extract_strided_slice %cst_1
1129     {offsets = [1, 2, 5], sizes = [2, 13, 3], strides = [1, 1, 1]}
1130       : vector<4x37x9xi32> to vector<2x13x3xi32>
1131   return %0, %1 : vector<12x2xf32>, vector<2x13x3xi32>
1132 }
1133
1134 // -----
1135
1136 // CHECK-LABEL: extract_strided_broadcast
1137 //       CHECK:   %[[B:.*]] = vector.broadcast %{{.*}} : vector<4xf16> to vector<2x4xf16>
1138 //  CHECK-NEXT:   return %[[B]] : vector<2x4xf16>
1139 func.func @extract_strided_broadcast(%arg0: vector<4xf16>) -> vector<2x4xf16> {
1140  %0 = vector.broadcast %arg0 : vector<4xf16> to vector<16x4xf16>
1141  %1 = vector.extract_strided_slice %0
1142   {offsets = [0, 0], sizes = [2, 4], strides = [1, 1]} :
1143   vector<16x4xf16> to vector<2x4xf16>
1144   return %1 : vector<2x4xf16>
1145 }
1146
1147 // -----
1148
1149 // CHECK-LABEL: extract_strided_broadcast2
1150 //       CHECK:   %[[E:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0], sizes = [2], strides = [1]} : vector<4xf16> to vector<2xf16>
1151 //  CHECK-NEXT:   %[[B:.*]] = vector.broadcast %[[E]] : vector<2xf16> to vector<2x2xf16>
1152 //  CHECK-NEXT:   return %[[B]] : vector<2x2xf16>
1153 func.func @extract_strided_broadcast2(%arg0: vector<4xf16>) -> vector<2x2xf16> {
1154  %0 = vector.broadcast %arg0 : vector<4xf16> to vector<16x4xf16>
1155  %1 = vector.extract_strided_slice %0
1156   {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} :
1157   vector<16x4xf16> to vector<2x2xf16>
1158   return %1 : vector<2x2xf16>
1159 }
1160
1161 // -----
1162
1163 // CHECK-LABEL: func @extract_strided_broadcast3
1164 //  CHECK-SAME: (%[[ARG:.+]]: vector<1xf32>)
1165 //       CHECK: %[[V:.+]] = vector.broadcast %[[ARG]] : vector<1xf32> to vector<1x4xf32>
1166 //       CHECK: return %[[V]]
1167 func.func @extract_strided_broadcast3(%arg0: vector<1xf32>) -> vector<1x4xf32> {
1168  %0 = vector.broadcast %arg0 : vector<1xf32> to vector<1x8xf32>
1169  %1 = vector.extract_strided_slice %0
1170       {offsets = [0, 4], sizes = [1, 4], strides = [1, 1]}
1171       : vector<1x8xf32> to vector<1x4xf32>
1172   return %1 : vector<1x4xf32>
1173 }
1174
1175 // -----
1176
1177 // CHECK-LABEL: func @extract_strided_broadcast4
1178 //  CHECK-SAME: (%[[ARG:.+]]: f32)
1179 //       CHECK: %[[V:.+]] = vector.broadcast %[[ARG]] : f32 to vector<1x4xf32>
1180 //       CHECK: return %[[V]]
1181 func.func @extract_strided_broadcast4(%arg0: f32) -> vector<1x4xf32> {
1182  %0 = vector.broadcast %arg0 : f32 to vector<1x8xf32>
1183  %1 = vector.extract_strided_slice %0
1184       {offsets = [0, 4], sizes = [1, 4], strides = [1, 1]}
1185       : vector<1x8xf32> to vector<1x4xf32>
1186   return %1 : vector<1x4xf32>
1187 }
1188
1189 // -----
1190
1191 // CHECK-LABEL: consecutive_shape_cast
1192 //       CHECK:   %[[C:.*]] = vector.shape_cast %{{.*}} : vector<16xf16> to vector<4x4xf16>
1193 //  CHECK-NEXT:   return %[[C]] : vector<4x4xf16>
1194 func.func @consecutive_shape_cast(%arg0: vector<16xf16>) -> vector<4x4xf16> {
1195   %0 = vector.shape_cast %arg0 : vector<16xf16> to vector<2x8xf16>
1196   %1 = vector.shape_cast %0 : vector<2x8xf16> to vector<4x4xf16>
1197   return %1 : vector<4x4xf16>
1198 }
1199
1200 // -----
1201
1202 // CHECK-LABEL: func @dead_transfer_op
1203 //   CHECK-NOT:   vector.transfer_read
1204 //   CHECK-NOT:   vector.transfer_write
1205 //       CHECK:   return
1206 func.func @dead_transfer_op(%arg0 : tensor<4x4xf32>, %arg1 : memref<4x4xf32>,
1207                        %v0 : vector<1x4xf32>) {
1208   %c0 = arith.constant 0 : index
1209   %cf0 = arith.constant 0.0 : f32
1210   %r = vector.transfer_read %arg1[%c0, %c0], %cf0 :
1211     memref<4x4xf32>, vector<1x4xf32>
1212   %w = vector.transfer_write %v0, %arg0[%c0, %c0] :
1213     vector<1x4xf32>, tensor<4x4xf32>
1214   return
1215 }
1216
1217 // -----
1218
1219 // CHECK-LABEL: func @dead_load
1220 //   CHECK-NOT:   vector.maskedload
1221 //   CHECK-NOT:   vector.gather
1222 //   CHECK-NOT:   vector.expandload
1223 //       CHECK:   return
1224 func.func @dead_load(%base: memref<?xf32>, %indices: vector<16xi32>,
1225                           %mask: vector<16xi1>, %passthru: vector<16xf32>) {
1226   %c0 = arith.constant 0 : index
1227   %0 = vector.maskedload %base[%c0], %mask, %passthru :
1228     memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1229   %1 = vector.gather %base[%c0][%indices], %mask, %passthru :
1230     memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1231   %2 = vector.expandload %base[%c0], %mask, %passthru :
1232     memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
1233   return
1234 }
1235
1236 // -----
1237
1238 #contraction_accesses0 = [
1239   affine_map<(i, j, k) -> (i, k)>,
1240   affine_map<(i, j, k) -> (k, j)>,
1241   affine_map<(i, j, k) -> (i, j)>
1242 ]
1243 #contraction_trait0 = {
1244   indexing_maps = #contraction_accesses0,
1245   iterator_types = ["parallel", "parallel", "reduction"]
1246 }
1247
1248 // CHECK-LABEL: func @contractions
1249 //  CHECK-SAME:   %[[A:[0-9a-zA-Z]+]]: vector<2x3xf32>
1250 //  CHECK-SAME:   %[[B:[0-9a-zA-Z]+]]: vector<3x4xf32>
1251 //  CHECK-SAME:   %[[C:[0-9a-zA-Z]+]]: vector<2x4xf32>
1252 //  CHECK-SAME:   %[[A_I8:[0-9a-zA-Z]+]]: vector<2x3xi8>
1253 //  CHECK-SAME:   %[[B_I8:[0-9a-zA-Z]+]]: vector<3x4xi8>
1254 //  CHECK-SAME:   %[[C_I8:[0-9a-zA-Z]+]]: vector<2x4xi8>
1255 func.func @contractions(%a: vector<2x3xf32>, %b: vector<3x4xf32>, %c: vector<2x4xf32>,
1256                    %a_i8: vector<2x3xi8>, %b_i8: vector<3x4xi8>, %c_i8: vector<2x4xi8>)
1257   -> (vector<2x4xf32>, vector<2x4xi8>)
1258 {
1259   // CHECK-NOT: arith.constant
1260   %vf_0 = arith.constant dense <0.0>: vector<2x4xf32>
1261   // CHECK-NOT: arith.addf
1262   //     CHECK: %[[D:.*]] = vector.contract {{.*}} %[[A]], %[[B]], %[[C]]
1263   %0 = vector.contract #contraction_trait0 %a, %b, %vf_0:
1264     vector<2x3xf32>, vector<3x4xf32> into vector<2x4xf32>
1265   // CHECK-NOT: arith.addf
1266   %1 = arith.addf %0, %c: vector<2x4xf32>
1267
1268   // CHECK-NOT: arith.constant
1269   %vi8_0 = arith.constant dense <0>: vector<2x4xi8>
1270   // CHECK-NOT: arith.addi
1271   //     CHECK: %[[D_I8:.*]] = vector.contract {{.*}} %[[A_I8]], %[[B_I8]], %[[C_I8]]
1272   %i8_0 = vector.contract #contraction_trait0 %a_i8, %b_i8, %vi8_0:
1273     vector<2x3xi8>, vector<3x4xi8> into vector<2x4xi8>
1274   // CHECK-NOT: arith.addi
1275   %i8_1 = arith.addi %i8_0, %c_i8: vector<2x4xi8>
1276
1277   // CHECK: return %[[D]], %[[D_I8]]
1278   return %1, %i8_1: vector<2x4xf32>, vector<2x4xi8>
1279 }
1280
1281 // -----
1282
1283 // CHECK-LABEL: func @transfer_folding_1
1284 //  CHECK-SAME:   %[[T0:[0-9a-zA-Z]+]]: tensor<2x3x4xf32>
1285 //  CHECK-SAME:   %[[T1:[0-9a-zA-Z]+]]: tensor<2x3x4xf32>
1286 func.func @transfer_folding_1(%t0: tensor<2x3x4xf32>, %t1: tensor<2x3x4xf32>)
1287   -> (tensor<2x3x4xf32>, tensor<2x3x4xf32>, tensor<2x3x4xf32>)
1288 {
1289   %c0 = arith.constant 0 : index
1290   %pad = arith.constant 0.0 : f32
1291   %v = vector.transfer_read %t0[%c0, %c0, %c0], %pad {in_bounds = [true, true, true]} :
1292     tensor<2x3x4xf32>, vector<2x3x4xf32>
1293
1294   %r0 = vector.transfer_write %v, %t1[%c0, %c0, %c0] {in_bounds = [true, true, true]} :
1295     vector<2x3x4xf32>, tensor<2x3x4xf32>
1296
1297   %t2 = "test.constant"() { value = dense<6.0> : tensor<2x3x4xf32>} : () -> (tensor<2x3x4xf32>)
1298   %r1 = vector.transfer_write %v, %t2[%c0, %c0, %c0] {in_bounds = [true, true, true]} :
1299     vector<2x3x4xf32>, tensor<2x3x4xf32>
1300
1301
1302   // CHECK-NEXT: some_op_that_may_have_side_effects
1303   %t3 = "some_op_that_may_have_side_effects"() : () -> (tensor<2x3x4xf32>)
1304   %r2 = vector.transfer_write %v, %t0[%c0, %c0, %c0] {in_bounds = [true, true, true]} :
1305     vector<2x3x4xf32>, tensor<2x3x4xf32>
1306
1307   // CHECK-NEXT: return %[[T0]], %[[T0]], %[[T0]]
1308   return %r0, %r1, %r2: tensor<2x3x4xf32>, tensor<2x3x4xf32>, tensor<2x3x4xf32>
1309 }
1310
1311 // -----
1312
1313 // CHECK-LABEL: func @store_after_load_tensor
1314 //  CHECK-SAME: (%[[ARG:.*]]: tensor<4x4xf32>)
1315 //   CHECK-NOT:   vector.transfer_read
1316 //   CHECK-NOT:   vector.transfer_write
1317 //       CHECK:   return %[[ARG]] : tensor<4x4xf32>
1318 func.func @store_after_load_tensor(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
1319   %c1 = arith.constant 1 : index
1320   %c0 = arith.constant 0 : index
1321   %cf0 = arith.constant 0.0 : f32
1322   %0 = vector.transfer_read %arg0[%c1, %c0], %cf0 :
1323     tensor<4x4xf32>, vector<1x4xf32>
1324   %w0 = vector.transfer_write %0, %arg0[%c1, %c0] :
1325     vector<1x4xf32>, tensor<4x4xf32>
1326   return %w0 : tensor<4x4xf32>
1327 }
1328
1329 // -----
1330
1331 // CHECK-LABEL: func @store_after_load_tensor_negative
1332 //       CHECK:   vector.transfer_read
1333 //       CHECK:   vector.transfer_write
1334 //       CHECK:   return
1335 func.func @store_after_load_tensor_negative(%arg0 : tensor<4x4xf32>) -> tensor<4x4xf32> {
1336   %c1 = arith.constant 1 : index
1337   %c0 = arith.constant 0 : index
1338   %cf0 = arith.constant 0.0 : f32
1339   %0 = vector.transfer_read %arg0[%c1, %c0], %cf0 :
1340     tensor<4x4xf32>, vector<1x4xf32>
1341   %w0 = vector.transfer_write %0, %arg0[%c0, %c0] :
1342     vector<1x4xf32>, tensor<4x4xf32>
1343   return %w0 : tensor<4x4xf32>
1344 }
1345
1346 // -----
1347
1348 // CHECK-LABEL: func @store_to_load_tensor
1349 //  CHECK-SAME: (%[[ARG:.*]]: tensor<4x4xf32>, %[[V0:.*]]: vector<1x4xf32>, %[[V1:.*]]: vector<1x4xf32>)
1350 //   CHECK-NOT:   vector.transfer_write
1351 //   CHECK-NOT:   vector.transfer_read
1352 //       CHECK:   return %[[V0]] : vector<1x4xf32>
1353 func.func @store_to_load_tensor(%arg0 : tensor<4x4xf32>,
1354   %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>) -> vector<1x4xf32> {
1355   %c1 = arith.constant 1 : index
1356   %c2 = arith.constant 2 : index
1357   %c0 = arith.constant 0 : index
1358   %cf0 = arith.constant 0.0 : f32
1359   %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} :
1360     vector<1x4xf32>, tensor<4x4xf32>
1361   %w1 = vector.transfer_write %v1, %w0[%c2, %c0] {in_bounds = [true, true]} :
1362     vector<1x4xf32>, tensor<4x4xf32>
1363   %0 = vector.transfer_read %w1[%c1, %c0], %cf0 {in_bounds = [true, true]} :
1364     tensor<4x4xf32>, vector<1x4xf32>
1365   return %0 : vector<1x4xf32>
1366 }
1367
1368 // -----
1369
1370 // CHECK-LABEL: func @store_to_load_negative_tensor
1371 //       CHECK:   vector.transfer_write
1372 //       CHECK:   vector.transfer_write
1373 //       CHECK:   %[[V:.*]] = vector.transfer_read
1374 //       CHECK:   return %[[V]] : vector<1x4xf32>
1375 func.func @store_to_load_negative_tensor(%arg0 : tensor<4x4xf32>,
1376   %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) -> vector<1x4xf32> {
1377   %c1 = arith.constant 1 : index
1378   %c2 = arith.constant 2 : index
1379   %c0 = arith.constant 0 : index
1380   %cf0 = arith.constant 0.0 : f32
1381   %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} :
1382     vector<1x4xf32>, tensor<4x4xf32>
1383   %w1 = vector.transfer_write %v0, %w0[%i, %i] {in_bounds = [true, true]} :
1384     vector<1x4xf32>, tensor<4x4xf32>
1385   %0 = vector.transfer_read %w1[%c1, %c0], %cf0 {in_bounds = [true, true]} :
1386     tensor<4x4xf32>, vector<1x4xf32>
1387   return %0 : vector<1x4xf32>
1388 }
1389
1390 // -----
1391
1392 // CHECK-LABEL: func @store_to_load_tensor_broadcast
1393 //  CHECK-SAME: (%[[ARG:.*]]: tensor<4x4xf32>, %[[V0:.*]]: vector<4x2xf32>)
1394 //       CHECK:   %[[B:.*]] = vector.broadcast %[[V0]] : vector<4x2xf32> to vector<6x4x2xf32>
1395 //       CHECK:   %[[T:.*]] = vector.transpose %[[B]], [1, 2, 0] : vector<6x4x2xf32> to vector<4x2x6xf32>
1396 //       CHECK:   return %[[T]] : vector<4x2x6xf32>
1397 func.func @store_to_load_tensor_broadcast(%arg0 : tensor<4x4xf32>,
1398   %v0 : vector<4x2xf32>) -> vector<4x2x6xf32> {
1399   %c0 = arith.constant 0 : index
1400   %cf0 = arith.constant 0.0 : f32
1401   %w0 = vector.transfer_write %v0, %arg0[%c0, %c0] {in_bounds = [true, true]} :
1402     vector<4x2xf32>, tensor<4x4xf32>
1403   %0 = vector.transfer_read %w0[%c0, %c0], %cf0 {in_bounds = [true, true, true],
1404   permutation_map = affine_map<(d0, d1) -> (d0, d1, 0)>} :
1405     tensor<4x4xf32>, vector<4x2x6xf32>
1406   return %0 : vector<4x2x6xf32>
1407 }
1408
1409 // -----
1410
1411 // CHECK-LABEL: func @store_to_load_tensor_broadcast_scalable
1412 //  CHECK-SAME: (%[[ARG:.*]]: tensor<?xf32>, %[[V0:.*]]: vector<[4]xf32>)
1413 //       CHECK:   %[[B:.*]] = vector.broadcast %[[V0]] : vector<[4]xf32> to vector<6x[4]xf32>
1414 //       CHECK:   return %[[B]] : vector<6x[4]xf32>
1415 func.func @store_to_load_tensor_broadcast_scalable(%arg0 : tensor<?xf32>,
1416   %v0 : vector<[4]xf32>) -> vector<6x[4]xf32> {
1417   %c0 = arith.constant 0 : index
1418   %cf0 = arith.constant 0.0 : f32
1419   %w0 = vector.transfer_write %v0, %arg0[%c0] {in_bounds = [true]} :
1420     vector<[4]xf32>, tensor<?xf32>
1421   %0 = vector.transfer_read %w0[%c0], %cf0 {in_bounds = [true, true],
1422   permutation_map = affine_map<(d0) -> (0, d0)>} :
1423     tensor<?xf32>, vector<6x[4]xf32>
1424   return %0 : vector<6x[4]xf32>
1425 }
1426
1427 // -----
1428
1429 // CHECK-LABEL: func @store_to_load_tensor_perm_broadcast
1430 //  CHECK-SAME: (%[[ARG:.*]]: tensor<4x4x4xf32>, %[[V0:.*]]: vector<4x1xf32>)
1431 //       CHECK:   %[[B:.*]] = vector.broadcast %[[V0]] : vector<4x1xf32> to vector<100x5x4x1xf32>
1432 //       CHECK:   %[[T:.*]] = vector.transpose %[[B]], [3, 0, 2, 1] : vector<100x5x4x1xf32> to vector<1x100x4x5xf32>
1433 //       CHECK:   return %[[T]] : vector<1x100x4x5xf32>
1434 func.func @store_to_load_tensor_perm_broadcast(%arg0 : tensor<4x4x4xf32>,
1435   %v0 : vector<4x1xf32>) -> vector<1x100x4x5xf32> {
1436   %c0 = arith.constant 0 : index
1437   %cf0 = arith.constant 0.0 : f32
1438   %w0 = vector.transfer_write %v0, %arg0[%c0, %c0, %c0] {in_bounds = [true, true],
1439   permutation_map = affine_map<(d0, d1, d2) -> (d2, d1)>} :
1440     vector<4x1xf32>, tensor<4x4x4xf32>
1441   %0 = vector.transfer_read %w0[%c0, %c0, %c0], %cf0 {in_bounds = [true, true, true, true],
1442   permutation_map = affine_map<(d0, d1, d2) -> (d1, 0, d2, 0)>} :
1443     tensor<4x4x4xf32>, vector<1x100x4x5xf32>
1444   return %0 : vector<1x100x4x5xf32>
1445 }
1446
1447 // -----
1448
1449
1450 // CHECK-LABEL: func @dead_store_tensor
1451 //   CHECK-DAG:      %[[C0:.*]] = arith.constant 0 : index
1452 //   CHECK-DAG:      %[[C1:.*]] = arith.constant 1 : index
1453 //   CHECK-DAG:      %[[C2:.*]] = arith.constant 2 : index
1454 //   CHECK-NOT:   vector.transfer_write {{.*}}, {{.*}}[%[[C1]], %[[C0]]
1455 //       CHECK:   vector.transfer_write {{.*}}, {{.*}}[%[[C2]], %[[C0]]
1456 //       CHECK:   %[[VTW:.*]] = vector.transfer_write {{.*}}, {{.*}}[%[[C1]], %[[C0]]
1457 //       CHECK:   return %[[VTW]] : tensor<4x4xf32>
1458 func.func @dead_store_tensor(%arg0 : tensor<4x4xf32>,
1459   %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) -> tensor<4x4xf32> {
1460   %c1 = arith.constant 1 : index
1461   %c2 = arith.constant 2 : index
1462   %c0 = arith.constant 0 : index
1463   %cf0 = arith.constant 0.0 : f32
1464   %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} :
1465     vector<1x4xf32>, tensor<4x4xf32>
1466   %w1 = vector.transfer_write %v0, %w0[%c2, %c0] {in_bounds = [true, true]} :
1467     vector<1x4xf32>, tensor<4x4xf32>
1468   %w2 = vector.transfer_write %v1, %w1[%c1, %c0] {in_bounds = [true, true]} :
1469     vector<1x4xf32>, tensor<4x4xf32>
1470   return %w2 : tensor<4x4xf32>
1471 }
1472
1473 // -----
1474
1475 // CHECK-LABEL: func @dead_store_tensor_negative
1476 //   CHECK-DAG:      %[[C0:.*]] = arith.constant 0 : index
1477 //   CHECK-DAG:      %[[C1:.*]] = arith.constant 1 : index
1478 //       CHECK:   vector.transfer_write
1479 //       CHECK:   vector.transfer_write
1480 //       CHECK:   vector.transfer_read
1481 //       CHECK:   %[[VTW:.*]] = vector.transfer_write {{.*}}, {{.*}}[%[[C1]], %[[C0]]]
1482 //       CHECK:   return %[[VTW]] : tensor<4x4xf32>
1483 func.func @dead_store_tensor_negative(%arg0 : tensor<4x4xf32>,
1484   %v0 : vector<1x4xf32>, %v1 : vector<1x4xf32>, %i : index) -> tensor<4x4xf32> {
1485   %c1 = arith.constant 1 : index
1486   %c2 = arith.constant 2 : index
1487   %c0 = arith.constant 0 : index
1488   %cf0 = arith.constant 0.0 : f32
1489   %w0 = vector.transfer_write %v0, %arg0[%c1, %c0] {in_bounds = [true, true]} :
1490     vector<1x4xf32>, tensor<4x4xf32>
1491   %w1 = vector.transfer_write %v0, %w0[%c2, %c0] {in_bounds = [true, true]} :
1492     vector<1x4xf32>, tensor<4x4xf32>
1493   %0 = vector.transfer_read %w1[%i, %i], %cf0 {in_bounds = [true, true]} :
1494     tensor<4x4xf32>, vector<1x4xf32>
1495   %x = arith.addf %0, %0 : vector<1x4xf32>
1496   %w2 = vector.transfer_write %x, %w0[%c1, %c0] {in_bounds = [true, true]} :
1497     vector<1x4xf32>, tensor<4x4xf32>
1498   return %w2 : tensor<4x4xf32>
1499 }
1500
1501 // -----
1502
1503 //       CHECK: #[[$MAP:[0-9a-z]+]] = affine_map<(d0, d1) -> (d1, d0)>
1504
1505 // CHECK-LABEL: func @swap_extract_slice_transfer_write
1506 //  CHECK-SAME:   %[[VEC:.*]]: vector<8x4xf32>
1507 //  CHECK-SAME:   %[[INIT_TENSOR:.*]]: tensor<4x8xf32>,
1508 //  CHECK-SAME:   %[[ITER_ARG:.*]]: tensor<64x64xf32>,
1509 //  CHECK-SAME:   %[[IV:.*]]: index, %[[SZ:.*]]: index)
1510 func.func @swap_extract_slice_transfer_write(%arg0 : vector<8x4xf32>,
1511                                              %arg1 : tensor<4x8xf32>,
1512                                              %arg2 : tensor<64x64xf32>,
1513                                              %iv : index, %sz : index) -> tensor<64x64xf32> {
1514   //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
1515   %c0 = arith.constant 0 : index
1516
1517   //       CHECK:   %[[T0:.*]] = tensor.extract_slice %[[ITER_ARG]]
1518   //  CHECK-SAME:                 [%[[IV]], 16] [%[[SZ]], 8]
1519   //       CHECK:   %[[T1:.*]] = vector.transfer_write %[[VEC]]
1520   //  CHECK-SAME:                 %[[T0]][%[[C0]], %[[C0]]]
1521   //  CHECK-SAME:                 in_bounds = [true, false]
1522   //  CHECK-SAME:                 permutation_map = #[[$MAP]]
1523   //       CHECK:   %[[T2:.*]] = tensor.insert_slice %[[T1]] into %[[ITER_ARG]]
1524   //  CHECK-SAME:                 [%[[IV]], 16] [%[[SZ]], 8]
1525   %0 = vector.transfer_write %arg0, %arg1[%c0, %c0] {in_bounds = [true, true], permutation_map = affine_map<(d0, d1) -> (d1, d0)>} : vector<8x4xf32>, tensor<4x8xf32>
1526   %1 = tensor.extract_slice %0[0, 0] [%sz, 8] [1, 1] : tensor<4x8xf32> to tensor<?x8xf32>
1527   %2 = tensor.insert_slice %1 into %arg2[%iv, 16] [%sz, 8] [1, 1] : tensor<?x8xf32> into tensor<64x64xf32>
1528
1529   //       CHECK:   return %[[T2]]
1530   func.return %2 : tensor<64x64xf32>
1531 }
1532
1533 // -----
1534
1535 // CHECK-LABEL: func @do_not_swap_extract_slice_transfer_write
1536 //  CHECK-SAME:   %[[VEC:.*]]: vector<8xf32>,
1537 //  CHECK-SAME:   %[[VEC_SMALL:.*]]: vector<4xf32>,
1538 //  CHECK-SAME:   %[[INIT_TENSOR:.*]]: tensor<8xf32>,
1539 //  CHECK-SAME:   %[[ITER_ARG:.*]]: tensor<64xf32>,
1540 //  CHECK-SAME:   %[[IV:.*]]: index, %[[SZ:.*]]: index)
1541 func.func @do_not_swap_extract_slice_transfer_write(%arg0 : vector<8xf32>,
1542                                                     %arg1 : vector<4xf32>,
1543                                                     %arg2 : tensor<8xf32>,
1544                                                     %arg3 : tensor<64xf32>,
1545                                                     %iv : index, %sz : index) -> (tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) {
1546   //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
1547   %c0 = arith.constant 0 : index
1548
1549   // Don't swap if the extracted and inserted slices do not match.
1550   //       CHECK:   %[[T0:.*]] = vector.transfer_write %[[VEC]]
1551   //       CHECK:   %[[T1:.*]] = tensor.extract_slice %[[T0]]
1552   //       CHECK:   %[[T2:.*]] = tensor.insert_slice %[[T1]]
1553   %0 = vector.transfer_write %arg0, %arg2[%c0] {in_bounds = [true]} : vector<8xf32>, tensor<8xf32>
1554   %1 = tensor.extract_slice %0[0] [%iv] [1] : tensor<8xf32> to tensor<?xf32>
1555   %2 = tensor.insert_slice %1 into %arg3[%iv] [%sz] [1] : tensor<?xf32> into tensor<64xf32>
1556
1557   // Don't swap if the TransferWriteOp takes a small vector.
1558   //       CHECK:   %[[T3:.*]] = vector.transfer_write %[[VEC_SMALL]]
1559   //       CHECK:   %[[T4:.*]] = tensor.extract_slice %[[T3]]
1560   //       CHECK:   %[[T5:.*]] = tensor.insert_slice %[[T4]]
1561   %3 = vector.transfer_write %arg1, %arg2[%c0] {in_bounds = [true]} : vector<4xf32>, tensor<8xf32>
1562   %4 = tensor.extract_slice %3[0] [%sz] [1] : tensor<8xf32> to tensor<?xf32>
1563   %5 = tensor.insert_slice %4 into %arg3[%iv] [%sz] [1] : tensor<?xf32> into tensor<64xf32>
1564
1565   // Don't swap if the one of the operations is rank-reducing.
1566   //       CHECK:   %[[T6:.*]] = vector.transfer_write %[[VEC]]
1567   //       CHECK:   %[[T7:.*]] = tensor.extract_slice %[[T6]]
1568   //       CHECK:   %[[T8:.*]] = tensor.insert_slice %[[T7]]
1569   %6 = vector.transfer_write %arg0, %arg2[%c0] {in_bounds = [true]} : vector<8xf32>, tensor<8xf32>
1570   %7 = tensor.extract_slice %6[0] [1] [1] : tensor<8xf32> to tensor<f32>
1571   %8 = tensor.insert_slice %7 into %arg3[%iv] [1] [1] : tensor<f32> into tensor<64xf32>
1572
1573   //       CHECK:   return %[[T2]], %[[T5]], %[[T8]]
1574   func.return %2, %5, %8 : tensor<64xf32>, tensor<64xf32>, tensor<64xf32>
1575 }
1576
1577 // -----
1578
1579 // CHECK-LABEL: func @vector_multi_reduction_single_parallel(
1580 //  CHECK-SAME:     %[[v:.*]]: vector<2xf32>,
1581 func.func @vector_multi_reduction_single_parallel(%arg0: vector<2xf32>, %acc: vector<2xf32>) -> vector<2xf32> {
1582     %0 = vector.multi_reduction <mul>, %arg0, %acc [] : vector<2xf32> to vector<2xf32>
1583
1584 //       CHECK:     return %[[v]] : vector<2xf32>
1585     return %0 : vector<2xf32>
1586 }
1587
1588 // -----
1589
1590 // CHECK-LABEL: func @masked_vector_multi_reduction_single_parallel(
1591 //  CHECK-SAME:     %[[VAL_0:.*]]: vector<2xf32>, %{{.*}}: vector<2xf32>,
1592 func.func @masked_vector_multi_reduction_single_parallel(%arg0: vector<2xf32>, %acc: vector<2xf32>, %mask: vector<2xi1>) -> vector<2xf32> {
1593     %0 = vector.mask %mask { vector.multi_reduction <mul>, %arg0, %acc [] : vector<2xf32> to vector<2xf32> } : vector<2xi1> -> vector<2xf32>
1594 //       CHECK:   return %[[VAL_0]] : vector<2xf32>
1595     return %0 : vector<2xf32>
1596 }
1597
1598 // -----
1599
1600 // CHECK-LABEL: func @vector_multi_reduction_unit_dimensions(
1601 //  CHECK-SAME: %[[SOURCE:.+]]: vector<5x1x4x1x20xf32>, %[[ACC:.+]]: vector<5x4x20xf32>
1602 func.func @vector_multi_reduction_unit_dimensions(%source: vector<5x1x4x1x20xf32>, %acc: vector<5x4x20xf32>) -> vector<5x4x20xf32> {
1603 //       CHECK:   %[[CAST:.+]] = vector.shape_cast  %[[SOURCE]] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32>
1604 //       CHECK:   %[[RESULT:.+]] = arith.mulf  %[[ACC]], %[[CAST]] : vector<5x4x20xf32>
1605     %0 = vector.multi_reduction <mul>, %source, %acc [1, 3] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32>
1606
1607 //       CHECK:     return %[[RESULT]] : vector<5x4x20xf32>
1608     return %0 : vector<5x4x20xf32>
1609 }
1610
1611 // -----
1612 // CHECK-LABEL:   func.func @vector_multi_reduction_scalable(
1613 // CHECK-SAME:     %[[VAL_0:.*]]: vector<1x[4]x1xf32>,
1614 // CHECK-SAME:     %[[VAL_1:.*]]: vector<1x[4]xf32>,
1615 // CHECK-SAME:     %[[VAL_2:.*]]: vector<1x[4]x1xi1>)
1616 func.func @vector_multi_reduction_scalable(%source: vector<1x[4]x1xf32>,
1617                                            %acc: vector<1x[4]xf32>,
1618                                            %mask: vector<1x[4]x1xi1>) -> vector<1x[4]xf32> {
1619 // CHECK:           %[[VAL_3:.*]] = vector.shape_cast %[[VAL_2]] : vector<1x[4]x1xi1> to vector<1x[4]xi1>
1620 // CHECK:           %[[VAL_4:.*]] = vector.shape_cast %[[VAL_0]] : vector<1x[4]x1xf32> to vector<1x[4]xf32>
1621 // CHECK:           %[[VAL_5:.*]] = arith.addf %[[VAL_1]], %[[VAL_4]] : vector<1x[4]xf32>
1622 // CHECK:           %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_4]] : vector<1x[4]xi1>, vector<1x[4]xf32>
1623     %0 = vector.mask %mask { vector.multi_reduction <add>, %source, %acc [2] : vector<1x[4]x1xf32> to vector<1x[4]xf32> } :
1624           vector<1x[4]x1xi1> -> vector<1x[4]xf32>
1625
1626     return %0 : vector<1x[4]xf32>
1627 }
1628
1629 // -----
1630
1631 // CHECK-LABEL: func @masked_vector_multi_reduction_unit_dimensions
1632 //  CHECK-SAME: %[[VAL_0:.*]]: vector<5x1x4x1x20xf32>, %[[VAL_1:.*]]: vector<5x4x20xf32>,
1633 //  CHECK-SAME: %[[VAL_2:.*]]: vector<5x1x4x1x20xi1>)
1634 func.func @masked_vector_multi_reduction_unit_dimensions(%source: vector<5x1x4x1x20xf32>,
1635                                                          %acc: vector<5x4x20xf32>,
1636                                                          %mask: vector<5x1x4x1x20xi1>) -> vector<5x4x20xf32> {
1637 //       CHECK:   %[[VAL_3:.*]] = vector.shape_cast %[[VAL_2]] : vector<5x1x4x1x20xi1> to vector<5x4x20xi1>
1638 //       CHECK:   %[[VAL_4:.*]] = vector.shape_cast %[[VAL_0]] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32>
1639 //       CHECK:   %[[VAL_5:.*]] = arith.mulf %[[VAL_1]], %[[VAL_4]] : vector<5x4x20xf32>
1640 //       CHECK:   %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_4]] : vector<5x4x20xi1>, vector<5x4x20xf32>
1641 %0 = vector.mask %mask { vector.multi_reduction <mul>, %source, %acc [1, 3] : vector<5x1x4x1x20xf32> to vector<5x4x20xf32> } :
1642            vector<5x1x4x1x20xi1> -> vector<5x4x20xf32>
1643     return %0 : vector<5x4x20xf32>
1644 }
1645
1646 // -----
1647
1648 // CHECK-LABEL: func @vector_multi_reduction_unit_dimensions_fail(
1649 //  CHECK-SAME: %[[SRC:.+]]: vector<5x1x4x1x20xf32>, %[[ACCUM:.+]]: vector<5x1x20xf32>
1650 func.func @vector_multi_reduction_unit_dimensions_fail(%source: vector<5x1x4x1x20xf32>, %acc: vector<5x1x20xf32>) -> vector<5x1x20xf32> {
1651 //       CHECK:   %[[RES:.+]] = vector.multi_reduction  <mul>, %[[SRC]], %[[ACCUM]] [1, 2] : vector<5x1x4x1x20xf32> to vector<5x1x20xf32>
1652     %0 = vector.multi_reduction <mul>, %source, %acc [1, 2] : vector<5x1x4x1x20xf32> to vector<5x1x20xf32>
1653
1654 //       CHECK:     return %[[RES]] : vector<5x1x20xf32>
1655     return %0 : vector<5x1x20xf32>
1656 }
1657
1658 // -----
1659
1660 // CHECK-LABEL: func @vector_multi_reduction_unit_dimensions_single_elem(
1661 //  CHECK-SAME: %[[SOURCE:.+]]: vector<1x1x1xf32>, %[[ACC:.+]]: f32
1662 func.func @vector_multi_reduction_unit_dimensions_single_elem(%source: vector<1x1x1xf32>, %acc: f32) -> f32 {
1663 //       CHECK:   %[[CAST:.+]] = vector.extract  %[[SOURCE]][0, 0, 0] : f32 from vector<1x1x1xf32>
1664 //       CHECK:   %[[RESULT:.+]] = arith.mulf  %[[ACC]], %[[CAST]] : f32
1665     %0 = vector.multi_reduction <mul>, %source, %acc [0,1,2] : vector<1x1x1xf32> to f32
1666
1667 //       CHECK:     return %[[RESULT]] : f32
1668     return %0 : f32
1669 }
1670
1671 // -----
1672
1673 // CHECK-LABEL: func @masked_vector_multi_reduction_unit_dimensions_single_elem(
1674 //  CHECK-SAME: %[[VAL_0:.*]]: vector<1x1x1xf32>, %[[VAL_1:.*]]: f32,
1675 //  CHECK-SAME: %[[VAL_2:.*]]: vector<1x1x1xi1>)
1676 func.func @masked_vector_multi_reduction_unit_dimensions_single_elem(%source: vector<1x1x1xf32>, %acc: f32, %mask: vector<1x1x1xi1>) -> f32 {
1677       // CHECK:           %[[VAL_3:.*]] = vector.extract %[[VAL_2]][0, 0, 0] : i1 from vector<1x1x1xi1>
1678       // CHECK:           %[[VAL_4:.*]] = vector.extract %[[VAL_0]][0, 0, 0] : f32 from vector<1x1x1xf32>
1679       // CHECK:           %[[VAL_5:.*]] = arith.mulf %[[VAL_1]], %[[VAL_4]] : f32
1680       // CHECK:           %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_4]] : f32
1681   %0 = vector.mask %mask { vector.multi_reduction <mul>, %source, %acc [0,1,2] : vector<1x1x1xf32> to f32 } : vector<1x1x1xi1> -> f32
1682     return %0 : f32
1683 }
1684
1685 // -----
1686
1687 // CHECK-LABEL: func @insert_strided_slice_full_range
1688 //  CHECK-SAME: %[[SOURCE:.+]]: vector<16x16xf16>, %{{.+}}: vector<16x16xf16>
1689 func.func @insert_strided_slice_full_range(%source: vector<16x16xf16>, %dest: vector<16x16xf16>) -> vector<16x16xf16> {
1690   %0 = vector.insert_strided_slice %source, %dest {offsets = [0, 0], strides = [1, 1]} : vector<16x16xf16> into vector<16x16xf16>
1691   // CHECK: return %[[SOURCE]]
1692   return %0: vector<16x16xf16>
1693 }
1694
1695 // -----
1696
1697 // CHECK-LABEL: extract_strided_splat
1698 //       CHECK:   %[[B:.*]] = vector.splat %{{.*}} : vector<2x4xf16>
1699 //  CHECK-NEXT:   return %[[B]] : vector<2x4xf16>
1700 func.func @extract_strided_splat(%arg0: f16) -> vector<2x4xf16> {
1701  %0 = vector.splat %arg0 : vector<16x4xf16>
1702  %1 = vector.extract_strided_slice %0
1703   {offsets = [1, 0], sizes = [2, 4], strides = [1, 1]} :
1704   vector<16x4xf16> to vector<2x4xf16>
1705   return %1 : vector<2x4xf16>
1706 }
1707
1708 // -----
1709
1710 // CHECK-LABEL: func @insert_extract_to_broadcast
1711 //  CHECK-SAME: (%[[ARG0:.*]]: vector<1x1x4xf32>, %[[ARG1:.*]]: vector<4xf32>)
1712 //       CHECK:   %[[V0:.*]] = vector.extract %[[ARG0]][0, 0] : vector<4xf32> from vector<1x1x4xf32>
1713 //       CHECK:   %[[V1:.*]] = vector.broadcast %[[ARG1]] : vector<4xf32> to vector<1x1x4xf32>
1714 //       CHECK:   return %[[V0]], %[[V1]] : vector<4xf32>, vector<1x1x4xf32>
1715 func.func @insert_extract_to_broadcast(%arg0 : vector<1x1x4xf32>,
1716   %arg1 : vector<4xf32>) -> (vector<4xf32>, vector<1x1x4xf32>) {
1717   %0 = vector.extract %arg0[0, 0] : vector<4xf32> from vector<1x1x4xf32>
1718   %1 = vector.insert %arg1, %arg0 [0, 0] : vector<4xf32> into vector<1x1x4xf32>
1719   return %0, %1 : vector<4xf32>, vector<1x1x4xf32>
1720 }
1721
1722 // -----
1723
1724 // CHECK-LABEL: func.func @extract_splat_constant
1725 //   CHECK-DAG:   %[[CST1:.*]] = arith.constant 1 : i32
1726 //   CHECK-DAG:   %[[CST0:.*]] = arith.constant dense<2.000000e+00> : vector<7xf32>
1727 //  CHECK-NEXT:   return %[[CST0]], %[[CST1]] : vector<7xf32>, i32
1728 func.func @extract_splat_constant() -> (vector<7xf32>, i32) {
1729   %cst = arith.constant dense<2.000000e+00> : vector<29x7xf32>
1730   %cst_1 = arith.constant dense<1> : vector<4x37x9xi32>
1731   %0 = vector.extract %cst[2] : vector<7xf32> from vector<29x7xf32>
1732   %1 = vector.extract %cst_1[1, 4, 5] : i32 from vector<4x37x9xi32>
1733   return %0, %1 : vector<7xf32>, i32
1734 }
1735
1736 // -----
1737
1738 // CHECK-LABEL: func.func @extract_1d_constant
1739 //   CHECK-DAG: %[[I32CST:.*]] = arith.constant 3 : i32
1740 //   CHECK-DAG: %[[IDXCST:.*]] = arith.constant 1 : index
1741 //   CHECK-DAG: %[[F32CST:.*]] = arith.constant 2.000000e+00 : f32
1742 //  CHECK-NEXT: return %[[I32CST]], %[[IDXCST]], %[[F32CST]] : i32, index, f32
1743 func.func @extract_1d_constant() -> (i32, index, f32) {
1744   %icst = arith.constant dense<[1, 2, 3, 4]> : vector<4xi32>
1745   %e = vector.extract %icst[2] : i32 from vector<4xi32>
1746   %idx_cst = arith.constant dense<[0, 1, 2]> : vector<3xindex>
1747   %f = vector.extract %idx_cst[1] : index from vector<3xindex>
1748   %fcst = arith.constant dense<[2.000000e+00, 3.000000e+00, 4.000000e+00]> : vector<3xf32>
1749   %g = vector.extract %fcst[0] : f32 from vector<3xf32>
1750   return %e, %f, %g : i32, index, f32
1751 }
1752
1753 // -----
1754
1755 // CHECK-LABEL: func.func @extract_2d_constant
1756 //   CHECK-DAG: %[[ACST:.*]] = arith.constant 0 : i32
1757 //   CHECK-DAG: %[[BCST:.*]] = arith.constant 2 : i32
1758 //   CHECK-DAG: %[[CCST:.*]] = arith.constant 3 : i32
1759 //   CHECK-DAG: %[[DCST:.*]] = arith.constant 5 : i32
1760 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] : i32, i32, i32, i32
1761 func.func @extract_2d_constant() -> (i32, i32, i32, i32) {
1762   %cst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32>
1763   %a = vector.extract %cst[0, 0] : i32 from vector<2x3xi32>
1764   %b = vector.extract %cst[0, 2] : i32 from vector<2x3xi32>
1765   %c = vector.extract %cst[1, 0] : i32 from vector<2x3xi32>
1766   %d = vector.extract %cst[1, 2] : i32 from vector<2x3xi32>
1767   return %a, %b, %c, %d : i32, i32, i32, i32
1768 }
1769
1770 // -----
1771
1772 // CHECK-LABEL: func.func @extract_vector_2d_constant
1773 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[0, 1, 2]> : vector<3xi32>
1774 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[3, 4, 5]> : vector<3xi32>
1775 //  CHECK-NEXT: return %[[ACST]], %[[BCST]] : vector<3xi32>, vector<3xi32>
1776 func.func @extract_vector_2d_constant() -> (vector<3xi32>, vector<3xi32>) {
1777   %cst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32>
1778   %a = vector.extract %cst[0] : vector<3xi32> from vector<2x3xi32>
1779   %b = vector.extract %cst[1] : vector<3xi32> from vector<2x3xi32>
1780   return %a, %b : vector<3xi32>, vector<3xi32>
1781 }
1782
1783 // -----
1784
1785 // CHECK-LABEL: func.func @extract_3d_constant
1786 //   CHECK-DAG: %[[ACST:.*]] = arith.constant 0 : i32
1787 //   CHECK-DAG: %[[BCST:.*]] = arith.constant 1 : i32
1788 //   CHECK-DAG: %[[CCST:.*]] = arith.constant 9 : i32
1789 //   CHECK-DAG: %[[DCST:.*]] = arith.constant 10 : i32
1790 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] : i32, i32, i32, i32
1791 func.func @extract_3d_constant() -> (i32, i32, i32, i32) {
1792   %cst = arith.constant dense<[[[0, 1], [2, 3], [4, 5]], [[6, 7], [8, 9], [10, 11]]]> : vector<2x3x2xi32>
1793   %a = vector.extract %cst[0, 0, 0] : i32 from vector<2x3x2xi32>
1794   %b = vector.extract %cst[0, 0, 1] : i32 from vector<2x3x2xi32>
1795   %c = vector.extract %cst[1, 1, 1] : i32 from vector<2x3x2xi32>
1796   %d = vector.extract %cst[1, 2, 0] : i32 from vector<2x3x2xi32>
1797   return %a, %b, %c, %d : i32, i32, i32, i32
1798 }
1799
1800 // -----
1801
1802 // CHECK-LABEL: func.func @extract_vector_3d_constant
1803 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[0, 1\], \[2, 3\], \[4, 5\]\]}}> : vector<3x2xi32>
1804 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[6, 7\], \[8, 9\], \[10, 11\]\]}}> : vector<3x2xi32>
1805 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<[8, 9]> : vector<2xi32>
1806 //   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<[10, 11]> : vector<2xi32>
1807 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]] : vector<3x2xi32>, vector<3x2xi32>, vector<2xi32>, vector<2xi32>
1808 func.func @extract_vector_3d_constant() -> (vector<3x2xi32>, vector<3x2xi32>, vector<2xi32>, vector<2xi32>) {
1809   %cst = arith.constant dense<[[[0, 1], [2, 3], [4, 5]], [[6, 7], [8, 9], [10, 11]]]> : vector<2x3x2xi32>
1810   %a = vector.extract %cst[0] : vector<3x2xi32> from vector<2x3x2xi32>
1811   %b = vector.extract %cst[1] : vector<3x2xi32> from vector<2x3x2xi32>
1812   %c = vector.extract %cst[1, 1] : vector<2xi32> from vector<2x3x2xi32>
1813   %d = vector.extract %cst[1, 2] : vector<2xi32> from vector<2x3x2xi32>
1814   return %a, %b, %c, %d : vector<3x2xi32>, vector<3x2xi32>, vector<2xi32>, vector<2xi32>
1815 }
1816
1817 // -----
1818
1819 // CHECK-LABEL: func.func @extract_splat_vector_3d_constant
1820 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<0> : vector<2xi32>
1821 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<4> : vector<2xi32>
1822 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<5> : vector<2xi32>
1823 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<2xi32>, vector<2xi32>, vector<2xi32>
1824 func.func @extract_splat_vector_3d_constant() -> (vector<2xi32>, vector<2xi32>, vector<2xi32>) {
1825   %cst = arith.constant dense<[[[0, 0], [1, 1], [2, 2]], [[3, 3], [4, 4], [5, 5]]]> : vector<2x3x2xi32>
1826   %a = vector.extract %cst[0, 0] : vector<2xi32> from vector<2x3x2xi32>
1827   %b = vector.extract %cst[1, 1] : vector<2xi32> from vector<2x3x2xi32>
1828   %c = vector.extract %cst[1, 2] : vector<2xi32> from vector<2x3x2xi32>
1829   return %a, %b, %c : vector<2xi32>, vector<2xi32>, vector<2xi32>
1830 }
1831
1832 // -----
1833
1834 // CHECK-LABEL: func.func @extract_strided_slice_1d_constant
1835 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[0, 1, 2]> : vector<3xi32>
1836 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[1, 2]> : vector<2xi32>
1837 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<2> : vector<1xi32>
1838 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<3xi32>, vector<2xi32>, vector<1xi32>
1839 func.func @extract_strided_slice_1d_constant() -> (vector<3xi32>, vector<2xi32>, vector<1xi32>) {
1840   %cst = arith.constant dense<[0, 1, 2]> : vector<3xi32>
1841   %a = vector.extract_strided_slice %cst
1842    {offsets = [0], sizes = [3], strides = [1]} : vector<3xi32> to vector<3xi32>
1843   %b = vector.extract_strided_slice %cst
1844    {offsets = [1], sizes = [2], strides = [1]} : vector<3xi32> to vector<2xi32>
1845   %c = vector.extract_strided_slice %cst
1846    {offsets = [2], sizes = [1], strides = [1]} : vector<3xi32> to vector<1xi32>
1847   return %a, %b, %c : vector<3xi32>, vector<2xi32>, vector<1xi32>
1848 }
1849
1850 // -----
1851
1852 // CHECK-LABEL: func.func @extract_strided_slice_2d_constant
1853 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<0> : vector<1x1xi32>
1854 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[4, 5\]\]}}> : vector<1x2xi32>
1855 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[1, 2\], \[4, 5\]\]}}> : vector<2x2xi32>
1856 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<1x1xi32>, vector<1x2xi32>, vector<2x2xi32>
1857 func.func @extract_strided_slice_2d_constant() -> (vector<1x1xi32>, vector<1x2xi32>, vector<2x2xi32>) {
1858   %cst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32>
1859   %a = vector.extract_strided_slice %cst
1860    {offsets = [0, 0], sizes = [1, 1], strides = [1, 1]} : vector<2x3xi32> to vector<1x1xi32>
1861   %b = vector.extract_strided_slice %cst
1862    {offsets = [1, 1], sizes = [1, 2], strides = [1, 1]} : vector<2x3xi32> to vector<1x2xi32>
1863   %c = vector.extract_strided_slice %cst
1864    {offsets = [0, 1], sizes = [2, 2], strides = [1, 1]} : vector<2x3xi32> to vector<2x2xi32>
1865   return %a, %b, %c : vector<1x1xi32>, vector<1x2xi32>, vector<2x2xi32>
1866 }
1867
1868 // -----
1869
1870 // CHECK-LABEL: func.func @extract_strided_slice_3d_constant
1871 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[\[8, 9\], \[10, 11\]\]\]}}> : vector<1x2x2xi32>
1872 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[\[2, 3\]\]\]}}> : vector<1x1x2xi32>
1873 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[\[6, 7\]\], \[\[10, 11\]\]\]}}> : vector<2x1x2xi32>
1874 //   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<11> : vector<1x1x1xi32>
1875 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]]
1876 func.func @extract_strided_slice_3d_constant() -> (vector<1x2x2xi32>, vector<1x1x2xi32>, vector<2x1x2xi32>, vector<1x1x1xi32>) {
1877   %cst = arith.constant dense<[[[0, 1], [2, 3]], [[4, 5], [6, 7]], [[8, 9], [10, 11]]]> : vector<3x2x2xi32>
1878   %a = vector.extract_strided_slice %cst
1879    {offsets = [2], sizes = [1], strides = [1]} : vector<3x2x2xi32> to vector<1x2x2xi32>
1880   %b = vector.extract_strided_slice %cst
1881    {offsets = [0, 1], sizes = [1, 1], strides = [1, 1]} : vector<3x2x2xi32> to vector<1x1x2xi32>
1882   %c = vector.extract_strided_slice %cst
1883    {offsets = [1, 1, 0], sizes = [2, 1, 2], strides = [1, 1, 1]} : vector<3x2x2xi32> to vector<2x1x2xi32>
1884   %d = vector.extract_strided_slice %cst
1885    {offsets = [2, 1, 1], sizes = [1, 1, 1], strides = [1, 1, 1]} : vector<3x2x2xi32> to vector<1x1x1xi32>
1886   return %a, %b, %c, %d : vector<1x2x2xi32>, vector<1x1x2xi32>, vector<2x1x2xi32>, vector<1x1x1xi32>
1887 }
1888
1889 // -----
1890
1891 // CHECK-LABEL: extract_extract_strided
1892 //  CHECK-SAME: %[[A:.*]]: vector<32x16x4xf16>
1893 //       CHECK: %[[V:.*]] = vector.extract %[[A]][9, 7] : vector<4xf16> from vector<32x16x4xf16>
1894 //       CHECK: return %[[V]] : vector<4xf16>
1895 func.func @extract_extract_strided(%arg0: vector<32x16x4xf16>) -> vector<4xf16> {
1896  %1 = vector.extract_strided_slice %arg0
1897   {offsets = [7, 3], sizes = [10, 8], strides = [1, 1]} :
1898   vector<32x16x4xf16> to vector<10x8x4xf16>
1899   %2 = vector.extract %1[2, 4] : vector<4xf16> from vector<10x8x4xf16>
1900   return %2 : vector<4xf16>
1901 }
1902
1903 // -----
1904
1905 // CHECK-LABEL: extract_insert_strided
1906 //  CHECK-SAME: %[[A:.*]]: vector<6x4xf32>
1907 //       CHECK: %[[V:.*]] = vector.extract %[[A]][0, 2] : f32 from vector<6x4xf32>
1908 //       CHECK: return %[[V]] : f32
1909 func.func @extract_insert_strided(%a: vector<6x4xf32>, %b: vector<8x16xf32>)
1910   -> f32 {
1911   %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]}
1912     : vector<6x4xf32> into vector<8x16xf32>
1913   %2 = vector.extract %0[2, 4] : f32 from vector<8x16xf32>
1914   return %2 : f32
1915 }
1916
1917 // -----
1918
1919 // CHECK-LABEL: extract_insert_rank_reduce
1920 //  CHECK-SAME: %[[A:.*]]: vector<4xf32>
1921 //       CHECK: %[[V:.*]] = vector.extract %[[A]][2] : f32 from vector<4xf32>
1922 //       CHECK: return %[[V]] : f32
1923 func.func @extract_insert_rank_reduce(%a: vector<4xf32>, %b: vector<8x16xf32>)
1924   -> f32 {
1925   %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1]}
1926     : vector<4xf32> into vector<8x16xf32>
1927   %2 = vector.extract %0[2, 4] : f32 from vector<8x16xf32>
1928   return %2 : f32
1929 }
1930
1931 // -----
1932
1933 // CHECK-LABEL: extract_insert_negative
1934 //       CHECK: vector.insert_strided_slice
1935 //       CHECK: vector.extract
1936 func.func @extract_insert_negative(%a: vector<2x15xf32>, %b: vector<12x8x16xf32>)
1937   -> vector<16xf32> {
1938   %0 = vector.insert_strided_slice %a, %b {offsets = [4, 2, 0], strides = [1, 1]}
1939     : vector<2x15xf32> into vector<12x8x16xf32>
1940   %2 = vector.extract %0[4, 2] : vector<16xf32> from vector<12x8x16xf32>
1941   return %2 : vector<16xf32>
1942 }
1943
1944 // -----
1945
1946 // CHECK-LABEL: extract_insert_chain
1947 //  CHECK-SAME: (%[[A:.*]]: vector<2x16xf32>, %[[B:.*]]: vector<12x8x16xf32>, %[[C:.*]]: vector<2x16xf32>)
1948 //       CHECK: %[[V:.*]] = vector.extract %[[C]][0] : vector<16xf32> from vector<2x16xf32>
1949 //       CHECK: return %[[V]] : vector<16xf32>
1950 func.func @extract_insert_chain(%a: vector<2x16xf32>, %b: vector<12x8x16xf32>, %c: vector<2x16xf32>)
1951   -> vector<16xf32> {
1952   %0 = vector.insert_strided_slice %c, %b {offsets = [4, 2, 0], strides = [1, 1]}
1953     : vector<2x16xf32> into vector<12x8x16xf32>
1954   %1 = vector.insert_strided_slice %a, %0 {offsets = [0, 2, 0], strides = [1, 1]}
1955     : vector<2x16xf32> into vector<12x8x16xf32>
1956   %2 = vector.extract %1[4, 2] : vector<16xf32> from vector<12x8x16xf32>
1957   return %2 : vector<16xf32>
1958 }
1959
1960 // -----
1961
1962 // CHECK-LABEL: extract_from_extract_chain_should_not_fold_dynamic_extracts
1963 //  CHECK-SAME: (%[[VEC:.*]]: vector<2x4xf32>, %[[IDX:.*]]: index)
1964 //       CHECK: %[[A:.*]] = vector.extract %[[VEC]][%[[IDX]]] : vector<4xf32> from vector<2x4xf32>
1965 //       CHECK: %[[B:.*]] = vector.extract %[[A]][1] : f32 from vector<4xf32>
1966 func.func @extract_from_extract_chain_should_not_fold_dynamic_extracts(%v: vector<2x4xf32>, %index: index) -> f32 {
1967   %0 = vector.extract %v[%index] : vector<4xf32> from vector<2x4xf32>
1968   %1 = vector.extract %0[1] : f32 from vector<4xf32>
1969   return %1 : f32
1970 }
1971
1972 // -----
1973
1974 // CHECK-LABEL: extract_extract_strided2
1975 //  CHECK-SAME: %[[A:.*]]: vector<2x4xf32>
1976 //       CHECK: %[[V:.*]] = vector.extract %[[A]][1] : vector<4xf32> from vector<2x4xf32>
1977 //       CHECK: return %[[V]] : vector<4xf32>
1978 func.func @extract_extract_strided2(%A: vector<2x4xf32>)
1979   -> (vector<4xf32>) {
1980  %0 = vector.extract_strided_slice %A {offsets = [1, 0], sizes = [1, 4], strides = [1, 1]} : vector<2x4xf32> to vector<1x4xf32>
1981  %1 = vector.extract %0[0] : vector<4xf32> from vector<1x4xf32>
1982  return %1 : vector<4xf32>
1983 }
1984
1985 // -----
1986
1987 // CHECK-LABEL: func @splat_fold
1988 func.func @splat_fold() -> vector<4xf32> {
1989   %c = arith.constant 1.0 : f32
1990   %v = vector.splat %c : vector<4xf32>
1991   return %v : vector<4xf32>
1992
1993   // CHECK-NEXT: [[V:%.*]] = arith.constant dense<1.000000e+00> : vector<4xf32>
1994   // CHECK-NEXT: return [[V]] : vector<4xf32>
1995 }
1996
1997 // -----
1998
1999 // CHECK-LABEL: func @shuffle_1d
2000 //       CHECK:   %[[V:.+]] = arith.constant dense<[3, 2, 5, 1]> : vector<4xi32>
2001 //       CHECK:   return %[[V]]
2002 func.func @shuffle_1d() -> vector<4xi32> {
2003   %v0 = arith.constant dense<[0, 1, 2]> : vector<3xi32>
2004   %v1 = arith.constant dense<[3, 4, 5]> : vector<3xi32>
2005   %shuffle = vector.shuffle %v0, %v1 [3, 2, 5, 1] : vector<3xi32>, vector<3xi32>
2006   return %shuffle : vector<4xi32>
2007 }
2008
2009 // -----
2010
2011 // Check that poison indices pick the first element of the first non-poison
2012 // input vector. That is, %v[0] (i.e., 5) in this test.
2013
2014 // CHECK-LABEL: func @shuffle_1d_poison_idx
2015 //       CHECK:   %[[V:.+]] = arith.constant dense<[2, 5, 0, 5]> : vector<4xi32>
2016 //       CHECK:   return %[[V]]
2017 func.func @shuffle_1d_poison_idx() -> vector<4xi32> {
2018   %v0 = arith.constant dense<[5, 4, 3]> : vector<3xi32>
2019   %v1 = arith.constant dense<[2, 1, 0]> : vector<3xi32>
2020   %shuffle = vector.shuffle %v0, %v1 [3, -1, 5, -1] : vector<3xi32>, vector<3xi32>
2021   return %shuffle : vector<4xi32>
2022 }
2023
2024 // -----
2025
2026 // CHECK-LABEL: func @shuffle_canonicalize_0d
2027 func.func @shuffle_canonicalize_0d(%v0 : vector<i32>, %v1 : vector<i32>) -> vector<1xi32> {
2028   // CHECK: vector.broadcast %{{.*}} : vector<i32> to vector<1xi32>
2029   %shuffle = vector.shuffle %v0, %v1 [0] : vector<i32>, vector<i32>
2030   return %shuffle : vector<1xi32>
2031 }
2032
2033 // -----
2034
2035 // CHECK-LABEL: func @shuffle_fold1
2036 //       CHECK:   %arg0 : vector<4xi32>
2037 func.func @shuffle_fold1(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<4xi32> {
2038   %shuffle = vector.shuffle %v0, %v1 [0, 1, 2, 3] : vector<4xi32>, vector<2xi32>
2039   return %shuffle : vector<4xi32>
2040 }
2041
2042 // -----
2043
2044 // CHECK-LABEL: func @shuffle_fold2
2045 //       CHECK:   %arg1 : vector<2xi32>
2046 func.func @shuffle_fold2(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<2xi32> {
2047   %shuffle = vector.shuffle %v0, %v1 [4, 5] : vector<4xi32>, vector<2xi32>
2048   return %shuffle : vector<2xi32>
2049 }
2050
2051 // -----
2052
2053 // CHECK-LABEL: func @shuffle_fold3
2054 //       CHECK:   return %arg0 : vector<4x5x6xi32>
2055 func.func @shuffle_fold3(%v0 : vector<4x5x6xi32>, %v1 : vector<2x5x6xi32>) -> vector<4x5x6xi32> {
2056   %shuffle = vector.shuffle %v0, %v1 [0, 1, 2, 3] : vector<4x5x6xi32>, vector<2x5x6xi32>
2057   return %shuffle : vector<4x5x6xi32>
2058 }
2059
2060 // -----
2061
2062 // CHECK-LABEL: func @shuffle_fold4
2063 //       CHECK:   return %arg1 : vector<2x5x6xi32>
2064 func.func @shuffle_fold4(%v0 : vector<4x5x6xi32>, %v1 : vector<2x5x6xi32>) -> vector<2x5x6xi32> {
2065   %shuffle = vector.shuffle %v0, %v1 [4, 5] : vector<4x5x6xi32>, vector<2x5x6xi32>
2066   return %shuffle : vector<2x5x6xi32>
2067 }
2068
2069 // -----
2070
2071 // CHECK-LABEL: func @shuffle_nofold1
2072 //       CHECK:   %[[V:.+]] = vector.shuffle %arg0, %arg1 [0, 1, 2, 3, 4] : vector<4xi32>, vector<2xi32>
2073 //       CHECK:   return %[[V]]
2074 func.func @shuffle_nofold1(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<5xi32> {
2075   %shuffle = vector.shuffle %v0, %v1 [0, 1, 2, 3, 4] : vector<4xi32>, vector<2xi32>
2076   return %shuffle : vector<5xi32>
2077 }
2078
2079 // -----
2080
2081 // CHECK-LABEL: func @transpose_scalar_broadcast1
2082 //  CHECK-SAME: (%[[ARG:.+]]: vector<1xf32>)
2083 //       CHECK:   %[[V:.+]] = vector.broadcast %[[ARG]] : vector<1xf32> to vector<1x8xf32>
2084 //       CHECK:   return %[[V]] : vector<1x8xf32>
2085 func.func @transpose_scalar_broadcast1(%value: vector<1xf32>) -> vector<1x8xf32> {
2086   %bcast = vector.broadcast %value : vector<1xf32> to vector<8x1xf32>
2087   %t = vector.transpose %bcast, [1, 0] : vector<8x1xf32> to vector<1x8xf32>
2088   return %t : vector<1x8xf32>
2089 }
2090
2091 // -----
2092
2093 // CHECK-LABEL: func @transpose_scalar_broadcast2
2094 //  CHECK-SAME: (%[[ARG:.+]]: f32)
2095 //       CHECK:   %[[V:.+]] = vector.broadcast %[[ARG]] : f32 to vector<1x8xf32>
2096 //       CHECK:   return %[[V]] : vector<1x8xf32>
2097 func.func @transpose_scalar_broadcast2(%value: f32) -> vector<1x8xf32> {
2098   %bcast = vector.broadcast %value : f32 to vector<8x1xf32>
2099   %t = vector.transpose %bcast, [1, 0] : vector<8x1xf32> to vector<1x8xf32>
2100   return %t : vector<1x8xf32>
2101 }
2102
2103 // -----
2104
2105 // CHECK-LABEL: func @transpose_splat_constant
2106 //       CHECK:   %[[CST:.+]] = arith.constant dense<5.000000e+00> : vector<8x4xf32>
2107 //       CHECK:   return %[[CST]]
2108 func.func @transpose_splat_constant() -> vector<8x4xf32> {
2109   %cst = arith.constant dense<5.0> : vector<4x8xf32>
2110   %0 = vector.transpose %cst, [1, 0] : vector<4x8xf32> to vector<8x4xf32>
2111   return %0 : vector<8x4xf32>
2112 }
2113
2114 // CHECK-LABEL:   func @transpose_splat2(
2115 // CHECK-SAME:                           %[[VAL_0:.*]]: f32) -> vector<3x4xf32> {
2116 // CHECK:           %[[VAL_1:.*]] = vector.splat %[[VAL_0]] : vector<3x4xf32>
2117 // CHECK:           return %[[VAL_1]] : vector<3x4xf32>
2118 // CHECK:         }
2119 func.func @transpose_splat2(%arg : f32) -> vector<3x4xf32> {
2120   %splat = vector.splat %arg : vector<4x3xf32>
2121   %0 = vector.transpose %splat, [1, 0] : vector<4x3xf32> to vector<3x4xf32>
2122   return %0 : vector<3x4xf32>
2123 }
2124
2125 // -----
2126
2127 // CHECK-LABEL: func.func @insert_1d_constant
2128 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[9, 1, 2]> : vector<3xi32>
2129 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[0, 9, 2]> : vector<3xi32>
2130 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<[0, 1, 9]> : vector<3xi32>
2131 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]] : vector<3xi32>, vector<3xi32>, vector<3xi32>
2132 func.func @insert_1d_constant() -> (vector<3xi32>, vector<3xi32>, vector<3xi32>) {
2133   %vcst = arith.constant dense<[0, 1, 2]> : vector<3xi32>
2134   %icst = arith.constant 9 : i32
2135   %a = vector.insert %icst, %vcst[0] : i32 into vector<3xi32>
2136   %b = vector.insert %icst, %vcst[1] : i32 into vector<3xi32>
2137   %c = vector.insert %icst, %vcst[2] : i32 into vector<3xi32>
2138   return %a, %b, %c : vector<3xi32>, vector<3xi32>, vector<3xi32>
2139 }
2140
2141 // -----
2142
2143 // CHECK-LABEL: func.func @insert_2d_constant
2144 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[99, 1, 2\], \[3, 4, 5\]\]}}> : vector<2x3xi32>
2145 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[0, 1, 2\], \[3, 4, 99\]\]}}> : vector<2x3xi32>
2146 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[90, 91, 92\], \[3, 4, 5\]\]}}> : vector<2x3xi32>
2147 //   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<{{\[\[0, 1, 2\], \[90, 91, 92\]\]}}> : vector<2x3xi32>
2148 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]]
2149 func.func @insert_2d_constant() -> (vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>) {
2150   %vcst = arith.constant dense<[[0, 1, 2], [3, 4, 5]]> : vector<2x3xi32>
2151   %cst_scalar = arith.constant 99 : i32
2152   %cst_1d = arith.constant dense<[90, 91, 92]> : vector<3xi32>
2153   %a = vector.insert %cst_scalar, %vcst[0, 0] : i32 into vector<2x3xi32>
2154   %b = vector.insert %cst_scalar, %vcst[1, 2] : i32 into vector<2x3xi32>
2155   %c = vector.insert %cst_1d, %vcst[0] : vector<3xi32> into vector<2x3xi32>
2156   %d = vector.insert %cst_1d, %vcst[1] : vector<3xi32> into vector<2x3xi32>
2157   return %a, %b, %c, %d : vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>
2158 }
2159
2160 // -----
2161
2162 // CHECK-LABEL: func.func @insert_2d_splat_constant
2163 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<0> : vector<2x3xi32>
2164 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[99, 0, 0\], \[0, 0, 0\]\]}}> : vector<2x3xi32>
2165 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[0, 0, 0\], \[0, 99, 0\]\]}}> : vector<2x3xi32>
2166 //   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<{{\[\[33, 33, 33\], \[0, 0, 0\]\]}}> : vector<2x3xi32>
2167 //   CHECK-DAG: %[[ECST:.*]] = arith.constant dense<{{\[\[0, 0, 0\], \[33, 33, 33\]\]}}> : vector<2x3xi32>
2168 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]], %[[ECST]]
2169 func.func @insert_2d_splat_constant()
2170   -> (vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>) {
2171   %vcst = arith.constant dense<0> : vector<2x3xi32>
2172   %cst_zero = arith.constant 0 : i32
2173   %cst_scalar = arith.constant 99 : i32
2174   %cst_1d = arith.constant dense<33> : vector<3xi32>
2175   %a = vector.insert %cst_zero, %vcst[0, 0] : i32 into vector<2x3xi32>
2176   %b = vector.insert %cst_scalar, %vcst[0, 0] : i32 into vector<2x3xi32>
2177   %c = vector.insert %cst_scalar, %vcst[1, 1] : i32 into vector<2x3xi32>
2178   %d = vector.insert %cst_1d, %vcst[0] : vector<3xi32> into vector<2x3xi32>
2179   %e = vector.insert %cst_1d, %vcst[1] : vector<3xi32> into vector<2x3xi32>
2180   return %a, %b, %c, %d, %e : vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>, vector<2x3xi32>
2181 }
2182
2183 // -----
2184
2185 // CHECK-LABEL: func @insert_element_fold
2186 //       CHECK:   %[[V:.+]] = arith.constant dense<[0, 1, 7, 3]> : vector<4xi32>
2187 //       CHECK:   return %[[V]]
2188 func.func @insert_element_fold() -> vector<4xi32> {
2189   %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32>
2190   %s = arith.constant 7 : i32
2191   %i = arith.constant 2 : i32
2192   %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32>
2193   return %1 : vector<4xi32>
2194 }
2195
2196 // -----
2197
2198 // CHECK-LABEL: func @insert_element_invalid_fold
2199 func.func @insert_element_invalid_fold() -> vector<1xf32> {
2200   // Out-of-bound index here.
2201   %c26 = arith.constant 26 : index
2202   %cst_2 = arith.constant 1.60215309E+9 : f32
2203   %cst_20 = arith.constant dense<1.60215309E+9> : vector<1xf32>
2204 // CHECK: vector.insertelement
2205   %46 = vector.insertelement %cst_2, %cst_20[%c26 : index] : vector<1xf32>
2206   return %46 : vector<1xf32>
2207 }
2208
2209
2210 // -----
2211
2212 // Do not crash on poison
2213 // CHECK-LABEL: func @insert_poison_fold1
2214 //       CHECK:   vector.insertelement
2215 func.func @insert_poison_fold1() -> vector<4xi32> {
2216   %v = ub.poison : vector<4xi32>
2217   %s = arith.constant 7 : i32
2218   %i = arith.constant 2 : i32
2219   %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32>
2220   return %1 : vector<4xi32>
2221 }
2222
2223 // -----
2224
2225 // Do not crash on poison
2226 // CHECK-LABEL: func @insert_poison_fold2
2227 //       CHECK:   vector.insertelement
2228 func.func @insert_poison_fold2() -> vector<4xi32> {
2229   %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32>
2230   %s = ub.poison : i32
2231   %i = arith.constant 2 : i32
2232   %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32>
2233   return %1 : vector<4xi32>
2234 }
2235
2236 // -----
2237
2238 // Do not crash on poison
2239 // CHECK-LABEL: func @insert_poison_fold3
2240 //       CHECK:   vector.insertelement
2241 func.func @insert_poison_fold3() -> vector<4xi32> {
2242   %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32>
2243   %s = arith.constant 7 : i32
2244   %i = ub.poison : i32
2245   %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32>
2246   return %1 : vector<4xi32>
2247 }
2248
2249 // -----
2250
2251 // CHECK-LABEL: func @extract_element_fold
2252 //       CHECK:   %[[C:.+]] = arith.constant 5 : i32
2253 //       CHECK:   return %[[C]]
2254 func.func @extract_element_fold() -> i32 {
2255   %v = arith.constant dense<[1, 3, 5, 7]> : vector<4xi32>
2256   %i = arith.constant 2 : i32
2257   %1 = vector.extractelement %v[%i : i32] : vector<4xi32>
2258   return %1 : i32
2259 }
2260
2261 // CHECK-LABEL: func @extract_element_splat_fold
2262 //  CHECK-SAME: (%[[ARG:.+]]: i32)
2263 //       CHECK:   return %[[ARG]]
2264 func.func @extract_element_splat_fold(%a : i32) -> i32 {
2265   %v = vector.splat %a : vector<4xi32>
2266   %i = arith.constant 2 : i32
2267   %1 = vector.extractelement %v[%i : i32] : vector<4xi32>
2268   return %1 : i32
2269 }
2270
2271 // -----
2272
2273 // Do not crash on poison
2274 // CHECK-LABEL: func @extract_element_poison_fold1
2275 //       CHECK:   vector.extractelement
2276 func.func @extract_element_poison_fold1() -> i32 {
2277   %v = ub.poison : vector<4xi32>
2278   %i = arith.constant 2 : i32
2279   %1 = vector.extractelement %v[%i : i32] : vector<4xi32>
2280   return %1 : i32
2281 }
2282
2283 // -----
2284
2285 // Do not crash on poison
2286 // CHECK-LABEL: func @extract_element_poison_fold2
2287 //       CHECK:   vector.extractelement
2288 func.func @extract_element_poison_fold2() -> i32 {
2289   %v = arith.constant dense<[1, 3, 5, 7]> : vector<4xi32>
2290   %i = ub.poison : i32
2291   %1 = vector.extractelement %v[%i : i32] : vector<4xi32>
2292   return %1 : i32
2293 }
2294
2295 // -----
2296
2297 // CHECK-LABEL: func @reduce_one_element_vector_extract
2298 //  CHECK-SAME: (%[[V:.+]]: vector<1xf32>)
2299 //       CHECK:   %[[S:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2300 //       CHECK:   return %[[S]] : f32
2301 func.func @reduce_one_element_vector_extract(%a : vector<1xf32>) -> f32 {
2302   %s = vector.reduction <add>, %a : vector<1xf32> into f32
2303   return %s : f32
2304 }
2305
2306 // -----
2307
2308 // CHECK-LABEL: func @masked_reduce_one_element_vector_extract
2309 //  CHECK-SAME: %[[VAL_0:.*]]: vector<1xf32>, %[[VAL_1:.*]]: vector<1xi1>)
2310 func.func @masked_reduce_one_element_vector_extract(%a : vector<1xf32>, %mask : vector<1xi1>) -> f32 {
2311 //       CHECK:   %[[VAL_2:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<1xf32>
2312   %s = vector.mask %mask { vector.reduction <add>, %a : vector<1xf32> into f32 }
2313          : vector<1xi1> -> f32
2314   return %s : f32
2315 }
2316
2317 // -----
2318
2319 // CHECK-LABEL: func @reduce_one_element_vector_addf
2320 //  CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
2321 //       CHECK:   %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2322 //       CHECK:   %[[S:.+]] = arith.addf %[[A]], %arg1 : f32
2323 //       CHECK:   return %[[S]]
2324 func.func @reduce_one_element_vector_addf(%a : vector<1xf32>, %b: f32) -> f32 {
2325   %s = vector.reduction <add>, %a, %b : vector<1xf32> into f32
2326   return %s : f32
2327 }
2328
2329 // -----
2330
2331 // CHECK-LABEL: func @reduce_one_element_vector_addf_fastmath
2332 //  CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
2333 //       CHECK:   %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2334 //       CHECK:   %[[S:.+]] = arith.addf %[[A]], %arg1 fastmath<nnan,ninf> : f32
2335 //       CHECK:   return %[[S]]
2336 func.func @reduce_one_element_vector_addf_fastmath(%a : vector<1xf32>, %b: f32) -> f32 {
2337   %s = vector.reduction <add>, %a, %b fastmath<nnan,ninf> : vector<1xf32> into f32
2338   return %s : f32
2339 }
2340
2341 // -----
2342
2343 // CHECK-LABEL: func @masked_reduce_one_element_vector_addf
2344 //  CHECK-SAME: %[[VAL_0:.*]]: vector<1xf32>, %[[VAL_1:.*]]: f32,
2345 //  CHECK-SAME: %[[VAL_2:.*]]: vector<1xi1>)
2346 func.func @masked_reduce_one_element_vector_addf(%a: vector<1xf32>,
2347                                                  %b: f32,
2348                                                  %mask: vector<1xi1>) -> f32 {
2349 //       CHECK:   %[[VAL_3:.*]] = vector.extract %[[VAL_2]][0] : i1 from vector<1xi1>
2350 //       CHECK:   %[[VAL_4:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<1xf32>
2351 //       CHECK:   %[[VAL_5:.*]] = arith.addf %[[VAL_4]], %[[VAL_1]] : f32
2352 //       CHECK:   %[[VAL_6:.*]] = arith.select %[[VAL_3]], %[[VAL_5]], %[[VAL_1]] : f32
2353   %s = vector.mask %mask { vector.reduction <add>, %a, %b : vector<1xf32> into f32 }
2354          : vector<1xi1> -> f32
2355   return %s : f32
2356 }
2357
2358 // -----
2359
2360 // CHECK-LABEL: func @reduce_one_element_vector_mulf
2361 //  CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
2362 //       CHECK:   %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2363 //       CHECK:   %[[S:.+]] = arith.mulf %[[A]], %arg1 : f32
2364 //       CHECK:   return %[[S]]
2365 func.func @reduce_one_element_vector_mulf(%a : vector<1xf32>, %b: f32) -> f32 {
2366   %s = vector.reduction <mul>, %a, %b : vector<1xf32> into f32
2367   return %s : f32
2368 }
2369
2370 // -----
2371
2372 // CHECK-LABEL: func @dont_reduce_one_element_vector
2373 //       CHECK: vector.reduction
2374 func.func @dont_reduce_one_element_vector(%a : vector<4xf32>) -> f32 {
2375   %s = vector.reduction <add>, %a : vector<4xf32> into f32
2376   return %s : f32
2377 }
2378
2379 // -----
2380
2381 // CHECK-LABEL: func @reduce_one_element_vector_maximumf
2382 //  CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
2383 //       CHECK:   %[[A:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32>
2384 //       CHECK:   %[[S:.+]] = arith.maximumf %[[A]], %[[B]] : f32
2385 //       CHECK:   return %[[S]]
2386 func.func @reduce_one_element_vector_maximumf(%a : vector<1xf32>, %b: f32) -> f32 {
2387   %s = vector.reduction <maximumf>, %a, %b : vector<1xf32> into f32
2388   return %s : f32
2389 }
2390
2391 // -----
2392
2393 // CHECK-LABEL: func @bitcast(
2394 //  CHECK-SAME:               %[[ARG:.*]]: vector<4x8xf32>) -> vector<4x16xi16> {
2395 //       CHECK: vector.bitcast %[[ARG:.*]] : vector<4x8xf32> to vector<4x16xi16>
2396 func.func @bitcast(%a: vector<4x8xf32>) -> vector<4x16xi16> {
2397   %0 = vector.bitcast %a : vector<4x8xf32> to vector<4x8xi32>
2398   %1 = vector.bitcast %0 : vector<4x8xi32> to vector<4x16xi16>
2399   return %1 : vector<4x16xi16>
2400 }
2401
2402 // -----
2403
2404 // CHECK-LABEL: @insert_strided_slice_splat
2405 //  CHECK-SAME: (%[[ARG:.*]]: f32)
2406 //  CHECK-NEXT:   %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<8x16xf32>
2407 //  CHECK-NEXT:   return %[[SPLAT]] : vector<8x16xf32>
2408 func.func @insert_strided_slice_splat(%x: f32) -> (vector<8x16xf32>) {
2409   %splat0 = vector.splat %x : vector<4x4xf32>
2410   %splat1 = vector.splat %x : vector<8x16xf32>
2411   %0 = vector.insert_strided_slice %splat0, %splat1 {offsets = [2, 2], strides = [1, 1]}
2412     : vector<4x4xf32> into vector<8x16xf32>
2413   return %0 : vector<8x16xf32>
2414 }
2415
2416
2417 // -----
2418
2419 // CHECK-LABEL: @insert_extract_strided_slice
2420 //  CHECK-SAME: (%[[ARG:.*]]: vector<8x16xf32>)
2421 //  CHECK-NEXT:   return %[[ARG]] : vector<8x16xf32>
2422 func.func @insert_extract_strided_slice(%x: vector<8x16xf32>) -> (vector<8x16xf32>) {
2423   %0 = vector.extract_strided_slice %x {offsets = [0, 8], sizes = [2, 4], strides = [1, 1]}
2424         : vector<8x16xf32> to vector<2x4xf32>
2425   %1 = vector.insert_strided_slice %0, %x {offsets = [0, 8], strides = [1, 1]}
2426         : vector<2x4xf32> into vector<8x16xf32>
2427   return %1 : vector<8x16xf32>
2428 }
2429
2430 // -----
2431
2432 // CHECK-LABEL: func.func @insert_strided_1d_constant
2433 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<[4, 1, 2]> : vector<3xi32>
2434 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<[0, 1, 4]> : vector<3xi32>
2435 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<[5, 6, 2]> : vector<3xi32>
2436 //   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<[0, 5, 6]> : vector<3xi32>
2437 //   CHECK-DAG: %[[ECST:.*]] = arith.constant dense<[7, 8, 9]> : vector<3xi32>
2438 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]], %[[ECST]]
2439 func.func @insert_strided_1d_constant() ->
2440   (vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>) {
2441   %vcst = arith.constant dense<[0, 1, 2]> : vector<3xi32>
2442   %cst_1 = arith.constant dense<4> : vector<1xi32>
2443   %cst_2 = arith.constant dense<[5, 6]> : vector<2xi32>
2444   %cst_3 = arith.constant dense<[7, 8, 9]> : vector<3xi32>
2445   %a = vector.insert_strided_slice %cst_1, %vcst {offsets = [0], strides = [1]} : vector<1xi32> into vector<3xi32>
2446   %b = vector.insert_strided_slice %cst_1, %vcst {offsets = [2], strides = [1]} : vector<1xi32> into vector<3xi32>
2447   %c = vector.insert_strided_slice %cst_2, %vcst {offsets = [0], strides = [1]} : vector<2xi32> into vector<3xi32>
2448   %d = vector.insert_strided_slice %cst_2, %vcst {offsets = [1], strides = [1]} : vector<2xi32> into vector<3xi32>
2449   %e = vector.insert_strided_slice %cst_3, %vcst {offsets = [0], strides = [1]} : vector<3xi32> into vector<3xi32>
2450   return %a, %b, %c, %d, %e : vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>, vector<3xi32>
2451 }
2452
2453 // -----
2454
2455 // CHECK-LABEL: func.func @insert_strided_2d_constant
2456 //   CHECK-DAG: %[[ACST:.*]] = arith.constant dense<{{\[\[0, 1\], \[9, 3\], \[4, 5\]\]}}> : vector<3x2xi32>
2457 //   CHECK-DAG: %[[BCST:.*]] = arith.constant dense<{{\[\[0, 1\], \[2, 3\], \[4, 9\]\]}}> : vector<3x2xi32>
2458 //   CHECK-DAG: %[[CCST:.*]] = arith.constant dense<{{\[\[18, 19\], \[2, 3\], \[4, 5\]\]}}> : vector<3x2xi32>
2459 //   CHECK-DAG: %[[DCST:.*]] = arith.constant dense<{{\[\[0, 1\], \[18, 19\], \[4, 5\]\]}}> : vector<3x2xi32>
2460 //   CHECK-DAG: %[[ECST:.*]] = arith.constant dense<{{\[\[0, 1\], \[2, 3\], \[18, 19\]\]}}> : vector<3x2xi32>
2461 //   CHECK-DAG: %[[FCST:.*]] = arith.constant dense<{{\[\[28, 29\], \[38, 39\], \[4, 5\]\]}}> : vector<3x2xi32>
2462 //   CHECK-DAG: %[[GCST:.*]] = arith.constant dense<{{\[\[0, 1\], \[28, 29\], \[38, 39\]\]}}> : vector<3x2xi32>
2463 //  CHECK-NEXT: return %[[ACST]], %[[BCST]], %[[CCST]], %[[DCST]], %[[ECST]], %[[FCST]], %[[GCST]]
2464 func.func @insert_strided_2d_constant() ->
2465   (vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>) {
2466   %vcst = arith.constant dense<[[0, 1], [2, 3], [4, 5]]> : vector<3x2xi32>
2467   %cst_1 = arith.constant dense<9> : vector<1xi32>
2468   %cst_2 = arith.constant dense<[18, 19]> : vector<2xi32>
2469   %cst_3 = arith.constant dense<[[28, 29], [38, 39]]> : vector<2x2xi32>
2470   %a = vector.insert_strided_slice %cst_1, %vcst {offsets = [1, 0], strides = [1]} : vector<1xi32> into vector<3x2xi32>
2471   %b = vector.insert_strided_slice %cst_1, %vcst {offsets = [2, 1], strides = [1]} : vector<1xi32> into vector<3x2xi32>
2472   %c = vector.insert_strided_slice %cst_2, %vcst {offsets = [0, 0], strides = [1]} : vector<2xi32> into vector<3x2xi32>
2473   %d = vector.insert_strided_slice %cst_2, %vcst {offsets = [1, 0], strides = [1]} : vector<2xi32> into vector<3x2xi32>
2474   %e = vector.insert_strided_slice %cst_2, %vcst {offsets = [2, 0], strides = [1]} : vector<2xi32> into vector<3x2xi32>
2475   %f = vector.insert_strided_slice %cst_3, %vcst {offsets = [0, 0], strides = [1, 1]} : vector<2x2xi32> into vector<3x2xi32>
2476   %g = vector.insert_strided_slice %cst_3, %vcst {offsets = [1, 0], strides = [1, 1]} : vector<2x2xi32> into vector<3x2xi32>
2477   return %a, %b, %c, %d, %e, %f, %g :
2478     vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>, vector<3x2xi32>
2479 }
2480
2481 // -----
2482
2483 // CHECK-LABEL: func @shuffle_splat
2484 //  CHECK-SAME:   (%[[ARG:.*]]: i32)
2485 //  CHECK-NEXT:   %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<4xi32>
2486 //  CHECK-NEXT:   return %[[SPLAT]] : vector<4xi32>
2487 func.func @shuffle_splat(%x : i32) -> vector<4xi32> {
2488   %v0 = vector.splat %x : vector<4xi32>
2489   %v1 = vector.splat %x : vector<2xi32>
2490   %shuffle = vector.shuffle %v0, %v1 [2, 3, 4, 5] : vector<4xi32>, vector<2xi32>
2491   return %shuffle : vector<4xi32>
2492 }
2493
2494
2495 // -----
2496
2497 // CHECK-LABEL: func @insert_splat
2498 //  CHECK-SAME:   (%[[ARG:.*]]: i32)
2499 //  CHECK-NEXT:   %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<2x4x3xi32>
2500 //  CHECK-NEXT:   return %[[SPLAT]] : vector<2x4x3xi32>
2501 func.func @insert_splat(%x : i32) -> vector<2x4x3xi32> {
2502   %v0 = vector.splat %x : vector<4x3xi32>
2503   %v1 = vector.splat %x : vector<2x4x3xi32>
2504   %insert = vector.insert %v0, %v1[0] : vector<4x3xi32> into vector<2x4x3xi32>
2505   return %insert : vector<2x4x3xi32>
2506 }
2507
2508 // -----
2509
2510 // CHECK-LABEL: func.func @transfer_read_from_rank_reducing_extract_slice
2511 //       CHECK:   tensor.extract_slice
2512 //       CHECK:   vector.transfer_read
2513 func.func @transfer_read_from_rank_reducing_extract_slice(%src: tensor<1x8x8x8xf32>, %i1: index, %i2: index, %i3: index, %i4: index) -> vector<4xf32> {
2514   %c0 = arith.constant 0 : index
2515   %f0 = arith.constant 0.000000e+00 : f32
2516   %0 = tensor.extract_slice %src[0, %i1, %i2, %i3] [1, 4, 1, 4] [1, 1, 1, 1] : tensor<1x8x8x8xf32> to tensor<1x4x4xf32>
2517   %1 = vector.transfer_read %0[%c0, %i4, %c0], %f0 {in_bounds = [true]} : tensor<1x4x4xf32>, vector<4xf32>
2518   return %1 : vector<4xf32>
2519 }
2520
2521 // -----
2522
2523 // CHECK-LABEL: func.func @extract_from_broadcast
2524 func.func @extract_from_broadcast(%src: vector<1x1x1xf32>) -> vector<1xf32> {
2525   %0 = vector.broadcast %src : vector<1x1x1xf32> to vector<1x1x32x1xf32>
2526
2527   //  CHECK-NEXT:   %0 = vector.extract {{.*}}[0, 0] : vector<1xf32> from vector<1x1x1xf32>
2528   //  CHECK-NEXT:   return %0 : vector<1xf32>
2529   %1 = vector.extract %0[0, 0, 31] : vector<1xf32> from vector<1x1x32x1xf32>
2530   return %1: vector<1xf32>
2531 }
2532
2533 // CHECK-LABEL: func.func @extract_from_stretch_broadcast
2534 func.func @extract_from_stretch_broadcast(%src: vector<3x1x2xf32>) -> f32 {
2535   //  CHECK-NEXT:  %0 = vector.extract {{.*}}[0, 0, 0] : f32 from vector<3x1x2xf32>
2536   //  CHECK-NEXT:  return %0 : f32
2537   %0 = vector.broadcast %src : vector<3x1x2xf32> to vector<3x4x2xf32>
2538   %1 = vector.extract %0[0, 2, 0] : f32 from vector<3x4x2xf32>
2539   return %1: f32
2540 }
2541
2542 // -----
2543 // CHECK-LABEL: func.func @extract_strided_slice_of_constant_mask
2544 func.func @extract_strided_slice_of_constant_mask() -> vector<5x7xi1>{
2545   //  CHECK-NEXT:   %[[RES:.*]] = vector.constant_mask [5, 4] : vector<5x7xi1>
2546   //  CHECK-NEXT:   return %[[RES]] : vector<5x7xi1>
2547   %c4 = arith.constant 4 : index
2548   %c10 = arith.constant 10 : index
2549   %mask = vector.create_mask %c10, %c4 : vector<12x7xi1>
2550   %res = vector.extract_strided_slice %mask {offsets = [3], sizes = [5], strides = [1]} : vector<12x7xi1> to vector<5x7xi1>
2551   return %res : vector<5x7xi1>
2552 }
2553
2554 // -----
2555
2556 // CHECK-LABEL: func.func @fold_extractelement_of_broadcast(
2557 //  CHECK-SAME:     %[[f:.*]]: f32
2558 //       CHECK:   return %[[f]]
2559 func.func @fold_extractelement_of_broadcast(%f: f32) -> f32 {
2560   %0 = vector.broadcast %f : f32 to vector<15xf32>
2561   %c5 = arith.constant 5 : index
2562   %1 = vector.extractelement %0 [%c5 : index] : vector<15xf32>
2563   return %1 : f32
2564 }
2565
2566 // -----
2567
2568 // CHECK-LABEL: func.func @fold_0d_vector_reduction
2569 func.func @fold_0d_vector_reduction(%arg0: vector<f32>) -> f32 {
2570   // CHECK-NEXT: %[[RES:.*]] = vector.extractelement %arg{{.*}}[] : vector<f32>
2571   // CHECK-NEXT: return %[[RES]] : f32
2572   %0 = vector.reduction <add>, %arg0 : vector<f32> into f32
2573   return %0 : f32
2574 }
2575
2576 // -----
2577
2578 // CHECK-LABEL: func @empty_vector_mask
2579 func.func @empty_vector_mask(%mask : vector<8xi1>) {
2580 //   CHECK-NOT:   vector.mask
2581   vector.mask %mask { } : vector<8xi1>
2582   return
2583 }
2584
2585 // -----
2586
2587 // CHECK-LABEL: func @empty_vector_mask_with_return
2588 //  CHECK-SAME:     %[[IN:.*]]: vector<8xf32>
2589 func.func @empty_vector_mask_with_return(%a : vector<8xf32>, %mask : vector<8xi1>) -> vector<8xf32> {
2590 //   CHECK-NOT:   vector.mask
2591 //       CHECK:   return %[[IN]] : vector<8xf32>
2592   %0 = vector.mask %mask { vector.yield %a : vector<8xf32> } : vector<8xi1> -> vector<8xf32>
2593   return %0 : vector<8xf32>
2594 }
2595
2596 // -----
2597
2598 // CHECK-LABEL: func @all_true_vector_mask
2599 //  CHECK-SAME:     %[[IN:.*]]: tensor<3x4xf32>
2600 func.func @all_true_vector_mask(%ta : tensor<3x4xf32>) -> vector<3x4xf32> {
2601 //   CHECK-NOT:   vector.mask
2602 //       CHECK:   %[[LD:.*]] = vector.transfer_read %[[IN]]
2603 //       CHECK:   return %[[LD]] : vector<3x4xf32>
2604   %c0 = arith.constant 0 : index
2605   %cf0 = arith.constant 0.0 : f32
2606   %all_true = vector.constant_mask [3, 4] : vector<3x4xi1>
2607   %0 = vector.mask %all_true { vector.transfer_read %ta[%c0, %c0], %cf0 : tensor<3x4xf32>, vector<3x4xf32> } : vector<3x4xi1> -> vector<3x4xf32>
2608   return %0 : vector<3x4xf32>
2609 }
2610
2611 // -----
2612
2613 // CHECK-LABEL: func @all_true_vector_mask_no_result(
2614 func.func @all_true_vector_mask_no_result(%a : vector<3x4xf32>, %m : memref<3x4xf32>) {
2615 //   CHECK-NOT:   vector.mask
2616 //       CHECK:   vector.transfer_write
2617   %c0 = arith.constant 0 : index
2618   %all_true = vector.constant_mask [3, 4] : vector<3x4xi1>
2619   vector.mask %all_true { vector.transfer_write %a, %m[%c0, %c0] : vector<3x4xf32>, memref<3x4xf32> } : vector<3x4xi1>
2620   return
2621 }
2622
2623 // -----
2624
2625 // CHECK-LABEL:   func.func @fold_shape_cast_with_mask(
2626 // CHECK-SAME:     %[[VAL_0:.*]]: tensor<1x?xf32>) -> vector<1x4xi1> {
2627 func.func @fold_shape_cast_with_mask(%arg0: tensor<1x?xf32>) -> vector<1x4xi1> {
2628 // CHECK-NOT: vector.shape_cast
2629 // CHECK:     %[[VAL_1:.*]] = arith.constant 1 : index
2630 // CHECK:     %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<1x?xf32>
2631 // CHECK:     %[[VAL_3:.*]] = vector.create_mask %[[VAL_1]], %[[VAL_2]] : vector<1x4xi1>
2632 // CHECK:     return %[[VAL_3]] : vector<1x4xi1>
2633   %c1 = arith.constant 1 : index
2634   %dim = tensor.dim %arg0, %c1 : tensor<1x?xf32>
2635   %1 = vector.create_mask %c1, %dim, %c1, %c1 : vector<1x4x1x1xi1>
2636   %2 = vector.shape_cast %1 : vector<1x4x1x1xi1> to vector<1x4xi1>
2637   return %2 : vector<1x4xi1>
2638 }
2639
2640 // -----
2641
2642 // CHECK-LABEL:   func.func @fold_shape_cast_with_mask_scalable(
2643 // CHECK-SAME:    %[[VAL_0:.*]]: tensor<1x?xf32>) -> vector<1x[4]xi1> {
2644 func.func @fold_shape_cast_with_mask_scalable(%arg0: tensor<1x?xf32>) -> vector<1x[4]xi1> {
2645 // CHECK-NOT: vector.shape_cast
2646 // CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
2647 // CHECK:           %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<1x?xf32>
2648 // CHECK:           %[[VAL_3:.*]] = vector.create_mask %[[VAL_1]], %[[VAL_2]] : vector<1x[4]xi1>
2649 // CHECK:           return %[[VAL_3]] : vector<1x[4]xi1>
2650   %c1 = arith.constant 1 : index
2651   %dim = tensor.dim %arg0, %c1 : tensor<1x?xf32>
2652   %1 = vector.create_mask %c1, %dim, %c1, %c1 : vector<1x[4]x1x1xi1>
2653   %2 = vector.shape_cast %1 : vector<1x[4]x1x1xi1> to vector<1x[4]xi1>
2654   return %2 : vector<1x[4]xi1>
2655 }
2656
2657 // -----
2658
2659 // Check that scalable "1" (i.e. [1]) is not folded
2660 // CHECK-LABEL:   func.func @fold_shape_cast_with_mask_scalable_one(
2661 // CHECK-SAME:    %[[VAL_0:.*]]: tensor<1x?xf32>) -> vector<1x[1]xi1> {
2662 func.func @fold_shape_cast_with_mask_scalable_one(%arg0: tensor<1x?xf32>) -> vector<1x[1]xi1>{
2663 // CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
2664 // CHECK:           %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<1x?xf32>
2665 // CHECK:           %[[VAL_3:.*]] = vector.create_mask %[[VAL_1]], %[[VAL_2]] : vector<1x[1]xi1>
2666 // CHECK:           return %[[VAL_3]] : vector<1x[1]xi1>
2667   %c1 = arith.constant 1 : index
2668   %dim = tensor.dim %arg0, %c1 : tensor<1x?xf32>
2669   %1 = vector.create_mask %c1, %dim, %c1 : vector<1x[1]x1xi1>
2670   %2 = vector.shape_cast %1 : vector<1x[1]x1xi1> to vector<1x[1]xi1>
2671   return %2 : vector<1x[1]xi1>
2672 }
2673
2674 // -----
2675
2676 // CHECK-LABEL:   func.func @fold_shape_cast_with_constant_mask() -> vector<4xi1> {
2677 func.func @fold_shape_cast_with_constant_mask() -> vector<4xi1>{
2678 // CHECK-NOT: vector.shape_cast
2679 // CHECK:           %[[VAL_0:.*]] = vector.constant_mask [1] : vector<4xi1>
2680 // CHECK:           return %[[VAL_0]] : vector<4xi1>
2681   %1 = vector.constant_mask [1, 1, 1] : vector<4x1x1xi1>
2682   %2 = vector.shape_cast %1 : vector<4x1x1xi1> to vector<4xi1>
2683   return %2 : vector<4xi1>
2684 }
2685
2686 // -----
2687
2688 // TODO: This IR could be canonicalized but the canonicalization pattern is not
2689 // smart enough. For now, just make sure that we do not crash.
2690
2691 // CHECK-LABEL: func.func @load_store_forwarding_rank_mismatch(
2692 //       CHECK:   vector.transfer_write
2693 //       CHECK:   vector.transfer_read
2694 func.func @load_store_forwarding_rank_mismatch(%v0: vector<4x1x1xf32>, %arg0: tensor<4x4x4xf32>) -> (vector<1x100x4x5xf32>) {
2695   %c0 = arith.constant 0 : index
2696   %cf0 = arith.constant 0.0 : f32
2697   // d0 is explicitly written.
2698   %w0 = vector.transfer_write %v0, %arg0[%c0, %c0, %c0]
2699       {in_bounds = [true, true, true],
2700       permutation_map = affine_map<(d0, d1, d2) -> (d2, d1, d0)>} :
2701       vector<4x1x1xf32>, tensor<4x4x4xf32>
2702   // d0 is implicitly read (rank-reduction of unit dim).
2703   %r = vector.transfer_read %w0[%c0, %c0, %c0], %cf0
2704       {in_bounds = [true, true, true, true],
2705       permutation_map = affine_map<(d0, d1, d2) -> (d1, 0, d2, 0)>} :
2706       tensor<4x4x4xf32>, vector<1x100x4x5xf32>
2707   return %r : vector<1x100x4x5xf32>
2708 }
2709
2710 // -----
2711
2712 // CHECK-LABEL: func.func @rank_0_shuffle_to_interleave(
2713 //  CHECK-SAME:     %[[LHS:.*]]: vector<f64>, %[[RHS:.*]]: vector<f64>)
2714 func.func @rank_0_shuffle_to_interleave(%arg0: vector<f64>, %arg1: vector<f64>) -> vector<2xf64> {
2715   // CHECK: %[[ZIP:.*]] = vector.interleave %[[LHS]], %[[RHS]] : vector<f64> -> vector<2xf64>
2716   // CHECK: return %[[ZIP]]
2717   %0 = vector.shuffle %arg0, %arg1 [0, 1] : vector<f64>, vector<f64>
2718   return %0 : vector<2xf64>
2719 }
2720
2721 // -----
2722
2723 // CHECK-LABEL: func.func @rank_1_shuffle_to_interleave(
2724 //  CHECK-SAME:     %[[LHS:.*]]: vector<6xi32>, %[[RHS:.*]]: vector<6xi32>)
2725 func.func @rank_1_shuffle_to_interleave(%arg0: vector<6xi32>, %arg1: vector<6xi32>) -> vector<12xi32> {
2726   // CHECK: %[[ZIP:.*]] = vector.interleave %[[LHS]], %[[RHS]] : vector<6xi32> -> vector<12xi32>
2727   // CHECK: return %[[ZIP]]
2728   %0 = vector.shuffle %arg0, %arg1 [0, 6, 1, 7, 2, 8, 3, 9, 4, 10, 5, 11] : vector<6xi32>, vector<6xi32>
2729   return %0 : vector<12xi32>
2730 }
2731
2732 // -----
2733
2734 // CHECK-LABEL: func @extract_from_0d_splat_broadcast_regression(
2735 //  CHECK-SAME:     %[[a:.*]]: f32, %[[b:.*]]: vector<f32>, %[[c:.*]]: vector<2xf32>)
2736 func.func @extract_from_0d_splat_broadcast_regression(%a: f32, %b: vector<f32>, %c: vector<2xf32>) -> (f32, f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32>) {
2737   // Splat scalar to 0D and extract scalar.
2738   %0 = vector.splat %a : vector<f32>
2739   %1 = vector.extract %0[] : f32 from vector<f32>
2740
2741   // Broadcast scalar to 0D and extract scalar.
2742   %2 = vector.broadcast %a : f32 to vector<f32>
2743   %3 = vector.extract %2[] : f32 from vector<f32>
2744
2745   // Broadcast 0D to 3D and extract scalar.
2746   // CHECK: %[[extract1:.*]] = vector.extractelement %[[b]][] : vector<f32>
2747   %4 = vector.broadcast %b : vector<f32> to vector<1x2x4xf32>
2748   %5 = vector.extract %4[0, 0, 1] : f32 from vector<1x2x4xf32>
2749
2750   // Splat scalar to 2D and extract scalar.
2751   %6 = vector.splat %a : vector<2x3xf32>
2752   %7 = vector.extract %6[0, 1] : f32 from vector<2x3xf32>
2753
2754   // Broadcast scalar to 3D and extract scalar.
2755   %8 = vector.broadcast %a : f32 to vector<5x6x7xf32>
2756   %9 = vector.extract %8[2, 1, 5] : f32 from vector<5x6x7xf32>
2757
2758   // Extract 2D from 3D that was broadcasted from a scalar.
2759   // CHECK: %[[extract2:.*]] = vector.broadcast %[[a]] : f32 to vector<6x7xf32>
2760   %10 = vector.extract %8[2] : vector<6x7xf32> from vector<5x6x7xf32>
2761
2762   // Extract 1D from 2D that was splat'ed from a scalar.
2763   // CHECK: %[[extract3:.*]] = vector.broadcast %[[a]] : f32 to vector<3xf32>
2764   %11 = vector.extract %6[1] : vector<3xf32> from vector<2x3xf32>
2765
2766   // CHECK:   return %[[a]], %[[a]], %[[extract1]], %[[a]], %[[a]], %[[extract2]], %[[extract3]]
2767   return %1, %3, %5, %7, %9, %10, %11 : f32, f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32>
2768 }
2769
2770 // -----
2771
2772 // CHECK-LABEL: func @extract_scalar_from_from_elements(
2773 //  CHECK-SAME:     %[[a:.*]]: f32, %[[b:.*]]: f32)
2774 func.func @extract_scalar_from_from_elements(%a: f32, %b: f32) -> (f32, f32, f32, f32, f32, f32, f32) {
2775   // Extract from 0D.
2776   %0 = vector.from_elements %a : vector<f32>
2777   %1 = vector.extract %0[] : f32 from vector<f32>
2778
2779   // Extract from 1D.
2780   %2 = vector.from_elements %a : vector<1xf32>
2781   %3 = vector.extract %2[0] : f32 from vector<1xf32>
2782   %4 = vector.from_elements %a, %b, %a, %a, %b : vector<5xf32>
2783   %5 = vector.extract %4[4] : f32 from vector<5xf32>
2784
2785   // Extract from 2D.
2786   %6 = vector.from_elements %a, %a, %a, %b, %b, %b : vector<2x3xf32>
2787   %7 = vector.extract %6[0, 0] : f32 from vector<2x3xf32>
2788   %8 = vector.extract %6[0, 1] : f32 from vector<2x3xf32>
2789   %9 = vector.extract %6[1, 1] : f32 from vector<2x3xf32>
2790   %10 = vector.extract %6[1, 2] : f32 from vector<2x3xf32>
2791
2792   // CHECK: return %[[a]], %[[a]], %[[b]], %[[a]], %[[a]], %[[b]], %[[b]]
2793   return %1, %3, %5, %7, %8, %9, %10 : f32, f32, f32, f32, f32, f32, f32
2794 }
2795
2796 // -----
2797
2798 // CHECK-LABEL: func @extract_1d_from_from_elements(
2799 //  CHECK-SAME:     %[[a:.*]]: f32, %[[b:.*]]: f32)
2800 func.func @extract_1d_from_from_elements(%a: f32, %b: f32) -> (vector<3xf32>, vector<3xf32>) {
2801   %0 = vector.from_elements %a, %a, %a, %b, %b, %b : vector<2x3xf32>
2802   // CHECK: %[[splat1:.*]] = vector.splat %[[a]] : vector<3xf32>
2803   %1 = vector.extract %0[0] : vector<3xf32> from vector<2x3xf32>
2804   // CHECK: %[[splat2:.*]] = vector.splat %[[b]] : vector<3xf32>
2805   %2 = vector.extract %0[1] : vector<3xf32> from vector<2x3xf32>
2806   // CHECK: return %[[splat1]], %[[splat2]]
2807   return %1, %2 : vector<3xf32>, vector<3xf32>
2808 }
2809
2810 // -----
2811
2812 // CHECK-LABEL: func @extract_2d_from_from_elements(
2813 //  CHECK-SAME:     %[[a:.*]]: f32, %[[b:.*]]: f32)
2814 func.func @extract_2d_from_from_elements(%a: f32, %b: f32) -> (vector<2x2xf32>, vector<2x2xf32>) {
2815   %0 = vector.from_elements %a, %a, %a, %b, %b, %b, %b, %a, %b, %a, %a, %b : vector<3x2x2xf32>
2816   // CHECK: %[[splat1:.*]] = vector.from_elements %[[a]], %[[a]], %[[a]], %[[b]] : vector<2x2xf32>
2817   %1 = vector.extract %0[0] : vector<2x2xf32> from vector<3x2x2xf32>
2818   // CHECK: %[[splat2:.*]] = vector.from_elements %[[b]], %[[b]], %[[b]], %[[a]] : vector<2x2xf32>
2819   %2 = vector.extract %0[1] : vector<2x2xf32> from vector<3x2x2xf32>
2820   // CHECK: return %[[splat1]], %[[splat2]]
2821   return %1, %2 : vector<2x2xf32>, vector<2x2xf32>
2822 }
2823
2824 // -----
2825
2826 // CHECK-LABEL: func @from_elements_to_splat(
2827 //  CHECK-SAME:     %[[a:.*]]: f32, %[[b:.*]]: f32)
2828 func.func @from_elements_to_splat(%a: f32, %b: f32) -> (vector<2x3xf32>, vector<2x3xf32>, vector<f32>) {
2829   // CHECK: %[[splat:.*]] = vector.splat %[[a]] : vector<2x3xf32>
2830   %0 = vector.from_elements %a, %a, %a, %a, %a, %a : vector<2x3xf32>
2831   // CHECK: %[[from_el:.*]] = vector.from_elements {{.*}} : vector<2x3xf32>
2832   %1 = vector.from_elements %a, %a, %a, %a, %b, %a : vector<2x3xf32>
2833   // CHECK: %[[splat2:.*]] = vector.splat %[[a]] : vector<f32>
2834   %2 = vector.from_elements %a : vector<f32>
2835   // CHECK: return %[[splat]], %[[from_el]], %[[splat2]]
2836   return %0, %1, %2 : vector<2x3xf32>, vector<2x3xf32>, vector<f32>
2837 }
2838
2839 // -----
2840
2841 // CHECK-LABEL: func @vector_insert_const_regression(
2842 //       CHECK:   llvm.mlir.undef
2843 //       CHECK:   vector.insert
2844 func.func @vector_insert_const_regression(%arg0: i8) -> vector<4xi8> {
2845   %0 = llvm.mlir.undef : vector<4xi8>
2846   %1 = vector.insert %arg0, %0 [0] : i8 into vector<4xi8>
2847   return %1 : vector<4xi8>
2848 }
2849
2850 // -----
2851
2852 // CHECK-LABEL: @insert_scalar_poison_idx
2853 func.func @insert_scalar_poison_idx(%a: vector<4x5xf32>, %b: f32)
2854     -> vector<4x5xf32> {
2855   //  CHECK-NOT: vector.insert
2856   // CHECK-NEXT: ub.poison : vector<4x5xf32>
2857   %0 = vector.insert %b, %a[-1, 0] : f32 into vector<4x5xf32>
2858   return %0 : vector<4x5xf32>
2859 }
2860
2861 // -----
2862
2863 // CHECK-LABEL: @insert_vector_poison_idx
2864 func.func @insert_vector_poison_idx(%a: vector<4x5xf32>, %b: vector<5xf32>)
2865     -> vector<4x5xf32> {
2866   //  CHECK-NOT: vector.insert
2867   // CHECK-NEXT: ub.poison : vector<4x5xf32>
2868   %0 = vector.insert %b, %a[-1] : vector<5xf32> into vector<4x5xf32>
2869   return %0 : vector<4x5xf32>
2870 }
2871
2872 // -----
2873
2874 // CHECK-LABEL: @insert_multiple_poison_idx
2875 func.func @insert_multiple_poison_idx(%a: vector<4x5x8xf32>, %b: vector<8xf32>)
2876     -> vector<4x5x8xf32> {
2877   //  CHECK-NOT: vector.insert
2878   // CHECK-NEXT: ub.poison : vector<4x5x8xf32>
2879   %0 = vector.insert %b, %a[-1, -1] : vector<8xf32> into vector<4x5x8xf32>
2880   return %0 : vector<4x5x8xf32>
2881 }
2882
2883 // -----
2884
2885 // CHECK-LABEL: @contiguous_extract_strided_slices_to_extract
2886 // CHECK:        %[[EXTRACT:.+]] = vector.extract {{.*}}[0, 0, 0, 0, 0] : vector<4xi32> from vector<8x1x2x1x1x4xi32>
2887 // CHECK-NEXT:   return %[[EXTRACT]] :  vector<4xi32>
2888 func.func @contiguous_extract_strided_slices_to_extract(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<4xi32> {
2889   %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [1, 1, 1, 1, 1, 4], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x1x1x1x4xi32>
2890   %2 = vector.shape_cast %1 : vector<1x1x1x1x1x4xi32> to vector<4xi32>
2891   return %2 : vector<4xi32>
2892 }
2893
2894 // -----
2895
2896 // CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_shorter_size_list
2897 // CHECK:        %[[EXTRACT:.+]] = vector.extract {{.*}}[0, 0, 0, 0] : vector<1x4xi32> from vector<8x1x2x1x1x4xi32>
2898 // CHECK-NEXT:   return %[[EXTRACT]] :  vector<1x4xi32>
2899 func.func @contiguous_extract_strided_slices_to_extract_shorter_size_list(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<1x4xi32> {
2900   %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 1, 1, 1], strides = [1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x1x1x1x4xi32>
2901   %2 = vector.shape_cast %1 : vector<1x1x1x1x1x4xi32> to vector<1x4xi32>
2902   return %2 : vector<1x4xi32>
2903 }
2904
2905 // -----
2906
2907 // CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_failure_non_unit_outer_size
2908 // CHECK-NEXT:   vector.extract_strided_slice
2909 func.func @contiguous_extract_strided_slices_to_extract_failure_non_unit_outer_size(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<8x1x1x1x1x4xi32> {
2910   %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [8, 1, 1, 1, 1, 4], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<8x1x1x1x1x4xi32>
2911   return %1 : vector<8x1x1x1x1x4xi32>
2912 }
2913
2914 // -----
2915
2916 // CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_failure_non_full_size
2917 // CHECK-NEXT:   vector.extract_strided_slice
2918 func.func @contiguous_extract_strided_slices_to_extract_failure_non_full_size(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<1x1x1x1x1x2xi32> {
2919   %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [1, 1, 1, 1, 1, 2], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x1x1x1x2xi32>
2920   return %1 : vector<1x1x1x1x1x2xi32>
2921 }
2922
2923 // -----
2924
2925 // CHECK-LABEL: @contiguous_extract_strided_slices_to_extract_failure_non_full_inner_size
2926 // CHECK-NEXT:    vector.extract_strided_slice
2927 func.func @contiguous_extract_strided_slices_to_extract_failure_non_full_inner_size(%arg0 : vector<8x1x2x1x1x4xi32>) -> vector<1x1x2x1x1x1xi32> {
2928   %1 = vector.extract_strided_slice %arg0 {offsets = [0, 0, 0, 0, 0, 0], sizes = [1, 1, 2, 1, 1, 1], strides = [1, 1, 1, 1, 1, 1]} : vector<8x1x2x1x1x4xi32> to vector<1x1x2x1x1x1xi32>
2929   return %1 : vector<1x1x2x1x1x1xi32>
2930 }
2931
2932 // -----
2933
2934 // CHECK-LABEL: @contiguous_gather
2935 //  CHECK-SAME:   (%[[BASE:.*]]: memref<?xf32>, %[[MASK:.*]]: vector<16xi1>, %[[PASSTHRU:.*]]: vector<16xf32>)
2936 //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
2937 //       CHECK:   %[[R:.*]] = vector.maskedload %[[BASE]][%[[C0]]], %[[MASK]], %[[PASSTHRU]] : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
2938 //       CHECK:   return %[[R]]
2939 func.func @contiguous_gather(%base: memref<?xf32>,
2940                              %mask: vector<16xi1>, %passthru: vector<16xf32>) -> vector<16xf32> {
2941   %c0 = arith.constant 0 : index
2942   %indices = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>
2943   %1 = vector.gather %base[%c0][%indices], %mask, %passthru :
2944     memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
2945   return %1 : vector<16xf32>
2946 }
2947
2948 // -----
2949
2950 // CHECK-LABEL: @contiguous_gather_non_zero_start(
2951 //  TODO: Non-zero start is not supported yet.
2952 //       CHECK:   %[[R:.*]] = vector.gather
2953 //       CHECK:   return %[[R]]
2954 func.func @contiguous_gather_non_zero_start(%base: memref<?xf32>,
2955                                             %mask: vector<16xi1>,
2956                                             %passthru: vector<16xf32>) -> vector<16xf32> {
2957   %c0 = arith.constant 0 : index
2958   %indices = arith.constant dense<[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : vector<16xi32>
2959   %1 = vector.gather %base[%c0][%indices], %mask, %passthru :
2960     memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
2961   return %1 : vector<16xf32>
2962 }
2963
2964 // -----
2965
2966 // CHECK-LABEL: @contiguous_gather_2d(
2967 // TODO: Only 1D vectors are supported.
2968 //       CHECK:   %[[R:.*]] = vector.gather
2969 //       CHECK:   return %[[R]]
2970 func.func @contiguous_gather_2d(%base: memref<?x?xf32>,
2971                                 %mask: vector<4x4xi1>, %passthru: vector<4x4xf32>) -> vector<4x4xf32> {
2972   %c0 = arith.constant 0 : index
2973   %indices = arith.constant dense<[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]> : vector<4x4xi32>
2974   %1 = vector.gather %base[%c0, %c0][%indices], %mask, %passthru :
2975     memref<?x?xf32>, vector<4x4xi32>, vector<4x4xi1>, vector<4x4xf32> into vector<4x4xf32>
2976   return %1 : vector<4x4xf32>
2977 }
2978
2979 // -----
2980
2981 // CHECK-LABEL: @contiguous_gather_const_mask
2982 //  CHECK-SAME:   (%[[BASE:.*]]: memref<?xf32>, %[[PASSTHRU:.*]]: vector<16xf32>)
2983 //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
2984 //       CHECK:   %[[R:.*]] = vector.load %[[BASE]][%[[C0]]] : memref<?xf32>, vector<16xf32>
2985 //       CHECK:   return %[[R]]
2986 func.func @contiguous_gather_const_mask(%base: memref<?xf32>,
2987                                         %passthru: vector<16xf32>) -> vector<16xf32> {
2988   %c0 = arith.constant 0 : index
2989   %indices = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>
2990   %mask = arith.constant dense<true> : vector<16xi1>
2991   %1 = vector.gather %base[%c0][%indices], %mask, %passthru :
2992     memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
2993   return %1 : vector<16xf32>
2994 }
2995
2996 // -----
2997
2998 // CHECK-LABEL: @contiguous_gather_step
2999 //  CHECK-SAME:   (%[[BASE:.*]]: memref<?xf32>, %[[MASK:.*]]: vector<16xi1>, %[[PASSTHRU:.*]]: vector<16xf32>)
3000 //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
3001 //       CHECK:   %[[R:.*]] = vector.maskedload %[[BASE]][%[[C0]]], %[[MASK]], %[[PASSTHRU]] : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
3002 //       CHECK:   return %[[R]]
3003 func.func @contiguous_gather_step(%base: memref<?xf32>,
3004                                   %mask: vector<16xi1>, %passthru: vector<16xf32>) -> vector<16xf32> {
3005   %c0 = arith.constant 0 : index
3006   %indices = vector.step : vector<16xindex>
3007   %1 = vector.gather %base[%c0][%indices], %mask, %passthru :
3008     memref<?xf32>, vector<16xindex>, vector<16xi1>, vector<16xf32> into vector<16xf32>
3009   return %1 : vector<16xf32>
3010 }
3011
3012 // -----
3013
3014 // CHECK-LABEL: @gather_broadcast(
3015 // TODO: Broadcast is not supported yet
3016 //       CHECK:   %[[R:.*]] = vector.gather
3017 //       CHECK:   return %[[R]]
3018 func.func @gather_broadcast(%base: memref<?xf32>,
3019                              %mask: vector<16xi1>, %passthru: vector<16xf32>) -> vector<16xf32> {
3020   %c0 = arith.constant 0 : index
3021   %indices = arith.constant dense<0> : vector<16xi32>
3022   %1 = vector.gather %base[%c0][%indices], %mask, %passthru :
3023     memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32>
3024   return %1 : vector<16xf32>
3025 }
3026
3027 // -----
3028
3029 // CHECK-LABEL: @contiguous_scatter
3030 //  CHECK-SAME:   (%[[BASE:.*]]: memref<?xf32>, %[[MASK:.*]]: vector<16xi1>, %[[VALUE:.*]]: vector<16xf32>)
3031 //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
3032 //       CHECK:   vector.maskedstore %[[BASE]][%[[C0]]], %[[MASK]], %[[VALUE]] : memref<?xf32>, vector<16xi1>, vector<16xf32>
3033 func.func @contiguous_scatter(%base: memref<?xf32>,
3034                               %mask: vector<16xi1>, %value: vector<16xf32>) {
3035   %c0 = arith.constant 0 : index
3036   %indices = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>
3037   vector.scatter %base[%c0][%indices], %mask, %value :
3038     memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32>
3039   return
3040 }
3041
3042 // -----
3043
3044 // CHECK-LABEL: @contiguous_scatter_const_mask
3045 //  CHECK-SAME:   (%[[BASE:.*]]: memref<?xf32>, %[[VALUE:.*]]: vector<16xf32>)
3046 //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
3047 //       CHECK:   vector.store %[[VALUE]], %[[BASE]][%[[C0]]] : memref<?xf32>, vector<16xf32>
3048 func.func @contiguous_scatter_const_mask(%base: memref<?xf32>,
3049                                          %value: vector<16xf32>) {
3050   %c0 = arith.constant 0 : index
3051   %indices = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : vector<16xi32>
3052   %mask = vector.constant_mask [16] : vector<16xi1>
3053   vector.scatter %base[%c0][%indices], %mask, %value :
3054     memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32>
3055   return
3056 }
3057
3058 // -----
3059
3060 // CHECK-LABEL: @contiguous_scatter_step
3061 //  CHECK-SAME:   (%[[BASE:.*]]: memref<?xf32>, %[[MASK:.*]]: vector<16xi1>, %[[VALUE:.*]]: vector<16xf32>)
3062 //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
3063 //       CHECK:   vector.maskedstore %[[BASE]][%[[C0]]], %[[MASK]], %[[VALUE]] : memref<?xf32>, vector<16xi1>, vector<16xf32>
3064 func.func @contiguous_scatter_step(%base: memref<?xf32>,
3065                                    %mask: vector<16xi1>, %value: vector<16xf32>) {
3066   %c0 = arith.constant 0 : index
3067   %indices = vector.step : vector<16xindex>
3068   vector.scatter %base[%c0][%indices], %mask, %value :
3069     memref<?xf32>, vector<16xindex>, vector<16xi1>, vector<16xf32>
3070   return
3071 }