mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir

   1 // RUN: mlir-opt --split-input-file --tosa-to-linalg-named %s -verify-diagnostics -o -| FileCheck %s
   2
   3 // CHECK-LABEL: @matmul
   4 func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) {
   5   // CHECK: [[C0:%.+]] = arith.constant 0
   6   // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6]
   7   // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32>
   8   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
   9   %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>)  -> (tensor<1x5x6xf32>)
  10   return %0 : tensor<1x5x6xf32>
  11 }
  12
  13 // -----
  14
  15
  16 // CHECK-LABEL: @matmul_quantized
  17 func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) {
  18   // CHECK: [[C0:%.+]] = arith.constant 0
  19   // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6]
  20   // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : i32, tensor<1x5x6xi32> -> tensor<1x5x6xi32>
  21   // CHECK: [[ONE:%.+]] = arith.constant 1
  22   // CHECK: [[TWO:%.+]] = arith.constant 2
  23   // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
  24   %0 = "tosa.matmul"(%arg0, %arg1) {quantization_info = {a_zp = 1 : i32, b_zp = 2 : i32}} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>)
  25   return %0 : tensor<1x5x6xi32>
  26 }
  27
  28 // -----
  29
  30 // CHECK-LABEL: @matmul_dyn_batch
  31 func @matmul_dyn_batch(%arg0: tensor<?x5x3xf32>, %arg1: tensor<?x3x6xf32>) -> (tensor<?x5x6xf32>) {
  32   // CHECK: %[[C0:.+]] = arith.constant 0
  33   // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
  34   // CHECK: %[[C0_0:.+]] = arith.constant 0
  35   // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]], 5, 6]
  36   // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0_0]], %[[INIT]]) : f32, tensor<?x5x6xf32> -> tensor<?x5x6xf32>
  37   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<?x5x3xf32>, tensor<?x3x6xf32>) outs(%[[FILLED]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
  38   %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<?x5x3xf32>, tensor<?x3x6xf32>)  -> (tensor<?x5x6xf32>)
  39   return %0 : tensor<?x5x6xf32>
  40 }
  41
  42 // -----
  43
  44 // CHECK-LABEL: @matmul_dyn_independent_dim
  45 func @matmul_dyn_independent_dim(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) {
  46   // CHECK: %[[C2:.+]] = arith.constant 2
  47   // CHECK: %[[DIM:.+]] = tensor.dim %arg1, %[[C2]]
  48   // CHECK: %[[C0:.+]] = arith.constant 0
  49   // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, %[[DIM]]]
  50   // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x?xf32> -> tensor<1x5x?xf32>
  51   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
  52   %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>)  -> (tensor<1x5x?xf32>)
  53   return %0 : tensor<1x5x?xf32>
  54 }
  55
  56 // -----
  57
  58 // CHECK-LABEL: @matmul_dyn_independent_dim
  59 func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) {
  60   // CHECK: %[[C0:.+]] = arith.constant 0
  61   // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, 6]
  62   // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32>
  63   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
  64   %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>)  -> (tensor<1x5x6xf32>)
  65   return %0 : tensor<1x5x6xf32>
  66 }
  67
  68 // -----
  69
  70 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
  71 // CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)>
  72
  73 // CHECK-LABEL: @fully_connected
  74 func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) {
  75   // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6]
  76   // CHECK: [[ZERO:%.+]] = arith.constant 0
  77   // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]])
  78   // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]>
  79   // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]])
  80   // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6]
  81   // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32>
  82   // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) {
  83   // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
  84   // CHECK:   [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32
  85   // CHECK:   linalg.yield [[ADD]] : f32
  86
  87   %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<5x3xf32>, tensor<6x3xf32>, tensor<6xf32>)  -> (tensor<5x6xf32>)
  88   return %0 : tensor<5x6xf32>
  89 }
  90
  91 // -----
  92
  93 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
  94 // CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)>
  95
  96 // CHECK-LABEL: @quantized_fully_connected
  97 func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8>, %arg2: tensor<6xi32>) -> (tensor<5x6xi32>) {
  98   // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6]
  99   // CHECK: [[ZERO:%.+]] = arith.constant 0
 100   // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]])
 101   // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]>
 102   // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]])
 103   // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6]
 104   // CHECK: [[ONE:%.+]] = arith.constant 1
 105   // CHECK: [[TWO:%.+]] = arith.constant 2
 106   // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32>
 107   // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) outs([[INITB]]
 108   // CHECK: ^bb0([[IN1:%.+]]: i32, [[IN2:%.+]]: i32, [[UNUSED:%.+]]: i32):
 109   // CHECK:   [[ADD:%.+]] = arith.addi
 110   // CHECK:   linalg.yield [[ADD]] : i32
 111   %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) {quantization_info = {input_zp = 1:i32, weight_zp = 2:i32}} : (tensor<5x3xi8>, tensor<6x3xi8>, tensor<6xi32>)  -> (tensor<5x6xi32>)
 112   return %0 : tensor<5x6xi32>
 113 }
 114
 115 // -----
 116
 117 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
 118 // CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)>
 119
 120 // CHECK-LABEL: @fully_connected_dyn
 121 func @fully_connected_dyn(%arg0: tensor<?x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<?x6xf32>) {
 122   // CHECK: %[[C0:.+]] = arith.constant 0
 123   // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
 124   // CHECK: %[[INITT:.+]] = linalg.init_tensor [%[[DIM]], 6]
 125   // CHECK: %[[ZERO:.+]] = arith.constant 0
 126   // CHECK: %[[FILL:.+]] = linalg.fill(%[[ZERO]], %[[INITT]])
 127   // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 0]>
 128   // CHECK: %[[TRANSPOSE:.+]] = "tosa.transpose"(%arg1, %[[PERM]])
 129   // CHECK: %[[INITB:.+]] = linalg.init_tensor [%[[DIM]], 6]
 130   // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor<?x3xf32>, tensor<3x6xf32>) outs(%[[FILL]] : tensor<?x6xf32>) -> tensor<?x6xf32>
 131   // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor<?x6xf32>) outs(%[[INITB]] : tensor<?x6xf32>) {
 132   // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
 133   // CHECK:   %[[ADD:.+]] = arith.addf %arg3, %arg4 : f32
 134   // CHECK:   linalg.yield %[[ADD]] : f32
 135
 136   %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<?x3xf32>, tensor<6x3xf32>, tensor<6xf32>)  -> (tensor<?x6xf32>)
 137   return %0 : tensor<?x6xf32>
 138 }
 139
 140 // -----
 141
 142 // CHECK-LABEL: @max_pool
 143 func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () {
 144   // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38
 145   // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 32, 62]
 146   // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[CONST]], [[INIT]])
 147   // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3]
 148   // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x32x62xf32>)
 149   %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>)  -> (tensor<1x4x32x62xf32>)
 150   return
 151 }
 152
 153 // CHECK-LABEL: @max_pool_padded
 154 func @max_pool_padded(%arg0: tensor<1x6x34x62xf32>) -> () {
 155   // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 : f32
 156   // CHECK-DAG: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0]
 157   // CHECK-DAG:   linalg.yield [[CONST]]
 158   // CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32
 159   // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 33, 62]
 160   // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[INITVAL]], [[INIT]])
 161   // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3]
 162   // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x33x62xf32>)
 163   %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 1], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>)  -> (tensor<1x4x33x62xf32>)
 164   return
 165 }
 166
 167 // CHECK-LABEL: @max_pool_i8
 168 func @max_pool_i8(%arg0: tensor<1x6x34x62xi8>) -> () {
 169   // CHECK: arith.constant -128
 170   // CHECK: linalg.pooling_nhwc_max
 171   %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi8>)  -> (tensor<1x4x32x62xi8>)
 172   return
 173 }
 174
 175 // CHECK-LABEL: @max_pool_i16
 176 func @max_pool_i16(%arg0: tensor<1x6x34x62xi16>) -> () {
 177   // CHECK: arith.constant -32768
 178   // CHECK: linalg.pooling_nhwc_max
 179   %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi16>)  -> (tensor<1x4x32x62xi16>)
 180   return
 181 }
 182
 183 // CHECK-LABEL: @max_pool_i32
 184 func @max_pool_i32(%arg0: tensor<1x6x34x62xi32>) -> () {
 185   // CHECK: arith.constant -2147483648
 186   // CHECK: linalg.pooling_nhwc_max
 187   %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi32>)  -> (tensor<1x4x32x62xi32>)
 188   return
 189 }
 190 // -----
 191
 192 // CHECK-LABEL: @avg_pool
 193 func @avg_pool(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) {
 194   // Initial piece computes the sum of the pooling region, with appropriate padding.
 195   // CHECK: [[CONST:%.+]] = arith.constant 0
 196   // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
 197   // CHECK: [[CONST:%.+]] = arith.constant 0
 198   // CHECK: [[POOLINIT:%.+]] = linalg.init_tensor [1, 5, 33, 62]
 199   // CHECK: [[FILL:%.+]] = linalg.fill([[CONST]], [[POOLINIT]])
 200   // CHECK: [[KERNEL:%.+]] = linalg.init_tensor [4, 4]
 201   // CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) outs([[FILL]] : tensor<1x5x33x62xf32>)
 202   // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 33, 62]
 203   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins([[POOL]] : tensor<1x5x33x62xf32>) outs([[INIT]] : tensor<1x5x33x62xf32>)
 204   // CHECK:   [[ZERO:%.0]] = arith.constant 0
 205   // CHECK:   [[ONE:%.+]] = arith.constant 1
 206   // CHECK:   [[HEIGHT:%.+]] = arith.constant 4
 207   // CHECK:   [[WIDTH:%.+]] = arith.constant 32
 208   // CHECK:   [[IDX1:%.+]] = linalg.index 1
 209   // CHECK:   [[IDX2:%.+]] = linalg.index 2
 210
 211   // The large block below computes what portion of the kernel is within non-padded input.
 212   // CHECK:   [[NY:%.+]] = arith.subi [[HEIGHT]], [[IDX1]]
 213   // CHECK:   [[NX:%.+]] = arith.subi [[WIDTH]], [[IDX2]]
 214   // CHECK:   [[KH:%.+]] = arith.constant 4
 215   // CHECK:   [[PAD0:%.+]] = arith.constant 1
 216   // CHECK:   [[SUBP0:%.+]] = arith.subi [[IDX1]], [[PAD0]]
 217   // CHECK:   [[P0CMP:%.+]] = arith.cmpi slt, [[SUBP0]], [[ZERO]]
 218   // CHECK:   [[SELP0:%.+]] = select [[P0CMP]], [[SUBP0]], [[ZERO]]
 219   // CHECK:   [[ADDP0:%.+]] = arith.addi [[KH]], [[SELP0]]
 220   // CHECK:   [[PAD1:%.+]] = arith.constant 1
 221   // CHECK:   [[SUBP1:%.+]] = arith.subi [[NY]], [[PAD1]]
 222   // CHECK:   [[P1CMP:%.+]] = arith.cmpi slt, [[SUBP1]], [[ZERO]]
 223   // CHECK:   [[SELP1:%.+]] = select [[P1CMP]], [[SUBP1]], [[ZERO]]
 224   // CHECK:   [[ADDP1:%.+]] = arith.addi [[ADDP0]], [[SELP1]]
 225   // CHECK:   [[YCMP:%.+]] = arith.cmpi slt, [[ADDP1]], [[ONE]]
 226   // CHECK:   [[YSEL:%.+]] = select [[YCMP]], [[ONE]], [[ADDP1]]
 227   // CHECK:   [[KW:%.+]] = arith.constant 4 : index
 228   // CHECK:   [[PAD2:%.+]] = arith.constant 1 : index
 229   // CHECK:   [[SUBP2:%.+]] = arith.subi [[IDX2]], [[PAD2]]
 230   // CHECK:   [[P2CMP:%.+]] = arith.cmpi slt, [[SUBP2]], [[ZERO]]
 231   // CHECK:   [[SELP2:%.+]] = select [[P2CMP]], [[SUBP2]], [[ZERO]]
 232   // CHECK:   [[ADDP2:%.+]] = arith.addi [[KW]], [[SELP2]]
 233   // CHECK:   [[PAD3:%.+]] = arith.constant 1 : index
 234   // CHECK:   [[SUBP3:%.+]] = arith.subi [[NX]], [[PAD3]]
 235   // CHECK:   [[P3CMP:%.+]] = arith.cmpi slt, [[SUBP3]], [[ZERO]]
 236   // CHECK:   [[SELP3:%.+]] = select [[P3CMP]], [[SUBP3]], [[ZERO]]
 237   // CHECK:   [[ADDP3:%.+]] = arith.addi [[ADDP2]], [[SELP3]]
 238   // CHECK:   [[XCMP:%.+]] = arith.cmpi slt, [[ADDP3]], [[ONE]]
 239   // CHECK:   [[XSEL:%.+]] = select [[XCMP]], [[ONE]], [[ADDP3]]
 240
 241   // Given the valid coverage of the pooling region, normalize the summation.
 242   // CHECK:   [[C:%.+]] = arith.muli [[YSEL]], [[XSEL]]
 243   // CHECK:   [[CI:%.+]] = arith.index_cast [[C]]
 244   // CHECK:   [[CF:%.+]] = arith.sitofp [[CI]]
 245   // CHECK:   [[RESULT:%.+]] = arith.divf %arg1, [[CF]]
 246   // CHECK:   linalg.yield [[RESULT]]
 247   %0 = "tosa.avg_pool2d"(%arg0) {pad = [1, 1, 1, 1], kernel = [4, 4], stride = [1, 1]} : (tensor<1x6x34x62xf32>)  -> (tensor<1x5x33x62xf32>)
 248   return %0 : tensor<1x5x33x62xf32>
 249 }
 250
 251 // -----
 252
 253 // CHECK-LABEL: @avg_pool_i8
 254 func @avg_pool_i8(%arg0 : tensor<1x128x128x2xi8>) -> () {
 255
 256   // CHECK: linalg.pooling_nhwc_sum
 257   // CHECK: linalg.generic
 258
 259   // CHECK: %[[INZP:.+]] = arith.constant -128
 260   // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]]
 261   // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]]
 262   // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825
 263   // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}}
 264   // CHECK: %[[SHIFT:.+]] = arith.constant 30
 265   // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false}
 266   // CHECK: %[[OUTZP:.+]] = arith.constant -128
 267   // CHECK: %[[OUT:.+]] = arith.addi %[[SCALE]], %[[OUTZP]]
 268   // CHECK: %[[MIN:.+]] = arith.constant -128
 269   // CHECK: %[[MAX:.+]] = arith.constant 127
 270   // CHECK: %[[CMP_MIN:.+]] = arith.cmpi slt, %[[OUT]], %[[MIN]]
 271   // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]]
 272   // CHECK: %[[CMP_MAX:.+]] = arith.cmpi slt, %[[MAX]], %[[OUT]]
 273   // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]]
 274   // CHECK: %[[TRUNC:.+]] = arith.trunci %[[CLMP_MAX]]
 275   // CHECK: linalg.yield %[[TRUNC]]
 276   %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi8>) -> tensor<1x32x32x2xi8>
 277   return
 278 }
 279
 280 // -----
 281
 282 // CHECK-LABEL: @avg_pool_i16
 283 func @avg_pool_i16(%arg0 : tensor<1x128x128x2xi16>) -> () {
 284
 285   // CHECK: linalg.pooling_nhwc_sum
 286   // CHECK: linalg.generic
 287
 288   // CHECK: %[[INZP:.+]] = arith.constant -128
 289   // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]]
 290   // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]]
 291   // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825
 292   // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}}
 293   // CHECK: %[[SHIFT:.+]] = arith.constant 30
 294   // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false}
 295   // CHECK: %[[OUTZP:.+]] = arith.constant -128
 296   // CHECK: %[[OUT:.+]] = arith.addi %[[SCALE]], %[[OUTZP]]
 297   // CHECK: %[[MIN:.+]] = arith.constant -32768
 298   // CHECK: %[[MAX:.+]] = arith.constant 32767
 299   // CHECK: %[[CMP_MIN:.+]] = arith.cmpi slt, %[[OUT]], %[[MIN]]
 300   // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]]
 301   // CHECK: %[[CMP_MAX:.+]] = arith.cmpi slt, %[[MAX]], %[[OUT]]
 302   // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]]
 303   // CHECK: %[[TRUNC:.+]] = arith.trunci %[[CLMP_MAX]]
 304   // CHECK: linalg.yield %[[TRUNC]]
 305   %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi16>) -> tensor<1x32x32x2xi16>
 306   return
 307 }
 308
 309 // -----
 310
 311 // CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d3)>
 312 // CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 313
 314 // CHECK-LABEL: @conv2d_f32
 315 func @conv2d_f32(%input: tensor<1x49x42x27xf32>, %weights: tensor<28x3x3x27xf32>, %bias: tensor<28xf32>) -> () {
 316   // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 2, 3, 0]>
 317   // CHECK: %[[W:.+]] = "tosa.transpose"(%arg1, %[[PERM]])
 318   // CHECK: %[[M_IN:.+]] = linalg.init_tensor [1, 45, 40, 28]
 319   // CHECK: %[[CST:.+]] = arith.constant 0
 320   // CHECK: %[[FILL:.+]] = linalg.fill
 321   // CHECK: %[[B_IN:.+]] = linalg.init_tensor [1, 45, 40, 28]
 322   // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>)
 323   // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) outs(%[[B_IN]] : tensor<1x45x40x28xf32>)
 324   // CHECK:   arith.addf
 325   // CHECK:   linalg.yield
 326   %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [0, 0, 0, 0], stride = [1, 1], dilation = [2, 1]} : (tensor<1x49x42x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>)  -> (tensor<1x45x40x28xf32>)
 327   return
 328 }
 329
 330 // -----
 331
 332 // CHECK-LABEL: @conv2d_padded_f32
 333 func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28x3x3x28xf32>, %bias: tensor<28xf32>) -> () {
 334   // CHECK: %[[C0:.+]] = arith.constant 0
 335   // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
 336   // CHECK:   linalg.yield %[[C0]]
 337   // CHECK: linalg.conv_2d_nhwc_hwcf
 338   %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [1, 1, 1, 1], stride = [1, 1], dilation = [2, 1]} : (tensor<1x47x40x28xf32>, tensor<28x3x3x28xf32>, tensor<28xf32>)  -> (tensor<1x45x40x28xf32>)
 339   return
 340 }
 341
 342 // -----
 343
 344 // CHECK-LABEL: @conv2d_quant
 345 func @conv2d_quant(%arg0 : tensor<1x12x12x1xi8>, %arg1 : tensor<1024x3x3x1xi8>, %arg2 : tensor<1024xi32>) -> () {
 346   // CHECK:   %[[C22:.+]] = arith.constant -22
 347   // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
 348   // CHECK:   linalg.yield %[[C22]]
 349   // CHECK: linalg.conv_2d_nhwc_hwcf_q
 350   %0 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = [1, 1], pad = [1, 1, 1, 1], quantization_info = {input_zp = -22 : i32, weight_zp = 42 : i32}, stride = [1, 1]} : (tensor<1x12x12x1xi8>, tensor<1024x3x3x1xi8>, tensor<1024xi32>) -> tensor<1x12x12x1024xi32>
 351   return
 352 }
 353
 354 // -----
 355
 356 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)>
 357 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 358
 359 // CHECK-LABEL: @depthwise_conv
 360 func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () {
 361   // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11]
 362   // CHECK: [[CST0:%.+]] = arith.constant 0
 363   // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
 364   // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33]
 365   // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
 366   // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 5, 5, 33]}
 367   // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
 368   // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  // no predecessors
 369   // CHECK:   [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32
 370   // CHECK:   linalg.yield [[ADD]] : f32
 371   // CHECK: } -> tensor<1x5x5x33xf32>
 372   %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [1, 1], dilation = [1, 1] } : (tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>)  -> (tensor<1x5x5x33xf32>)
 373   return
 374 }
 375
 376 // -----
 377
 378 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)>
 379 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 380
 381 // CHECK-LABEL: @depthwise_conv_strides
 382 func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () {
 383   // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11]
 384   // CHECK: [[CST0:%.+]] = arith.constant 0
 385   // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
 386   // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33]
 387   // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
 388   // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 5, 5, 33]}
 389   // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
 390   // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  // no predecessors
 391   // CHECK:   [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32
 392   // CHECK:   linalg.yield [[ADD]] : f32
 393   // CHECK: } -> tensor<1x5x5x33xf32>
 394   %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [2, 2], dilation = [1, 1] } : (tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>)  -> (tensor<1x5x5x33xf32>)
 395   return
 396 }
 397
 398 // -----
 399
 400 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)>
 401 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 402
 403 // CHECK-LABEL: @depthwise_conv_quant
 404 func @depthwise_conv_quant(%arg0 : tensor<1x12x12x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () {
 405   // CHECK: [[PADV:%.+]] = arith.constant -128
 406   // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
 407   // CHECK:   linalg.yield [[PADV]]
 408
 409   // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 12, 12, 4, 128]
 410   // CHECK: [[CST0:%.+]] = arith.constant 0
 411   // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
 412   // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 12, 12, 512]
 413   // CHECK: [[C128:%.+]] = arith.constant -128
 414   // CHECK: [[C42:%.+]] = arith.constant 42
 415   // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>)
 416   // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 12, 12, 512]}
 417   // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) {
 418   // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32):  // no predecessors
 419   // CHECK:   [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32
 420   // CHECK:   linalg.yield [[ADD]] : i32
 421   // CHECK: } -> tensor<1x12x12x512xi32>
 422   %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [1, 1, 1, 1], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [1, 1] } : (tensor<1x12x12x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>)  -> tensor<1x12x12x512xi32>
 423   return
 424 }
 425
 426 // -----
 427
 428 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)>
 429 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 430
 431 // CHECK-LABEL: @depthwise_conv_quant_dilations
 432 func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () {
 433   // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 10, 10, 4, 128]
 434   // CHECK: [[CST0:%.+]] = arith.constant 0
 435   // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
 436   // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 10, 10, 512]
 437   // CHECK: [[C128:%.+]] = arith.constant -128
 438   // CHECK: [[C42:%.+]] = arith.constant 42
 439   // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>)
 440   // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 10, 10, 512]}
 441   // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) {
 442   // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32):  // no predecessors
 443   // CHECK:   [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32
 444   // CHECK:   linalg.yield [[ADD]] : i32
 445   // CHECK: } -> tensor<1x10x10x512xi32>
 446   %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [2, 2] } : (tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>)  -> tensor<1x10x10x512xi32>
 447   return
 448 }