mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir

   1 // RUN: mlir-opt --split-input-file -pass-pipeline="builtin.module(func.func(tosa-to-linalg))" %s -verify-diagnostics -o -| FileCheck %s
   2
   3 // CHECK: #[[$MAP0:.*]] = affine_map<() -> ()>
   4
   5 // CHECK-LABEL: @test_abs_scalar
   6 // CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]]
   7 func.func @test_abs_scalar(%arg0: tensor<f32>) -> tensor<f32> {
   8   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<f32>
   9   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = []} ins([[ARG0]] : tensor<f32>) outs([[INIT]] : tensor<f32>) {
  10   // CHECK:   ^bb0([[ARG1:%.*]]: f32, [[ARG2:%.*]]: f32):
  11   // CHECK:   [[ELEMENT:%.*]] = math.absf [[ARG1]] : f32
  12   // CHECK:   linalg.yield [[ELEMENT]] : f32
  13   // CHECK: } -> tensor<f32>
  14   %0 = tosa.abs %arg0 : (tensor<f32>) -> tensor<f32>
  15
  16   // CHECK: return [[GENERIC]] : tensor<f32>
  17         return %0 : tensor<f32>
  18 }
  19
  20 // -----
  21
  22 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
  23 // CHECK-LABEL: @test_abs_1d_cast_static_to_dynamic
  24 // CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]]
  25 func.func @test_abs_1d_cast_static_to_dynamic(%arg0: tensor<5xf32>) -> tensor<?xf32> {
  26   // CHECK: [[EMPTY:%.+]] = tensor.empty() : tensor<5xf32>
  27   // CHECK: [[RESULT:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins([[ARG0]] : tensor<5xf32>) outs([[EMPTY]] : tensor<5xf32>) {
  28   // CHECK: ^bb0([[IN0:%.+]]: f32, [[OUT0:%.+]]: f32):
  29   // CHECK:   [[ABS:%.+]] = math.absf [[IN0]] : f32
  30   // CHECK:   linalg.yield [[ABS]] : f32
  31   // CHECK: } -> tensor<5xf32>
  32   // CHECK: [[CAST_RESULT:%.+]] = tensor.cast [[RESULT]] : tensor<5xf32> to tensor<?xf32>
  33   %0 = "tosa.abs"(%arg0) : (tensor<5xf32>) -> tensor<?xf32>
  34
  35   // CHECK: return [[CAST_RESULT]] : tensor<?xf32>
  36   return %0 : tensor<?xf32>
  37 }
  38
  39 // -----
  40
  41 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
  42 // CHECK-LABEL: @test_abs_1d_cast_dynamic_to_static
  43 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
  44 func.func @test_abs_1d_cast_dynamic_to_static(%arg0: tensor<?xf32>) -> tensor<5xf32> {
  45   // CHECK: %[[ZERO:.*]] = arith.constant 0 : index
  46   // CHECK: %[[DIM_SIZE:.*]] = tensor.dim %[[ARG0]], %[[ZERO]] : tensor<?xf32>
  47   // CHECK: %[[EMPTY:.*]] = tensor.empty(%[[DIM_SIZE]]) : tensor<?xf32>
  48   // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<?xf32>) outs(%[[EMPTY]] : tensor<?xf32>) {
  49   // CHECK: ^bb0(%[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: f32):
  50   // CHECK:   %[[VAL_2:.*]] = math.absf %[[VAL_0]] : f32
  51   // CHECK:   linalg.yield %[[VAL_2]] : f32
  52   // CHECK: } -> tensor<?xf32>
  53   // CHECK: %[[CAST_RESULT:.*]] = tensor.cast %[[RESULT]] : tensor<?xf32> to tensor<5xf32>
  54   %0 = "tosa.abs"(%arg0) : (tensor<?xf32>) -> tensor<5xf32>
  55
  56   // CHECK: return %[[CAST_RESULT]] : tensor<5xf32>
  57   return %0 : tensor<5xf32>
  58 }
  59
  60 // -----
  61
  62 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
  63 // CHECK-LABEL: @test_abs_1d_dynamic
  64 // CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]]
  65 func.func @test_abs_1d_dynamic(%arg0: tensor<?xf32>) -> tensor<?xf32> {
  66
  67   // CHECK: [[ZERO:%.+]] = arith.constant 0 : index
  68   // CHECK: [[DIM:%.+]] = tensor.dim [[ARG0]], [[ZERO]] : tensor<?xf32>
  69   // CHECK: [[EMPTY:%.+]] = tensor.empty([[DIM]]) : tensor<?xf32>
  70   // CHECK: [[RESULT:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<?xf32>) outs([[EMPTY]] : tensor<?xf32>) {
  71   // CHECK: ^bb0([[IN0:%.+]]: f32, [[OUT0:%.+]]: f32):
  72   // CHECK:   [[ABSF:%.+]] = math.absf [[IN0]] : f32
  73   // CHECK:   linalg.yield [[ABSF]] : f32
  74   // CHECK: } -> tensor<?xf32>
  75   %0 = tosa.abs %arg0 : (tensor<?xf32>) -> tensor<?xf32>
  76
  77   // CHECK: return [[RESULT]] : tensor<?xf32>
  78   return %0 : tensor<?xf32>
  79 }
  80
  81 // -----
  82
  83 // CHECK: #[[$MAP0:.*]] = affine_map<() -> ()>
  84 // CHECK-LABEL: @test_add_0d
  85 // CHECK-SAME: [[ARG0:%[0-9a-zA-Z_]*]]:
  86 // CHECK-SAME: [[ARG1:%[0-9a-zA-Z_]*]]:
  87 func.func @test_add_0d(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
  88
  89   // CHECK: [[EMPTY:%.+]] = tensor.empty() : tensor<f32>
  90   // CHECK: [[RESULT:%.+]] = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins([[ARG0]], [[ARG1]] : tensor<f32>, tensor<f32>) outs([[EMPTY]] : tensor<f32>) {
  91   // CHECK: ^bb0([[IN0:%.+]]: f32, [[IN1:%.+]]: f32, [[OUT0:%.+]]: f32):
  92   // CHECK:   [[ADDF:%.+]] = arith.addf [[IN0]], [[IN1]] : f32
  93   // CHECK:   linalg.yield [[ADDF]] : f32
  94   // CHECK: } -> tensor<f32>
  95   %0 = tosa.add %arg0, %arg1 : (tensor<f32>, tensor<f32>) -> tensor<f32>
  96
  97   // CHECK: return [[RESULT]] : tensor<f32>
  98   return %0 : tensor<f32>
  99 }
 100
 101 // -----
 102
 103 // CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (0)>
 104 // CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (d0)>
 105 // CHECK-LABEL: @test_add_1d_all_dynamic
 106 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]:
 107 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
 108 func.func @test_add_1d_all_dynamic(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
 109
 110   // CHECK: %[[CONST0:.*]] = arith.constant 0 : index
 111   // CHECK: %[[ARG0_DIM0:.*]] = tensor.dim %[[ARG0]], %[[CONST0]] : tensor<?xf32>
 112   // CHECK: %[[ARG1_DIM0:.*]] = tensor.dim %[[ARG1]], %[[CONST0]] : tensor<?xf32>
 113   // CHECK: %[[ARG0_MAX_DIM:.*]] = arith.maxui %[[ARG0_DIM0]], %[[ARG1_DIM0]] : index
 114   // CHECK: %[[CONST1:.*]] = arith.constant 1 : index
 115   // CHECK: %[[VAL_0:.*]] = tensor.dim %[[ARG0]], %[[CONST0]] : tensor<?xf32>
 116   // CHECK: %[[VAL_1:.*]] = arith.cmpi eq, %[[VAL_0]], %[[CONST1]] : index
 117   // CHECK: %[[ARG0_DIM0_BROADCAST:.*]] = scf.if %[[VAL_1]] -> (tensor<?xf32>) {
 118   // CHECK:   %[[VAL_2:.*]] = tensor.empty(%[[ARG0_MAX_DIM]]) : tensor<?xf32>
 119   // CHECK:   %[[VAL_3:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<?xf32>) outs(%[[VAL_2]] : tensor<?xf32>) {
 120   // CHECK:   ^bb0(%[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: f32):
 121   // CHECK:     linalg.yield %[[VAL_4]] : f32
 122   // CHECK:   } -> tensor<?xf32>
 123   // CHECK:   scf.yield %[[VAL_3]] : tensor<?xf32>
 124   // CHECK: } else {
 125   // CHECK:   scf.yield %[[ARG0]] : tensor<?xf32>
 126   // CHECK: }
 127   // CHECK: %[[VAL_6:.*]] = tensor.dim %[[ARG1]], %[[CONST0]] : tensor<?xf32>
 128   // CHECK: %[[VAL_7:.*]] = arith.cmpi eq, %[[VAL_6]], %[[CONST1]] : index
 129   // CHECK: %[[ARG0_DIM1_BROADCAST:.*]] = scf.if %[[VAL_7]] -> (tensor<?xf32>) {
 130   // CHECK:   %[[VAL_8:.*]] = tensor.empty(%[[ARG0_MAX_DIM]]) : tensor<?xf32>
 131   // CHECK:   %[[VAL_9:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[ARG1]] : tensor<?xf32>) outs(%[[VAL_8]] : tensor<?xf32>) {
 132   // CHECK:   ^bb0(%[[VAL_10:.*]]: f32, %[[VAL_11:.*]]: f32):
 133   // CHECK:     linalg.yield %[[VAL_10]] : f32
 134   // CHECK:   } -> tensor<?xf32>
 135   // CHECK:   scf.yield %[[VAL_9]] : tensor<?xf32>
 136   // CHECK: } else {
 137   // CHECK:   scf.yield %[[ARG1]] : tensor<?xf32>
 138   // CHECK: }
 139   // CHECK: %[[VAL_12:.*]] = tensor.empty(%[[ARG0_MAX_DIM]]) : tensor<?xf32>
 140   // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[ARG0_DIM0_BROADCAST]], %[[ARG0_DIM1_BROADCAST]] : tensor<?xf32>, tensor<?xf32>) outs(%[[VAL_12]] : tensor<?xf32>) {
 141   // CHECK: ^bb0(%[[VAL_13:.*]]: f32, %[[VAL_14:.*]]: f32, %[[VAL_15:.*]]: f32):
 142   // CHECK:   %[[VAL_16:.*]] = arith.addf %[[VAL_13]], %[[VAL_14]] : f32
 143   // CHECK:   linalg.yield %[[VAL_16]] : f32
 144   // CHECK: } -> tensor<?xf32>
 145   %0 = tosa.add %arg0, %arg1 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
 146
 147   // CHECK: return %[[RESULT]] : tensor<?xf32>
 148   return %0 : tensor<?xf32>
 149 }
 150
 151 // -----
 152
 153 // CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (0)>
 154 // CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (d0)>
 155 // CHECK-LABEL: @test_add_1d_broadcast_dynamic_to_static
 156 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]:
 157 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
 158 func.func @test_add_1d_broadcast_dynamic_to_static(%arg0: tensor<5xf32>, %arg1: tensor<?xf32>) -> tensor<5xf32> {
 159
 160   // CHECK: %[[CONST1:.*]] = arith.constant 1 : index
 161   // CHECK: %[[CONST0:.*]] = arith.constant 0 : index
 162   // CHECK: %[[ARG1_DIM0:.*]] = tensor.dim %[[ARG1]], %[[CONST0]] : tensor<?xf32>
 163   // CHECK: %[[VAL_0:.*]] = arith.cmpi eq, %[[ARG1_DIM0]], %[[CONST1]] : index
 164   // CHECK: %[[ARG1_DIM0_BROADCAST:.*]] = scf.if %[[VAL_0]] -> (tensor<?xf32>) {
 165   // CHECK:   %[[VAL_1:.*]] = tensor.empty() : tensor<5xf32>
 166   // CHECK:   %[[VAL_2:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[ARG1]] : tensor<?xf32>) outs(%[[VAL_1]] : tensor<5xf32>) {
 167   // CHECK:   ^bb0(%[[VAL_3:.*]]: f32, %[[VAL_4:.*]]: f32):
 168   // CHECK:     linalg.yield %[[VAL_3]] : f32
 169   // CHECK:   } -> tensor<5xf32>
 170   // CHECK:   %[[VAL_5:.*]] = tensor.cast %[[VAL_2]] : tensor<5xf32> to tensor<?xf32>
 171   // CHECK:   scf.yield %[[VAL_5]] : tensor<?xf32>
 172   // CHECK: } else {
 173   // CHECK:   scf.yield %[[ARG1]] : tensor<?xf32>
 174   // CHECK: }
 175   // CHECK: %[[VAL_6:.*]] = tensor.empty() : tensor<5xf32>
 176   // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[ARG0]], %[[ARG1_DIM0_BROADCAST]] : tensor<5xf32>, tensor<?xf32>) outs(%[[VAL_6]] : tensor<5xf32>) {
 177   // CHECK: ^bb0(%[[VAL_7:.*]]: f32, %[[VAL_8:.*]]: f32, %[[VAL_9:.*]]: f32):
 178   // CHECK:   %[[VAL_10:.*]] = arith.addf %[[VAL_7]], %[[VAL_8]] : f32
 179   // CHECK:   linalg.yield %[[VAL_10]] : f32
 180   // CHECK: } -> tensor<5xf32>
 181   %0 = tosa.add %arg0, %arg1 : (tensor<5xf32>, tensor<?xf32>) -> tensor<5xf32>
 182
 183   // CHECK: return %[[RESULT]] : tensor<5xf32>
 184   return %0 : tensor<5xf32>
 185 }
 186
 187 // -----
 188
 189 // CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (0)>
 190 // CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (d0)>
 191 // CHECK-LABEL: @test_add_1d_broadcast_static_to_dynamic
 192 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]:
 193 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
 194 func.func @test_add_1d_broadcast_static_to_dynamic(%arg0: tensor<1xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
 195
 196   // CHECK: %[[CONST0:.*]] = arith.constant 0 : index
 197   // CHECK: %[[ARG1_DIM0:.*]] = tensor.dim %[[ARG1]], %[[CONST0]] : tensor<?xf32>
 198   // CHECK: %[[VAL_0:.*]] = tensor.empty(%[[ARG1_DIM0]]) : tensor<?xf32>
 199   // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[ARG0]], %[[ARG1]] : tensor<1xf32>, tensor<?xf32>) outs(%[[VAL_0]] : tensor<?xf32>) {
 200   // CHECK: ^bb0(%[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: f32, %[[VAL_3:.*]]: f32):
 201   // CHECK:   %[[VAL_4:.*]] = arith.addf %[[VAL_1]], %[[VAL_2]] : f32
 202   // CHECK:   linalg.yield %[[VAL_4]] : f32
 203   // CHECK: } -> tensor<?xf32>
 204   %0 = tosa.add %arg0, %arg1 : (tensor<1xf32>, tensor<?xf32>) -> tensor<?xf32>
 205
 206   // CHECK: return %[[RESULT]] : tensor<?xf32>
 207   return %0 : tensor<?xf32>
 208 }
 209
 210 // -----
 211
 212 // CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (0)>
 213 // CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (d0)>
 214 // CHECK-LABEL: @test_add_1d_broadcast_static_to_static
 215 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]:
 216 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
 217 func.func @test_add_1d_broadcast_static_to_static(%arg0: tensor<1xf32>, %arg1: tensor<3xf32>) -> tensor<3xf32> {
 218
 219   // CHECK: %[[VAL_0:.*]] = tensor.empty() : tensor<3xf32>
 220   // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[ARG0]], %[[ARG1]] : tensor<1xf32>, tensor<3xf32>) outs(%[[VAL_0]] : tensor<3xf32>) {
 221   // CHECK: ^bb0(%[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: f32, %[[VAL_3:.*]]: f32):
 222   // CHECK:   %[[VAL_4:.*]] = arith.addf %[[VAL_1]], %[[VAL_2]] : f32
 223   // CHECK:   linalg.yield %[[VAL_4]] : f32
 224   // CHECK: } -> tensor<3xf32>
 225   %0 = tosa.add %arg0, %arg1 : (tensor<1xf32>, tensor<3xf32>) -> tensor<3xf32>
 226
 227   // CHECK: return %[[RESULT]] : tensor<3xf32>
 228   return %0 : tensor<3xf32>
 229 }
 230
 231 // -----
 232
 233 // CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (d0)>
 234 // CHECK-LABEL: @test_add_1d_matching_static
 235 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]:
 236 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
 237 func.func @test_add_1d_matching_static(%arg0: tensor<3xf32>, %arg1: tensor<3xf32>) -> tensor<3xf32> {
 238
 239   // CHECK: %[[VAL_0:.*]] = tensor.empty() : tensor<3xf32>
 240   // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], %[[ARG1]] : tensor<3xf32>, tensor<3xf32>) outs(%[[VAL_0]] : tensor<3xf32>) {
 241   // CHECK: ^bb0(%[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: f32, %[[VAL_3:.*]]: f32):
 242   // CHECK:   %[[VAL_4:.*]] = arith.addf %[[VAL_1]], %[[VAL_2]] : f32
 243   // CHECK:   linalg.yield %[[VAL_4]] : f32
 244   // CHECK: } -> tensor<3xf32>
 245   %0 = tosa.add %arg0, %arg1 : (tensor<3xf32>, tensor<3xf32>) -> tensor<3xf32>
 246
 247   // CHECK: return %[[RESULT]] : tensor<3xf32>
 248   return %0 : tensor<3xf32>
 249 }
 250
 251 // -----
 252
 253 // CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1) -> (0, d1)>
 254 // CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1) -> (d0, d1)>
 255 // CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1) -> (d0, 0)>
 256 // CHECK-LABEL: @test_add_2d_all_dynamic
 257 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]:
 258 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
 259 func.func @test_add_2d_all_dynamic(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
 260
 261   // CHECK: %[[CONST0:.*]] = arith.constant 0 : index
 262   // CHECK: %[[ARG0_DIM0:.*]] = tensor.dim %[[ARG0]], %[[CONST0]] : tensor<?x?xf32>
 263   // CHECK: %[[ARG1_DIM0:.*]] = tensor.dim %[[ARG1]], %[[CONST0]] : tensor<?x?xf32>
 264   // CHECK: %[[MAX_DIM0:.*]] = arith.maxui %[[ARG0_DIM0]], %[[ARG1_DIM0]] : index
 265   // CHECK: %[[CONST1:.*]] = arith.constant 1 : index
 266   // CHECK: %[[ARG0_DIM1:.*]] = tensor.dim %[[ARG0]], %[[CONST1]] : tensor<?x?xf32>
 267   // CHECK: %[[ARG1_DIM1:.*]] = tensor.dim %[[ARG1]], %[[CONST1]] : tensor<?x?xf32>
 268   // CHECK: %[[MAX_DIM1:.*]] = arith.maxui %[[ARG0_DIM1]], %[[ARG1_DIM1]] : index
 269
 270   // CHECK: %[[VAL_0:.*]] = tensor.dim %[[ARG0]], %[[CONST0]] : tensor<?x?xf32>
 271   // CHECK: %[[VAL_1:.*]] = arith.cmpi eq, %[[VAL_0]], %[[CONST1]] : index
 272   // CHECK: %[[ARG0_DIM0_BROADCAST:.*]] = scf.if %[[VAL_1]] -> (tensor<?x?xf32>) {
 273   // CHECK:   %[[LOCAL_CONST1:.*]] = arith.constant 1 : index
 274   // CHECK:   %[[VAL_2:.*]] = tensor.dim %[[ARG0]], %[[LOCAL_CONST1]] : tensor<?x?xf32>
 275   // CHECK:   %[[VAL_3:.*]] = tensor.empty(%[[MAX_DIM0]], %[[VAL_2]]) : tensor<?x?xf32>
 276   // CHECK:   %[[VAL_4:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x?xf32>) outs(%[[VAL_3]] : tensor<?x?xf32>) {
 277   // CHECK:   ^bb0(%[[VAL_5:.*]]: f32, %[[VAL_6:.*]]: f32):
 278   // CHECK:     linalg.yield %[[VAL_5]] : f32
 279   // CHECK:   } -> tensor<?x?xf32>
 280   // CHECK:   scf.yield %[[VAL_4]] : tensor<?x?xf32>
 281   // CHECK: } else {
 282   // CHECK:   scf.yield %[[ARG0]] : tensor<?x?xf32>
 283   // CHECK: }
 284
 285   // CHECK: %[[VAL_7:.*]] = tensor.dim %[[ARG0_DIM0_BROADCAST]], %[[CONST1]] : tensor<?x?xf32>
 286   // CHECK: %[[VAL_8:.*]] = arith.cmpi eq, %[[VAL_7]], %[[CONST1]] : index
 287   // CHECK: %[[ARG0_DIM1_BROADCAST:.*]] = scf.if %[[VAL_8]] -> (tensor<?x?xf32>) {
 288   // CHECK:   %[[LOCAL_CONST0:.*]] = arith.constant 0 : index
 289   // CHECK:   %[[VAL_9:.*]] = tensor.dim %[[ARG0_DIM0_BROADCAST]], %[[LOCAL_CONST0]] : tensor<?x?xf32>
 290   // CHECK:   %[[VAL_10:.*]] = tensor.empty(%[[VAL_9]], %[[MAX_DIM1]]) : tensor<?x?xf32>
 291   // CHECK:   %[[VAL_11:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0_DIM0_BROADCAST]] : tensor<?x?xf32>) outs(%[[VAL_10]] : tensor<?x?xf32>) {
 292   // CHECK:   ^bb0(%[[VAL_12:.*]]: f32, %[[VAL_13:.*]]: f32):
 293   // CHECK:     linalg.yield %[[VAL_12]] : f32
 294   // CHECK:   } -> tensor<?x?xf32>
 295   // CHECK:   scf.yield %[[VAL_11]] : tensor<?x?xf32>
 296   // CHECK: } else {
 297   // CHECK:   scf.yield %[[ARG0_DIM0_BROADCAST]] : tensor<?x?xf32>
 298   // CHECK: }
 299
 300   // CHECK: %[[VAL_14:.*]] = tensor.dim %[[ARG1]], %[[CONST0]] : tensor<?x?xf32>
 301   // CHECK: %[[VAL_15:.*]] = arith.cmpi eq, %[[VAL_14]], %[[CONST1]] : index
 302   // CHECK: %[[ARG1_DIM0_BROADCAST:.*]] = scf.if %[[VAL_15]] -> (tensor<?x?xf32>) {
 303   // CHECK:   %[[LOCAL_CONST1:.*]] = arith.constant 1 : index
 304   // CHECK:   %[[VAL_16:.*]] = tensor.dim %[[ARG1]], %[[LOCAL_CONST1]] : tensor<?x?xf32>
 305   // CHECK:   %[[VAL_17:.*]] = tensor.empty(%[[MAX_DIM0]], %[[VAL_16]]) : tensor<?x?xf32>
 306   // CHECK:   %[[VAL_18:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG1]] : tensor<?x?xf32>) outs(%[[VAL_17]] : tensor<?x?xf32>) {
 307   // CHECK:   ^bb0(%[[VAL_19:.*]]: f32, %[[VAL_20:.*]]: f32):
 308   // CHECK:     linalg.yield %[[VAL_19]] : f32
 309   // CHECK:   } -> tensor<?x?xf32>
 310   // CHECK:   scf.yield %[[VAL_18]] : tensor<?x?xf32>
 311   // CHECK: } else {
 312   // CHECK:   scf.yield %[[ARG1]] : tensor<?x?xf32>
 313   // CHECK: }
 314
 315   // CHECK: %[[VAL_21:.*]] = tensor.dim %[[ARG1_DIM0_BROADCAST]], %[[CONST1]] : tensor<?x?xf32>
 316   // CHECK: %[[VAL_22:.*]] = arith.cmpi eq, %[[VAL_21]], %[[CONST1]] : index
 317   // CHECK: %[[ARG1_DIM1_BROADCAST:.*]] = scf.if %[[VAL_22]] -> (tensor<?x?xf32>) {
 318   // CHECK:   %[[LOCAL_CONST0:.*]] = arith.constant 0 : index
 319   // CHECK:   %[[VAL_23:.*]] = tensor.dim %[[ARG1_DIM0_BROADCAST]], %[[LOCAL_CONST0]] : tensor<?x?xf32>
 320   // CHECK:   %[[VAL_24:.*]] = tensor.empty(%[[VAL_23]], %[[MAX_DIM1]]) : tensor<?x?xf32>
 321   // CHECK:   %[[VAL_25:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG1_DIM0_BROADCAST]] : tensor<?x?xf32>) outs(%[[VAL_24]] : tensor<?x?xf32>) {
 322   // CHECK:   ^bb0(%[[VAL_26:.*]]: f32, %[[VAL_27:.*]]: f32):
 323   // CHECK:     linalg.yield %[[VAL_26]] : f32
 324   // CHECK:   } -> tensor<?x?xf32>
 325   // CHECK:   scf.yield %[[VAL_25]] : tensor<?x?xf32>
 326   // CHECK: } else {
 327   // CHECK:   scf.yield %[[ARG1_DIM0_BROADCAST]] : tensor<?x?xf32>
 328   // CHECK: }
 329
 330   // CHECK: %[[VAL_28:.*]] = tensor.empty(%[[MAX_DIM0]], %[[MAX_DIM1]]) : tensor<?x?xf32>
 331   // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0_DIM1_BROADCAST]], %[[ARG1_DIM1_BROADCAST]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[VAL_28]] : tensor<?x?xf32>) {
 332   // CHECK: ^bb0(%[[VAL_29:.*]]: f32, %[[VAL_30:.*]]: f32, %[[VAL_31:.*]]: f32):
 333   // CHECK:   %[[VAL_32:.*]] = arith.addf %[[VAL_29]], %[[VAL_30]] : f32
 334   // CHECK:   linalg.yield %[[VAL_32]] : f32
 335   // CHECK: } -> tensor<?x?xf32>
 336   %0 = tosa.add %arg0, %arg1 : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
 337
 338   // CHECK: return %[[RESULT]] : tensor<?x?xf32>
 339   return %0 : tensor<?x?xf32>
 340 }
 341
 342 // -----
 343
 344 // CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1, d2) -> (0, d1, d2)>
 345 // CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
 346 // CHECK-LABEL: @test_add_2d_different_ranks
 347 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]:
 348 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
 349 func.func @test_add_2d_different_ranks(%arg0: tensor<3x4xf32>, %arg1: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
 350
 351   // CHECK: %[[ARG0_EXPANDED:.*]] = tensor.expand_shape %[[ARG0]] {{\[\[}}0, 1], [2]] output_shape [1, 3, 4] : tensor<3x4xf32> into tensor<1x3x4xf32>
 352   // CHECK: %[[VAL_0:.*]] = tensor.empty() : tensor<2x3x4xf32>
 353   // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG0_EXPANDED]], %[[ARG1]] : tensor<1x3x4xf32>, tensor<2x3x4xf32>) outs(%[[VAL_0]] : tensor<2x3x4xf32>) {
 354   // CHECK: ^bb0(%[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: f32, %[[VAL_3:.*]]: f32):
 355   // CHECK:   %[[VAL_4:.*]] = arith.addf %[[VAL_1]], %[[VAL_2]] : f32
 356   // CHECK:   linalg.yield %[[VAL_4]] : f32
 357   // CHECK: } -> tensor<2x3x4xf32>
 358   %0 = tosa.add %arg0, %arg1 : (tensor<3x4xf32>, tensor<2x3x4xf32>) -> tensor<2x3x4xf32>
 359
 360   // CHECK: return %[[RESULT]] : tensor<2x3x4xf32>
 361   return %0 : tensor<2x3x4xf32>
 362 }
 363
 364 // -----
 365
 366 // CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1) -> (d0, 0)>
 367 // CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1) -> (d0, d1)>
 368 // CHECK-LABEL: @test_select_2d_one_dynamic
 369 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]:
 370 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
 371 // CHECK-SAME: %[[ARG2:[0-9a-zA-Z_]*]]:
 372 func.func @test_select_2d_one_dynamic(%arg0: tensor<2x?xi1>, %arg1: tensor<2x?xf32>, %arg2: tensor<2x?xf32>) -> tensor<2x?xf32> {
 373
 374   // CHECK: %[[CONST1:.*]] = arith.constant 1 : index
 375   // CHECK: %[[ARG0_DIM1:.*]] = tensor.dim %[[ARG0]], %[[CONST1]] : tensor<2x?xi1>
 376   // CHECK: %[[ARG1_DIM1:.*]] = tensor.dim %[[ARG1]], %[[CONST1]] : tensor<2x?xf32>
 377   // CHECK: %[[VAL_0:.*]] = arith.maxui %[[ARG0_DIM1]], %[[ARG1_DIM1]] : index
 378   // CHECK: %[[ARG2_DIM1:.*]] = tensor.dim %[[ARG2]], %[[CONST1]] : tensor<2x?xf32>
 379   // CHECK: %[[MAX_DIM1:.*]] = arith.maxui %[[VAL_0]], %[[ARG2_DIM1]] : index
 380
 381   // CHECK: %[[VAL_1:.*]] = tensor.dim %[[ARG0]], %[[CONST1]] : tensor<2x?xi1>
 382   // CHECK: %[[VAL_2:.*]] = arith.cmpi eq, %[[VAL_1]], %[[CONST1]] : index
 383   // CHECK: %[[ARG0_BROADCAST:.*]] = scf.if %[[VAL_2]] -> (tensor<2x?xi1>) {
 384   // CHECK:   %[[VAL_3:.*]] = tensor.empty(%[[MAX_DIM1]]) : tensor<2x?xi1>
 385   // CHECK:   %[[VAL_4:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x?xi1>) outs(%[[VAL_3]] : tensor<2x?xi1>) {
 386   // CHECK:   ^bb0(%[[VAL_5:.*]]: i1, %[[VAL_6:.*]]: i1):
 387   // CHECK:     linalg.yield %[[VAL_5]] : i1
 388   // CHECK:   } -> tensor<2x?xi1>
 389   // CHECK:   scf.yield %[[VAL_4]] : tensor<2x?xi1>
 390   // CHECK: } else {
 391   // CHECK:   scf.yield %[[ARG0]] : tensor<2x?xi1>
 392   // CHECK: }
 393
 394   // CHECK: %[[VAL_7:.*]] = tensor.dim %[[ARG1]], %[[CONST1]] : tensor<2x?xf32>
 395   // CHECK: %[[VAL_8:.*]] = arith.cmpi eq, %[[VAL_7]], %[[CONST1]] : index
 396   // CHECK: %[[ARG1_BROADCAST:.*]] = scf.if %[[VAL_8]] -> (tensor<2x?xf32>) {
 397   // CHECK:   %[[VAL_9:.*]] = tensor.empty(%[[MAX_DIM1]]) : tensor<2x?xf32>
 398   // CHECK:   %[[VAL_10:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x?xf32>) outs(%[[VAL_9]] : tensor<2x?xf32>) {
 399   // CHECK:   ^bb0(%[[VAL_11:.*]]: f32, %[[VAL_12:.*]]: f32):
 400   // CHECK:     linalg.yield %[[VAL_11]] : f32
 401   // CHECK:   } -> tensor<2x?xf32>
 402   // CHECK:   scf.yield %[[VAL_10]] : tensor<2x?xf32>
 403   // CHECK: } else {
 404   // CHECK:   scf.yield %[[ARG1]] : tensor<2x?xf32>
 405   // CHECK: }
 406
 407   // CHECK: %[[VAL_13:.*]] = tensor.dim %[[ARG2]], %[[CONST1]] : tensor<2x?xf32>
 408   // CHECK: %[[VAL_14:.*]] = arith.cmpi eq, %[[VAL_13]], %[[CONST1]] : index
 409   // CHECK: %[[ARG2_BROADCAST:.*]] = scf.if %[[VAL_14]] -> (tensor<2x?xf32>) {
 410   // CHECK:   %[[VAL_15:.*]] = tensor.empty(%[[MAX_DIM1]]) : tensor<2x?xf32>
 411   // CHECK:   %[[VAL_16:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG2]] : tensor<2x?xf32>) outs(%[[VAL_15]] : tensor<2x?xf32>) {
 412   // CHECK:   ^bb0(%[[VAL_17:.*]]: f32, %[[VAL_18:.*]]: f32):
 413   // CHECK:     linalg.yield %[[VAL_17]] : f32
 414   // CHECK:   } -> tensor<2x?xf32>
 415   // CHECK:   scf.yield %[[VAL_16]] : tensor<2x?xf32>
 416   // CHECK: } else {
 417   // CHECK:   scf.yield %[[ARG2]] : tensor<2x?xf32>
 418   // CHECK: }
 419
 420   // CHECK: %[[VAL_19:.*]] = tensor.empty(%[[MAX_DIM1]]) : tensor<2x?xf32>
 421   // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0_BROADCAST]], %[[ARG1_BROADCAST]], %[[ARG2_BROADCAST]] : tensor<2x?xi1>, tensor<2x?xf32>, tensor<2x?xf32>) outs(%[[VAL_19]] : tensor<2x?xf32>) {
 422   // CHECK: ^bb0(%[[VAL_20:.*]]: i1, %[[VAL_21:.*]]: f32, %[[VAL_22:.*]]: f32, %[[VAL_23:.*]]: f32):
 423   // CHECK:   %[[VAL_24:.*]] = arith.select %[[VAL_20]], %[[VAL_21]], %[[VAL_22]] : f32
 424   // CHECK:   linalg.yield %[[VAL_24]] : f32
 425   // CHECK: } -> tensor<2x?xf32>
 426   %0 = tosa.select %arg0, %arg1, %arg2 : (tensor<2x?xi1>, tensor<2x?xf32>, tensor<2x?xf32>) -> tensor<2x?xf32>
 427
 428   // CHECK: return %[[RESULT]] : tensor<2x?xf32>
 429   return %0 : tensor<2x?xf32>
 430 }
 431
 432 // -----
 433
 434 // CHECK-LABEL: @test_simple_f32
 435 func.func @test_simple_f32(%arg0: tensor<1xf32>) -> () {
 436   // CHECK: linalg.generic
 437   // CHECK: tanh
 438   %0 = tosa.tanh %arg0 : (tensor<1xf32>) -> tensor<1xf32>
 439
 440   // CHECK: linalg.generic
 441   // CHECK: math.absf
 442   %1 = tosa.abs %arg0 : (tensor<1xf32>) -> tensor<1xf32>
 443
 444   // CHECK: linalg.generic
 445   // CHECK: arith.addf
 446   %2 = tosa.add %0, %0 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
 447
 448   // CHECK: linalg.generic
 449   // CHECK: arith.subf
 450   %3 = tosa.sub %0, %1 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
 451
 452   // CHECK: linalg.generic
 453   // CHECK: arith.mulf
 454   %4 = tosa.mul %0, %1 {shift = 0 : i8} : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
 455
 456   // CHECK: linalg.generic
 457   // CHECK: arith.negf
 458   %5 = tosa.negate %0 : (tensor<1xf32>) -> tensor<1xf32>
 459
 460   // CHECK: linalg.generic
 461   // CHECK: pow
 462   %6 = tosa.pow %1, %2 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
 463
 464   // CHECK: linalg.generic
 465   // CHECK: rsqrt
 466   %7 = tosa.rsqrt %1 : (tensor<1xf32>) -> tensor<1xf32>
 467
 468   // CHECK: linalg.generic
 469   // CHECK: log
 470   %8 = tosa.log %arg0 : (tensor<1xf32>) -> tensor<1xf32>
 471
 472   // CHECK: linalg.generic
 473   // CHECK: exp
 474   %9 = tosa.exp %arg0 : (tensor<1xf32>) -> tensor<1xf32>
 475
 476   // CHECK: linalg.generic
 477   // CHECK: arith.cmpf
 478   %10 = tosa.greater %0, %1 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xi1>
 479
 480   // CHECK: linalg.generic
 481   // CHECK: arith.cmpf
 482   %11 = tosa.greater_equal %0, %1 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xi1>
 483
 484   // CHECK: linalg.generic
 485   // CHECK: arith.cmpf
 486   %12 = tosa.equal %0, %1 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xi1>
 487
 488   // CHECK: linalg.generic
 489   // CHECK: select
 490   %13 = tosa.select %10, %0, %1 : (tensor<1xi1>, tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
 491
 492   // CHECK: linalg.generic
 493   // CHECK: arith.maximumf
 494   %14 = tosa.maximum %0, %1 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
 495
 496   // CHECK: linalg.generic
 497   // CHECK: arith.minimumf
 498   %15 = tosa.minimum %0, %1 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
 499
 500   // CHECK: linalg.generic
 501   // CHECK: ceil
 502   %16 = tosa.ceil %0 : (tensor<1xf32>) -> tensor<1xf32>
 503
 504   // CHECK: linalg.generic
 505   // CHECK: floor
 506   %17 = tosa.floor %0 : (tensor<1xf32>) -> tensor<1xf32>
 507
 508   // CHECK: linalg.generic
 509   // CHECK: arith.minimumf
 510   // CHECK: arith.maximumf
 511   %18 = tosa.clamp %0 {min_int = 1 : i64, max_int = 5 : i64, min_fp = 1.0 : f32, max_fp = 5.0 : f32} : (tensor<1xf32>) -> tensor<1xf32>
 512
 513   // CHECK: linalg.generic
 514   // CHECK: arith.negf
 515   // CHECK: exp
 516   // CHECK: arith.addf
 517   // CHECK: arith.divf
 518   %19 = tosa.sigmoid %0 : (tensor<1xf32>) -> tensor<1xf32>
 519
 520   // CHECK: linalg.generic
 521   // CHECK: [[ROUND:%.+]] = math.roundeven {{%.+}} : f32
 522   // CHECK: [[CSTMIN:%.+]] = arith.constant -2.14748365E+9 : f32
 523   // CHECK: [[CSTMAXP1:%.+]] = arith.constant 2.14748365E+9 : f32
 524   // CHECK: [[CSTMAX:%.+]] = arith.constant 2147483647 : i32
 525   // CHECK: [[MAX:%.+]] = arith.maximumf [[ROUND]], [[CSTMIN]] : f32
 526   // CHECK: [[CONV:%.+]] = arith.fptosi [[MAX]] : f32 to i32
 527   // CHECK: [[CMP:%.+]] = arith.cmpf uge, [[ROUND]], [[CSTMAXP1]] : f32
 528   // CHECK: arith.select [[CMP]], [[CSTMAX]], [[CONV]] : i32
 529   %20 = tosa.cast %0 : (tensor<1xf32>) -> tensor<1xi32>
 530
 531   // CHECK: linalg.generic
 532   // CHECK: arith.constant 0
 533   // CHECK: arith.cmpf
 534   %21 = tosa.cast %0 : (tensor<1xf32>) -> tensor<1xi1>
 535
 536   // CHECK: linalg.generic
 537   // CHECK: arith.truncf
 538   %22 = tosa.cast %0 : (tensor<1xf32>) -> tensor<1xf16>
 539
 540   // CHECK: linalg.generic
 541   // CHECK: arith.divf
 542   %23 = tosa.reciprocal %0 : (tensor<1xf32>) -> tensor<1xf32>
 543
 544   // CHECK: linalg.generic
 545   // CHECK: math.erf
 546   %24 = tosa.erf %0 : (tensor<1xf32>) -> tensor<1xf32>
 547
 548   // CHECK: linalg.generic
 549   // CHECK: math.sin
 550   %25 = tosa.sin %arg0 : (tensor<1xf32>) -> tensor<1xf32>
 551
 552   // CHECK: linalg.generic
 553   // CHECK: math.cos
 554   %26 = tosa.cos %arg0 : (tensor<1xf32>) -> tensor<1xf32>
 555
 556   return
 557 }
 558
 559 // -----
 560
 561 // CHECK-LABEL: @test_simple_f16
 562 func.func @test_simple_f16(%arg0: tensor<1xf16>) -> () {
 563
 564   // CHECK: linalg.generic
 565   // CHECK: arith.extf
 566   %0 = tosa.cast %arg0 : (tensor<1xf16>) -> tensor<1xf32>
 567
 568   // CHECK: linalg.generic
 569   // CHECK: [[ROUND:%.+]] = math.roundeven {{%.+}} : f16
 570   // CHECK: [[CSTMIN:%.+]] = arith.constant -1.280000e+02 : f16
 571   // CHECK: [[CSTMAX:%.+]] = arith.constant 1.270000e+02 : f16
 572   // CHECK: [[MIN:%.+]] = arith.minimumf [[ROUND]], [[CSTMAX]] : f16
 573   // CHECK: [[CLAMP:%.+]] = arith.maximumf [[MIN]], [[CSTMIN]] : f16
 574   // CHECK: arith.fptosi [[CLAMP]] : f16 to i8
 575   %1 = "tosa.cast"(%arg0) : (tensor<1xf16>) -> tensor<1xi8>
 576
 577   // CHECK: linalg.generic
 578   // CHECK: [[ROUND:%.+]] = math.roundeven {{%[a-z0-9_]+}} : f16
 579   // CHECK: [[CONV:%.+]] = arith.fptosi [[ROUND]] : f16 to i32
 580   // CHECK: [[POSINF:%.+]] = arith.constant 0x7C00 : f16
 581   // CHECK: [[NEGINF:%.+]] = arith.constant 0xFC00 : f16
 582   // CHECK: [[OVERFLOW:%.+]] = arith.cmpf ueq, [[ROUND]], [[POSINF]] : f16
 583   // CHECK: [[UNDERFLOW:%.+]] = arith.cmpf ueq, [[ROUND]], [[NEGINF]] : f16
 584   // CHECK: [[MININT:%.+]] = arith.constant -2147483648 : i32
 585   // CHECK: [[MAXINT:%.+]] = arith.constant 2147483647 : i32
 586   // CHECK: [[CLAMPPOSINF:%.+]] = arith.select [[OVERFLOW]], [[MAXINT]], [[CONV]] : i32
 587   // CHECK: arith.select [[UNDERFLOW]], [[MININT]], [[CLAMPPOSINF]] : i32
 588   %2 = "tosa.cast"(%arg0) : (tensor<1xf16>) -> tensor<1xi32>
 589   return
 590 }
 591
 592 // -----
 593
 594 // CHECK-LABEL: @test_simple_i16
 595 func.func @test_simple_i16(%arg0: tensor<1xi16>) -> () {
 596   // CHECK: linalg.generic
 597   // CHECK: arith.extsi
 598   // CHECK: arith.extsi
 599   // CHECK: arith.muli
 600   %0 = tosa.mul %arg0, %arg0 {shift = 0 : i8} : (tensor<1xi16>, tensor<1xi16>) -> tensor<1xi32>
 601
 602   return
 603 }
 604
 605 // -----
 606
 607 // CHECK-LABEL: @test_simple_ui8
 608 func.func @test_simple_ui8(%arg0: tensor<1xui8>) -> () {
 609   // CHECK: arith.uitofp
 610   %0 = tosa.cast %arg0 : (tensor<1xui8>) -> tensor<1xf32>
 611   return
 612 }
 613
 614 // -----
 615
 616 // CHECK-LABEL: @test_simple_i32
 617 func.func @test_simple_i32(%arg0: tensor<1xi32>, %unsigned: tensor<1xui32>, %unsigned64: tensor<1xui64>) -> () {
 618   // CHECK: linalg.generic
 619   // CHECK: arith.addi
 620   %0 = tosa.add %arg0, %arg0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
 621
 622   // CHECK: linalg.generic
 623   // CHECK: arith.subi
 624   %1 = tosa.sub %arg0, %arg0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
 625
 626   // CHECK: linalg.generic
 627   // CHECK: arith.muli
 628   %2 = tosa.mul %arg0, %arg0 {shift = 0 : i8} : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
 629
 630   // CHECK: linalg.generic
 631   // CHECK: arith.constant 2
 632   // CHECK: apply_scale
 633   %3 = tosa.mul %arg0, %arg0 {shift = 2 : i8} : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
 634
 635   // CHECK: linalg.generic
 636   // CHECK: arith.divsi
 637   %40 = tosa.int_div %arg0, %arg0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
 638
 639   // CHECK: linalg.generic
 640   // CHECK: ^bb0(%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32):
 641   // CHECK: [[ZERO:%.+]] = arith.constant 0
 642   // CHECK: arith.subi [[ZERO]], %[[ARG1]]
 643   %5 = tosa.negate %arg0 : (tensor<1xi32>) -> tensor<1xi32>
 644
 645   // CHECK: linalg.generic
 646   // CHECK: and
 647   %6 = tosa.bitwise_and %arg0, %arg0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
 648
 649   // CHECK: linalg.generic
 650   // CHECK: or
 651   %7 = tosa.bitwise_or %arg0, %arg0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
 652
 653   // CHECK: linalg.generic
 654   // CHECK: arith.xori
 655   %8 = tosa.bitwise_xor %arg0, %arg0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
 656
 657   // CHECK: linalg.generic
 658   // CHECK: arith.shli
 659   %9 = tosa.logical_left_shift %arg0, %arg0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
 660
 661   // CHECK: linalg.generic
 662   // CHECK: arith.shrui
 663   %10 = tosa.logical_right_shift %arg0, %arg0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
 664
 665   // CHECK: linalg.generic
 666   // CHECK: arith.shrsi
 667   %11 = tosa.arithmetic_right_shift %arg0, %arg0 {round = 0 : i1} : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
 668
 669   // CHECK: linalg.generic
 670   // CHECK: arith.constant 1
 671   // CHECK: arith.constant 0
 672   // CHECK: arith.constant true
 673   // CHECK: arith.cmpi
 674   // CHECK: arith.subi
 675   // CHECK: arith.shrsi
 676   // CHECK: arith.trunci
 677   // CHECK: and
 678   // CHECK: and
 679   // CHECK: arith.extui
 680   // CHECK: arith.addi
 681   %12 = tosa.arithmetic_right_shift %arg0, %arg0 {round = 1 : i1} : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
 682
 683   // CHECK: math.ctlz
 684   %13 = tosa.clz %arg0 : (tensor<1xi32>) -> tensor<1xi32>
 685
 686   // CHECK: linalg.generic
 687   // CHECK: arith.cmpi
 688   %14 = tosa.greater %0, %1 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi1>
 689
 690   // CHECK: linalg.generic
 691   // CHECK: arith.cmpi
 692   %15 = tosa.greater_equal %0, %1 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi1>
 693
 694   // CHECK: linalg.generic
 695   // CHECK: select
 696   %16 = tosa.select %14, %0, %1 : (tensor<1xi1>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
 697
 698   // CHECK: linalg.generic
 699   // CHECK: arith.maxsi
 700   %17 = tosa.maximum %0, %1 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
 701
 702   // CHECK: linalg.generic
 703   // CHECK: arith.minsi
 704   %18 = tosa.minimum %0, %1 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
 705
 706   // CHECK: linalg.generic
 707   // CHECK-DAG: arith.maxsi
 708   // CHECK-DAG: arith.minsi
 709   %19 = tosa.clamp %0 {min_int = 1 : i64, max_int = 5 : i64, min_fp = 1.0 : f32, max_fp = 5.0 : f32} : (tensor<1xi32>) -> tensor<1xi32>
 710
 711   // CHECK: linalg.generic
 712   // CHECK-DAG: %[[LB:.*]] = arith.constant 4 : i32
 713   // CHECK-DAG: %[[UB:.*]] = arith.constant 32 : i32
 714   // CHECK-DAG: arith.maxui %[[LB]],
 715   // CHECK-DAG: arith.minui %[[UB]],
 716   %u0 = tosa.clamp %unsigned {min_int = 4 : i64, max_int = 32 : i64, min_fp = 1.0 : f32, max_fp = 5.0 : f32} : (tensor<1xui32>) -> tensor<1xui32>
 717
 718   // CHECK: linalg.generic
 719   // CHECK-DAG: %[[LB:.*]] = arith.constant -1 : i32
 720   // CHECK-DAG: %[[UB:.*]] = arith.constant -1 : i32
 721   // CHECK-DAG: arith.maxui %[[LB]],
 722   // CHECK-DAG: arith.minui %[[UB]],
 723   %u1 = tosa.clamp %unsigned {min_int = 9223372036854775807 : i64, max_int = 9223372036854775807 : i64, min_fp = 1.0 : f32, max_fp = 5.0 : f32} : (tensor<1xui32>) -> tensor<1xui32>
 724
 725   // CHECK: linalg.generic
 726   // CHECK-DAG: %[[LB:.*]] = arith.constant 0 : i32
 727   // CHECK-DAG: %[[UB:.*]] = arith.constant 0 : i32
 728   // CHECK-DAG: arith.maxui %[[LB]],
 729   // CHECK-DAG: arith.minui %[[UB]],
 730   %u2 = tosa.clamp %unsigned {min_int = -3 : i64, max_int = -2 : i64, min_fp = 1.0 : f32, max_fp = 5.0 : f32} : (tensor<1xui32>) -> tensor<1xui32>
 731
 732   // CHECK: linalg.generic
 733   // CHECK-DAG: %[[LB:.*]] = arith.constant 0 : i64
 734   // CHECK-DAG: %[[UB:.*]] = arith.constant 9223372036854775807 : i64
 735   // CHECK-DAG: arith.maxui %[[LB]],
 736   // CHECK-DAG: arith.minui %[[UB]],
 737   %u3 = tosa.clamp %unsigned64 {min_int = -3 : i64, max_int = 9223372036854775807 : i64, min_fp = 1.0 : f32, max_fp = 5.0 : f32} : (tensor<1xui64>) -> tensor<1xui64>
 738
 739   // CHECK: linalg.generic
 740   // CHECK: arith.trunci
 741   %20 = tosa.cast %0 : (tensor<1xi32>) -> tensor<1xi16>
 742
 743   // CHECK: linalg.generic
 744   // CHECK: arith.extsi
 745   %21 = tosa.cast %0 : (tensor<1xi32>) -> tensor<1xi64>
 746
 747   // CHECK: linalg.generic
 748   // CHECK: arith.constant 0
 749   // CHECK: arith.cmpi
 750   %22 = tosa.cast %0 : (tensor<1xi32>) -> tensor<1xi1>
 751
 752   // CHECK: linalg.generic
 753   // CHECK: arith.sitofp
 754   %23 = tosa.cast %0 : (tensor<1xi32>) -> tensor<1xf32>
 755
 756   // CHECK: linalg.generic
 757   // CHECK: arith.constant 0
 758   // CHECK: arith.subi
 759   // CHECK: arith.maxsi
 760   %24 = tosa.abs %arg0 : (tensor<1xi32>) -> tensor<1xi32>
 761
 762   return
 763 }
 764
 765 // -----
 766
 767 // CHECK-LABEL: @test_simple_ui8
 768 func.func @test_simple_ui8(%arg0: tensor<1xi8>) -> () {
 769
 770   // CHECK: linalg.generic
 771   // CHECK: sitofp
 772   %0 = tosa.cast %arg0 : (tensor<1xi8>) -> tensor<1xf32>
 773
 774   return
 775 }
 776
 777 // -----
 778
 779 // CHECK-LABEL: @test_i8
 780 func.func @test_i8(%arg0: tensor<1xi8>) -> () {
 781   // CHECK: linalg.generic
 782   // CHECK: ^bb0(%[[ARG1:.+]]: i8,
 783   // CHECK-DAG: %[[C127:.+]] = arith.constant -127
 784   // CHECK-DAG: %[[C126:.+]] = arith.constant 126
 785   // CHECK-DAG: %[[LOWER:.+]] = arith.maxsi %[[C127]], %[[ARG1]]
 786   // CHECK-DAG: %[[CLAMPED:.+]] = arith.minsi %[[C126]], %[[LOWER]]
 787   %0 = tosa.clamp %arg0 {min_int = -127 : i64, max_int = 126 : i64, min_fp = 0.0 : f32, max_fp = 0.0 : f32} : (tensor<1xi8>) -> tensor<1xi8>
 788
 789   // CHECK: linalg.generic
 790   // CHECK: ^bb0(%[[ARG1:.+]]: i8,
 791   // CHECK-DAG: %[[C128:.+]] = arith.constant -128
 792   // CHECK-DAG: %[[C127:.+]] = arith.constant 127
 793   // CHECK-DAG: %[[LOWER:.+]] = arith.maxsi %[[C128]], %[[ARG1]]
 794   // CHECK-DAG: %[[CLAMPED:.+]] = arith.minsi %[[C127]], %[[LOWER]]
 795   %1 = tosa.clamp %arg0 {min_int = -130 : i64, max_int = 130 : i64, min_fp = 0.0 : f32, max_fp = 0.0 : f32} : (tensor<1xi8>) -> tensor<1xi8>
 796
 797   return
 798 }
 799
 800 // -----
 801
 802 // CHECK-LABEL: @test_i64
 803 func.func @test_i64(%arg0: tensor<1xi64>) -> () {
 804   // CHECK: linalg.generic
 805   // CHECK: ^bb0(%[[ARG1:.+]]: i64,
 806   // CHECK-DAG: %[[C127:.+]] = arith.constant -9223372036854775808
 807   // CHECK-DAG: %[[C126:.+]] = arith.constant 9223372036854775807
 808   // CHECK-DAG: %[[LOWER:.+]] = arith.maxsi %[[C127]], %[[ARG1]]
 809   // CHECK-DAG: %[[CLAMPED:.+]] = arith.minsi %[[C126]], %[[LOWER]]
 810   %0 = tosa.clamp %arg0 {min_int = -9223372036854775808 : i64, max_int = 9223372036854775807 : i64, min_fp = 0.0 : f32, max_fp = 0.0 : f32} : (tensor<1xi64>) -> tensor<1xi64>
 811
 812   return
 813 }
 814
 815 // -----
 816
 817 // CHECK-LABEL: @test_clamp_f16
 818 func.func @test_clamp_f16(%arg0: tensor<1xf16>) -> () {
 819   // CHECK: linalg.generic
 820   // CHECK: ^bb0(%[[ARG1:.+]]: f16,
 821   // CHECK-DAG: %[[C0:.+]] = arith.constant 0.0
 822   // CHECK-DAG: %[[C6:.+]] = arith.constant 6.0
 823   // CHECK-DAG: %[[MIN:.+]] = arith.minimumf %[[ARG1]], %[[C6]]
 824   // CHECK-DAG: %[[MAX:.+]] = arith.maximumf %[[MIN]], %[[C0]]
 825   %0 = tosa.clamp %arg0 {min_int = 0 : i64, max_int = 0 : i64, min_fp = 0.0 : f32, max_fp = 6.0 : f32} : (tensor<1xf16>) -> tensor<1xf16>
 826
 827   return
 828 }
 829
 830 // -----
 831
 832 // CHECK-LABEL: @test_bool
 833 func.func @test_bool(%arg0: tensor<1xi1>, %arg1: tensor<1xi1>) -> () {
 834   // CHECK: linalg.generic
 835   // CHECK: and
 836   %0 = tosa.logical_and %arg0, %arg1 : (tensor<1xi1>, tensor<1xi1>) -> tensor<1xi1>
 837
 838   // CHECK: linalg.generic
 839   // CHECK: or
 840   %1 = tosa.logical_or %arg0, %arg1 : (tensor<1xi1>, tensor<1xi1>) -> tensor<1xi1>
 841
 842   // CHECK: linalg.generic
 843   // CHECK: arith.xori
 844   %2 = tosa.logical_xor %arg0, %arg1 : (tensor<1xi1>, tensor<1xi1>) -> tensor<1xi1>
 845
 846   // CHECK: linalg.generic
 847   // CHECK: arith.constant true
 848   // CHECK: arith.xori
 849   %3 = tosa.logical_not %arg0 : (tensor<1xi1>) -> tensor<1xi1>
 850
 851   return
 852 }
 853
 854 // -----
 855
 856 // CHECK-LABEL: @test_negate_quantized
 857 func.func @test_negate_quantized(%arg0: tensor<1xi8>) -> () {
 858   // CHECK: linalg.generic
 859   // CHECK: ^bb0(%[[BBARG0:.+]]: i8,
 860   // CHECK: [[CNST:%.+]] = arith.constant 7
 861   // CHECK: [[EXT:%.+]] = arith.extsi %[[BBARG0]] : i8 to i16
 862   // CHECK: [[SUB:%.+]] = arith.subi [[CNST]], [[EXT]]
 863   // CHECK: [[MIN:%.+]] = arith.constant -128
 864   // CHECK: [[MAX:%.+]] = arith.constant 127
 865   // CHECK: [[LBOUND:%.+]] = arith.maxsi [[MIN]], [[SUB]]
 866   // CHECK: [[UBOUND:%.+]] = arith.minsi [[MAX]], [[LBOUND]]
 867   // CHECK: [[TRUNC:%.+]] = arith.trunci [[UBOUND]]
 868   // CHECK: linalg.yield [[TRUNC]]
 869   %0 = tosa.negate %arg0 {quantization_info = #tosa.unary_quant<input_zp = 0, output_zp = 7>} : (tensor<1xi8>) -> tensor<1xi8>
 870
 871   // CHECK: linalg.generic
 872   // CHECK: ^bb0(%[[BBARG0:.+]]: i8,
 873   // CHECK: [[EXT:%.+]] = arith.extsi %[[BBARG0]] : i8 to i16
 874   %1 = tosa.negate %arg0 {quantization_info = #tosa.unary_quant<input_zp = 32639, output_zp = 0>} : (tensor<1xi8>) -> tensor<1xi8>
 875
 876   // CHECK: linalg.generic
 877   // CHECK: ^bb0(%[[BBARG0:.+]]: i8,
 878   // CHECK: [[EXT:%.+]] = arith.extsi %[[BBARG0]] : i8 to i32
 879   %2 = tosa.negate %arg0 {quantization_info = #tosa.unary_quant<input_zp = 32640, output_zp = 0>} : (tensor<1xi8>) -> tensor<1xi8>
 880
 881   // CHECK: linalg.generic
 882   // CHECK: ^bb0(%[[BBARG0:.+]]: i8,
 883   // CHECK: [[ZERO:%.+]] = arith.constant 0
 884   // CHECK: [[SUB:%.+]] = arith.subi [[ZERO]],
 885   // CHECK: linalg.yield [[SUB]]
 886   %3 = tosa.negate %arg0 {quantization_info = #tosa.unary_quant<input_zp = 0, output_zp = 0>} : (tensor<1xi8>) -> tensor<1xi8>
 887
 888   return
 889 }
 890
 891 // -----
 892
 893 // CHECK-LABEL: @test_identity
 894 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]: tensor<1xf32>,
 895 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]: tensor<1xi32>
 896 func.func @test_identity(%arg0: tensor<1xf32>, %arg1: tensor<1xi32>) -> (tensor<1xf32>, tensor<1xi32>) {
 897   %0 = tosa.identity %arg0 : (tensor<1xf32>) -> tensor<1xf32>
 898   %1 = tosa.identity %arg1 : (tensor<1xi32>) -> tensor<1xi32>
 899
 900   // CHECK: return %[[ARG0]], %[[ARG1]]
 901   return %0, %1 : tensor<1xf32>, tensor<1xi32>
 902 }
 903
 904 // -----
 905
 906 // CHECK-LABEL: @reduce_float
 907 // CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xf32>
 908 func.func @reduce_float(%arg0: tensor<5x4xf32>) -> () {
 909   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<4xf32>
 910   // CHECK: [[CST0:%.+]] = arith.constant 0.0
 911   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
 912   // CHECK: [[REDUCE:%.+]] = linalg.reduce ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<4xf32>) dimensions = [0]
 913   // CHECK:  (%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) {
 914   // CHECK:   [[RES:%.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
 915   // CHECK:   linalg.yield [[RES]] : f32
 916   // CHECK:  }
 917   // CHECK: tensor.expand_shape [[REDUCE]] {{\[}}[0, 1]] output_shape [1, 4] : tensor<4xf32> into tensor<1x4xf32>
 918   %0 = tosa.reduce_sum %arg0 {axis = 0 : i32} : (tensor<5x4xf32>) -> tensor<1x4xf32>
 919
 920   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<5xf32>
 921   // CHECK: [[CST0:%.+]] = arith.constant 0.0
 922   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
 923   // CHECK: [[REDUCE:%.+]] = linalg.reduce ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<5xf32>) dimensions = [1]
 924   // CHECK:  (%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) {
 925   // CHECK:   [[RES:%.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
 926   // CHECK:   linalg.yield [[RES]] : f32
 927   // CHECK:  }
 928   // CHECK: tensor.expand_shape [[REDUCE]] {{\[}}[0, 1]] output_shape [5, 1] : tensor<5xf32> into tensor<5x1xf32>
 929   %1 = tosa.reduce_sum %arg0 {axis = 1 : i32} : (tensor<5x4xf32>) -> tensor<5x1xf32>
 930
 931   // CHECK: arith.constant 1.0
 932   // CHECK: linalg.fill
 933   // CHECK: linalg.reduce
 934   // CHECK: arith.mulf
 935   %2 = tosa.reduce_prod %arg0 {axis = 0 : i32} : (tensor<5x4xf32>) -> tensor<1x4xf32>
 936
 937   // CHECK: arith.constant 3.40282347E+38 : f32
 938   // CHECK: linalg.fill
 939   // CHECK: linalg.reduce
 940   // CHECK: arith.minimumf
 941   %3 = tosa.reduce_min %arg0 {axis = 0 : i32} : (tensor<5x4xf32>) -> tensor<1x4xf32>
 942
 943   // CHECK: arith.constant -3.40282347E+38 : f32
 944   // CHECK: linalg.fill
 945   // CHECK: linalg.reduce
 946   // CHECK: arith.maximumf
 947   %4 = tosa.reduce_max %arg0 {axis = 0 : i32} : (tensor<5x4xf32>) -> tensor<1x4xf32>
 948   return
 949 }
 950
 951 // -----
 952
 953 // CHECK-LABEL: @reduce_float_dyn
 954 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]: tensor<?x5x4xf32>
 955 func.func @reduce_float_dyn(%arg0: tensor<?x5x4xf32>) -> () {
 956   // CHECK: %[[C0:.+]] = arith.constant 0
 957   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C0]]
 958   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<?x4xf32>
 959   // CHECK: %[[CST0:.+]] = arith.constant 0.0
 960   // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}outs(%[[INIT]]
 961   // CHECK: %[[REDUCE:.+]] = linalg.reduce ins(%[[ARG0]] : tensor<?x5x4xf32>) outs(%[[FILL]] : tensor<?x4xf32>) dimensions = [1]
 962   // CHECK:  (%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) {
 963   // CHECK:   %[[RES:.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
 964   // CHECK:   linalg.yield %[[RES]] : f32
 965   // CHECK:  }
 966   // CHECK: %[[C0_0:.+]] = arith.constant 0 : index
 967   // CHECK: %[[DIM_1:.+]] = tensor.dim %[[REDUCE]], %[[C0_0]] : tensor<?x4xf32>
 968   // CHECK: %[[C1:.+]] = arith.constant 1 : index
 969   // CHECK: tensor.expand_shape %[[REDUCE]] {{\[}}[0], [1, 2]] output_shape [%[[DIM_1]], 1, 4] : tensor<?x4xf32> into tensor<?x1x4xf32>
 970   %0 = tosa.reduce_sum %arg0 {axis = 1 : i32} : (tensor<?x5x4xf32>) -> tensor<?x1x4xf32>
 971   return
 972 }
 973
 974 // -----
 975
 976 // CHECK-LABEL: @reduce_float_dyn_rank_1
 977 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]: tensor<?xf32>
 978 func.func @reduce_float_dyn_rank_1(%arg0: tensor<?xf32>) -> () {
 979   // CHECK-DAG: %[[INIT:.+]] = tensor.empty() : tensor<f32>
 980   // CHECK-DAG: %[[CST0:.+]] = arith.constant 0.0
 981   // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}outs(%[[INIT]]
 982   // CHECK: %[[REDUCE:.+]] = linalg.reduce ins(%[[ARG0]] : tensor<?xf32>) outs(%[[FILL]] : tensor<f32>) dimensions = [0]
 983   // CHECK:  (%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) {
 984   // CHECK:   %[[RES:.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
 985   // CHECK:   linalg.yield %[[RES]] : f32
 986   // CHECK:  }
 987   // CHECK: tensor.expand_shape %[[REDUCE]] {{\[}}] output_shape [1] : tensor<f32> into tensor<1xf32>
 988   %0 = tosa.reduce_sum %arg0 {axis = 0 : i32} : (tensor<?xf32>) -> tensor<1xf32>
 989   return
 990 }
 991
 992 // -----
 993
 994 // CHECK-LABEL: @reduce_float_dyn_nonzero_batch
 995 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
 996 func.func @reduce_float_dyn_nonzero_batch(%arg0: tensor<5x?x4xf32>) -> () {
 997   // CHECK: %[[C1:.+]] = arith.constant 1
 998   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C1]]
 999   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<5x?xf32>
1000   // CHECK: %[[CST1:.+]] = arith.constant 1.0
1001   // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST1]]{{.*}}outs(%[[INIT]]
1002   // CHECK: %[[REDUCE:.+]] = linalg.reduce ins(%[[ARG0]] : tensor<5x?x4xf32>) outs(%[[FILL]] : tensor<5x?xf32>) dimensions = [2]
1003   // CHECK:  (%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) {
1004   // CHECK:   %[[RES:.+]] = arith.mulf %[[ARG1]], %[[ARG2]] : f32
1005   // CHECK:   linalg.yield %[[RES]] : f32
1006   // CHECK:  }
1007   // CHECK: %[[C1_0:.+]] = arith.constant 1 : index
1008   // CHECK: %[[DIM_1:.+]] = tensor.dim %[[REDUCE]], %[[C1_0]] : tensor<5x?xf32>
1009   // CHECK: %[[C1_2:.+]] = arith.constant 1 : index
1010   // CHECK: tensor.expand_shape %[[REDUCE]] {{\[}}[0], [1, 2]] output_shape [5, %[[DIM_1]], 1] : tensor<5x?xf32> into tensor<5x?x1xf32>
1011   %0 = tosa.reduce_prod %arg0 {axis = 2 : i32} : (tensor<5x?x4xf32>) -> tensor<5x?x1xf32>
1012   return
1013 }
1014
1015 // -----
1016
1017 // CHECK-LABEL: @reduce_float_dyn_multiple
1018 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1019 func.func @reduce_float_dyn_multiple(%arg0: tensor<?x?xf32>) -> () {
1020   // CHECK: %[[C0:.+]] = arith.constant 0
1021   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C0]]
1022   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
1023   // CHECK: %[[CMIN:.+]] = arith.constant -3.40282347E+38
1024   // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CMIN]]{{.*}}outs(%[[INIT]]
1025   // CHECK: %[[REDUCE:.+]] = linalg.reduce ins(%[[ARG0]] : tensor<?x?xf32>) outs(%[[FILL]] : tensor<?xf32>) dimensions = [1]
1026   // CHECK:  (%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) {
1027   // CHECK:   %[[MAX:.+]] = arith.maximumf %[[ARG1]], %[[ARG2]] : f32
1028   // CHECK:   linalg.yield %[[MAX]] : f32
1029   // CHECK:  }
1030   // CHECK: %[[C0_0:.+]] = arith.constant 0 : index
1031   // CHECK: %[[DIM_1:.+]] = tensor.dim %[[REDUCE]], %[[C0_0]] : tensor<?xf32>
1032   // CHECK: %[[C1_2:.+]] = arith.constant 1 : index
1033   // CHECK: tensor.expand_shape %[[REDUCE]] {{\[}}[0, 1]] output_shape [%[[DIM_1]], 1] : tensor<?xf32> into tensor<?x1xf32>
1034   %0 = tosa.reduce_max %arg0 {axis = 1 : i32} : (tensor<?x?xf32>) -> tensor<?x1xf32>
1035   return
1036 }
1037
1038 // -----
1039
1040 // CHECK-LABEL: @reduce_int
1041 // CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xi32>
1042 func.func @reduce_int(%arg0: tensor<5x4xi32>) -> () {
1043   // CHECK: [[INIT:%.+]] = tensor.empty()
1044   // CHECK: [[CST0:%.+]] = arith.constant 0
1045   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
1046   // CHECK: [[REDUCE:%.+]] = linalg.reduce ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<4xi32>) dimensions = [0]
1047   // CHECK:  (%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32) {
1048   // CHECK:   [[RES:%.+]] = arith.addi %[[ARG1]], %[[ARG2]] : i32
1049   // CHECK:   linalg.yield [[RES]] : i32
1050   // CHECK:  }
1051   // CHECK: tensor.expand_shape [[REDUCE]] {{\[}}[0, 1]] output_shape [1, 4] : tensor<4xi32> into tensor<1x4xi32>
1052   %0 = tosa.reduce_sum %arg0 {axis = 0 : i32} : (tensor<5x4xi32>) -> tensor<1x4xi32>
1053
1054   // CHECK: [[INIT:%.+]] = tensor.empty()
1055   // CHECK: [[CST0:%.+]] = arith.constant 0
1056   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
1057   // CHECK: [[REDUCE:%.+]] = linalg.reduce ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<5xi32>) dimensions = [1]
1058   // CHECK:  (%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32) {
1059   // CHECK:   [[RES:%.+]] = arith.addi %[[ARG1]], %[[ARG2]] : i32
1060   // CHECK:   linalg.yield [[RES]] : i32
1061   // CHECK:  }
1062   // CHECK: tensor.expand_shape [[REDUCE]] {{\[}}[0, 1]] output_shape [5, 1] : tensor<5xi32> into tensor<5x1xi32>
1063   %1 = tosa.reduce_sum %arg0 {axis = 1 : i32} : (tensor<5x4xi32>) -> tensor<5x1xi32>
1064
1065   // CHECK: arith.constant 1
1066   // CHECK: linalg.fill
1067   // CHECK: linalg.reduce
1068   // CHECK: arith.muli
1069   %2 = tosa.reduce_prod %arg0 {axis = 0 : i32} : (tensor<5x4xi32>) -> tensor<1x4xi32>
1070
1071   // CHECK: arith.constant 2147483647 : i32
1072   // CHECK: linalg.fill
1073   // CHECK: linalg.reduce
1074   // CHECK: arith.minsi
1075   %3 = tosa.reduce_min %arg0 {axis = 0 : i32} : (tensor<5x4xi32>) -> tensor<1x4xi32>
1076
1077   // CHECK: arith.constant -2147483648 : i32
1078   // CHECK: linalg.fill
1079   // CHECK: linalg.reduce
1080   // CHECK: arith.maxsi
1081   %4 = tosa.reduce_max %arg0 {axis = 0 : i32} : (tensor<5x4xi32>) -> tensor<1x4xi32>
1082   return
1083 }
1084
1085 // -----
1086
1087 // CHECK-LABEL: @reduce_bool
1088 // CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xi1>
1089 func.func @reduce_bool(%arg0: tensor<5x4xi1>) -> () {
1090   // CHECK: [[INIT:%.+]] = tensor.empty()
1091   // CHECK: [[CST0:%.+]] = arith.constant true
1092   // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
1093   // CHECK: [[REDUCE:%.+]] = linalg.reduce ins([[ARG0]] : tensor<5x4xi1>) outs([[FILL]] : tensor<4xi1>) dimensions = [0]
1094   // CHECK:  (%[[ARG1:[0-9a-zA-Z_]+]]: i1, %[[ARG2:[0-9a-zA-Z_]+]]: i1) {
1095   // CHECK:   [[RES:%.+]] = arith.andi %[[ARG1]], %[[ARG2]] : i1
1096   // CHECK:   linalg.yield [[RES]] : i1
1097   // CHECK:  }
1098   // CHECK: tensor.expand_shape [[REDUCE]] {{\[}}[0, 1]] output_shape [1, 4] : tensor<4xi1> into tensor<1x4xi1>
1099   %0 = tosa.reduce_all %arg0 {axis = 0 : i32} : (tensor<5x4xi1>) -> tensor<1x4xi1>
1100
1101   // CHECK: arith.constant false
1102   // CHECK: linalg.fill
1103   // CHECK: linalg.reduce
1104   // CHECK: or
1105   %1 = tosa.reduce_any %arg0 {axis = 0 : i32} : (tensor<5x4xi1>) -> tensor<1x4xi1>
1106
1107   return
1108 }
1109
1110 // -----
1111 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
1112
1113 // CHECK-LABEL: @rescale_i8
1114 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1115 func.func @rescale_i8(%arg0 : tensor<2xi8>) -> () {
1116   // CHECK: [[C0:%.+]] = arith.constant 19689
1117   // CHECK: [[C1:%.+]] = arith.constant 15
1118   // CHECK: [[INIT:%.+]] = tensor.empty()
1119   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>)
1120   // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8):
1121   // CHECK: [[C17:%.+]] = arith.constant 17
1122   // CHECK: [[C22:%.+]] = arith.constant 22
1123   // CHECK-DAG: [[IN32:%.+]] = arith.extsi [[IN]]
1124   // CHECK-DAG: [[IN_ZEROED:%.+]] = arith.subi [[IN32]], [[C17]]
1125   // CHECK-DAG: [[SCALED:%.+]] = tosa.apply_scale [[IN_ZEROED]], [[C0]], [[C1]] {double_round = false}
1126   // CHECK-DAG: [[SCALED_ZEROED:%.+]] = arith.addi [[SCALED]], [[C22]]
1127   // CHECK-DAG: [[CMIN:%.+]] = arith.constant -128
1128   // CHECK-DAG: [[CMAX:%.+]] = arith.constant 127
1129   // CHECK-DAG: [[LOWER:%.+]] = arith.maxsi [[CMIN]], [[SCALED_ZEROED]]
1130   // CHECK-DAG: [[BOUNDED:%.+]] = arith.minsi [[CMAX]], [[LOWER]]
1131   // CHECK-DAG: [[TRUNC:%.+]] = arith.trunci [[BOUNDED]]
1132   // CHECK-DAG: linalg.yield [[TRUNC]]
1133   %0 = tosa.rescale %arg0 {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = array<i32: 19689>, shift = array<i8: 15>, scale32 = false, double_round = false, per_channel = false} : (tensor<2xi8>) -> tensor<2xi8>
1134
1135   // CHECK: return
1136   return
1137 }
1138
1139 // -----
1140 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
1141
1142 // CHECK-LABEL: @rescale_i8_unsigned_output
1143 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1144 func.func @rescale_i8_unsigned_output(%arg0 : tensor<2xi8>) -> () {
1145   // CHECK: [[C0:%.+]] = arith.constant 19689
1146   // CHECK: [[C1:%.+]] = arith.constant 15
1147   // CHECK: [[INIT:%.+]] = tensor.empty()
1148   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>)
1149   // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8):
1150   // CHECK: [[C17:%.+]] = arith.constant 17
1151   // CHECK: [[C22:%.+]] = arith.constant 22
1152   // CHECK-DAG: [[IN32:%.+]] = arith.extsi [[IN]]
1153   // CHECK-DAG: [[IN_ZEROED:%.+]] = arith.subi [[IN32]], [[C17]]
1154   // CHECK-DAG: [[SCALED:%.+]] = tosa.apply_scale [[IN_ZEROED]], [[C0]], [[C1]] {double_round = false}
1155   // CHECK-DAG: [[SCALED_ZEROED:%.+]] = arith.addi [[SCALED]], [[C22]]
1156   // CHECK-DAG: [[CMIN:%.+]] = arith.constant 0
1157   // CHECK-DAG: [[CMAX:%.+]] = arith.constant 255
1158   // CHECK-DAG: [[LOWER:%.+]] = arith.maxsi [[CMIN]], [[SCALED_ZEROED]]
1159   // CHECK-DAG: [[BOUNDED:%.+]] = arith.minsi [[CMAX]], [[LOWER]]
1160   // CHECK-DAG: [[TRUNC:%.+]] = arith.trunci [[BOUNDED]]
1161   // CHECK: linalg.yield [[TRUNC]]
1162   %1 = tosa.rescale %arg0 {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = array<i32: 19689>, shift = array<i8: 15>, scale32 = false, double_round = false, per_channel = false, output_unsigned = true} : (tensor<2xi8>) -> tensor<2xi8>
1163
1164   // CHECK: return
1165   return
1166 }
1167
1168 // -----
1169
1170 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
1171
1172 // CHECK-LABEL: @rescale_i8_dyn_batch
1173 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1174 func.func @rescale_i8_dyn_batch(%arg0 : tensor<?x2xi8>) -> () {
1175   // CHECK: %[[C0:.+]] = arith.constant 0
1176   // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
1177   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor<?x2xi8>
1178   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x2xi8>) outs(%[[INIT]] : tensor<?x2xi8>)
1179   %0 = tosa.rescale %arg0 {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = array<i32: 19689>, shift = array<i8: 15>, scale32 = false, double_round = false, per_channel = false} : (tensor<?x2xi8>) -> tensor<?x2xi8>
1180
1181   // CHECK: %[[C0:.+]] = arith.constant 0
1182   // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
1183   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor<?x2xi8>
1184   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x2xi8>) outs(%[[INIT]] : tensor<?x2xi8>)
1185   %1 = tosa.rescale %arg0 {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = array<i32: 19689>, shift = array<i8: 15>, scale32 = false, double_round = false, per_channel = false, output_unsigned = true} : (tensor<?x2xi8>) -> tensor<?x2xi8>
1186
1187   return
1188 }
1189
1190 // -----
1191
1192 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
1193
1194 // CHECK-LABEL: @rescale_dyn
1195 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1196 func.func @rescale_dyn(%arg0 : tensor<1x?x?x32xi32>) -> () {
1197   // CHECK: %[[C1:.+]] = arith.constant 1
1198   // CHECK: %[[DIM1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
1199   // CHECK: %[[C2:.+]] = arith.constant 2
1200   // CHECK: %[[DIM2:.+]] = tensor.dim %[[ARG0]], %[[C2]]
1201   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM2]])
1202   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x?x32xi32>) outs(%[[INIT]] : tensor<1x?x?x32xi8>)
1203   %0 = tosa.rescale %arg0 {double_round = true, input_zp = 0 : i32, multiplier = array<i32: 1376784203>, output_zp = 0 : i32, per_channel = false, scale32 = true, shift = array<i8: 38>} : (tensor<1x?x?x32xi32>) -> tensor<1x?x?x32xi8>
1204   return
1205 }
1206
1207 // -----
1208
1209 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
1210
1211 // CHECK-LABEL: @rescale_i8_unsigned_input
1212 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1213 func.func @rescale_i8_unsigned_input(%arg0 : tensor<2xi8>) -> () {
1214   // CHECK: [[C0:%.+]] = arith.constant 19689
1215   // CHECK: [[C1:%.+]] = arith.constant 15
1216   // CHECK: [[INIT:%.+]] = tensor.empty()
1217   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>)
1218   // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8):
1219   // CHECK: [[C17:%.+]] = arith.constant 17
1220   // CHECK: [[C22:%.+]] = arith.constant 22
1221   // CHECK-DAG: [[IN32:%.+]] = arith.extui [[IN]]
1222   // CHECK-DAG: [[IN_ZEROED:%.+]] = arith.subi [[IN32]], [[C17]]
1223   // CHECK-DAG: [[SCALED:%.+]] = tosa.apply_scale [[IN_ZEROED]], [[C0]], [[C1]] {double_round = false}
1224   // CHECK-DAG: [[SCALED_ZEROED:%.+]] = arith.addi [[SCALED]], [[C22]]
1225   // CHECK-DAG: [[CMIN:%.+]] = arith.constant -128
1226   // CHECK-DAG: [[CMAX:%.+]] = arith.constant 127
1227   // CHECK-DAG: [[LOWER:%.+]] = arith.maxsi [[CMIN]], [[SCALED_ZEROED]]
1228   // CHECK-DAG: [[BOUNDED:%.+]] = arith.minsi [[CMAX]], [[LOWER]]
1229   // CHECK-DAG: [[TRUNC:%.+]] = arith.trunci [[BOUNDED]]
1230   // CHECK: linalg.yield [[TRUNC]]
1231   %0 = tosa.rescale %arg0 {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = array<i32: 19689>, shift = array<i8: 15>, scale32 = false, double_round = false, per_channel = false, input_unsigned = true} : (tensor<2xi8>) -> tensor<2xi8>
1232
1233   return
1234 }
1235
1236 // -----
1237
1238 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
1239
1240 // CHECK-LABEL: @rescale_per_channel
1241 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1242 func.func @rescale_per_channel(%arg0 : tensor<3xi8>) -> (tensor<3xi8>) {
1243   // CHECK: [[MULTIPLIERS:%.+]] = arith.constant dense<[42, 43, 0]>
1244   // CHECK: [[SHIFTS:%.+]] = arith.constant dense<[14, 15, 0]>
1245   // CHECK: [[INIT:%.+]] = tensor.empty()
1246   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[MULTIPLIERS]], [[SHIFTS]] : tensor<3xi8>, tensor<3xi32>, tensor<3xi8>) outs([[INIT]] : tensor<3xi8>)
1247   // CHECK: ^bb0([[IN:%.+]]: i8, [[MULTIPLIER:%.+]]: i32, [[SHIFT:%.+]]: i8, [[UNUSED:%.+]]: i8):
1248   // CHECK: [[C243:%.+]] = arith.constant 243
1249   // CHECK: [[C252:%.+]] = arith.constant 252
1250
1251   // CHECK-DAG: [[IN32:%.+]] = arith.extsi [[IN]]
1252   // CHECK-DAG: [[IN_ZEROED:%.+]] = arith.subi [[IN32]], [[C243]]
1253   // CHECK-DAG: [[SCALED:%.+]] = tosa.apply_scale [[IN_ZEROED]], [[MULTIPLIER]], [[SHIFT]] {double_round = false}
1254   // CHECK-DAG: [[SCALED_ZEROED:%.+]] = arith.addi [[SCALED]], [[C252]]
1255   // CHECK-DAG: [[CMIN:%.+]] = arith.constant -128
1256   // CHECK-DAG: [[CMAX:%.+]] = arith.constant 127
1257   // CHECK-DAG: [[LOWER:%.+]] = arith.maxsi [[CMIN]], [[SCALED_ZEROED]]
1258   // CHECK-DAG: [[BOUNDED:%.+]] = arith.minsi [[CMAX]], [[LOWER]]
1259   // CHECK-DAG: [[TRUNC:%.+]] = arith.trunci [[BOUNDED]]
1260   // CHECK-DAG: linalg.yield [[TRUNC]]
1261   %0 = tosa.rescale %arg0 {input_zp = 243 : i32, output_zp = 252 : i32, multiplier = array<i32: 42, 43, 44>, shift = array<i8: 14, 15, 64>, scale32 = false, double_round = false, per_channel = false} : (tensor<3xi8>) -> tensor<3xi8>
1262
1263   // CHECK: return [[GENERIC]]
1264   return %0 : tensor<3xi8>
1265 }
1266
1267 // -----
1268
1269 // CHECK-LABEL: @rescaleDoubleRound
1270 func.func @rescaleDoubleRound(%arg0 : tensor<2xi8>) -> (tensor<2xi8>) {
1271   // CHECK: linalg.generic
1272   // CHECK: tosa.apply_scale
1273   // CHECK-SAME:  {double_round = true}
1274   %0 = tosa.rescale %arg0 {input_zp = 243 : i32, output_zp = 252 : i32, multiplier = array<i32: 19689>, shift = array<i8: 33>, scale32 = true, double_round = true, per_channel = false} : (tensor<2xi8>) -> tensor<2xi8>
1275   return %0 : tensor<2xi8>
1276 }
1277
1278 // CHECK-LABEL: @rescaleUnnecessaryDoubleRound
1279 func.func @rescaleUnnecessaryDoubleRound(%arg0 : tensor<2xi8>) -> (tensor<2xi8>) {
1280   // CHECK: linalg.generic
1281   // CHECK: tosa.apply_scale
1282   // CHECK-SAME:  {double_round = false}
1283   %0 = tosa.rescale %arg0 {input_zp = 243 : i32, output_zp = 252 : i32, multiplier = array<i32: 19689>, shift = array<i8: 15>, scale32 = true, double_round = true, per_channel = false} : (tensor<2xi8>) -> tensor<2xi8>
1284   return %0 : tensor<2xi8>
1285 }
1286
1287 // -----
1288
1289 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
1290
1291 // CHECK-LABEL: @reverse
1292 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1293 func.func @reverse(%arg0: tensor<5x4xi32>) -> () {
1294   // CHECK: %[[C0:.+]] = arith.constant 0
1295   // CHECK: %[[RDIM:.+]] = tensor.dim %[[ARG0]], %[[C0]]
1296   // CHECK: %[[INIT:.+]] = tensor.empty()
1297   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>)
1298   // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
1299   // CHECK-DAG:   %[[I1:.+]] = linalg.index 1
1300   // CHECK-DAG:   %[[SUB1:.+]] = arith.constant 1
1301   // CHECK-DAG:   %[[RDIM_MINUS_C1:.+]] = arith.subi %[[RDIM]], %[[SUB1]]
1302   // CHECK-DAG:   %[[READ_DIM:.+]] = arith.subi %[[RDIM_MINUS_C1]], %[[I0]]
1303   // CHECK-DAG:   %[[EXTRACT:.+]] = tensor.extract %arg0[%[[READ_DIM]], %[[I1]]] : tensor<5x4xi32>
1304   // CHECK:   linalg.yield %[[EXTRACT]]
1305   %0 = tosa.reverse %arg0 {axis = 0 : i32} : (tensor<5x4xi32>) -> tensor<5x4xi32>
1306
1307   // CHECK: %[[C1:.+]] = arith.constant 1
1308   // CHECK: %[[RDIM:.+]] = tensor.dim %[[ARG0]], %[[C1]]
1309   // CHECK: %[[INIT:.+]] = tensor.empty()
1310   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>)
1311   // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
1312   // CHECK-DAG:   %[[I1:.+]] = linalg.index 1
1313   // CHECK-DAG:   %[[SUB1:.+]] = arith.constant 1
1314   // CHECK-DAG:   %[[RDIM_MINUS_C1:.+]] = arith.subi %[[RDIM]], %[[SUB1]]
1315   // CHECK-DAG:   %[[READ_DIM:.+]] = arith.subi %[[RDIM_MINUS_C1]], %[[I1]]
1316   // CHECK-DAG:   %[[EXTRACT:.+]] = tensor.extract %arg0[%[[I0]], %[[READ_DIM]]] : tensor<5x4xi32>
1317   // CHECK:   linalg.yield %[[EXTRACT]]
1318   %1 = tosa.reverse %arg0 {axis = 1 : i32} : (tensor<5x4xi32>) -> tensor<5x4xi32>
1319   return
1320 }
1321
1322 // -----
1323
1324 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
1325
1326 // CHECK-LABEL: @reverse_dyn
1327 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1328 func.func @reverse_dyn(%arg0: tensor<?xi32>) -> () {
1329   // CHECK: %[[C0_1:.+]] = arith.constant 0
1330   // CHECK: %[[D0_1:.+]] = tensor.dim %[[ARG0]], %[[C0_1]]
1331   // CHECK: %[[C0_2:.+]] = arith.constant 0
1332   // CHECK: %[[D0_2:.+]] = tensor.dim %[[ARG0]], %[[C0_2]]
1333   // CHECK: %[[INIT:.+]] = tensor.empty(%[[D0_1]])
1334   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel"]} outs(%[[INIT]] : tensor<?xi32>)
1335   // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
1336   // CHECK-DAG:   %[[SUB1:.+]] = arith.constant 1
1337   // CHECK-DAG:   %[[RDIM_MINUS_C1:.+]] = arith.subi %[[D0_2]], %[[SUB1]]
1338   // CHECK-DAG:   %[[READ_DIM:.+]] = arith.subi %[[RDIM_MINUS_C1]], %[[I0]]
1339   // CHECK-DAG:   %[[EXTRACT:.+]] = tensor.extract %arg0[%[[READ_DIM]]] : tensor<?xi32>
1340   // CHECK:   linalg.yield %[[EXTRACT]]
1341   %0 = tosa.reverse %arg0 {axis = 0 : i32} : (tensor<?xi32>) -> tensor<?xi32>
1342   return
1343 }
1344
1345 // -----
1346
1347 // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, d3)>
1348 // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
1349
1350 // CHECK-LABEL: @tile
1351 // CHECK-SAME: %[[ARG0:.+]]: tensor<2x3xi8>
1352 func.func @tile(%arg0 : tensor<2x3xi8>) -> () {
1353   // CHECK: [[INIT:%.+]] = tensor.empty()
1354   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<2x2x1x3xi8>)
1355   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8
1356   // CHECK:   linalg.yield %[[ARG1]] : i8
1357   // CHECK: tosa.reshape [[GENERIC]] {new_shape = array<i64: 4, 3>}
1358   %0 = tosa.tile %arg0 {multiples = array<i64: 2, 1>} : (tensor<2x3xi8>) -> tensor<4x3xi8>
1359
1360   // CHECK: [[INIT:%.+]] = tensor.empty()
1361   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<1x2x2x3xi8>)
1362   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8
1363   // CHECK:   linalg.yield %[[ARG1]] : i8
1364   // CHECK: tosa.reshape [[GENERIC]] {new_shape = array<i64: 2, 6>}
1365   %1 = tosa.tile %arg0 {multiples = array<i64: 1, 2>} : (tensor<2x3xi8>) -> tensor<2x6xi8>
1366
1367   // CHECK: [[INIT:%.+]] = tensor.empty()
1368   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<5x2x7x3xi8>)
1369   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8
1370   // CHECK:   linalg.yield %[[ARG1]] : i8
1371   // CHECK: tosa.reshape [[GENERIC]] {new_shape = array<i64: 10, 21>}
1372   %2 = tosa.tile %arg0 {multiples = array<i64: 5, 7>} : (tensor<2x3xi8>)  -> tensor<10x21xi8>
1373
1374   return
1375 }
1376
1377 // -----
1378
1379 // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, d3)>
1380 // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
1381
1382 // CHECK-LABEL: @tile_dyn_input
1383 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1384 func.func @tile_dyn_input(%arg0 : tensor<?x3xi8>) -> () {
1385   // CHECK: %[[CST0:.+]] = arith.constant 0
1386   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST0]] : tensor<?x3xi8>
1387   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
1388   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x3xi8>) outs(%[[INIT]] : tensor<2x?x1x3xi8>)
1389   // CHECK: ^bb0(%[[ARG1:.+]]: i8,
1390   // CHECK:   linalg.yield %[[ARG1]] : i8
1391   // CHECK: tosa.reshape %[[GENERIC]] {new_shape = array<i64: -9223372036854775808, 3>}
1392   %0 = tosa.tile %arg0 {multiples = array<i64: 2, 1>} : (tensor<?x3xi8>)  -> tensor<?x3xi8>
1393
1394   return
1395 }
1396
1397 // -----
1398
1399 // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, d3)>
1400 // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
1401
1402 // CHECK-LABEL: @tile_dyn_multiples
1403 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1404 func.func @tile_dyn_multiples(%arg0 : tensor<2x3xi8>) -> () {
1405   // CHECK: %[[CST1:.+]] = arith.constant 1
1406   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST1]] : tensor<2x3xi8>
1407   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
1408   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs(%[[INIT]] : tensor<2x2x?x3xi8>)
1409   // CHECK: ^bb0(%[[ARG1:.+]]: i8,
1410   // CHECK:   linalg.yield %[[ARG1]] : i8
1411   // CHECK: tosa.reshape %[[GENERIC]] {new_shape = array<i64: 2, -9223372036854775808>}
1412   %0 = tosa.tile %arg0 {multiples = array<i64: 2, -1>} : (tensor<2x3xi8>)  -> tensor<2x?xi8>
1413
1414   return
1415 }
1416
1417 // -----
1418
1419 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
1420 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
1421 // CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0)>
1422 // CHECK: #[[$MAP3:.*]] = affine_map<(d0) -> (d0)>
1423 // CHECK: #[[$MAP4:.*]] = affine_map<(d0) -> ()>
1424
1425 func.func @argmax(%arg0 : tensor<3x2xi32>, %arg1 : tensor<6xf32>) -> () {
1426   // CHECK: [[IDX_INIT:%.+]] = tensor.empty()
1427   // CHECK: [[IDX_MIN:%.+]] = arith.constant 0 : i32
1428   // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}outs([[IDX_INIT]]
1429   // CHECK: [[VAL_INIT:%.+]] = tensor.empty()
1430   // CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648
1431   // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]]
1432   // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<2xi32>, tensor<2xi32>)
1433   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32
1434   // CHECK:   [[IDX:%.+]] = linalg.index 0
1435   // CHECK:   [[CAST:%.+]] = arith.index_cast [[IDX]]
1436   // CHECK:   [[CMP:%.+]] = arith.cmpi sgt, %[[ARG1]], %[[ARG3]]
1437   // CHECK:   [[SELECT_VAL:%.+]] = arith.select [[CMP]], %[[ARG1]], %[[ARG3]]
1438   // CHECK:   [[SELECT_IDX:%.+]] = arith.select [[CMP]], [[CAST]], %[[ARG2]]
1439   // CHECK:   linalg.yield [[SELECT_IDX]], [[SELECT_VAL]]
1440   %0 = tosa.argmax %arg0 { axis = 0 : i32} : (tensor<3x2xi32>)  -> tensor<2xi32>
1441
1442   // CHECK: [[IDX_INIT:%.+]] = tensor.empty()
1443   // CHECK: [[IDX_MIN:%.+]] = arith.constant 0 : i32
1444   // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}outs([[IDX_INIT]]
1445   // CHECK: [[VAL_INIT:%.+]] = tensor.empty()
1446   // CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648
1447   // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]]
1448   // CHECK: linalg.generic {indexing_maps = [#map, #map2, #map2], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
1449   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32
1450   // CHECK:   [[IDX:%.+]] = linalg.index 1
1451   // CHECK:   [[CAST:%.+]] = arith.index_cast [[IDX]]
1452   // CHECK:   [[CMP:%.+]] = arith.cmpi sgt, %[[ARG1]], %[[ARG3]]
1453   // CHECK:   [[SELECT_VAL:%.+]] = arith.select [[CMP]], %[[ARG1]], %[[ARG3]]
1454   // CHECK:   [[SELECT_IDX:%.+]] = arith.select [[CMP]], [[CAST]], %[[ARG2]]
1455   // CHECK:   linalg.yield [[SELECT_IDX]], [[SELECT_VAL]]
1456   %1 = tosa.argmax %arg0 { axis = 1 : i32} : (tensor<3x2xi32>)  -> tensor<3xi32>
1457
1458   // CHECK: arith.constant -3.40282347E+38 : f32
1459   // CHECK: linalg.index
1460   // CHECK: arith.index_cast
1461   // CHECK: arith.cmpf ogt
1462   // CHECK: select
1463   // CHECK: select
1464   // CHECK: linalg.yield
1465   %2 = tosa.argmax %arg1 { axis = 0 : i32} : (tensor<6xf32>)  -> tensor<i32>
1466
1467   return
1468 }
1469
1470 // -----
1471
1472 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
1473 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
1474
1475 func.func @argmax_dyn_non_axis(%arg0 : tensor<3x?xi32>) -> () {
1476   // CHECK: %[[CST1:.+]] = arith.constant 1
1477   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST1]]
1478   // CHECK: %[[IDX_INIT:.+]] = tensor.empty(%[[DYN]])
1479   // CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32
1480   // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]]
1481   // CHECK: %[[VAL_INIT:.+]] = tensor.empty(%[[DYN]])
1482   // CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648
1483   // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]]
1484   // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<?xi32>, tensor<?xi32>)
1485   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32
1486   // CHECK:   %[[IDX:.+]] = linalg.index 0
1487   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[IDX]]
1488   // CHECK:   %[[CMP:.+]] = arith.cmpi sgt, %[[ARG1]], %[[ARG3]]
1489   // CHECK:   %[[SELECT_VAL:.+]] = arith.select %[[CMP]], %[[ARG1]], %[[ARG3]]
1490   // CHECK:   %[[SELECT_IDX:.+]] = arith.select %[[CMP]], %[[CAST]], %[[ARG2]]
1491   // CHECK:   linalg.yield %[[SELECT_IDX]], %[[SELECT_VAL]]
1492   %0 = tosa.argmax %arg0 { axis = 0 : i32} : (tensor<3x?xi32>)  -> tensor<?xi32>
1493   return
1494 }
1495
1496 // -----
1497
1498 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
1499 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0)>
1500
1501 func.func @argmax_dyn_axis(%arg0 : tensor<3x?xi32>) -> () {
1502   // CHECK: %[[IDX_INIT:.+]] = tensor.empty()
1503   // CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32
1504   // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]]
1505   // CHECK: %[[VAL_INIT:.+]] = tensor.empty()
1506   // CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648
1507   // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]]
1508   // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
1509   // CHECK:   %[[IDX:.+]] = linalg.index 1
1510   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[IDX]]
1511   // CHECK:   %[[CMP:.+]] = arith.cmpi sgt, %[[ARG1]], %[[ARG3]]
1512   // CHECK:   %[[SELECT_VAL:.+]] = arith.select %[[CMP]], %[[ARG1]], %[[ARG3]]
1513   // CHECK:   %[[SELECT_IDX:.+]] = arith.select %[[CMP]], %[[CAST]], %[[ARG2]]
1514   // CHECK:   linalg.yield %[[SELECT_IDX]], %[[SELECT_VAL]]
1515   %0 = tosa.argmax %arg0 { axis = 1 : i32} : (tensor<3x?xi32>)  -> tensor<3xi32>
1516   return
1517 }
1518
1519 // -----
1520
1521 // CHECK-LABEL: @gather_float
1522 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
1523 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]
1524 func.func @gather_float(%arg0: tensor<2x3x2xf32>, %arg1: tensor<2x3xi32>) -> () {
1525   // CHECK: %[[INIT:.+]] = tensor.empty()
1526   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xf32>)
1527   // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: f32)
1528   // CHECK:   %[[IDX0:.+]] = linalg.index 0
1529   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
1530   // CHECK:   %[[IDX2:.+]] = linalg.index 2
1531   // CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[CAST]], %[[IDX2]]] : tensor<2x3x2xf32>
1532   // CHECK:   linalg.yield %[[EXTRACT]]
1533   %0 = tosa.gather %arg0, %arg1 : (tensor<2x3x2xf32>, tensor<2x3xi32>)  -> tensor<2x3x2xf32>
1534   return
1535 }
1536
1537 // -----
1538
1539 // CHECK-LABEL: @gather_float_dyn
1540 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
1541 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]
1542 func.func @gather_float_dyn(%arg0: tensor<?x3x2xf32>, %arg1: tensor<?x3xi32>) -> () {
1543   // CHECK: %[[C0:.+]] = arith.constant 0
1544   // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
1545   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
1546   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<?x3xi32>) outs(%[[INIT]] : tensor<?x3x2xf32>)
1547   // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: f32)
1548   // CHECK:   %[[IDX0:.+]] = linalg.index 0
1549   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
1550   // CHECK:   %[[IDX2:.+]] = linalg.index 2
1551   // CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[CAST]], %[[IDX2]]] : tensor<?x3x2xf32>
1552   // CHECK:   linalg.yield %[[EXTRACT]]
1553   %0 = tosa.gather %arg0, %arg1 : (tensor<?x3x2xf32>, tensor<?x3xi32>)  -> tensor<?x3x2xf32>
1554   return
1555 }
1556
1557 // -----
1558
1559 // CHECK-LABEL: @gather_float_all_dynamic
1560 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
1561 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]
1562 func.func @gather_float_all_dynamic(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?xi32>) -> () {
1563   // CHECK: %[[C0:.+]] = arith.constant 0
1564   // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
1565   // CHECK: %[[C1:.+]] = arith.constant 1
1566   // CHECK: %[[INDEX:.+]] = tensor.dim %[[ARG1]], %[[C1]]
1567   // CHECK: %[[C2:.+]] = arith.constant 2
1568   // CHECK: %[[CHANNEL:.+]] = tensor.dim %[[ARG0]], %[[C2]]
1569   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]], %[[INDEX]], %[[CHANNEL]])
1570   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<?x?xi32>) outs(%[[INIT]] : tensor<?x?x?xf32>)
1571   // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: f32)
1572   // CHECK:   %[[IDX0:.+]] = linalg.index 0
1573   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
1574   // CHECK:   %[[IDX2:.+]] = linalg.index 2
1575   // CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[CAST]], %[[IDX2]]] : tensor<?x?x?xf32>
1576   // CHECK:   linalg.yield %[[EXTRACT]]
1577   %0 = tosa.gather %arg0, %arg1 : (tensor<?x?x?xf32>, tensor<?x?xi32>)  -> tensor<?x?x?xf32>
1578   return
1579 }
1580
1581 // -----
1582
1583 // CHECK-LABEL: @gather_int
1584 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
1585 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]
1586 func.func @gather_int(%arg0: tensor<2x3x2xi32>, %arg1: tensor<2x3xi32>) -> () {
1587   // CHECK: %[[INIT:.+]] = tensor.empty()
1588   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xi32>)
1589   // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: i32)
1590   // CHECK:   %[[IDX0:.+]] = linalg.index 0
1591   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
1592   // CHECK:   %[[IDX2:.+]] = linalg.index 2
1593   // CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[CAST]], %[[IDX2]]] : tensor<2x3x2xi32>
1594   // CHECK:   linalg.yield %[[EXTRACT]]
1595   %0 = tosa.gather %arg0, %arg1 : (tensor<2x3x2xi32>, tensor<2x3xi32>)  -> tensor<2x3x2xi32>
1596   return
1597 }
1598
1599 // -----
1600
1601 // CHECK-LABEL: @table8
1602 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1603 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]:
1604 func.func @table8(%arg0: tensor<6xi8>, %arg1: tensor<512xi8>) -> () {
1605   // CHECK: %[[INIT:.+]] = tensor.empty()
1606   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
1607   // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
1608   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
1609   // CHECK:   %[[OFFSET:.+]] = arith.constant 128
1610   // CHECK:   %[[ADD:.+]] = arith.addi %[[CAST]], %[[OFFSET]]
1611   // CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[ARG1]][%[[ADD]]]
1612   // CHECK:   linalg.yield %[[EXTRACT]]
1613   %0 = tosa.table %arg0, %arg1 : (tensor<6xi8>, tensor<512xi8>)  -> tensor<6xi8>
1614   return
1615 }
1616
1617 // -----
1618
1619 // CHECK-LABEL: @table16
1620 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1621 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]:
1622 func.func @table16(%arg0: tensor<6xi16>, %arg1: tensor<513xi16>) -> () {
1623   // CHECK: %[[INIT:.+]] = tensor.empty()
1624   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi16>) outs(%[[INIT]] : tensor<6xi32>)
1625   // CHECK: ^bb0(%[[ARG2:.*]]: i16, %[[ARG3:.*]]: i32)
1626   // CHECK: %[[EXT_IN:.+]] = arith.extsi %[[ARG2]]
1627   // CHECK: %[[C32768:.+]] = arith.constant 32768
1628   // CHECK: %[[C7:.+]] = arith.constant 7
1629   // CHECK: %[[C1:.+]] = arith.constant 1
1630   // CHECK: %[[C127:.+]] = arith.constant 127
1631   // CHECK: %[[INADD:.+]] = arith.addi %[[EXT_IN]], %[[C32768]]
1632   // CHECK: %[[IDX:.+]] = arith.shrui %[[INADD]], %[[C7]]
1633   // CHECK: %[[FRACTION:.+]] = arith.andi %[[INADD]], %[[C127]]
1634   // CHECK: %[[IDXPLUS1:.+]] = arith.addi %[[IDX]], %[[C1]]
1635   // CHECK: %[[IDX_CAST:.+]] = arith.index_cast %[[IDX]]
1636   // CHECK: %[[IDXPLUS1_CAST:.+]] = arith.index_cast %[[IDXPLUS1]]
1637   // CHECK: %[[BASE:.+]] = tensor.extract %[[ARG1]][%[[IDX_CAST]]]
1638   // CHECK: %[[NEXT:.+]] = tensor.extract %[[ARG1]][%[[IDXPLUS1_CAST]]]
1639   // CHECK: %[[BASE_EXT:.+]] = arith.extsi %[[BASE]]
1640   // CHECK: %[[NEXT_EXT:.+]] = arith.extsi %[[NEXT]]
1641   // CHECK: %[[BASE_MUL:.+]] = arith.shli %[[BASE_EXT]], %[[C7]]
1642   // CHECK: %[[DIFF:.+]] = arith.subi %[[NEXT_EXT]], %[[BASE_EXT]]
1643   // CHECK: %[[DIFF_MUL:.+]] = arith.muli %[[DIFF]], %[[FRACTION]]
1644   // CHECK: %[[RESULT:.+]] = arith.addi %[[BASE_MUL]], %[[DIFF_MUL]]
1645   // CHECK: linalg.yield %[[RESULT]]
1646   %0 = tosa.table %arg0, %arg1 : (tensor<6xi16>, tensor<513xi16>)  -> tensor<6xi32>
1647   return
1648 }
1649
1650 // -----
1651
1652 // CHECK-LABEL: @table8_dyn
1653 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1654 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]:
1655 func.func @table8_dyn(%arg0: tensor<?xi8>, %arg1: tensor<512xi8>) -> () {
1656   // CHECK: %[[CST0:.+]] = arith.constant 0
1657   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST0]]
1658   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
1659   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<?xi8>) outs(%[[INIT]] : tensor<?xi8>)
1660   // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
1661   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
1662   // CHECK:   %[[OFFSET:.+]] = arith.constant 128
1663   // CHECK:   %[[ADD:.+]] = arith.addi %[[CAST]], %[[OFFSET]]
1664   // CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[ARG1]][%[[ADD]]]
1665   // CHECK:   linalg.yield %[[EXTRACT]]
1666   %0 = tosa.table %arg0, %arg1 : (tensor<?xi8>, tensor<512xi8>)  -> tensor<?xi8>
1667   return
1668 }
1669
1670 // -----
1671
1672 // CHECK-LABEL: @table8_dyn_table
1673 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1674 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]:
1675 func.func @table8_dyn_table(%arg0: tensor<6xi8>, %arg1: tensor<?xi8>) -> () {
1676   // CHECK: %[[INIT:.+]] = tensor.empty()
1677   // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
1678   // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
1679   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
1680   // CHECK:   %[[OFFSET:.+]] = arith.constant 128
1681   // CHECK:   %[[ADD:.+]] = arith.addi %[[CAST]], %[[OFFSET]]
1682   // CHECK:   %[[EXTRACT:.+]] = tensor.extract %[[ARG1]][%[[ADD]]]
1683   // CHECK:   linalg.yield %[[EXTRACT]]
1684   %0 = tosa.table %arg0, %arg1 : (tensor<6xi8>, tensor<?xi8>)  -> tensor<6xi8>
1685   return
1686 }
1687
1688 // -----
1689 // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
1690 // CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>
1691 // CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
1692
1693 // CHECK-LABEL:   func.func @test_static_rfft2d(
1694 // CHECK-SAME:                                  %[[VAL_0:.*]]: tensor<5x5x8xf32>) -> (tensor<5x5x5xf32>, tensor<5x5x5xf32>) {
1695 // CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
1696 // CHECK:           %[[VAL_2:.*]] = arith.constant 2 : index
1697 // CHECK:           %[[VAL_3:.*]] = arith.constant 8 : index
1698 // CHECK:           %[[VAL_4:.*]] = arith.constant 4 : index
1699 // CHECK:           %[[VAL_5:.*]] = arith.constant 5 : index
1700 // CHECK:           %[[VAL_6:.*]] = tensor.empty() : tensor<5x5x5xf32>
1701 // CHECK:           %[[VAL_7:.*]] = arith.constant 0.000000e+00 : f32
1702 // CHECK:           %[[VAL_8:.*]] = linalg.fill ins(%[[VAL_7]] : f32) outs(%[[VAL_6]] : tensor<5x5x5xf32>) -> tensor<5x5x5xf32>
1703 // CHECK:           %[[VAL_9:.*]] = tensor.empty() : tensor<5x5x5xf32>
1704 // CHECK:           %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32
1705 // CHECK:           %[[VAL_11:.*]] = linalg.fill ins(%[[VAL_10]] : f32) outs(%[[VAL_9]] : tensor<5x5x5xf32>) -> tensor<5x5x5xf32>
1706 // CHECK:           %[[VAL_12:.*]] = arith.constant 1 : index
1707 // CHECK:           %[[VAL_13:.*]] = arith.constant 5 : index
1708 // CHECK:           %[[VAL_14:.*]] = arith.constant 2 : index
1709 // CHECK:           %[[VAL_15:.*]] = arith.constant 8 : index
1710 // CHECK:           %[[VAL_16:.*]] = arith.constant 6.28318548 : f32
1711 // CHECK:           %[[VAL_17:.*]] = arith.index_castui %[[VAL_13]] : index to i32
1712 // CHECK:           %[[VAL_18:.*]] = arith.uitofp %[[VAL_17]] : i32 to f32
1713 // CHECK:           %[[VAL_19:.*]] = arith.index_castui %[[VAL_15]] : index to i32
1714 // CHECK:           %[[VAL_20:.*]] = arith.uitofp %[[VAL_19]] : i32 to f32
1715 // CHECK:           %[[VAL_21:.*]]:2 = linalg.generic {indexing_maps = [#[[$ATTR_0]], #[[$ATTR_1]], #[[$ATTR_1]]], iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%[[VAL_0]] : tensor<5x5x8xf32>) outs(%[[VAL_8]], %[[VAL_11]] : tensor<5x5x5xf32>, tensor<5x5x5xf32>) {
1716 // CHECK:           ^bb0(%[[VAL_22:.*]]: f32, %[[VAL_23:.*]]: f32, %[[VAL_24:.*]]: f32):
1717 // CHECK:             %[[VAL_25:.*]] = linalg.index 1 : index
1718 // CHECK:             %[[VAL_26:.*]] = linalg.index 2 : index
1719 // CHECK:             %[[VAL_27:.*]] = linalg.index 3 : index
1720 // CHECK:             %[[VAL_28:.*]] = linalg.index 4 : index
1721 // CHECK:             %[[VAL_29:.*]] = index.mul %[[VAL_27]], %[[VAL_25]]
1722 // CHECK:             %[[VAL_30:.*]] = index.mul %[[VAL_28]], %[[VAL_26]]
1723 // CHECK:             %[[VAL_31:.*]] = index.remu %[[VAL_29]], %[[VAL_13]]
1724 // CHECK:             %[[VAL_32:.*]] = index.remu %[[VAL_30]], %[[VAL_15]]
1725 // CHECK:             %[[VAL_33:.*]] = arith.index_castui %[[VAL_31]] : index to i32
1726 // CHECK:             %[[VAL_34:.*]] = arith.uitofp %[[VAL_33]] : i32 to f32
1727 // CHECK:             %[[VAL_35:.*]] = arith.index_castui %[[VAL_32]] : index to i32
1728 // CHECK:             %[[VAL_36:.*]] = arith.uitofp %[[VAL_35]] : i32 to f32
1729 // CHECK:             %[[VAL_37:.*]] = arith.divf %[[VAL_34]], %[[VAL_18]] : f32
1730 // CHECK:             %[[VAL_38:.*]] = arith.divf %[[VAL_36]], %[[VAL_20]] : f32
1731 // CHECK:             %[[VAL_39:.*]] = arith.addf %[[VAL_37]], %[[VAL_38]] : f32
1732 // CHECK:             %[[VAL_40:.*]] = arith.mulf %[[VAL_16]], %[[VAL_39]] : f32
1733 // CHECK:             %[[VAL_41:.*]] = math.cos %[[VAL_40]] : f32
1734 // CHECK:             %[[VAL_42:.*]] = math.sin %[[VAL_40]] : f32
1735 // CHECK:             %[[VAL_43:.*]] = arith.mulf %[[VAL_22]], %[[VAL_41]] : f32
1736 // CHECK:             %[[VAL_44:.*]] = arith.mulf %[[VAL_22]], %[[VAL_42]] : f32
1737 // CHECK:             %[[VAL_45:.*]] = arith.addf %[[VAL_23]], %[[VAL_43]] : f32
1738 // CHECK:             %[[VAL_46:.*]] = arith.subf %[[VAL_24]], %[[VAL_44]] : f32
1739 // CHECK:             linalg.yield %[[VAL_45]], %[[VAL_46]] : f32, f32
1740 // CHECK:           } -> (tensor<5x5x5xf32>, tensor<5x5x5xf32>)
1741 // CHECK:           return %[[VAL_47:.*]]#0, %[[VAL_47]]#1 : tensor<5x5x5xf32>, tensor<5x5x5xf32>
1742 // CHECK:         }
1743 func.func @test_static_rfft2d(%arg0: tensor<5x5x8xf32>) -> (tensor<5x5x5xf32>, tensor<5x5x5xf32>) {
1744   %output_real, %output_imag = "tosa.rfft2d"(%arg0) {} : (tensor<5x5x8xf32>) -> (tensor<5x5x5xf32>, tensor<5x5x5xf32>)
1745   return %output_real, %output_imag : tensor<5x5x5xf32>, tensor<5x5x5xf32>
1746 }
1747
1748 // -----
1749 // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
1750 // CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>
1751 // CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
1752
1753 // CHECK-LABEL:   func.func @test_dynamic_rfft2d(
1754 // CHECK-SAME:                                   %[[VAL_0:.*]]: tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
1755 // CHECK:           %[[VAL_1:.*]] = arith.constant 0 : index
1756 // CHECK:           %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<?x?x?xf32>
1757 // CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
1758 // CHECK:           %[[VAL_4:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xf32>
1759 // CHECK:           %[[VAL_5:.*]] = arith.constant 2 : index
1760 // CHECK:           %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?x?xf32>
1761 // CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
1762 // CHECK:           %[[VAL_8:.*]] = arith.constant 2 : index
1763 // CHECK:           %[[VAL_9:.*]] = arith.divui %[[VAL_6]], %[[VAL_8]] : index
1764 // CHECK:           %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[VAL_7]] : index
1765 // CHECK:           %[[VAL_11:.*]] = tensor.empty(%[[VAL_2]], %[[VAL_4]], %[[VAL_10]]) : tensor<?x?x?xf32>
1766 // CHECK:           %[[VAL_12:.*]] = arith.constant 0.000000e+00 : f32
1767 // CHECK:           %[[VAL_13:.*]] = linalg.fill ins(%[[VAL_12]] : f32) outs(%[[VAL_11]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
1768 // CHECK:           %[[VAL_14:.*]] = tensor.empty(%[[VAL_2]], %[[VAL_4]], %[[VAL_10]]) : tensor<?x?x?xf32>
1769 // CHECK:           %[[VAL_15:.*]] = arith.constant 0.000000e+00 : f32
1770 // CHECK:           %[[VAL_16:.*]] = linalg.fill ins(%[[VAL_15]] : f32) outs(%[[VAL_14]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
1771 // CHECK:           %[[VAL_17:.*]] = arith.constant 1 : index
1772 // CHECK:           %[[VAL_18:.*]] = tensor.dim %[[VAL_0]], %[[VAL_17]] : tensor<?x?x?xf32>
1773 // CHECK:           %[[VAL_19:.*]] = arith.constant 2 : index
1774 // CHECK:           %[[VAL_20:.*]] = tensor.dim %[[VAL_0]], %[[VAL_19]] : tensor<?x?x?xf32>
1775 // CHECK:           %[[VAL_21:.*]] = arith.constant 6.28318548 : f32
1776 // CHECK:           %[[VAL_22:.*]] = arith.index_castui %[[VAL_18]] : index to i32
1777 // CHECK:           %[[VAL_23:.*]] = arith.uitofp %[[VAL_22]] : i32 to f32
1778 // CHECK:           %[[VAL_24:.*]] = arith.index_castui %[[VAL_20]] : index to i32
1779 // CHECK:           %[[VAL_25:.*]] = arith.uitofp %[[VAL_24]] : i32 to f32
1780 // CHECK:           %[[VAL_26:.*]]:2 = linalg.generic {indexing_maps = [#[[$ATTR_0]], #[[$ATTR_1]], #[[$ATTR_1]]], iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%[[VAL_0]] : tensor<?x?x?xf32>) outs(%[[VAL_13]], %[[VAL_16]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
1781 // CHECK:           ^bb0(%[[VAL_27:.*]]: f32, %[[VAL_28:.*]]: f32, %[[VAL_29:.*]]: f32):
1782 // CHECK:             %[[VAL_30:.*]] = linalg.index 1 : index
1783 // CHECK:             %[[VAL_31:.*]] = linalg.index 2 : index
1784 // CHECK:             %[[VAL_32:.*]] = linalg.index 3 : index
1785 // CHECK:             %[[VAL_33:.*]] = linalg.index 4 : index
1786 // CHECK:             %[[VAL_34:.*]] = index.mul %[[VAL_32]], %[[VAL_30]]
1787 // CHECK:             %[[VAL_35:.*]] = index.mul %[[VAL_33]], %[[VAL_31]]
1788 // CHECK:             %[[VAL_36:.*]] = index.remu %[[VAL_34]], %[[VAL_18]]
1789 // CHECK:             %[[VAL_37:.*]] = index.remu %[[VAL_35]], %[[VAL_20]]
1790 // CHECK:             %[[VAL_38:.*]] = arith.index_castui %[[VAL_36]] : index to i32
1791 // CHECK:             %[[VAL_39:.*]] = arith.uitofp %[[VAL_38]] : i32 to f32
1792 // CHECK:             %[[VAL_40:.*]] = arith.index_castui %[[VAL_37]] : index to i32
1793 // CHECK:             %[[VAL_41:.*]] = arith.uitofp %[[VAL_40]] : i32 to f32
1794 // CHECK:             %[[VAL_42:.*]] = arith.divf %[[VAL_39]], %[[VAL_23]] : f32
1795 // CHECK:             %[[VAL_43:.*]] = arith.divf %[[VAL_41]], %[[VAL_25]] : f32
1796 // CHECK:             %[[VAL_44:.*]] = arith.addf %[[VAL_42]], %[[VAL_43]] : f32
1797 // CHECK:             %[[VAL_45:.*]] = arith.mulf %[[VAL_21]], %[[VAL_44]] : f32
1798 // CHECK:             %[[VAL_46:.*]] = math.cos %[[VAL_45]] : f32
1799 // CHECK:             %[[VAL_47:.*]] = math.sin %[[VAL_45]] : f32
1800 // CHECK:             %[[VAL_48:.*]] = arith.mulf %[[VAL_27]], %[[VAL_46]] : f32
1801 // CHECK:             %[[VAL_49:.*]] = arith.mulf %[[VAL_27]], %[[VAL_47]] : f32
1802 // CHECK:             %[[VAL_50:.*]] = arith.addf %[[VAL_28]], %[[VAL_48]] : f32
1803 // CHECK:             %[[VAL_51:.*]] = arith.subf %[[VAL_29]], %[[VAL_49]] : f32
1804 // CHECK:             linalg.yield %[[VAL_50]], %[[VAL_51]] : f32, f32
1805 // CHECK:           } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
1806 // CHECK:           return %[[VAL_52:.*]]#0, %[[VAL_52]]#1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>
1807 // CHECK:         }
1808 func.func @test_dynamic_rfft2d(%arg0: tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
1809   %output_real, %output_imag = "tosa.rfft2d"(%arg0) {} : (tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
1810   return %output_real, %output_imag : tensor<?x?x?xf32>, tensor<?x?x?xf32>
1811 }
1812
1813 // -----
1814 // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
1815 // CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>
1816 // CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
1817
1818 // CHECK-LABEL:   func.func @test_static_fft2d(
1819 // CHECK-SAME:                                 %[[VAL_0:.*]]: tensor<8x8x8xf32>,
1820 // CHECK-SAME:                                 %[[VAL_1:.*]]: tensor<8x8x8xf32>) -> (tensor<8x8x8xf32>, tensor<8x8x8xf32>) {
1821 // CHECK:           %[[VAL_2:.*]] = tensor.empty() : tensor<8x8x8xf32>
1822 // CHECK:           %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32
1823 // CHECK:           %[[VAL_4:.*]] = linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_2]] : tensor<8x8x8xf32>) -> tensor<8x8x8xf32>
1824 // CHECK:           %[[VAL_5:.*]] = tensor.empty() : tensor<8x8x8xf32>
1825 // CHECK:           %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
1826 // CHECK:           %[[VAL_7:.*]] = linalg.fill ins(%[[VAL_6]] : f32) outs(%[[VAL_5]] : tensor<8x8x8xf32>) -> tensor<8x8x8xf32>
1827 // CHECK:           %[[VAL_8:.*]] = arith.constant 1 : index
1828 // CHECK:           %[[VAL_9:.*]] = arith.constant 8 : index
1829 // CHECK:           %[[VAL_10:.*]] = arith.constant 2 : index
1830 // CHECK:           %[[VAL_11:.*]] = arith.constant 8 : index
1831 // CHECK:           %[[VAL_12:.*]] = arith.constant 6.28318548 : f32
1832 // CHECK:           %[[VAL_13:.*]] = arith.index_castui %[[VAL_9]] : index to i32
1833 // CHECK:           %[[VAL_14:.*]] = arith.uitofp %[[VAL_13]] : i32 to f32
1834 // CHECK:           %[[VAL_15:.*]] = arith.index_castui %[[VAL_11]] : index to i32
1835 // CHECK:           %[[VAL_16:.*]] = arith.uitofp %[[VAL_15]] : i32 to f32
1836 // CHECK:           %[[VAL_17:.*]]:2 = linalg.generic {indexing_maps = [#[[$ATTR_0]], #[[$ATTR_0]], #[[$ATTR_1]], #[[$ATTR_1]]], iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%[[VAL_0]], %[[VAL_1]] : tensor<8x8x8xf32>, tensor<8x8x8xf32>) outs(%[[VAL_4]], %[[VAL_7]] : tensor<8x8x8xf32>, tensor<8x8x8xf32>) {
1837 // CHECK:           ^bb0(%[[VAL_18:.*]]: f32, %[[VAL_19:.*]]: f32, %[[VAL_20:.*]]: f32, %[[VAL_21:.*]]: f32):
1838 // CHECK:             %[[VAL_22:.*]] = linalg.index 1 : index
1839 // CHECK:             %[[VAL_23:.*]] = linalg.index 2 : index
1840 // CHECK:             %[[VAL_24:.*]] = linalg.index 3 : index
1841 // CHECK:             %[[VAL_25:.*]] = linalg.index 4 : index
1842 // CHECK:             %[[VAL_26:.*]] = index.mul %[[VAL_24]], %[[VAL_22]]
1843 // CHECK:             %[[VAL_27:.*]] = index.mul %[[VAL_25]], %[[VAL_23]]
1844 // CHECK:             %[[VAL_28:.*]] = index.remu %[[VAL_26]], %[[VAL_9]]
1845 // CHECK:             %[[VAL_29:.*]] = index.remu %[[VAL_27]], %[[VAL_11]]
1846 // CHECK:             %[[VAL_30:.*]] = arith.index_castui %[[VAL_28]] : index to i32
1847 // CHECK:             %[[VAL_31:.*]] = arith.uitofp %[[VAL_30]] : i32 to f32
1848 // CHECK:             %[[VAL_32:.*]] = arith.index_castui %[[VAL_29]] : index to i32
1849 // CHECK:             %[[VAL_33:.*]] = arith.uitofp %[[VAL_32]] : i32 to f32
1850 // CHECK:             %[[VAL_34:.*]] = arith.divf %[[VAL_31]], %[[VAL_14]] : f32
1851 // CHECK:             %[[VAL_35:.*]] = arith.divf %[[VAL_33]], %[[VAL_16]] : f32
1852 // CHECK:             %[[VAL_36:.*]] = arith.addf %[[VAL_34]], %[[VAL_35]] : f32
1853 // CHECK:             %[[VAL_37:.*]] = arith.mulf %[[VAL_12]], %[[VAL_36]] : f32
1854 // CHECK:             %[[VAL_38:.*]] = math.cos %[[VAL_37]] : f32
1855 // CHECK:             %[[VAL_39:.*]] = math.sin %[[VAL_37]] : f32
1856 // CHECK:             %[[VAL_40:.*]] = arith.mulf %[[VAL_18]], %[[VAL_38]] : f32
1857 // CHECK:             %[[VAL_41:.*]] = arith.mulf %[[VAL_19]], %[[VAL_39]] : f32
1858 // CHECK:             %[[VAL_42:.*]] = arith.addf %[[VAL_40]], %[[VAL_41]] : f32
1859 // CHECK:             %[[VAL_43:.*]] = arith.mulf %[[VAL_19]], %[[VAL_38]] : f32
1860 // CHECK:             %[[VAL_44:.*]] = arith.mulf %[[VAL_18]], %[[VAL_39]] : f32
1861 // CHECK:             %[[VAL_45:.*]] = arith.subf %[[VAL_43]], %[[VAL_44]] : f32
1862 // CHECK:             %[[VAL_46:.*]] = arith.addf %[[VAL_20]], %[[VAL_42]] : f32
1863 // CHECK:             %[[VAL_47:.*]] = arith.addf %[[VAL_21]], %[[VAL_45]] : f32
1864 // CHECK:             linalg.yield %[[VAL_46]], %[[VAL_47]] : f32, f32
1865 // CHECK:           } -> (tensor<8x8x8xf32>, tensor<8x8x8xf32>)
1866 // CHECK:           return %[[VAL_48:.*]]#0, %[[VAL_48]]#1 : tensor<8x8x8xf32>, tensor<8x8x8xf32>
1867 // CHECK:         }
1868 func.func @test_static_fft2d(%arg0: tensor<8x8x8xf32>, %arg1: tensor<8x8x8xf32>) -> (tensor<8x8x8xf32>, tensor<8x8x8xf32>) {
1869   %output_real, %output_imag = "tosa.fft2d"(%arg0, %arg1) {inverse=false} : (tensor<8x8x8xf32>, tensor<8x8x8xf32>) -> (tensor<8x8x8xf32>, tensor<8x8x8xf32>)
1870   return %output_real, %output_imag : tensor<8x8x8xf32>, tensor<8x8x8xf32>
1871 }
1872
1873 // -----
1874 // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
1875 // CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>
1876 // CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
1877
1878 // CHECK-LABEL:   func.func @test_dynamic_fft2d(
1879 // CHECK-SAME:                                  %[[VAL_0:.*]]: tensor<?x?x?xf32>,
1880 // CHECK-SAME:                                  %[[VAL_1:.*]]: tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
1881 // CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
1882 // CHECK:           %[[VAL_3:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?x?xf32>
1883 // CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
1884 // CHECK:           %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?x?xf32>
1885 // CHECK:           %[[VAL_6:.*]] = arith.constant 2 : index
1886 // CHECK:           %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_6]] : tensor<?x?x?xf32>
1887 // CHECK:           %[[VAL_8:.*]] = tensor.empty(%[[VAL_3]], %[[VAL_5]], %[[VAL_7]]) : tensor<?x?x?xf32>
1888 // CHECK:           %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
1889 // CHECK:           %[[VAL_10:.*]] = linalg.fill ins(%[[VAL_9]] : f32) outs(%[[VAL_8]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
1890 // CHECK:           %[[VAL_11:.*]] = tensor.empty(%[[VAL_3]], %[[VAL_5]], %[[VAL_7]]) : tensor<?x?x?xf32>
1891 // CHECK:           %[[VAL_12:.*]] = arith.constant 0.000000e+00 : f32
1892 // CHECK:           %[[VAL_13:.*]] = linalg.fill ins(%[[VAL_12]] : f32) outs(%[[VAL_11]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
1893 // CHECK:           %[[VAL_14:.*]] = arith.constant 1 : index
1894 // CHECK:           %[[VAL_15:.*]] = tensor.dim %[[VAL_0]], %[[VAL_14]] : tensor<?x?x?xf32>
1895 // CHECK:           %[[VAL_16:.*]] = arith.constant 2 : index
1896 // CHECK:           %[[VAL_17:.*]] = tensor.dim %[[VAL_0]], %[[VAL_16]] : tensor<?x?x?xf32>
1897 // CHECK:           %[[VAL_18:.*]] = arith.constant 6.28318548 : f32
1898 // CHECK:           %[[VAL_19:.*]] = arith.index_castui %[[VAL_15]] : index to i32
1899 // CHECK:           %[[VAL_20:.*]] = arith.uitofp %[[VAL_19]] : i32 to f32
1900 // CHECK:           %[[VAL_21:.*]] = arith.index_castui %[[VAL_17]] : index to i32
1901 // CHECK:           %[[VAL_22:.*]] = arith.uitofp %[[VAL_21]] : i32 to f32
1902 // CHECK:           %[[VAL_23:.*]]:2 = linalg.generic {indexing_maps = [#[[$ATTR_0]], #[[$ATTR_0]], #[[$ATTR_1]], #[[$ATTR_1]]], iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%[[VAL_0]], %[[VAL_1]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%[[VAL_10]], %[[VAL_13]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
1903 // CHECK:           ^bb0(%[[VAL_24:.*]]: f32, %[[VAL_25:.*]]: f32, %[[VAL_26:.*]]: f32, %[[VAL_27:.*]]: f32):
1904 // CHECK:             %[[VAL_28:.*]] = linalg.index 1 : index
1905 // CHECK:             %[[VAL_29:.*]] = linalg.index 2 : index
1906 // CHECK:             %[[VAL_30:.*]] = linalg.index 3 : index
1907 // CHECK:             %[[VAL_31:.*]] = linalg.index 4 : index
1908 // CHECK:             %[[VAL_32:.*]] = index.mul %[[VAL_30]], %[[VAL_28]]
1909 // CHECK:             %[[VAL_33:.*]] = index.mul %[[VAL_31]], %[[VAL_29]]
1910 // CHECK:             %[[VAL_34:.*]] = index.remu %[[VAL_32]], %[[VAL_15]]
1911 // CHECK:             %[[VAL_35:.*]] = index.remu %[[VAL_33]], %[[VAL_17]]
1912 // CHECK:             %[[VAL_36:.*]] = arith.index_castui %[[VAL_34]] : index to i32
1913 // CHECK:             %[[VAL_37:.*]] = arith.uitofp %[[VAL_36]] : i32 to f32
1914 // CHECK:             %[[VAL_38:.*]] = arith.index_castui %[[VAL_35]] : index to i32
1915 // CHECK:             %[[VAL_39:.*]] = arith.uitofp %[[VAL_38]] : i32 to f32
1916 // CHECK:             %[[VAL_40:.*]] = arith.divf %[[VAL_37]], %[[VAL_20]] : f32
1917 // CHECK:             %[[VAL_41:.*]] = arith.divf %[[VAL_39]], %[[VAL_22]] : f32
1918 // CHECK:             %[[VAL_42:.*]] = arith.addf %[[VAL_40]], %[[VAL_41]] : f32
1919 // CHECK:             %[[VAL_43:.*]] = arith.mulf %[[VAL_18]], %[[VAL_42]] : f32
1920 // CHECK:             %[[VAL_44:.*]] = arith.constant -1.000000e+00 : f32
1921 // CHECK:             %[[VAL_45:.*]] = arith.mulf %[[VAL_43]], %[[VAL_44]] : f32
1922 // CHECK:             %[[VAL_46:.*]] = math.cos %[[VAL_45]] : f32
1923 // CHECK:             %[[VAL_47:.*]] = math.sin %[[VAL_45]] : f32
1924 // CHECK:             %[[VAL_48:.*]] = arith.mulf %[[VAL_24]], %[[VAL_46]] : f32
1925 // CHECK:             %[[VAL_49:.*]] = arith.mulf %[[VAL_25]], %[[VAL_47]] : f32
1926 // CHECK:             %[[VAL_50:.*]] = arith.addf %[[VAL_48]], %[[VAL_49]] : f32
1927 // CHECK:             %[[VAL_51:.*]] = arith.mulf %[[VAL_25]], %[[VAL_46]] : f32
1928 // CHECK:             %[[VAL_52:.*]] = arith.mulf %[[VAL_24]], %[[VAL_47]] : f32
1929 // CHECK:             %[[VAL_53:.*]] = arith.subf %[[VAL_51]], %[[VAL_52]] : f32
1930 // CHECK:             %[[VAL_54:.*]] = arith.addf %[[VAL_26]], %[[VAL_50]] : f32
1931 // CHECK:             %[[VAL_55:.*]] = arith.addf %[[VAL_27]], %[[VAL_53]] : f32
1932 // CHECK:             linalg.yield %[[VAL_54]], %[[VAL_55]] : f32, f32
1933 // CHECK:           } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
1934 // CHECK:           return %[[VAL_56:.*]]#0, %[[VAL_56]]#1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>
1935 // CHECK:         }
1936 func.func @test_dynamic_fft2d(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
1937   %output_real, %output_imag = "tosa.fft2d"(%arg0, %arg1) {inverse = true} : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
1938   return %output_real, %output_imag : tensor<?x?x?xf32>, tensor<?x?x?xf32>
1939 }
1940
1941
1942 // -----
1943
1944 // CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (0)>
1945 // CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (d0)>
1946
1947 // CHECK-LABEL:   func.func @test_cast_fp32_i64(
1948 // CHECK-SAME:                                  %[[ARG0:.*]]: tensor<1xf32>) -> tensor<1xi64> {
1949 // CHECK:           %[[EMPTY_TENSOR:.*]] = tensor.empty() : tensor<1xi64>
1950 // CHECK:           %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<1xf32>) outs(%[[EMPTY_TENSOR]] : tensor<1xi64>) {
1951 // CHECK:           ^bb0(%[[IN:.*]]: f32, %[[OUT:.*]]: i64):
1952 // CHECK:             %[[ROUND_EVEN:.*]] = math.roundeven %[[IN]] : f32
1953 // CHECK:             %[[FP_INT_MIN:.*]] = arith.constant -9.22337203E+18 : f32
1954 // CHECK:             %[[FP_INT_MAX_PLUS_ONE:.*]] = arith.constant 9.22337203E+18 : f32
1955 // CHECK:             %[[INT_MAX:.*]] = arith.constant 9223372036854775807 : i64
1956 // CHECK:             %[[MAX:.*]] = arith.maximumf %[[ROUND_EVEN]], %[[FP_INT_MIN]] : f32
1957 // CHECK:             %[[FPTOSI:.*]] = arith.fptosi %[[MAX]] : f32 to i64
1958 // CHECK:             %[[CMPF:.*]] = arith.cmpf uge, %[[ROUND_EVEN]], %[[FP_INT_MAX_PLUS_ONE]] : f32
1959 // CHECK:             %[[SELECT:.*]] = arith.select %[[CMPF]], %[[INT_MAX]], %[[FPTOSI]] : i64
1960 // CHECK:             linalg.yield %[[SELECT]] : i64
1961 // CHECK:           } -> tensor<1xi64>
1962 // CHECK:           return %[[RESULT]] : tensor<1xi64>
1963 func.func @test_cast_fp32_i64(%arg0: tensor<1xf32>) -> (tensor<1xi64>) {
1964   %0 = tosa.cast %arg0 : (tensor<1xf32>) -> tensor<1xi64>
1965   return %0: tensor<1xi64>
1966 }