1 // RUN: mlir-opt --split-input-file --tosa-to-linalg-named %s -verify-diagnostics -o -| FileCheck %s
3 // CHECK-LABEL: @matmul
4 func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) {
5 // CHECK: [[C0:%.+]] = arith.constant 0
6 // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6]
7 // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32>
8 // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
9 %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>)
10 return %0 : tensor<1x5x6xf32>
16 // CHECK-LABEL: @matmul_quantized
17 func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) {
18 // CHECK: [[C0:%.+]] = arith.constant 0
19 // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6]
20 // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : i32, tensor<1x5x6xi32> -> tensor<1x5x6xi32>
21 // CHECK: [[ONE:%.+]] = arith.constant 1
22 // CHECK: [[TWO:%.+]] = arith.constant 2
23 // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
24 %0 = "tosa.matmul"(%arg0, %arg1) {quantization_info = {a_zp = 1 : i32, b_zp = 2 : i32}} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>)
25 return %0 : tensor<1x5x6xi32>
30 // CHECK-LABEL: @matmul_dyn_batch
31 func @matmul_dyn_batch(%arg0: tensor<?x5x3xf32>, %arg1: tensor<?x3x6xf32>) -> (tensor<?x5x6xf32>) {
32 // CHECK: %[[C0:.+]] = arith.constant 0
33 // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
34 // CHECK: %[[C0_0:.+]] = arith.constant 0
35 // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]], 5, 6]
36 // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0_0]], %[[INIT]]) : f32, tensor<?x5x6xf32> -> tensor<?x5x6xf32>
37 // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<?x5x3xf32>, tensor<?x3x6xf32>) outs(%[[FILLED]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
38 %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<?x5x3xf32>, tensor<?x3x6xf32>) -> (tensor<?x5x6xf32>)
39 return %0 : tensor<?x5x6xf32>
44 // CHECK-LABEL: @matmul_dyn_independent_dim
45 func @matmul_dyn_independent_dim(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) {
46 // CHECK: %[[C2:.+]] = arith.constant 2
47 // CHECK: %[[DIM:.+]] = tensor.dim %arg1, %[[C2]]
48 // CHECK: %[[C0:.+]] = arith.constant 0
49 // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, %[[DIM]]]
50 // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x?xf32> -> tensor<1x5x?xf32>
51 // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
52 %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>)
53 return %0 : tensor<1x5x?xf32>
58 // CHECK-LABEL: @matmul_dyn_independent_dim
59 func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) {
60 // CHECK: %[[C0:.+]] = arith.constant 0
61 // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, 6]
62 // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32>
63 // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
64 %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>)
65 return %0 : tensor<1x5x6xf32>
70 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
71 // CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)>
73 // CHECK-LABEL: @fully_connected
74 func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) {
75 // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6]
76 // CHECK: [[ZERO:%.+]] = arith.constant 0
77 // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]])
78 // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]>
79 // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]])
80 // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6]
81 // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32>
82 // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) {
83 // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
84 // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32
85 // CHECK: linalg.yield [[ADD]] : f32
87 %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<5x3xf32>, tensor<6x3xf32>, tensor<6xf32>) -> (tensor<5x6xf32>)
88 return %0 : tensor<5x6xf32>
93 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
94 // CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)>
96 // CHECK-LABEL: @quantized_fully_connected
97 func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8>, %arg2: tensor<6xi32>) -> (tensor<5x6xi32>) {
98 // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6]
99 // CHECK: [[ZERO:%.+]] = arith.constant 0
100 // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]])
101 // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]>
102 // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]])
103 // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6]
104 // CHECK: [[ONE:%.+]] = arith.constant 1
105 // CHECK: [[TWO:%.+]] = arith.constant 2
106 // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32>
107 // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) outs([[INITB]]
108 // CHECK: ^bb0([[IN1:%.+]]: i32, [[IN2:%.+]]: i32, [[UNUSED:%.+]]: i32):
109 // CHECK: [[ADD:%.+]] = arith.addi
110 // CHECK: linalg.yield [[ADD]] : i32
111 %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) {quantization_info = {input_zp = 1:i32, weight_zp = 2:i32}} : (tensor<5x3xi8>, tensor<6x3xi8>, tensor<6xi32>) -> (tensor<5x6xi32>)
112 return %0 : tensor<5x6xi32>
117 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
118 // CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)>
120 // CHECK-LABEL: @fully_connected_dyn
121 func @fully_connected_dyn(%arg0: tensor<?x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<?x6xf32>) {
122 // CHECK: %[[C0:.+]] = arith.constant 0
123 // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
124 // CHECK: %[[INITT:.+]] = linalg.init_tensor [%[[DIM]], 6]
125 // CHECK: %[[ZERO:.+]] = arith.constant 0
126 // CHECK: %[[FILL:.+]] = linalg.fill(%[[ZERO]], %[[INITT]])
127 // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 0]>
128 // CHECK: %[[TRANSPOSE:.+]] = "tosa.transpose"(%arg1, %[[PERM]])
129 // CHECK: %[[INITB:.+]] = linalg.init_tensor [%[[DIM]], 6]
130 // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor<?x3xf32>, tensor<3x6xf32>) outs(%[[FILL]] : tensor<?x6xf32>) -> tensor<?x6xf32>
131 // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor<?x6xf32>) outs(%[[INITB]] : tensor<?x6xf32>) {
132 // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
133 // CHECK: %[[ADD:.+]] = arith.addf %arg3, %arg4 : f32
134 // CHECK: linalg.yield %[[ADD]] : f32
136 %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<?x3xf32>, tensor<6x3xf32>, tensor<6xf32>) -> (tensor<?x6xf32>)
137 return %0 : tensor<?x6xf32>
142 // CHECK-LABEL: @max_pool
143 func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () {
144 // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38
145 // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 32, 62]
146 // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[CONST]], [[INIT]])
147 // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3]
148 // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x32x62xf32>)
149 %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x32x62xf32>)
153 // CHECK-LABEL: @max_pool_padded
154 func @max_pool_padded(%arg0: tensor<1x6x34x62xf32>) -> () {
155 // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 : f32
156 // CHECK-DAG: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0]
157 // CHECK-DAG: linalg.yield [[CONST]]
158 // CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32
159 // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 33, 62]
160 // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[INITVAL]], [[INIT]])
161 // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3]
162 // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x33x62xf32>)
163 %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 1], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x33x62xf32>)
167 // CHECK-LABEL: @max_pool_i8
168 func @max_pool_i8(%arg0: tensor<1x6x34x62xi8>) -> () {
169 // CHECK: arith.constant -128
170 // CHECK: linalg.pooling_nhwc_max
171 %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi8>) -> (tensor<1x4x32x62xi8>)
175 // CHECK-LABEL: @max_pool_i16
176 func @max_pool_i16(%arg0: tensor<1x6x34x62xi16>) -> () {
177 // CHECK: arith.constant -32768
178 // CHECK: linalg.pooling_nhwc_max
179 %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi16>) -> (tensor<1x4x32x62xi16>)
183 // CHECK-LABEL: @max_pool_i32
184 func @max_pool_i32(%arg0: tensor<1x6x34x62xi32>) -> () {
185 // CHECK: arith.constant -2147483648
186 // CHECK: linalg.pooling_nhwc_max
187 %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi32>) -> (tensor<1x4x32x62xi32>)
192 // CHECK-LABEL: @avg_pool
193 func @avg_pool(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) {
194 // Initial piece computes the sum of the pooling region, with appropriate padding.
195 // CHECK: [[CONST:%.+]] = arith.constant 0
196 // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
197 // CHECK: [[CONST:%.+]] = arith.constant 0
198 // CHECK: [[POOLINIT:%.+]] = linalg.init_tensor [1, 5, 33, 62]
199 // CHECK: [[FILL:%.+]] = linalg.fill([[CONST]], [[POOLINIT]])
200 // CHECK: [[KERNEL:%.+]] = linalg.init_tensor [4, 4]
201 // CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) outs([[FILL]] : tensor<1x5x33x62xf32>)
202 // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 33, 62]
203 // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins([[POOL]] : tensor<1x5x33x62xf32>) outs([[INIT]] : tensor<1x5x33x62xf32>)
204 // CHECK: [[ZERO:%.0]] = arith.constant 0
205 // CHECK: [[ONE:%.+]] = arith.constant 1
206 // CHECK: [[HEIGHT:%.+]] = arith.constant 4
207 // CHECK: [[WIDTH:%.+]] = arith.constant 32
208 // CHECK: [[IDX1:%.+]] = linalg.index 1
209 // CHECK: [[IDX2:%.+]] = linalg.index 2
211 // The large block below computes what portion of the kernel is within non-padded input.
212 // CHECK: [[NY:%.+]] = arith.subi [[HEIGHT]], [[IDX1]]
213 // CHECK: [[NX:%.+]] = arith.subi [[WIDTH]], [[IDX2]]
214 // CHECK: [[KH:%.+]] = arith.constant 4
215 // CHECK: [[PAD0:%.+]] = arith.constant 1
216 // CHECK: [[SUBP0:%.+]] = arith.subi [[IDX1]], [[PAD0]]
217 // CHECK: [[P0CMP:%.+]] = arith.cmpi slt, [[SUBP0]], [[ZERO]]
218 // CHECK: [[SELP0:%.+]] = select [[P0CMP]], [[SUBP0]], [[ZERO]]
219 // CHECK: [[ADDP0:%.+]] = arith.addi [[KH]], [[SELP0]]
220 // CHECK: [[PAD1:%.+]] = arith.constant 1
221 // CHECK: [[SUBP1:%.+]] = arith.subi [[NY]], [[PAD1]]
222 // CHECK: [[P1CMP:%.+]] = arith.cmpi slt, [[SUBP1]], [[ZERO]]
223 // CHECK: [[SELP1:%.+]] = select [[P1CMP]], [[SUBP1]], [[ZERO]]
224 // CHECK: [[ADDP1:%.+]] = arith.addi [[ADDP0]], [[SELP1]]
225 // CHECK: [[YCMP:%.+]] = arith.cmpi slt, [[ADDP1]], [[ONE]]
226 // CHECK: [[YSEL:%.+]] = select [[YCMP]], [[ONE]], [[ADDP1]]
227 // CHECK: [[KW:%.+]] = arith.constant 4 : index
228 // CHECK: [[PAD2:%.+]] = arith.constant 1 : index
229 // CHECK: [[SUBP2:%.+]] = arith.subi [[IDX2]], [[PAD2]]
230 // CHECK: [[P2CMP:%.+]] = arith.cmpi slt, [[SUBP2]], [[ZERO]]
231 // CHECK: [[SELP2:%.+]] = select [[P2CMP]], [[SUBP2]], [[ZERO]]
232 // CHECK: [[ADDP2:%.+]] = arith.addi [[KW]], [[SELP2]]
233 // CHECK: [[PAD3:%.+]] = arith.constant 1 : index
234 // CHECK: [[SUBP3:%.+]] = arith.subi [[NX]], [[PAD3]]
235 // CHECK: [[P3CMP:%.+]] = arith.cmpi slt, [[SUBP3]], [[ZERO]]
236 // CHECK: [[SELP3:%.+]] = select [[P3CMP]], [[SUBP3]], [[ZERO]]
237 // CHECK: [[ADDP3:%.+]] = arith.addi [[ADDP2]], [[SELP3]]
238 // CHECK: [[XCMP:%.+]] = arith.cmpi slt, [[ADDP3]], [[ONE]]
239 // CHECK: [[XSEL:%.+]] = select [[XCMP]], [[ONE]], [[ADDP3]]
241 // Given the valid coverage of the pooling region, normalize the summation.
242 // CHECK: [[C:%.+]] = arith.muli [[YSEL]], [[XSEL]]
243 // CHECK: [[CI:%.+]] = arith.index_cast [[C]]
244 // CHECK: [[CF:%.+]] = arith.sitofp [[CI]]
245 // CHECK: [[RESULT:%.+]] = arith.divf %arg1, [[CF]]
246 // CHECK: linalg.yield [[RESULT]]
247 %0 = "tosa.avg_pool2d"(%arg0) {pad = [1, 1, 1, 1], kernel = [4, 4], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>)
248 return %0 : tensor<1x5x33x62xf32>
253 // CHECK-LABEL: @avg_pool_i8
254 func @avg_pool_i8(%arg0 : tensor<1x128x128x2xi8>) -> () {
256 // CHECK: linalg.pooling_nhwc_sum
257 // CHECK: linalg.generic
259 // CHECK: %[[INZP:.+]] = arith.constant -128
260 // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]]
261 // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]]
262 // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825
263 // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}}
264 // CHECK: %[[SHIFT:.+]] = arith.constant 30
265 // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false}
266 // CHECK: %[[OUTZP:.+]] = arith.constant -128
267 // CHECK: %[[OUT:.+]] = arith.addi %[[SCALE]], %[[OUTZP]]
268 // CHECK: %[[MIN:.+]] = arith.constant -128
269 // CHECK: %[[MAX:.+]] = arith.constant 127
270 // CHECK: %[[CMP_MIN:.+]] = arith.cmpi slt, %[[OUT]], %[[MIN]]
271 // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]]
272 // CHECK: %[[CMP_MAX:.+]] = arith.cmpi slt, %[[MAX]], %[[OUT]]
273 // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]]
274 // CHECK: %[[TRUNC:.+]] = arith.trunci %[[CLMP_MAX]]
275 // CHECK: linalg.yield %[[TRUNC]]
276 %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi8>) -> tensor<1x32x32x2xi8>
282 // CHECK-LABEL: @avg_pool_i16
283 func @avg_pool_i16(%arg0 : tensor<1x128x128x2xi16>) -> () {
285 // CHECK: linalg.pooling_nhwc_sum
286 // CHECK: linalg.generic
288 // CHECK: %[[INZP:.+]] = arith.constant -128
289 // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]]
290 // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]]
291 // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825
292 // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}}
293 // CHECK: %[[SHIFT:.+]] = arith.constant 30
294 // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false}
295 // CHECK: %[[OUTZP:.+]] = arith.constant -128
296 // CHECK: %[[OUT:.+]] = arith.addi %[[SCALE]], %[[OUTZP]]
297 // CHECK: %[[MIN:.+]] = arith.constant -32768
298 // CHECK: %[[MAX:.+]] = arith.constant 32767
299 // CHECK: %[[CMP_MIN:.+]] = arith.cmpi slt, %[[OUT]], %[[MIN]]
300 // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]]
301 // CHECK: %[[CMP_MAX:.+]] = arith.cmpi slt, %[[MAX]], %[[OUT]]
302 // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]]
303 // CHECK: %[[TRUNC:.+]] = arith.trunci %[[CLMP_MAX]]
304 // CHECK: linalg.yield %[[TRUNC]]
305 %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi16>) -> tensor<1x32x32x2xi16>
311 // CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d3)>
312 // CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
314 // CHECK-LABEL: @conv2d_f32
315 func @conv2d_f32(%input: tensor<1x49x42x27xf32>, %weights: tensor<28x3x3x27xf32>, %bias: tensor<28xf32>) -> () {
316 // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 2, 3, 0]>
317 // CHECK: %[[W:.+]] = "tosa.transpose"(%arg1, %[[PERM]])
318 // CHECK: %[[M_IN:.+]] = linalg.init_tensor [1, 45, 40, 28]
319 // CHECK: %[[CST:.+]] = arith.constant 0
320 // CHECK: %[[FILL:.+]] = linalg.fill
321 // CHECK: %[[B_IN:.+]] = linalg.init_tensor [1, 45, 40, 28]
322 // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>)
323 // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) outs(%[[B_IN]] : tensor<1x45x40x28xf32>)
325 // CHECK: linalg.yield
326 %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [0, 0, 0, 0], stride = [1, 1], dilation = [2, 1]} : (tensor<1x49x42x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>)
332 // CHECK-LABEL: @conv2d_padded_f32
333 func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28x3x3x28xf32>, %bias: tensor<28xf32>) -> () {
334 // CHECK: %[[C0:.+]] = arith.constant 0
335 // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
336 // CHECK: linalg.yield %[[C0]]
337 // CHECK: linalg.conv_2d_nhwc_hwcf
338 %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [1, 1, 1, 1], stride = [1, 1], dilation = [2, 1]} : (tensor<1x47x40x28xf32>, tensor<28x3x3x28xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>)
344 // CHECK-LABEL: @conv2d_quant
345 func @conv2d_quant(%arg0 : tensor<1x12x12x1xi8>, %arg1 : tensor<1024x3x3x1xi8>, %arg2 : tensor<1024xi32>) -> () {
346 // CHECK: %[[C22:.+]] = arith.constant -22
347 // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
348 // CHECK: linalg.yield %[[C22]]
349 // CHECK: linalg.conv_2d_nhwc_hwcf_q
350 %0 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = [1, 1], pad = [1, 1, 1, 1], quantization_info = {input_zp = -22 : i32, weight_zp = 42 : i32}, stride = [1, 1]} : (tensor<1x12x12x1xi8>, tensor<1024x3x3x1xi8>, tensor<1024xi32>) -> tensor<1x12x12x1024xi32>
356 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)>
357 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
359 // CHECK-LABEL: @depthwise_conv
360 func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () {
361 // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11]
362 // CHECK: [[CST0:%.+]] = arith.constant 0
363 // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
364 // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33]
365 // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
366 // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 5, 5, 33]}
367 // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
368 // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
369 // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32
370 // CHECK: linalg.yield [[ADD]] : f32
371 // CHECK: } -> tensor<1x5x5x33xf32>
372 %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [1, 1], dilation = [1, 1] } : (tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>) -> (tensor<1x5x5x33xf32>)
378 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)>
379 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
381 // CHECK-LABEL: @depthwise_conv_strides
382 func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () {
383 // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11]
384 // CHECK: [[CST0:%.+]] = arith.constant 0
385 // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
386 // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33]
387 // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
388 // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 5, 5, 33]}
389 // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
390 // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors
391 // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32
392 // CHECK: linalg.yield [[ADD]] : f32
393 // CHECK: } -> tensor<1x5x5x33xf32>
394 %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [2, 2], dilation = [1, 1] } : (tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>) -> (tensor<1x5x5x33xf32>)
400 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)>
401 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
403 // CHECK-LABEL: @depthwise_conv_quant
404 func @depthwise_conv_quant(%arg0 : tensor<1x12x12x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () {
405 // CHECK: [[PADV:%.+]] = arith.constant -128
406 // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
407 // CHECK: linalg.yield [[PADV]]
409 // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 12, 12, 4, 128]
410 // CHECK: [[CST0:%.+]] = arith.constant 0
411 // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
412 // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 12, 12, 512]
413 // CHECK: [[C128:%.+]] = arith.constant -128
414 // CHECK: [[C42:%.+]] = arith.constant 42
415 // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>)
416 // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 12, 12, 512]}
417 // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) {
418 // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // no predecessors
419 // CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32
420 // CHECK: linalg.yield [[ADD]] : i32
421 // CHECK: } -> tensor<1x12x12x512xi32>
422 %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [1, 1, 1, 1], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [1, 1] } : (tensor<1x12x12x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>) -> tensor<1x12x12x512xi32>
428 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)>
429 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
431 // CHECK-LABEL: @depthwise_conv_quant_dilations
432 func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () {
433 // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 10, 10, 4, 128]
434 // CHECK: [[CST0:%.+]] = arith.constant 0
435 // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]])
436 // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 10, 10, 512]
437 // CHECK: [[C128:%.+]] = arith.constant -128
438 // CHECK: [[C42:%.+]] = arith.constant 42
439 // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>)
440 // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 10, 10, 512]}
441 // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) {
442 // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // no predecessors
443 // CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32
444 // CHECK: linalg.yield [[ADD]] : i32
445 // CHECK: } -> tensor<1x10x10x512xi32>
446 %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [2, 2] } : (tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>) -> tensor<1x10x10x512xi32>