1 // RUN: mlir-opt --split-input-file -pass-pipeline="builtin.module(func.func(tosa-to-linalg))" %s -verify-diagnostics -o -| FileCheck %s
3 // CHECK: #[[$MAP0:.*]] = affine_map<() -> ()>
5 // CHECK-LABEL: @test_abs_scalar
6 // CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]]
7 func.func @test_abs_scalar(%arg0: tensor<f32>) -> tensor<f32> {
8 // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<f32>
9 // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = []} ins([[ARG0]] : tensor<f32>) outs([[INIT]] : tensor<f32>) {
10 // CHECK: ^bb0([[ARG1:%.*]]: f32, [[ARG2:%.*]]: f32):
11 // CHECK: [[ELEMENT:%.*]] = math.absf [[ARG1]] : f32
12 // CHECK: linalg.yield [[ELEMENT]] : f32
13 // CHECK: } -> tensor<f32>
14 %0 = tosa.abs %arg0 : (tensor<f32>) -> tensor<f32>
16 // CHECK: return [[GENERIC]] : tensor<f32>
17 return %0 : tensor<f32>
22 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
23 // CHECK-LABEL: @test_abs_1d_cast_static_to_dynamic
24 // CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]]
25 func.func @test_abs_1d_cast_static_to_dynamic(%arg0: tensor<5xf32>) -> tensor<?xf32> {
26 // CHECK: [[EMPTY:%.+]] = tensor.empty() : tensor<5xf32>
27 // CHECK: [[RESULT:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins([[ARG0]] : tensor<5xf32>) outs([[EMPTY]] : tensor<5xf32>) {
28 // CHECK: ^bb0([[IN0:%.+]]: f32, [[OUT0:%.+]]: f32):
29 // CHECK: [[ABS:%.+]] = math.absf [[IN0]] : f32
30 // CHECK: linalg.yield [[ABS]] : f32
31 // CHECK: } -> tensor<5xf32>
32 // CHECK: [[CAST_RESULT:%.+]] = tensor.cast [[RESULT]] : tensor<5xf32> to tensor<?xf32>
33 %0 = "tosa.abs"(%arg0) : (tensor<5xf32>) -> tensor<?xf32>
35 // CHECK: return [[CAST_RESULT]] : tensor<?xf32>
36 return %0 : tensor<?xf32>
41 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
42 // CHECK-LABEL: @test_abs_1d_cast_dynamic_to_static
43 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
44 func.func @test_abs_1d_cast_dynamic_to_static(%arg0: tensor<?xf32>) -> tensor<5xf32> {
45 // CHECK: %[[ZERO:.*]] = arith.constant 0 : index
46 // CHECK: %[[DIM_SIZE:.*]] = tensor.dim %[[ARG0]], %[[ZERO]] : tensor<?xf32>
47 // CHECK: %[[EMPTY:.*]] = tensor.empty(%[[DIM_SIZE]]) : tensor<?xf32>
48 // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<?xf32>) outs(%[[EMPTY]] : tensor<?xf32>) {
49 // CHECK: ^bb0(%[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: f32):
50 // CHECK: %[[VAL_2:.*]] = math.absf %[[VAL_0]] : f32
51 // CHECK: linalg.yield %[[VAL_2]] : f32
52 // CHECK: } -> tensor<?xf32>
53 // CHECK: %[[CAST_RESULT:.*]] = tensor.cast %[[RESULT]] : tensor<?xf32> to tensor<5xf32>
54 %0 = "tosa.abs"(%arg0) : (tensor<?xf32>) -> tensor<5xf32>
56 // CHECK: return %[[CAST_RESULT]] : tensor<5xf32>
57 return %0 : tensor<5xf32>
62 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
63 // CHECK-LABEL: @test_abs_1d_dynamic
64 // CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]]
65 func.func @test_abs_1d_dynamic(%arg0: tensor<?xf32>) -> tensor<?xf32> {
67 // CHECK: [[ZERO:%.+]] = arith.constant 0 : index
68 // CHECK: [[DIM:%.+]] = tensor.dim [[ARG0]], [[ZERO]] : tensor<?xf32>
69 // CHECK: [[EMPTY:%.+]] = tensor.empty([[DIM]]) : tensor<?xf32>
70 // CHECK: [[RESULT:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg0 : tensor<?xf32>) outs([[EMPTY]] : tensor<?xf32>) {
71 // CHECK: ^bb0([[IN0:%.+]]: f32, [[OUT0:%.+]]: f32):
72 // CHECK: [[ABSF:%.+]] = math.absf [[IN0]] : f32
73 // CHECK: linalg.yield [[ABSF]] : f32
74 // CHECK: } -> tensor<?xf32>
75 %0 = tosa.abs %arg0 : (tensor<?xf32>) -> tensor<?xf32>
77 // CHECK: return [[RESULT]] : tensor<?xf32>
78 return %0 : tensor<?xf32>
83 // CHECK: #[[$MAP0:.*]] = affine_map<() -> ()>
84 // CHECK-LABEL: @test_add_0d
85 // CHECK-SAME: [[ARG0:%[0-9a-zA-Z_]*]]:
86 // CHECK-SAME: [[ARG1:%[0-9a-zA-Z_]*]]:
87 func.func @test_add_0d(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
89 // CHECK: [[EMPTY:%.+]] = tensor.empty() : tensor<f32>
90 // CHECK: [[RESULT:%.+]] = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins([[ARG0]], [[ARG1]] : tensor<f32>, tensor<f32>) outs([[EMPTY]] : tensor<f32>) {
91 // CHECK: ^bb0([[IN0:%.+]]: f32, [[IN1:%.+]]: f32, [[OUT0:%.+]]: f32):
92 // CHECK: [[ADDF:%.+]] = arith.addf [[IN0]], [[IN1]] : f32
93 // CHECK: linalg.yield [[ADDF]] : f32
94 // CHECK: } -> tensor<f32>
95 %0 = tosa.add %arg0, %arg1 : (tensor<f32>, tensor<f32>) -> tensor<f32>
97 // CHECK: return [[RESULT]] : tensor<f32>
98 return %0 : tensor<f32>
103 // CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (0)>
104 // CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (d0)>
105 // CHECK-LABEL: @test_add_1d_all_dynamic
106 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]:
107 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
108 func.func @test_add_1d_all_dynamic(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
110 // CHECK: %[[CONST0:.*]] = arith.constant 0 : index
111 // CHECK: %[[ARG0_DIM0:.*]] = tensor.dim %[[ARG0]], %[[CONST0]] : tensor<?xf32>
112 // CHECK: %[[ARG1_DIM0:.*]] = tensor.dim %[[ARG1]], %[[CONST0]] : tensor<?xf32>
113 // CHECK: %[[ARG0_MAX_DIM:.*]] = arith.maxui %[[ARG0_DIM0]], %[[ARG1_DIM0]] : index
114 // CHECK: %[[CONST1:.*]] = arith.constant 1 : index
115 // CHECK: %[[VAL_0:.*]] = tensor.dim %[[ARG0]], %[[CONST0]] : tensor<?xf32>
116 // CHECK: %[[VAL_1:.*]] = arith.cmpi eq, %[[VAL_0]], %[[CONST1]] : index
117 // CHECK: %[[ARG0_DIM0_BROADCAST:.*]] = scf.if %[[VAL_1]] -> (tensor<?xf32>) {
118 // CHECK: %[[VAL_2:.*]] = tensor.empty(%[[ARG0_MAX_DIM]]) : tensor<?xf32>
119 // CHECK: %[[VAL_3:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<?xf32>) outs(%[[VAL_2]] : tensor<?xf32>) {
120 // CHECK: ^bb0(%[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: f32):
121 // CHECK: linalg.yield %[[VAL_4]] : f32
122 // CHECK: } -> tensor<?xf32>
123 // CHECK: scf.yield %[[VAL_3]] : tensor<?xf32>
125 // CHECK: scf.yield %[[ARG0]] : tensor<?xf32>
127 // CHECK: %[[VAL_6:.*]] = tensor.dim %[[ARG1]], %[[CONST0]] : tensor<?xf32>
128 // CHECK: %[[VAL_7:.*]] = arith.cmpi eq, %[[VAL_6]], %[[CONST1]] : index
129 // CHECK: %[[ARG0_DIM1_BROADCAST:.*]] = scf.if %[[VAL_7]] -> (tensor<?xf32>) {
130 // CHECK: %[[VAL_8:.*]] = tensor.empty(%[[ARG0_MAX_DIM]]) : tensor<?xf32>
131 // CHECK: %[[VAL_9:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[ARG1]] : tensor<?xf32>) outs(%[[VAL_8]] : tensor<?xf32>) {
132 // CHECK: ^bb0(%[[VAL_10:.*]]: f32, %[[VAL_11:.*]]: f32):
133 // CHECK: linalg.yield %[[VAL_10]] : f32
134 // CHECK: } -> tensor<?xf32>
135 // CHECK: scf.yield %[[VAL_9]] : tensor<?xf32>
137 // CHECK: scf.yield %[[ARG1]] : tensor<?xf32>
139 // CHECK: %[[VAL_12:.*]] = tensor.empty(%[[ARG0_MAX_DIM]]) : tensor<?xf32>
140 // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[ARG0_DIM0_BROADCAST]], %[[ARG0_DIM1_BROADCAST]] : tensor<?xf32>, tensor<?xf32>) outs(%[[VAL_12]] : tensor<?xf32>) {
141 // CHECK: ^bb0(%[[VAL_13:.*]]: f32, %[[VAL_14:.*]]: f32, %[[VAL_15:.*]]: f32):
142 // CHECK: %[[VAL_16:.*]] = arith.addf %[[VAL_13]], %[[VAL_14]] : f32
143 // CHECK: linalg.yield %[[VAL_16]] : f32
144 // CHECK: } -> tensor<?xf32>
145 %0 = tosa.add %arg0, %arg1 : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
147 // CHECK: return %[[RESULT]] : tensor<?xf32>
148 return %0 : tensor<?xf32>
153 // CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (0)>
154 // CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (d0)>
155 // CHECK-LABEL: @test_add_1d_broadcast_dynamic_to_static
156 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]:
157 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
158 func.func @test_add_1d_broadcast_dynamic_to_static(%arg0: tensor<5xf32>, %arg1: tensor<?xf32>) -> tensor<5xf32> {
160 // CHECK: %[[CONST1:.*]] = arith.constant 1 : index
161 // CHECK: %[[CONST0:.*]] = arith.constant 0 : index
162 // CHECK: %[[ARG1_DIM0:.*]] = tensor.dim %[[ARG1]], %[[CONST0]] : tensor<?xf32>
163 // CHECK: %[[VAL_0:.*]] = arith.cmpi eq, %[[ARG1_DIM0]], %[[CONST1]] : index
164 // CHECK: %[[ARG1_DIM0_BROADCAST:.*]] = scf.if %[[VAL_0]] -> (tensor<?xf32>) {
165 // CHECK: %[[VAL_1:.*]] = tensor.empty() : tensor<5xf32>
166 // CHECK: %[[VAL_2:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[ARG1]] : tensor<?xf32>) outs(%[[VAL_1]] : tensor<5xf32>) {
167 // CHECK: ^bb0(%[[VAL_3:.*]]: f32, %[[VAL_4:.*]]: f32):
168 // CHECK: linalg.yield %[[VAL_3]] : f32
169 // CHECK: } -> tensor<5xf32>
170 // CHECK: %[[VAL_5:.*]] = tensor.cast %[[VAL_2]] : tensor<5xf32> to tensor<?xf32>
171 // CHECK: scf.yield %[[VAL_5]] : tensor<?xf32>
173 // CHECK: scf.yield %[[ARG1]] : tensor<?xf32>
175 // CHECK: %[[VAL_6:.*]] = tensor.empty() : tensor<5xf32>
176 // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[ARG0]], %[[ARG1_DIM0_BROADCAST]] : tensor<5xf32>, tensor<?xf32>) outs(%[[VAL_6]] : tensor<5xf32>) {
177 // CHECK: ^bb0(%[[VAL_7:.*]]: f32, %[[VAL_8:.*]]: f32, %[[VAL_9:.*]]: f32):
178 // CHECK: %[[VAL_10:.*]] = arith.addf %[[VAL_7]], %[[VAL_8]] : f32
179 // CHECK: linalg.yield %[[VAL_10]] : f32
180 // CHECK: } -> tensor<5xf32>
181 %0 = tosa.add %arg0, %arg1 : (tensor<5xf32>, tensor<?xf32>) -> tensor<5xf32>
183 // CHECK: return %[[RESULT]] : tensor<5xf32>
184 return %0 : tensor<5xf32>
189 // CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (0)>
190 // CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (d0)>
191 // CHECK-LABEL: @test_add_1d_broadcast_static_to_dynamic
192 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]:
193 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
194 func.func @test_add_1d_broadcast_static_to_dynamic(%arg0: tensor<1xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
196 // CHECK: %[[CONST0:.*]] = arith.constant 0 : index
197 // CHECK: %[[ARG1_DIM0:.*]] = tensor.dim %[[ARG1]], %[[CONST0]] : tensor<?xf32>
198 // CHECK: %[[VAL_0:.*]] = tensor.empty(%[[ARG1_DIM0]]) : tensor<?xf32>
199 // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[ARG0]], %[[ARG1]] : tensor<1xf32>, tensor<?xf32>) outs(%[[VAL_0]] : tensor<?xf32>) {
200 // CHECK: ^bb0(%[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: f32, %[[VAL_3:.*]]: f32):
201 // CHECK: %[[VAL_4:.*]] = arith.addf %[[VAL_1]], %[[VAL_2]] : f32
202 // CHECK: linalg.yield %[[VAL_4]] : f32
203 // CHECK: } -> tensor<?xf32>
204 %0 = tosa.add %arg0, %arg1 : (tensor<1xf32>, tensor<?xf32>) -> tensor<?xf32>
206 // CHECK: return %[[RESULT]] : tensor<?xf32>
207 return %0 : tensor<?xf32>
212 // CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (0)>
213 // CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (d0)>
214 // CHECK-LABEL: @test_add_1d_broadcast_static_to_static
215 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]:
216 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
217 func.func @test_add_1d_broadcast_static_to_static(%arg0: tensor<1xf32>, %arg1: tensor<3xf32>) -> tensor<3xf32> {
219 // CHECK: %[[VAL_0:.*]] = tensor.empty() : tensor<3xf32>
220 // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[ARG0]], %[[ARG1]] : tensor<1xf32>, tensor<3xf32>) outs(%[[VAL_0]] : tensor<3xf32>) {
221 // CHECK: ^bb0(%[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: f32, %[[VAL_3:.*]]: f32):
222 // CHECK: %[[VAL_4:.*]] = arith.addf %[[VAL_1]], %[[VAL_2]] : f32
223 // CHECK: linalg.yield %[[VAL_4]] : f32
224 // CHECK: } -> tensor<3xf32>
225 %0 = tosa.add %arg0, %arg1 : (tensor<1xf32>, tensor<3xf32>) -> tensor<3xf32>
227 // CHECK: return %[[RESULT]] : tensor<3xf32>
228 return %0 : tensor<3xf32>
233 // CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (d0)>
234 // CHECK-LABEL: @test_add_1d_matching_static
235 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]:
236 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
237 func.func @test_add_1d_matching_static(%arg0: tensor<3xf32>, %arg1: tensor<3xf32>) -> tensor<3xf32> {
239 // CHECK: %[[VAL_0:.*]] = tensor.empty() : tensor<3xf32>
240 // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], %[[ARG1]] : tensor<3xf32>, tensor<3xf32>) outs(%[[VAL_0]] : tensor<3xf32>) {
241 // CHECK: ^bb0(%[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: f32, %[[VAL_3:.*]]: f32):
242 // CHECK: %[[VAL_4:.*]] = arith.addf %[[VAL_1]], %[[VAL_2]] : f32
243 // CHECK: linalg.yield %[[VAL_4]] : f32
244 // CHECK: } -> tensor<3xf32>
245 %0 = tosa.add %arg0, %arg1 : (tensor<3xf32>, tensor<3xf32>) -> tensor<3xf32>
247 // CHECK: return %[[RESULT]] : tensor<3xf32>
248 return %0 : tensor<3xf32>
253 // CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1) -> (0, d1)>
254 // CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1) -> (d0, d1)>
255 // CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1) -> (d0, 0)>
256 // CHECK-LABEL: @test_add_2d_all_dynamic
257 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]:
258 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
259 func.func @test_add_2d_all_dynamic(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
261 // CHECK: %[[CONST0:.*]] = arith.constant 0 : index
262 // CHECK: %[[ARG0_DIM0:.*]] = tensor.dim %[[ARG0]], %[[CONST0]] : tensor<?x?xf32>
263 // CHECK: %[[ARG1_DIM0:.*]] = tensor.dim %[[ARG1]], %[[CONST0]] : tensor<?x?xf32>
264 // CHECK: %[[MAX_DIM0:.*]] = arith.maxui %[[ARG0_DIM0]], %[[ARG1_DIM0]] : index
265 // CHECK: %[[CONST1:.*]] = arith.constant 1 : index
266 // CHECK: %[[ARG0_DIM1:.*]] = tensor.dim %[[ARG0]], %[[CONST1]] : tensor<?x?xf32>
267 // CHECK: %[[ARG1_DIM1:.*]] = tensor.dim %[[ARG1]], %[[CONST1]] : tensor<?x?xf32>
268 // CHECK: %[[MAX_DIM1:.*]] = arith.maxui %[[ARG0_DIM1]], %[[ARG1_DIM1]] : index
270 // CHECK: %[[VAL_0:.*]] = tensor.dim %[[ARG0]], %[[CONST0]] : tensor<?x?xf32>
271 // CHECK: %[[VAL_1:.*]] = arith.cmpi eq, %[[VAL_0]], %[[CONST1]] : index
272 // CHECK: %[[ARG0_DIM0_BROADCAST:.*]] = scf.if %[[VAL_1]] -> (tensor<?x?xf32>) {
273 // CHECK: %[[LOCAL_CONST1:.*]] = arith.constant 1 : index
274 // CHECK: %[[VAL_2:.*]] = tensor.dim %[[ARG0]], %[[LOCAL_CONST1]] : tensor<?x?xf32>
275 // CHECK: %[[VAL_3:.*]] = tensor.empty(%[[MAX_DIM0]], %[[VAL_2]]) : tensor<?x?xf32>
276 // CHECK: %[[VAL_4:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x?xf32>) outs(%[[VAL_3]] : tensor<?x?xf32>) {
277 // CHECK: ^bb0(%[[VAL_5:.*]]: f32, %[[VAL_6:.*]]: f32):
278 // CHECK: linalg.yield %[[VAL_5]] : f32
279 // CHECK: } -> tensor<?x?xf32>
280 // CHECK: scf.yield %[[VAL_4]] : tensor<?x?xf32>
282 // CHECK: scf.yield %[[ARG0]] : tensor<?x?xf32>
285 // CHECK: %[[VAL_7:.*]] = tensor.dim %[[ARG0_DIM0_BROADCAST]], %[[CONST1]] : tensor<?x?xf32>
286 // CHECK: %[[VAL_8:.*]] = arith.cmpi eq, %[[VAL_7]], %[[CONST1]] : index
287 // CHECK: %[[ARG0_DIM1_BROADCAST:.*]] = scf.if %[[VAL_8]] -> (tensor<?x?xf32>) {
288 // CHECK: %[[LOCAL_CONST0:.*]] = arith.constant 0 : index
289 // CHECK: %[[VAL_9:.*]] = tensor.dim %[[ARG0_DIM0_BROADCAST]], %[[LOCAL_CONST0]] : tensor<?x?xf32>
290 // CHECK: %[[VAL_10:.*]] = tensor.empty(%[[VAL_9]], %[[MAX_DIM1]]) : tensor<?x?xf32>
291 // CHECK: %[[VAL_11:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0_DIM0_BROADCAST]] : tensor<?x?xf32>) outs(%[[VAL_10]] : tensor<?x?xf32>) {
292 // CHECK: ^bb0(%[[VAL_12:.*]]: f32, %[[VAL_13:.*]]: f32):
293 // CHECK: linalg.yield %[[VAL_12]] : f32
294 // CHECK: } -> tensor<?x?xf32>
295 // CHECK: scf.yield %[[VAL_11]] : tensor<?x?xf32>
297 // CHECK: scf.yield %[[ARG0_DIM0_BROADCAST]] : tensor<?x?xf32>
300 // CHECK: %[[VAL_14:.*]] = tensor.dim %[[ARG1]], %[[CONST0]] : tensor<?x?xf32>
301 // CHECK: %[[VAL_15:.*]] = arith.cmpi eq, %[[VAL_14]], %[[CONST1]] : index
302 // CHECK: %[[ARG1_DIM0_BROADCAST:.*]] = scf.if %[[VAL_15]] -> (tensor<?x?xf32>) {
303 // CHECK: %[[LOCAL_CONST1:.*]] = arith.constant 1 : index
304 // CHECK: %[[VAL_16:.*]] = tensor.dim %[[ARG1]], %[[LOCAL_CONST1]] : tensor<?x?xf32>
305 // CHECK: %[[VAL_17:.*]] = tensor.empty(%[[MAX_DIM0]], %[[VAL_16]]) : tensor<?x?xf32>
306 // CHECK: %[[VAL_18:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG1]] : tensor<?x?xf32>) outs(%[[VAL_17]] : tensor<?x?xf32>) {
307 // CHECK: ^bb0(%[[VAL_19:.*]]: f32, %[[VAL_20:.*]]: f32):
308 // CHECK: linalg.yield %[[VAL_19]] : f32
309 // CHECK: } -> tensor<?x?xf32>
310 // CHECK: scf.yield %[[VAL_18]] : tensor<?x?xf32>
312 // CHECK: scf.yield %[[ARG1]] : tensor<?x?xf32>
315 // CHECK: %[[VAL_21:.*]] = tensor.dim %[[ARG1_DIM0_BROADCAST]], %[[CONST1]] : tensor<?x?xf32>
316 // CHECK: %[[VAL_22:.*]] = arith.cmpi eq, %[[VAL_21]], %[[CONST1]] : index
317 // CHECK: %[[ARG1_DIM1_BROADCAST:.*]] = scf.if %[[VAL_22]] -> (tensor<?x?xf32>) {
318 // CHECK: %[[LOCAL_CONST0:.*]] = arith.constant 0 : index
319 // CHECK: %[[VAL_23:.*]] = tensor.dim %[[ARG1_DIM0_BROADCAST]], %[[LOCAL_CONST0]] : tensor<?x?xf32>
320 // CHECK: %[[VAL_24:.*]] = tensor.empty(%[[VAL_23]], %[[MAX_DIM1]]) : tensor<?x?xf32>
321 // CHECK: %[[VAL_25:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG1_DIM0_BROADCAST]] : tensor<?x?xf32>) outs(%[[VAL_24]] : tensor<?x?xf32>) {
322 // CHECK: ^bb0(%[[VAL_26:.*]]: f32, %[[VAL_27:.*]]: f32):
323 // CHECK: linalg.yield %[[VAL_26]] : f32
324 // CHECK: } -> tensor<?x?xf32>
325 // CHECK: scf.yield %[[VAL_25]] : tensor<?x?xf32>
327 // CHECK: scf.yield %[[ARG1_DIM0_BROADCAST]] : tensor<?x?xf32>
330 // CHECK: %[[VAL_28:.*]] = tensor.empty(%[[MAX_DIM0]], %[[MAX_DIM1]]) : tensor<?x?xf32>
331 // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0_DIM1_BROADCAST]], %[[ARG1_DIM1_BROADCAST]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[VAL_28]] : tensor<?x?xf32>) {
332 // CHECK: ^bb0(%[[VAL_29:.*]]: f32, %[[VAL_30:.*]]: f32, %[[VAL_31:.*]]: f32):
333 // CHECK: %[[VAL_32:.*]] = arith.addf %[[VAL_29]], %[[VAL_30]] : f32
334 // CHECK: linalg.yield %[[VAL_32]] : f32
335 // CHECK: } -> tensor<?x?xf32>
336 %0 = tosa.add %arg0, %arg1 : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
338 // CHECK: return %[[RESULT]] : tensor<?x?xf32>
339 return %0 : tensor<?x?xf32>
344 // CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1, d2) -> (0, d1, d2)>
345 // CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
346 // CHECK-LABEL: @test_add_2d_different_ranks
347 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]:
348 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
349 func.func @test_add_2d_different_ranks(%arg0: tensor<3x4xf32>, %arg1: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
351 // CHECK: %[[ARG0_EXPANDED:.*]] = tensor.expand_shape %[[ARG0]] {{\[\[}}0, 1], [2]] output_shape [1, 3, 4] : tensor<3x4xf32> into tensor<1x3x4xf32>
352 // CHECK: %[[VAL_0:.*]] = tensor.empty() : tensor<2x3x4xf32>
353 // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG0_EXPANDED]], %[[ARG1]] : tensor<1x3x4xf32>, tensor<2x3x4xf32>) outs(%[[VAL_0]] : tensor<2x3x4xf32>) {
354 // CHECK: ^bb0(%[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: f32, %[[VAL_3:.*]]: f32):
355 // CHECK: %[[VAL_4:.*]] = arith.addf %[[VAL_1]], %[[VAL_2]] : f32
356 // CHECK: linalg.yield %[[VAL_4]] : f32
357 // CHECK: } -> tensor<2x3x4xf32>
358 %0 = tosa.add %arg0, %arg1 : (tensor<3x4xf32>, tensor<2x3x4xf32>) -> tensor<2x3x4xf32>
360 // CHECK: return %[[RESULT]] : tensor<2x3x4xf32>
361 return %0 : tensor<2x3x4xf32>
366 // CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1) -> (d0, 0)>
367 // CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1) -> (d0, d1)>
368 // CHECK-LABEL: @test_select_2d_one_dynamic
369 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]:
370 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
371 // CHECK-SAME: %[[ARG2:[0-9a-zA-Z_]*]]:
372 func.func @test_select_2d_one_dynamic(%arg0: tensor<2x?xi1>, %arg1: tensor<2x?xf32>, %arg2: tensor<2x?xf32>) -> tensor<2x?xf32> {
374 // CHECK: %[[CONST1:.*]] = arith.constant 1 : index
375 // CHECK: %[[ARG0_DIM1:.*]] = tensor.dim %[[ARG0]], %[[CONST1]] : tensor<2x?xi1>
376 // CHECK: %[[ARG1_DIM1:.*]] = tensor.dim %[[ARG1]], %[[CONST1]] : tensor<2x?xf32>
377 // CHECK: %[[VAL_0:.*]] = arith.maxui %[[ARG0_DIM1]], %[[ARG1_DIM1]] : index
378 // CHECK: %[[ARG2_DIM1:.*]] = tensor.dim %[[ARG2]], %[[CONST1]] : tensor<2x?xf32>
379 // CHECK: %[[MAX_DIM1:.*]] = arith.maxui %[[VAL_0]], %[[ARG2_DIM1]] : index
381 // CHECK: %[[VAL_1:.*]] = tensor.dim %[[ARG0]], %[[CONST1]] : tensor<2x?xi1>
382 // CHECK: %[[VAL_2:.*]] = arith.cmpi eq, %[[VAL_1]], %[[CONST1]] : index
383 // CHECK: %[[ARG0_BROADCAST:.*]] = scf.if %[[VAL_2]] -> (tensor<2x?xi1>) {
384 // CHECK: %[[VAL_3:.*]] = tensor.empty(%[[MAX_DIM1]]) : tensor<2x?xi1>
385 // CHECK: %[[VAL_4:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x?xi1>) outs(%[[VAL_3]] : tensor<2x?xi1>) {
386 // CHECK: ^bb0(%[[VAL_5:.*]]: i1, %[[VAL_6:.*]]: i1):
387 // CHECK: linalg.yield %[[VAL_5]] : i1
388 // CHECK: } -> tensor<2x?xi1>
389 // CHECK: scf.yield %[[VAL_4]] : tensor<2x?xi1>
391 // CHECK: scf.yield %[[ARG0]] : tensor<2x?xi1>
394 // CHECK: %[[VAL_7:.*]] = tensor.dim %[[ARG1]], %[[CONST1]] : tensor<2x?xf32>
395 // CHECK: %[[VAL_8:.*]] = arith.cmpi eq, %[[VAL_7]], %[[CONST1]] : index
396 // CHECK: %[[ARG1_BROADCAST:.*]] = scf.if %[[VAL_8]] -> (tensor<2x?xf32>) {
397 // CHECK: %[[VAL_9:.*]] = tensor.empty(%[[MAX_DIM1]]) : tensor<2x?xf32>
398 // CHECK: %[[VAL_10:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x?xf32>) outs(%[[VAL_9]] : tensor<2x?xf32>) {
399 // CHECK: ^bb0(%[[VAL_11:.*]]: f32, %[[VAL_12:.*]]: f32):
400 // CHECK: linalg.yield %[[VAL_11]] : f32
401 // CHECK: } -> tensor<2x?xf32>
402 // CHECK: scf.yield %[[VAL_10]] : tensor<2x?xf32>
404 // CHECK: scf.yield %[[ARG1]] : tensor<2x?xf32>
407 // CHECK: %[[VAL_13:.*]] = tensor.dim %[[ARG2]], %[[CONST1]] : tensor<2x?xf32>
408 // CHECK: %[[VAL_14:.*]] = arith.cmpi eq, %[[VAL_13]], %[[CONST1]] : index
409 // CHECK: %[[ARG2_BROADCAST:.*]] = scf.if %[[VAL_14]] -> (tensor<2x?xf32>) {
410 // CHECK: %[[VAL_15:.*]] = tensor.empty(%[[MAX_DIM1]]) : tensor<2x?xf32>
411 // CHECK: %[[VAL_16:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG2]] : tensor<2x?xf32>) outs(%[[VAL_15]] : tensor<2x?xf32>) {
412 // CHECK: ^bb0(%[[VAL_17:.*]]: f32, %[[VAL_18:.*]]: f32):
413 // CHECK: linalg.yield %[[VAL_17]] : f32
414 // CHECK: } -> tensor<2x?xf32>
415 // CHECK: scf.yield %[[VAL_16]] : tensor<2x?xf32>
417 // CHECK: scf.yield %[[ARG2]] : tensor<2x?xf32>
420 // CHECK: %[[VAL_19:.*]] = tensor.empty(%[[MAX_DIM1]]) : tensor<2x?xf32>
421 // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0_BROADCAST]], %[[ARG1_BROADCAST]], %[[ARG2_BROADCAST]] : tensor<2x?xi1>, tensor<2x?xf32>, tensor<2x?xf32>) outs(%[[VAL_19]] : tensor<2x?xf32>) {
422 // CHECK: ^bb0(%[[VAL_20:.*]]: i1, %[[VAL_21:.*]]: f32, %[[VAL_22:.*]]: f32, %[[VAL_23:.*]]: f32):
423 // CHECK: %[[VAL_24:.*]] = arith.select %[[VAL_20]], %[[VAL_21]], %[[VAL_22]] : f32
424 // CHECK: linalg.yield %[[VAL_24]] : f32
425 // CHECK: } -> tensor<2x?xf32>
426 %0 = tosa.select %arg0, %arg1, %arg2 : (tensor<2x?xi1>, tensor<2x?xf32>, tensor<2x?xf32>) -> tensor<2x?xf32>
428 // CHECK: return %[[RESULT]] : tensor<2x?xf32>
429 return %0 : tensor<2x?xf32>
434 // CHECK-LABEL: @test_simple_f32
435 func.func @test_simple_f32(%arg0: tensor<1xf32>) -> () {
436 // CHECK: linalg.generic
438 %0 = tosa.tanh %arg0 : (tensor<1xf32>) -> tensor<1xf32>
440 // CHECK: linalg.generic
442 %1 = tosa.abs %arg0 : (tensor<1xf32>) -> tensor<1xf32>
444 // CHECK: linalg.generic
446 %2 = tosa.add %0, %0 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
448 // CHECK: linalg.generic
450 %3 = tosa.sub %0, %1 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
452 // CHECK: linalg.generic
454 %4 = tosa.mul %0, %1 {shift = 0 : i8} : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
456 // CHECK: linalg.generic
458 %5 = tosa.negate %0 : (tensor<1xf32>) -> tensor<1xf32>
460 // CHECK: linalg.generic
462 %6 = tosa.pow %1, %2 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
464 // CHECK: linalg.generic
466 %7 = tosa.rsqrt %1 : (tensor<1xf32>) -> tensor<1xf32>
468 // CHECK: linalg.generic
470 %8 = tosa.log %arg0 : (tensor<1xf32>) -> tensor<1xf32>
472 // CHECK: linalg.generic
474 %9 = tosa.exp %arg0 : (tensor<1xf32>) -> tensor<1xf32>
476 // CHECK: linalg.generic
478 %10 = tosa.greater %0, %1 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xi1>
480 // CHECK: linalg.generic
482 %11 = tosa.greater_equal %0, %1 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xi1>
484 // CHECK: linalg.generic
486 %12 = tosa.equal %0, %1 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xi1>
488 // CHECK: linalg.generic
490 %13 = tosa.select %10, %0, %1 : (tensor<1xi1>, tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
492 // CHECK: linalg.generic
493 // CHECK: arith.maximumf
494 %14 = tosa.maximum %0, %1 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
496 // CHECK: linalg.generic
497 // CHECK: arith.minimumf
498 %15 = tosa.minimum %0, %1 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
500 // CHECK: linalg.generic
502 %16 = tosa.ceil %0 : (tensor<1xf32>) -> tensor<1xf32>
504 // CHECK: linalg.generic
506 %17 = tosa.floor %0 : (tensor<1xf32>) -> tensor<1xf32>
508 // CHECK: linalg.generic
509 // CHECK: arith.minimumf
510 // CHECK: arith.maximumf
511 %18 = tosa.clamp %0 {min_int = 1 : i64, max_int = 5 : i64, min_fp = 1.0 : f32, max_fp = 5.0 : f32} : (tensor<1xf32>) -> tensor<1xf32>
513 // CHECK: linalg.generic
518 %19 = tosa.sigmoid %0 : (tensor<1xf32>) -> tensor<1xf32>
520 // CHECK: linalg.generic
521 // CHECK: [[ROUND:%.+]] = math.roundeven {{%.+}} : f32
522 // CHECK: [[CSTMIN:%.+]] = arith.constant -2.14748365E+9 : f32
523 // CHECK: [[CSTMAXP1:%.+]] = arith.constant 2.14748365E+9 : f32
524 // CHECK: [[CSTMAX:%.+]] = arith.constant 2147483647 : i32
525 // CHECK: [[MAX:%.+]] = arith.maximumf [[ROUND]], [[CSTMIN]] : f32
526 // CHECK: [[CONV:%.+]] = arith.fptosi [[MAX]] : f32 to i32
527 // CHECK: [[CMP:%.+]] = arith.cmpf uge, [[ROUND]], [[CSTMAXP1]] : f32
528 // CHECK: arith.select [[CMP]], [[CSTMAX]], [[CONV]] : i32
529 %20 = tosa.cast %0 : (tensor<1xf32>) -> tensor<1xi32>
531 // CHECK: linalg.generic
532 // CHECK: arith.constant 0
534 %21 = tosa.cast %0 : (tensor<1xf32>) -> tensor<1xi1>
536 // CHECK: linalg.generic
537 // CHECK: arith.truncf
538 %22 = tosa.cast %0 : (tensor<1xf32>) -> tensor<1xf16>
540 // CHECK: linalg.generic
542 %23 = tosa.reciprocal %0 : (tensor<1xf32>) -> tensor<1xf32>
544 // CHECK: linalg.generic
546 %24 = tosa.erf %0 : (tensor<1xf32>) -> tensor<1xf32>
548 // CHECK: linalg.generic
550 %25 = tosa.sin %arg0 : (tensor<1xf32>) -> tensor<1xf32>
552 // CHECK: linalg.generic
554 %26 = tosa.cos %arg0 : (tensor<1xf32>) -> tensor<1xf32>
561 // CHECK-LABEL: @test_simple_f16
562 func.func @test_simple_f16(%arg0: tensor<1xf16>) -> () {
564 // CHECK: linalg.generic
566 %0 = tosa.cast %arg0 : (tensor<1xf16>) -> tensor<1xf32>
568 // CHECK: linalg.generic
569 // CHECK: [[ROUND:%.+]] = math.roundeven {{%.+}} : f16
570 // CHECK: [[CSTMIN:%.+]] = arith.constant -1.280000e+02 : f16
571 // CHECK: [[CSTMAX:%.+]] = arith.constant 1.270000e+02 : f16
572 // CHECK: [[MIN:%.+]] = arith.minimumf [[ROUND]], [[CSTMAX]] : f16
573 // CHECK: [[CLAMP:%.+]] = arith.maximumf [[MIN]], [[CSTMIN]] : f16
574 // CHECK: arith.fptosi [[CLAMP]] : f16 to i8
575 %1 = "tosa.cast"(%arg0) : (tensor<1xf16>) -> tensor<1xi8>
577 // CHECK: linalg.generic
578 // CHECK: [[ROUND:%.+]] = math.roundeven {{%[a-z0-9_]+}} : f16
579 // CHECK: [[CONV:%.+]] = arith.fptosi [[ROUND]] : f16 to i32
580 // CHECK: [[POSINF:%.+]] = arith.constant 0x7C00 : f16
581 // CHECK: [[NEGINF:%.+]] = arith.constant 0xFC00 : f16
582 // CHECK: [[OVERFLOW:%.+]] = arith.cmpf ueq, [[ROUND]], [[POSINF]] : f16
583 // CHECK: [[UNDERFLOW:%.+]] = arith.cmpf ueq, [[ROUND]], [[NEGINF]] : f16
584 // CHECK: [[MININT:%.+]] = arith.constant -2147483648 : i32
585 // CHECK: [[MAXINT:%.+]] = arith.constant 2147483647 : i32
586 // CHECK: [[CLAMPPOSINF:%.+]] = arith.select [[OVERFLOW]], [[MAXINT]], [[CONV]] : i32
587 // CHECK: arith.select [[UNDERFLOW]], [[MININT]], [[CLAMPPOSINF]] : i32
588 %2 = "tosa.cast"(%arg0) : (tensor<1xf16>) -> tensor<1xi32>
594 // CHECK-LABEL: @test_simple_i16
595 func.func @test_simple_i16(%arg0: tensor<1xi16>) -> () {
596 // CHECK: linalg.generic
597 // CHECK: arith.extsi
598 // CHECK: arith.extsi
600 %0 = tosa.mul %arg0, %arg0 {shift = 0 : i8} : (tensor<1xi16>, tensor<1xi16>) -> tensor<1xi32>
607 // CHECK-LABEL: @test_simple_ui8
608 func.func @test_simple_ui8(%arg0: tensor<1xui8>) -> () {
609 // CHECK: arith.uitofp
610 %0 = tosa.cast %arg0 : (tensor<1xui8>) -> tensor<1xf32>
616 // CHECK-LABEL: @test_simple_i32
617 func.func @test_simple_i32(%arg0: tensor<1xi32>, %unsigned: tensor<1xui32>, %unsigned64: tensor<1xui64>) -> () {
618 // CHECK: linalg.generic
620 %0 = tosa.add %arg0, %arg0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
622 // CHECK: linalg.generic
624 %1 = tosa.sub %arg0, %arg0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
626 // CHECK: linalg.generic
628 %2 = tosa.mul %arg0, %arg0 {shift = 0 : i8} : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
630 // CHECK: linalg.generic
631 // CHECK: arith.constant 2
632 // CHECK: apply_scale
633 %3 = tosa.mul %arg0, %arg0 {shift = 2 : i8} : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
635 // CHECK: linalg.generic
636 // CHECK: arith.divsi
637 %40 = tosa.int_div %arg0, %arg0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
639 // CHECK: linalg.generic
640 // CHECK: ^bb0(%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32):
641 // CHECK: [[ZERO:%.+]] = arith.constant 0
642 // CHECK: arith.subi [[ZERO]], %[[ARG1]]
643 %5 = tosa.negate %arg0 : (tensor<1xi32>) -> tensor<1xi32>
645 // CHECK: linalg.generic
647 %6 = tosa.bitwise_and %arg0, %arg0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
649 // CHECK: linalg.generic
651 %7 = tosa.bitwise_or %arg0, %arg0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
653 // CHECK: linalg.generic
655 %8 = tosa.bitwise_xor %arg0, %arg0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
657 // CHECK: linalg.generic
659 %9 = tosa.logical_left_shift %arg0, %arg0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
661 // CHECK: linalg.generic
662 // CHECK: arith.shrui
663 %10 = tosa.logical_right_shift %arg0, %arg0 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
665 // CHECK: linalg.generic
666 // CHECK: arith.shrsi
667 %11 = tosa.arithmetic_right_shift %arg0, %arg0 {round = 0 : i1} : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
669 // CHECK: linalg.generic
670 // CHECK: arith.constant 1
671 // CHECK: arith.constant 0
672 // CHECK: arith.constant true
675 // CHECK: arith.shrsi
676 // CHECK: arith.trunci
679 // CHECK: arith.extui
681 %12 = tosa.arithmetic_right_shift %arg0, %arg0 {round = 1 : i1} : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
684 %13 = tosa.clz %arg0 : (tensor<1xi32>) -> tensor<1xi32>
686 // CHECK: linalg.generic
688 %14 = tosa.greater %0, %1 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi1>
690 // CHECK: linalg.generic
692 %15 = tosa.greater_equal %0, %1 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi1>
694 // CHECK: linalg.generic
696 %16 = tosa.select %14, %0, %1 : (tensor<1xi1>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
698 // CHECK: linalg.generic
699 // CHECK: arith.maxsi
700 %17 = tosa.maximum %0, %1 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
702 // CHECK: linalg.generic
703 // CHECK: arith.minsi
704 %18 = tosa.minimum %0, %1 : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
706 // CHECK: linalg.generic
707 // CHECK-DAG: arith.maxsi
708 // CHECK-DAG: arith.minsi
709 %19 = tosa.clamp %0 {min_int = 1 : i64, max_int = 5 : i64, min_fp = 1.0 : f32, max_fp = 5.0 : f32} : (tensor<1xi32>) -> tensor<1xi32>
711 // CHECK: linalg.generic
712 // CHECK-DAG: %[[LB:.*]] = arith.constant 4 : i32
713 // CHECK-DAG: %[[UB:.*]] = arith.constant 32 : i32
714 // CHECK-DAG: arith.maxui %[[LB]],
715 // CHECK-DAG: arith.minui %[[UB]],
716 %u0 = tosa.clamp %unsigned {min_int = 4 : i64, max_int = 32 : i64, min_fp = 1.0 : f32, max_fp = 5.0 : f32} : (tensor<1xui32>) -> tensor<1xui32>
718 // CHECK: linalg.generic
719 // CHECK-DAG: %[[LB:.*]] = arith.constant -1 : i32
720 // CHECK-DAG: %[[UB:.*]] = arith.constant -1 : i32
721 // CHECK-DAG: arith.maxui %[[LB]],
722 // CHECK-DAG: arith.minui %[[UB]],
723 %u1 = tosa.clamp %unsigned {min_int = 9223372036854775807 : i64, max_int = 9223372036854775807 : i64, min_fp = 1.0 : f32, max_fp = 5.0 : f32} : (tensor<1xui32>) -> tensor<1xui32>
725 // CHECK: linalg.generic
726 // CHECK-DAG: %[[LB:.*]] = arith.constant 0 : i32
727 // CHECK-DAG: %[[UB:.*]] = arith.constant 0 : i32
728 // CHECK-DAG: arith.maxui %[[LB]],
729 // CHECK-DAG: arith.minui %[[UB]],
730 %u2 = tosa.clamp %unsigned {min_int = -3 : i64, max_int = -2 : i64, min_fp = 1.0 : f32, max_fp = 5.0 : f32} : (tensor<1xui32>) -> tensor<1xui32>
732 // CHECK: linalg.generic
733 // CHECK-DAG: %[[LB:.*]] = arith.constant 0 : i64
734 // CHECK-DAG: %[[UB:.*]] = arith.constant 9223372036854775807 : i64
735 // CHECK-DAG: arith.maxui %[[LB]],
736 // CHECK-DAG: arith.minui %[[UB]],
737 %u3 = tosa.clamp %unsigned64 {min_int = -3 : i64, max_int = 9223372036854775807 : i64, min_fp = 1.0 : f32, max_fp = 5.0 : f32} : (tensor<1xui64>) -> tensor<1xui64>
739 // CHECK: linalg.generic
740 // CHECK: arith.trunci
741 %20 = tosa.cast %0 : (tensor<1xi32>) -> tensor<1xi16>
743 // CHECK: linalg.generic
744 // CHECK: arith.extsi
745 %21 = tosa.cast %0 : (tensor<1xi32>) -> tensor<1xi64>
747 // CHECK: linalg.generic
748 // CHECK: arith.constant 0
750 %22 = tosa.cast %0 : (tensor<1xi32>) -> tensor<1xi1>
752 // CHECK: linalg.generic
753 // CHECK: arith.sitofp
754 %23 = tosa.cast %0 : (tensor<1xi32>) -> tensor<1xf32>
756 // CHECK: linalg.generic
757 // CHECK: arith.constant 0
759 // CHECK: arith.maxsi
760 %24 = tosa.abs %arg0 : (tensor<1xi32>) -> tensor<1xi32>
767 // CHECK-LABEL: @test_simple_ui8
768 func.func @test_simple_ui8(%arg0: tensor<1xi8>) -> () {
770 // CHECK: linalg.generic
772 %0 = tosa.cast %arg0 : (tensor<1xi8>) -> tensor<1xf32>
779 // CHECK-LABEL: @test_i8
780 func.func @test_i8(%arg0: tensor<1xi8>) -> () {
781 // CHECK: linalg.generic
782 // CHECK: ^bb0(%[[ARG1:.+]]: i8,
783 // CHECK-DAG: %[[C127:.+]] = arith.constant -127
784 // CHECK-DAG: %[[C126:.+]] = arith.constant 126
785 // CHECK-DAG: %[[LOWER:.+]] = arith.maxsi %[[C127]], %[[ARG1]]
786 // CHECK-DAG: %[[CLAMPED:.+]] = arith.minsi %[[C126]], %[[LOWER]]
787 %0 = tosa.clamp %arg0 {min_int = -127 : i64, max_int = 126 : i64, min_fp = 0.0 : f32, max_fp = 0.0 : f32} : (tensor<1xi8>) -> tensor<1xi8>
789 // CHECK: linalg.generic
790 // CHECK: ^bb0(%[[ARG1:.+]]: i8,
791 // CHECK-DAG: %[[C128:.+]] = arith.constant -128
792 // CHECK-DAG: %[[C127:.+]] = arith.constant 127
793 // CHECK-DAG: %[[LOWER:.+]] = arith.maxsi %[[C128]], %[[ARG1]]
794 // CHECK-DAG: %[[CLAMPED:.+]] = arith.minsi %[[C127]], %[[LOWER]]
795 %1 = tosa.clamp %arg0 {min_int = -130 : i64, max_int = 130 : i64, min_fp = 0.0 : f32, max_fp = 0.0 : f32} : (tensor<1xi8>) -> tensor<1xi8>
802 // CHECK-LABEL: @test_i64
803 func.func @test_i64(%arg0: tensor<1xi64>) -> () {
804 // CHECK: linalg.generic
805 // CHECK: ^bb0(%[[ARG1:.+]]: i64,
806 // CHECK-DAG: %[[C127:.+]] = arith.constant -9223372036854775808
807 // CHECK-DAG: %[[C126:.+]] = arith.constant 9223372036854775807
808 // CHECK-DAG: %[[LOWER:.+]] = arith.maxsi %[[C127]], %[[ARG1]]
809 // CHECK-DAG: %[[CLAMPED:.+]] = arith.minsi %[[C126]], %[[LOWER]]
810 %0 = tosa.clamp %arg0 {min_int = -9223372036854775808 : i64, max_int = 9223372036854775807 : i64, min_fp = 0.0 : f32, max_fp = 0.0 : f32} : (tensor<1xi64>) -> tensor<1xi64>
817 // CHECK-LABEL: @test_clamp_f16
818 func.func @test_clamp_f16(%arg0: tensor<1xf16>) -> () {
819 // CHECK: linalg.generic
820 // CHECK: ^bb0(%[[ARG1:.+]]: f16,
821 // CHECK-DAG: %[[C0:.+]] = arith.constant 0.0
822 // CHECK-DAG: %[[C6:.+]] = arith.constant 6.0
823 // CHECK-DAG: %[[MIN:.+]] = arith.minimumf %[[ARG1]], %[[C6]]
824 // CHECK-DAG: %[[MAX:.+]] = arith.maximumf %[[MIN]], %[[C0]]
825 %0 = tosa.clamp %arg0 {min_int = 0 : i64, max_int = 0 : i64, min_fp = 0.0 : f32, max_fp = 6.0 : f32} : (tensor<1xf16>) -> tensor<1xf16>
832 // CHECK-LABEL: @test_bool
833 func.func @test_bool(%arg0: tensor<1xi1>, %arg1: tensor<1xi1>) -> () {
834 // CHECK: linalg.generic
836 %0 = tosa.logical_and %arg0, %arg1 : (tensor<1xi1>, tensor<1xi1>) -> tensor<1xi1>
838 // CHECK: linalg.generic
840 %1 = tosa.logical_or %arg0, %arg1 : (tensor<1xi1>, tensor<1xi1>) -> tensor<1xi1>
842 // CHECK: linalg.generic
844 %2 = tosa.logical_xor %arg0, %arg1 : (tensor<1xi1>, tensor<1xi1>) -> tensor<1xi1>
846 // CHECK: linalg.generic
847 // CHECK: arith.constant true
849 %3 = tosa.logical_not %arg0 : (tensor<1xi1>) -> tensor<1xi1>
856 // CHECK-LABEL: @test_negate_quantized
857 func.func @test_negate_quantized(%arg0: tensor<1xi8>) -> () {
858 // CHECK: linalg.generic
859 // CHECK: ^bb0(%[[BBARG0:.+]]: i8,
860 // CHECK: [[CNST:%.+]] = arith.constant 7
861 // CHECK: [[EXT:%.+]] = arith.extsi %[[BBARG0]] : i8 to i16
862 // CHECK: [[SUB:%.+]] = arith.subi [[CNST]], [[EXT]]
863 // CHECK: [[MIN:%.+]] = arith.constant -128
864 // CHECK: [[MAX:%.+]] = arith.constant 127
865 // CHECK: [[LBOUND:%.+]] = arith.maxsi [[MIN]], [[SUB]]
866 // CHECK: [[UBOUND:%.+]] = arith.minsi [[MAX]], [[LBOUND]]
867 // CHECK: [[TRUNC:%.+]] = arith.trunci [[UBOUND]]
868 // CHECK: linalg.yield [[TRUNC]]
869 %0 = tosa.negate %arg0 {quantization_info = #tosa.unary_quant<input_zp = 0, output_zp = 7>} : (tensor<1xi8>) -> tensor<1xi8>
871 // CHECK: linalg.generic
872 // CHECK: ^bb0(%[[BBARG0:.+]]: i8,
873 // CHECK: [[EXT:%.+]] = arith.extsi %[[BBARG0]] : i8 to i16
874 %1 = tosa.negate %arg0 {quantization_info = #tosa.unary_quant<input_zp = 32639, output_zp = 0>} : (tensor<1xi8>) -> tensor<1xi8>
876 // CHECK: linalg.generic
877 // CHECK: ^bb0(%[[BBARG0:.+]]: i8,
878 // CHECK: [[EXT:%.+]] = arith.extsi %[[BBARG0]] : i8 to i32
879 %2 = tosa.negate %arg0 {quantization_info = #tosa.unary_quant<input_zp = 32640, output_zp = 0>} : (tensor<1xi8>) -> tensor<1xi8>
881 // CHECK: linalg.generic
882 // CHECK: ^bb0(%[[BBARG0:.+]]: i8,
883 // CHECK: [[ZERO:%.+]] = arith.constant 0
884 // CHECK: [[SUB:%.+]] = arith.subi [[ZERO]],
885 // CHECK: linalg.yield [[SUB]]
886 %3 = tosa.negate %arg0 {quantization_info = #tosa.unary_quant<input_zp = 0, output_zp = 0>} : (tensor<1xi8>) -> tensor<1xi8>
893 // CHECK-LABEL: @test_identity
894 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]: tensor<1xf32>,
895 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]: tensor<1xi32>
896 func.func @test_identity(%arg0: tensor<1xf32>, %arg1: tensor<1xi32>) -> (tensor<1xf32>, tensor<1xi32>) {
897 %0 = tosa.identity %arg0 : (tensor<1xf32>) -> tensor<1xf32>
898 %1 = tosa.identity %arg1 : (tensor<1xi32>) -> tensor<1xi32>
900 // CHECK: return %[[ARG0]], %[[ARG1]]
901 return %0, %1 : tensor<1xf32>, tensor<1xi32>
906 // CHECK-LABEL: @reduce_float
907 // CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xf32>
908 func.func @reduce_float(%arg0: tensor<5x4xf32>) -> () {
909 // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<4xf32>
910 // CHECK: [[CST0:%.+]] = arith.constant 0.0
911 // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
912 // CHECK: [[REDUCE:%.+]] = linalg.reduce ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<4xf32>) dimensions = [0]
913 // CHECK: (%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) {
914 // CHECK: [[RES:%.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
915 // CHECK: linalg.yield [[RES]] : f32
917 // CHECK: tensor.expand_shape [[REDUCE]] {{\[}}[0, 1]] output_shape [1, 4] : tensor<4xf32> into tensor<1x4xf32>
918 %0 = tosa.reduce_sum %arg0 {axis = 0 : i32} : (tensor<5x4xf32>) -> tensor<1x4xf32>
920 // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<5xf32>
921 // CHECK: [[CST0:%.+]] = arith.constant 0.0
922 // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
923 // CHECK: [[REDUCE:%.+]] = linalg.reduce ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<5xf32>) dimensions = [1]
924 // CHECK: (%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) {
925 // CHECK: [[RES:%.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
926 // CHECK: linalg.yield [[RES]] : f32
928 // CHECK: tensor.expand_shape [[REDUCE]] {{\[}}[0, 1]] output_shape [5, 1] : tensor<5xf32> into tensor<5x1xf32>
929 %1 = tosa.reduce_sum %arg0 {axis = 1 : i32} : (tensor<5x4xf32>) -> tensor<5x1xf32>
931 // CHECK: arith.constant 1.0
932 // CHECK: linalg.fill
933 // CHECK: linalg.reduce
935 %2 = tosa.reduce_prod %arg0 {axis = 0 : i32} : (tensor<5x4xf32>) -> tensor<1x4xf32>
937 // CHECK: arith.constant 3.40282347E+38 : f32
938 // CHECK: linalg.fill
939 // CHECK: linalg.reduce
940 // CHECK: arith.minimumf
941 %3 = tosa.reduce_min %arg0 {axis = 0 : i32} : (tensor<5x4xf32>) -> tensor<1x4xf32>
943 // CHECK: arith.constant -3.40282347E+38 : f32
944 // CHECK: linalg.fill
945 // CHECK: linalg.reduce
946 // CHECK: arith.maximumf
947 %4 = tosa.reduce_max %arg0 {axis = 0 : i32} : (tensor<5x4xf32>) -> tensor<1x4xf32>
953 // CHECK-LABEL: @reduce_float_dyn
954 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]: tensor<?x5x4xf32>
955 func.func @reduce_float_dyn(%arg0: tensor<?x5x4xf32>) -> () {
956 // CHECK: %[[C0:.+]] = arith.constant 0
957 // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C0]]
958 // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<?x4xf32>
959 // CHECK: %[[CST0:.+]] = arith.constant 0.0
960 // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}outs(%[[INIT]]
961 // CHECK: %[[REDUCE:.+]] = linalg.reduce ins(%[[ARG0]] : tensor<?x5x4xf32>) outs(%[[FILL]] : tensor<?x4xf32>) dimensions = [1]
962 // CHECK: (%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) {
963 // CHECK: %[[RES:.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
964 // CHECK: linalg.yield %[[RES]] : f32
966 // CHECK: %[[C0_0:.+]] = arith.constant 0 : index
967 // CHECK: %[[DIM_1:.+]] = tensor.dim %[[REDUCE]], %[[C0_0]] : tensor<?x4xf32>
968 // CHECK: %[[C1:.+]] = arith.constant 1 : index
969 // CHECK: tensor.expand_shape %[[REDUCE]] {{\[}}[0], [1, 2]] output_shape [%[[DIM_1]], 1, 4] : tensor<?x4xf32> into tensor<?x1x4xf32>
970 %0 = tosa.reduce_sum %arg0 {axis = 1 : i32} : (tensor<?x5x4xf32>) -> tensor<?x1x4xf32>
976 // CHECK-LABEL: @reduce_float_dyn_rank_1
977 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z_]*]]: tensor<?xf32>
978 func.func @reduce_float_dyn_rank_1(%arg0: tensor<?xf32>) -> () {
979 // CHECK-DAG: %[[INIT:.+]] = tensor.empty() : tensor<f32>
980 // CHECK-DAG: %[[CST0:.+]] = arith.constant 0.0
981 // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}outs(%[[INIT]]
982 // CHECK: %[[REDUCE:.+]] = linalg.reduce ins(%[[ARG0]] : tensor<?xf32>) outs(%[[FILL]] : tensor<f32>) dimensions = [0]
983 // CHECK: (%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) {
984 // CHECK: %[[RES:.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
985 // CHECK: linalg.yield %[[RES]] : f32
987 // CHECK: tensor.expand_shape %[[REDUCE]] {{\[}}] output_shape [1] : tensor<f32> into tensor<1xf32>
988 %0 = tosa.reduce_sum %arg0 {axis = 0 : i32} : (tensor<?xf32>) -> tensor<1xf32>
994 // CHECK-LABEL: @reduce_float_dyn_nonzero_batch
995 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
996 func.func @reduce_float_dyn_nonzero_batch(%arg0: tensor<5x?x4xf32>) -> () {
997 // CHECK: %[[C1:.+]] = arith.constant 1
998 // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C1]]
999 // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<5x?xf32>
1000 // CHECK: %[[CST1:.+]] = arith.constant 1.0
1001 // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST1]]{{.*}}outs(%[[INIT]]
1002 // CHECK: %[[REDUCE:.+]] = linalg.reduce ins(%[[ARG0]] : tensor<5x?x4xf32>) outs(%[[FILL]] : tensor<5x?xf32>) dimensions = [2]
1003 // CHECK: (%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) {
1004 // CHECK: %[[RES:.+]] = arith.mulf %[[ARG1]], %[[ARG2]] : f32
1005 // CHECK: linalg.yield %[[RES]] : f32
1007 // CHECK: %[[C1_0:.+]] = arith.constant 1 : index
1008 // CHECK: %[[DIM_1:.+]] = tensor.dim %[[REDUCE]], %[[C1_0]] : tensor<5x?xf32>
1009 // CHECK: %[[C1_2:.+]] = arith.constant 1 : index
1010 // CHECK: tensor.expand_shape %[[REDUCE]] {{\[}}[0], [1, 2]] output_shape [5, %[[DIM_1]], 1] : tensor<5x?xf32> into tensor<5x?x1xf32>
1011 %0 = tosa.reduce_prod %arg0 {axis = 2 : i32} : (tensor<5x?x4xf32>) -> tensor<5x?x1xf32>
1017 // CHECK-LABEL: @reduce_float_dyn_multiple
1018 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1019 func.func @reduce_float_dyn_multiple(%arg0: tensor<?x?xf32>) -> () {
1020 // CHECK: %[[C0:.+]] = arith.constant 0
1021 // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C0]]
1022 // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
1023 // CHECK: %[[CMIN:.+]] = arith.constant -3.40282347E+38
1024 // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CMIN]]{{.*}}outs(%[[INIT]]
1025 // CHECK: %[[REDUCE:.+]] = linalg.reduce ins(%[[ARG0]] : tensor<?x?xf32>) outs(%[[FILL]] : tensor<?xf32>) dimensions = [1]
1026 // CHECK: (%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) {
1027 // CHECK: %[[MAX:.+]] = arith.maximumf %[[ARG1]], %[[ARG2]] : f32
1028 // CHECK: linalg.yield %[[MAX]] : f32
1030 // CHECK: %[[C0_0:.+]] = arith.constant 0 : index
1031 // CHECK: %[[DIM_1:.+]] = tensor.dim %[[REDUCE]], %[[C0_0]] : tensor<?xf32>
1032 // CHECK: %[[C1_2:.+]] = arith.constant 1 : index
1033 // CHECK: tensor.expand_shape %[[REDUCE]] {{\[}}[0, 1]] output_shape [%[[DIM_1]], 1] : tensor<?xf32> into tensor<?x1xf32>
1034 %0 = tosa.reduce_max %arg0 {axis = 1 : i32} : (tensor<?x?xf32>) -> tensor<?x1xf32>
1040 // CHECK-LABEL: @reduce_int
1041 // CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xi32>
1042 func.func @reduce_int(%arg0: tensor<5x4xi32>) -> () {
1043 // CHECK: [[INIT:%.+]] = tensor.empty()
1044 // CHECK: [[CST0:%.+]] = arith.constant 0
1045 // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
1046 // CHECK: [[REDUCE:%.+]] = linalg.reduce ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<4xi32>) dimensions = [0]
1047 // CHECK: (%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32) {
1048 // CHECK: [[RES:%.+]] = arith.addi %[[ARG1]], %[[ARG2]] : i32
1049 // CHECK: linalg.yield [[RES]] : i32
1051 // CHECK: tensor.expand_shape [[REDUCE]] {{\[}}[0, 1]] output_shape [1, 4] : tensor<4xi32> into tensor<1x4xi32>
1052 %0 = tosa.reduce_sum %arg0 {axis = 0 : i32} : (tensor<5x4xi32>) -> tensor<1x4xi32>
1054 // CHECK: [[INIT:%.+]] = tensor.empty()
1055 // CHECK: [[CST0:%.+]] = arith.constant 0
1056 // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
1057 // CHECK: [[REDUCE:%.+]] = linalg.reduce ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<5xi32>) dimensions = [1]
1058 // CHECK: (%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32) {
1059 // CHECK: [[RES:%.+]] = arith.addi %[[ARG1]], %[[ARG2]] : i32
1060 // CHECK: linalg.yield [[RES]] : i32
1062 // CHECK: tensor.expand_shape [[REDUCE]] {{\[}}[0, 1]] output_shape [5, 1] : tensor<5xi32> into tensor<5x1xi32>
1063 %1 = tosa.reduce_sum %arg0 {axis = 1 : i32} : (tensor<5x4xi32>) -> tensor<5x1xi32>
1065 // CHECK: arith.constant 1
1066 // CHECK: linalg.fill
1067 // CHECK: linalg.reduce
1068 // CHECK: arith.muli
1069 %2 = tosa.reduce_prod %arg0 {axis = 0 : i32} : (tensor<5x4xi32>) -> tensor<1x4xi32>
1071 // CHECK: arith.constant 2147483647 : i32
1072 // CHECK: linalg.fill
1073 // CHECK: linalg.reduce
1074 // CHECK: arith.minsi
1075 %3 = tosa.reduce_min %arg0 {axis = 0 : i32} : (tensor<5x4xi32>) -> tensor<1x4xi32>
1077 // CHECK: arith.constant -2147483648 : i32
1078 // CHECK: linalg.fill
1079 // CHECK: linalg.reduce
1080 // CHECK: arith.maxsi
1081 %4 = tosa.reduce_max %arg0 {axis = 0 : i32} : (tensor<5x4xi32>) -> tensor<1x4xi32>
1087 // CHECK-LABEL: @reduce_bool
1088 // CHECK-SAME: [[ARG0:%.+]]: tensor<5x4xi1>
1089 func.func @reduce_bool(%arg0: tensor<5x4xi1>) -> () {
1090 // CHECK: [[INIT:%.+]] = tensor.empty()
1091 // CHECK: [[CST0:%.+]] = arith.constant true
1092 // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
1093 // CHECK: [[REDUCE:%.+]] = linalg.reduce ins([[ARG0]] : tensor<5x4xi1>) outs([[FILL]] : tensor<4xi1>) dimensions = [0]
1094 // CHECK: (%[[ARG1:[0-9a-zA-Z_]+]]: i1, %[[ARG2:[0-9a-zA-Z_]+]]: i1) {
1095 // CHECK: [[RES:%.+]] = arith.andi %[[ARG1]], %[[ARG2]] : i1
1096 // CHECK: linalg.yield [[RES]] : i1
1098 // CHECK: tensor.expand_shape [[REDUCE]] {{\[}}[0, 1]] output_shape [1, 4] : tensor<4xi1> into tensor<1x4xi1>
1099 %0 = tosa.reduce_all %arg0 {axis = 0 : i32} : (tensor<5x4xi1>) -> tensor<1x4xi1>
1101 // CHECK: arith.constant false
1102 // CHECK: linalg.fill
1103 // CHECK: linalg.reduce
1105 %1 = tosa.reduce_any %arg0 {axis = 0 : i32} : (tensor<5x4xi1>) -> tensor<1x4xi1>
1111 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
1113 // CHECK-LABEL: @rescale_i8
1114 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1115 func.func @rescale_i8(%arg0 : tensor<2xi8>) -> () {
1116 // CHECK: [[C0:%.+]] = arith.constant 19689
1117 // CHECK: [[C1:%.+]] = arith.constant 15
1118 // CHECK: [[INIT:%.+]] = tensor.empty()
1119 // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>)
1120 // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8):
1121 // CHECK: [[C17:%.+]] = arith.constant 17
1122 // CHECK: [[C22:%.+]] = arith.constant 22
1123 // CHECK-DAG: [[IN32:%.+]] = arith.extsi [[IN]]
1124 // CHECK-DAG: [[IN_ZEROED:%.+]] = arith.subi [[IN32]], [[C17]]
1125 // CHECK-DAG: [[SCALED:%.+]] = tosa.apply_scale [[IN_ZEROED]], [[C0]], [[C1]] {double_round = false}
1126 // CHECK-DAG: [[SCALED_ZEROED:%.+]] = arith.addi [[SCALED]], [[C22]]
1127 // CHECK-DAG: [[CMIN:%.+]] = arith.constant -128
1128 // CHECK-DAG: [[CMAX:%.+]] = arith.constant 127
1129 // CHECK-DAG: [[LOWER:%.+]] = arith.maxsi [[CMIN]], [[SCALED_ZEROED]]
1130 // CHECK-DAG: [[BOUNDED:%.+]] = arith.minsi [[CMAX]], [[LOWER]]
1131 // CHECK-DAG: [[TRUNC:%.+]] = arith.trunci [[BOUNDED]]
1132 // CHECK-DAG: linalg.yield [[TRUNC]]
1133 %0 = tosa.rescale %arg0 {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = array<i32: 19689>, shift = array<i8: 15>, scale32 = false, double_round = false, per_channel = false} : (tensor<2xi8>) -> tensor<2xi8>
1140 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
1142 // CHECK-LABEL: @rescale_i8_unsigned_output
1143 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1144 func.func @rescale_i8_unsigned_output(%arg0 : tensor<2xi8>) -> () {
1145 // CHECK: [[C0:%.+]] = arith.constant 19689
1146 // CHECK: [[C1:%.+]] = arith.constant 15
1147 // CHECK: [[INIT:%.+]] = tensor.empty()
1148 // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>)
1149 // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8):
1150 // CHECK: [[C17:%.+]] = arith.constant 17
1151 // CHECK: [[C22:%.+]] = arith.constant 22
1152 // CHECK-DAG: [[IN32:%.+]] = arith.extsi [[IN]]
1153 // CHECK-DAG: [[IN_ZEROED:%.+]] = arith.subi [[IN32]], [[C17]]
1154 // CHECK-DAG: [[SCALED:%.+]] = tosa.apply_scale [[IN_ZEROED]], [[C0]], [[C1]] {double_round = false}
1155 // CHECK-DAG: [[SCALED_ZEROED:%.+]] = arith.addi [[SCALED]], [[C22]]
1156 // CHECK-DAG: [[CMIN:%.+]] = arith.constant 0
1157 // CHECK-DAG: [[CMAX:%.+]] = arith.constant 255
1158 // CHECK-DAG: [[LOWER:%.+]] = arith.maxsi [[CMIN]], [[SCALED_ZEROED]]
1159 // CHECK-DAG: [[BOUNDED:%.+]] = arith.minsi [[CMAX]], [[LOWER]]
1160 // CHECK-DAG: [[TRUNC:%.+]] = arith.trunci [[BOUNDED]]
1161 // CHECK: linalg.yield [[TRUNC]]
1162 %1 = tosa.rescale %arg0 {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = array<i32: 19689>, shift = array<i8: 15>, scale32 = false, double_round = false, per_channel = false, output_unsigned = true} : (tensor<2xi8>) -> tensor<2xi8>
1170 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
1172 // CHECK-LABEL: @rescale_i8_dyn_batch
1173 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1174 func.func @rescale_i8_dyn_batch(%arg0 : tensor<?x2xi8>) -> () {
1175 // CHECK: %[[C0:.+]] = arith.constant 0
1176 // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
1177 // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor<?x2xi8>
1178 // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x2xi8>) outs(%[[INIT]] : tensor<?x2xi8>)
1179 %0 = tosa.rescale %arg0 {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = array<i32: 19689>, shift = array<i8: 15>, scale32 = false, double_round = false, per_channel = false} : (tensor<?x2xi8>) -> tensor<?x2xi8>
1181 // CHECK: %[[C0:.+]] = arith.constant 0
1182 // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
1183 // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor<?x2xi8>
1184 // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x2xi8>) outs(%[[INIT]] : tensor<?x2xi8>)
1185 %1 = tosa.rescale %arg0 {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = array<i32: 19689>, shift = array<i8: 15>, scale32 = false, double_round = false, per_channel = false, output_unsigned = true} : (tensor<?x2xi8>) -> tensor<?x2xi8>
1192 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
1194 // CHECK-LABEL: @rescale_dyn
1195 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1196 func.func @rescale_dyn(%arg0 : tensor<1x?x?x32xi32>) -> () {
1197 // CHECK: %[[C1:.+]] = arith.constant 1
1198 // CHECK: %[[DIM1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
1199 // CHECK: %[[C2:.+]] = arith.constant 2
1200 // CHECK: %[[DIM2:.+]] = tensor.dim %[[ARG0]], %[[C2]]
1201 // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM2]])
1202 // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x?x32xi32>) outs(%[[INIT]] : tensor<1x?x?x32xi8>)
1203 %0 = tosa.rescale %arg0 {double_round = true, input_zp = 0 : i32, multiplier = array<i32: 1376784203>, output_zp = 0 : i32, per_channel = false, scale32 = true, shift = array<i8: 38>} : (tensor<1x?x?x32xi32>) -> tensor<1x?x?x32xi8>
1209 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
1211 // CHECK-LABEL: @rescale_i8_unsigned_input
1212 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1213 func.func @rescale_i8_unsigned_input(%arg0 : tensor<2xi8>) -> () {
1214 // CHECK: [[C0:%.+]] = arith.constant 19689
1215 // CHECK: [[C1:%.+]] = arith.constant 15
1216 // CHECK: [[INIT:%.+]] = tensor.empty()
1217 // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>)
1218 // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8):
1219 // CHECK: [[C17:%.+]] = arith.constant 17
1220 // CHECK: [[C22:%.+]] = arith.constant 22
1221 // CHECK-DAG: [[IN32:%.+]] = arith.extui [[IN]]
1222 // CHECK-DAG: [[IN_ZEROED:%.+]] = arith.subi [[IN32]], [[C17]]
1223 // CHECK-DAG: [[SCALED:%.+]] = tosa.apply_scale [[IN_ZEROED]], [[C0]], [[C1]] {double_round = false}
1224 // CHECK-DAG: [[SCALED_ZEROED:%.+]] = arith.addi [[SCALED]], [[C22]]
1225 // CHECK-DAG: [[CMIN:%.+]] = arith.constant -128
1226 // CHECK-DAG: [[CMAX:%.+]] = arith.constant 127
1227 // CHECK-DAG: [[LOWER:%.+]] = arith.maxsi [[CMIN]], [[SCALED_ZEROED]]
1228 // CHECK-DAG: [[BOUNDED:%.+]] = arith.minsi [[CMAX]], [[LOWER]]
1229 // CHECK-DAG: [[TRUNC:%.+]] = arith.trunci [[BOUNDED]]
1230 // CHECK: linalg.yield [[TRUNC]]
1231 %0 = tosa.rescale %arg0 {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = array<i32: 19689>, shift = array<i8: 15>, scale32 = false, double_round = false, per_channel = false, input_unsigned = true} : (tensor<2xi8>) -> tensor<2xi8>
1238 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
1240 // CHECK-LABEL: @rescale_per_channel
1241 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1242 func.func @rescale_per_channel(%arg0 : tensor<3xi8>) -> (tensor<3xi8>) {
1243 // CHECK: [[MULTIPLIERS:%.+]] = arith.constant dense<[42, 43, 0]>
1244 // CHECK: [[SHIFTS:%.+]] = arith.constant dense<[14, 15, 0]>
1245 // CHECK: [[INIT:%.+]] = tensor.empty()
1246 // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[MULTIPLIERS]], [[SHIFTS]] : tensor<3xi8>, tensor<3xi32>, tensor<3xi8>) outs([[INIT]] : tensor<3xi8>)
1247 // CHECK: ^bb0([[IN:%.+]]: i8, [[MULTIPLIER:%.+]]: i32, [[SHIFT:%.+]]: i8, [[UNUSED:%.+]]: i8):
1248 // CHECK: [[C243:%.+]] = arith.constant 243
1249 // CHECK: [[C252:%.+]] = arith.constant 252
1251 // CHECK-DAG: [[IN32:%.+]] = arith.extsi [[IN]]
1252 // CHECK-DAG: [[IN_ZEROED:%.+]] = arith.subi [[IN32]], [[C243]]
1253 // CHECK-DAG: [[SCALED:%.+]] = tosa.apply_scale [[IN_ZEROED]], [[MULTIPLIER]], [[SHIFT]] {double_round = false}
1254 // CHECK-DAG: [[SCALED_ZEROED:%.+]] = arith.addi [[SCALED]], [[C252]]
1255 // CHECK-DAG: [[CMIN:%.+]] = arith.constant -128
1256 // CHECK-DAG: [[CMAX:%.+]] = arith.constant 127
1257 // CHECK-DAG: [[LOWER:%.+]] = arith.maxsi [[CMIN]], [[SCALED_ZEROED]]
1258 // CHECK-DAG: [[BOUNDED:%.+]] = arith.minsi [[CMAX]], [[LOWER]]
1259 // CHECK-DAG: [[TRUNC:%.+]] = arith.trunci [[BOUNDED]]
1260 // CHECK-DAG: linalg.yield [[TRUNC]]
1261 %0 = tosa.rescale %arg0 {input_zp = 243 : i32, output_zp = 252 : i32, multiplier = array<i32: 42, 43, 44>, shift = array<i8: 14, 15, 64>, scale32 = false, double_round = false, per_channel = false} : (tensor<3xi8>) -> tensor<3xi8>
1263 // CHECK: return [[GENERIC]]
1264 return %0 : tensor<3xi8>
1269 // CHECK-LABEL: @rescaleDoubleRound
1270 func.func @rescaleDoubleRound(%arg0 : tensor<2xi8>) -> (tensor<2xi8>) {
1271 // CHECK: linalg.generic
1272 // CHECK: tosa.apply_scale
1273 // CHECK-SAME: {double_round = true}
1274 %0 = tosa.rescale %arg0 {input_zp = 243 : i32, output_zp = 252 : i32, multiplier = array<i32: 19689>, shift = array<i8: 33>, scale32 = true, double_round = true, per_channel = false} : (tensor<2xi8>) -> tensor<2xi8>
1275 return %0 : tensor<2xi8>
1278 // CHECK-LABEL: @rescaleUnnecessaryDoubleRound
1279 func.func @rescaleUnnecessaryDoubleRound(%arg0 : tensor<2xi8>) -> (tensor<2xi8>) {
1280 // CHECK: linalg.generic
1281 // CHECK: tosa.apply_scale
1282 // CHECK-SAME: {double_round = false}
1283 %0 = tosa.rescale %arg0 {input_zp = 243 : i32, output_zp = 252 : i32, multiplier = array<i32: 19689>, shift = array<i8: 15>, scale32 = true, double_round = true, per_channel = false} : (tensor<2xi8>) -> tensor<2xi8>
1284 return %0 : tensor<2xi8>
1289 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
1291 // CHECK-LABEL: @reverse
1292 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1293 func.func @reverse(%arg0: tensor<5x4xi32>) -> () {
1294 // CHECK: %[[C0:.+]] = arith.constant 0
1295 // CHECK: %[[RDIM:.+]] = tensor.dim %[[ARG0]], %[[C0]]
1296 // CHECK: %[[INIT:.+]] = tensor.empty()
1297 // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>)
1298 // CHECK-DAG: %[[I0:.+]] = linalg.index 0
1299 // CHECK-DAG: %[[I1:.+]] = linalg.index 1
1300 // CHECK-DAG: %[[SUB1:.+]] = arith.constant 1
1301 // CHECK-DAG: %[[RDIM_MINUS_C1:.+]] = arith.subi %[[RDIM]], %[[SUB1]]
1302 // CHECK-DAG: %[[READ_DIM:.+]] = arith.subi %[[RDIM_MINUS_C1]], %[[I0]]
1303 // CHECK-DAG: %[[EXTRACT:.+]] = tensor.extract %arg0[%[[READ_DIM]], %[[I1]]] : tensor<5x4xi32>
1304 // CHECK: linalg.yield %[[EXTRACT]]
1305 %0 = tosa.reverse %arg0 {axis = 0 : i32} : (tensor<5x4xi32>) -> tensor<5x4xi32>
1307 // CHECK: %[[C1:.+]] = arith.constant 1
1308 // CHECK: %[[RDIM:.+]] = tensor.dim %[[ARG0]], %[[C1]]
1309 // CHECK: %[[INIT:.+]] = tensor.empty()
1310 // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>)
1311 // CHECK-DAG: %[[I0:.+]] = linalg.index 0
1312 // CHECK-DAG: %[[I1:.+]] = linalg.index 1
1313 // CHECK-DAG: %[[SUB1:.+]] = arith.constant 1
1314 // CHECK-DAG: %[[RDIM_MINUS_C1:.+]] = arith.subi %[[RDIM]], %[[SUB1]]
1315 // CHECK-DAG: %[[READ_DIM:.+]] = arith.subi %[[RDIM_MINUS_C1]], %[[I1]]
1316 // CHECK-DAG: %[[EXTRACT:.+]] = tensor.extract %arg0[%[[I0]], %[[READ_DIM]]] : tensor<5x4xi32>
1317 // CHECK: linalg.yield %[[EXTRACT]]
1318 %1 = tosa.reverse %arg0 {axis = 1 : i32} : (tensor<5x4xi32>) -> tensor<5x4xi32>
1324 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
1326 // CHECK-LABEL: @reverse_dyn
1327 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1328 func.func @reverse_dyn(%arg0: tensor<?xi32>) -> () {
1329 // CHECK: %[[C0_1:.+]] = arith.constant 0
1330 // CHECK: %[[D0_1:.+]] = tensor.dim %[[ARG0]], %[[C0_1]]
1331 // CHECK: %[[C0_2:.+]] = arith.constant 0
1332 // CHECK: %[[D0_2:.+]] = tensor.dim %[[ARG0]], %[[C0_2]]
1333 // CHECK: %[[INIT:.+]] = tensor.empty(%[[D0_1]])
1334 // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel"]} outs(%[[INIT]] : tensor<?xi32>)
1335 // CHECK-DAG: %[[I0:.+]] = linalg.index 0
1336 // CHECK-DAG: %[[SUB1:.+]] = arith.constant 1
1337 // CHECK-DAG: %[[RDIM_MINUS_C1:.+]] = arith.subi %[[D0_2]], %[[SUB1]]
1338 // CHECK-DAG: %[[READ_DIM:.+]] = arith.subi %[[RDIM_MINUS_C1]], %[[I0]]
1339 // CHECK-DAG: %[[EXTRACT:.+]] = tensor.extract %arg0[%[[READ_DIM]]] : tensor<?xi32>
1340 // CHECK: linalg.yield %[[EXTRACT]]
1341 %0 = tosa.reverse %arg0 {axis = 0 : i32} : (tensor<?xi32>) -> tensor<?xi32>
1347 // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, d3)>
1348 // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
1350 // CHECK-LABEL: @tile
1351 // CHECK-SAME: %[[ARG0:.+]]: tensor<2x3xi8>
1352 func.func @tile(%arg0 : tensor<2x3xi8>) -> () {
1353 // CHECK: [[INIT:%.+]] = tensor.empty()
1354 // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<2x2x1x3xi8>)
1355 // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8
1356 // CHECK: linalg.yield %[[ARG1]] : i8
1357 // CHECK: tosa.reshape [[GENERIC]] {new_shape = array<i64: 4, 3>}
1358 %0 = tosa.tile %arg0 {multiples = array<i64: 2, 1>} : (tensor<2x3xi8>) -> tensor<4x3xi8>
1360 // CHECK: [[INIT:%.+]] = tensor.empty()
1361 // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<1x2x2x3xi8>)
1362 // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8
1363 // CHECK: linalg.yield %[[ARG1]] : i8
1364 // CHECK: tosa.reshape [[GENERIC]] {new_shape = array<i64: 2, 6>}
1365 %1 = tosa.tile %arg0 {multiples = array<i64: 1, 2>} : (tensor<2x3xi8>) -> tensor<2x6xi8>
1367 // CHECK: [[INIT:%.+]] = tensor.empty()
1368 // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<5x2x7x3xi8>)
1369 // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8
1370 // CHECK: linalg.yield %[[ARG1]] : i8
1371 // CHECK: tosa.reshape [[GENERIC]] {new_shape = array<i64: 10, 21>}
1372 %2 = tosa.tile %arg0 {multiples = array<i64: 5, 7>} : (tensor<2x3xi8>) -> tensor<10x21xi8>
1379 // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, d3)>
1380 // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
1382 // CHECK-LABEL: @tile_dyn_input
1383 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1384 func.func @tile_dyn_input(%arg0 : tensor<?x3xi8>) -> () {
1385 // CHECK: %[[CST0:.+]] = arith.constant 0
1386 // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST0]] : tensor<?x3xi8>
1387 // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
1388 // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x3xi8>) outs(%[[INIT]] : tensor<2x?x1x3xi8>)
1389 // CHECK: ^bb0(%[[ARG1:.+]]: i8,
1390 // CHECK: linalg.yield %[[ARG1]] : i8
1391 // CHECK: tosa.reshape %[[GENERIC]] {new_shape = array<i64: -9223372036854775808, 3>}
1392 %0 = tosa.tile %arg0 {multiples = array<i64: 2, 1>} : (tensor<?x3xi8>) -> tensor<?x3xi8>
1399 // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, d3)>
1400 // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
1402 // CHECK-LABEL: @tile_dyn_multiples
1403 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1404 func.func @tile_dyn_multiples(%arg0 : tensor<2x3xi8>) -> () {
1405 // CHECK: %[[CST1:.+]] = arith.constant 1
1406 // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST1]] : tensor<2x3xi8>
1407 // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
1408 // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs(%[[INIT]] : tensor<2x2x?x3xi8>)
1409 // CHECK: ^bb0(%[[ARG1:.+]]: i8,
1410 // CHECK: linalg.yield %[[ARG1]] : i8
1411 // CHECK: tosa.reshape %[[GENERIC]] {new_shape = array<i64: 2, -9223372036854775808>}
1412 %0 = tosa.tile %arg0 {multiples = array<i64: 2, -1>} : (tensor<2x3xi8>) -> tensor<2x?xi8>
1419 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
1420 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
1421 // CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0)>
1422 // CHECK: #[[$MAP3:.*]] = affine_map<(d0) -> (d0)>
1423 // CHECK: #[[$MAP4:.*]] = affine_map<(d0) -> ()>
1425 func.func @argmax(%arg0 : tensor<3x2xi32>, %arg1 : tensor<6xf32>) -> () {
1426 // CHECK: [[IDX_INIT:%.+]] = tensor.empty()
1427 // CHECK: [[IDX_MIN:%.+]] = arith.constant 0 : i32
1428 // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}outs([[IDX_INIT]]
1429 // CHECK: [[VAL_INIT:%.+]] = tensor.empty()
1430 // CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648
1431 // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]]
1432 // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<2xi32>, tensor<2xi32>)
1433 // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32
1434 // CHECK: [[IDX:%.+]] = linalg.index 0
1435 // CHECK: [[CAST:%.+]] = arith.index_cast [[IDX]]
1436 // CHECK: [[CMP:%.+]] = arith.cmpi sgt, %[[ARG1]], %[[ARG3]]
1437 // CHECK: [[SELECT_VAL:%.+]] = arith.select [[CMP]], %[[ARG1]], %[[ARG3]]
1438 // CHECK: [[SELECT_IDX:%.+]] = arith.select [[CMP]], [[CAST]], %[[ARG2]]
1439 // CHECK: linalg.yield [[SELECT_IDX]], [[SELECT_VAL]]
1440 %0 = tosa.argmax %arg0 { axis = 0 : i32} : (tensor<3x2xi32>) -> tensor<2xi32>
1442 // CHECK: [[IDX_INIT:%.+]] = tensor.empty()
1443 // CHECK: [[IDX_MIN:%.+]] = arith.constant 0 : i32
1444 // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}outs([[IDX_INIT]]
1445 // CHECK: [[VAL_INIT:%.+]] = tensor.empty()
1446 // CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648
1447 // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]]
1448 // CHECK: linalg.generic {indexing_maps = [#map, #map2, #map2], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
1449 // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32
1450 // CHECK: [[IDX:%.+]] = linalg.index 1
1451 // CHECK: [[CAST:%.+]] = arith.index_cast [[IDX]]
1452 // CHECK: [[CMP:%.+]] = arith.cmpi sgt, %[[ARG1]], %[[ARG3]]
1453 // CHECK: [[SELECT_VAL:%.+]] = arith.select [[CMP]], %[[ARG1]], %[[ARG3]]
1454 // CHECK: [[SELECT_IDX:%.+]] = arith.select [[CMP]], [[CAST]], %[[ARG2]]
1455 // CHECK: linalg.yield [[SELECT_IDX]], [[SELECT_VAL]]
1456 %1 = tosa.argmax %arg0 { axis = 1 : i32} : (tensor<3x2xi32>) -> tensor<3xi32>
1458 // CHECK: arith.constant -3.40282347E+38 : f32
1459 // CHECK: linalg.index
1460 // CHECK: arith.index_cast
1461 // CHECK: arith.cmpf ogt
1464 // CHECK: linalg.yield
1465 %2 = tosa.argmax %arg1 { axis = 0 : i32} : (tensor<6xf32>) -> tensor<i32>
1472 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
1473 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)>
1475 func.func @argmax_dyn_non_axis(%arg0 : tensor<3x?xi32>) -> () {
1476 // CHECK: %[[CST1:.+]] = arith.constant 1
1477 // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST1]]
1478 // CHECK: %[[IDX_INIT:.+]] = tensor.empty(%[[DYN]])
1479 // CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32
1480 // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]]
1481 // CHECK: %[[VAL_INIT:.+]] = tensor.empty(%[[DYN]])
1482 // CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648
1483 // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]]
1484 // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<?xi32>, tensor<?xi32>)
1485 // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32
1486 // CHECK: %[[IDX:.+]] = linalg.index 0
1487 // CHECK: %[[CAST:.+]] = arith.index_cast %[[IDX]]
1488 // CHECK: %[[CMP:.+]] = arith.cmpi sgt, %[[ARG1]], %[[ARG3]]
1489 // CHECK: %[[SELECT_VAL:.+]] = arith.select %[[CMP]], %[[ARG1]], %[[ARG3]]
1490 // CHECK: %[[SELECT_IDX:.+]] = arith.select %[[CMP]], %[[CAST]], %[[ARG2]]
1491 // CHECK: linalg.yield %[[SELECT_IDX]], %[[SELECT_VAL]]
1492 %0 = tosa.argmax %arg0 { axis = 0 : i32} : (tensor<3x?xi32>) -> tensor<?xi32>
1498 // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
1499 // CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0)>
1501 func.func @argmax_dyn_axis(%arg0 : tensor<3x?xi32>) -> () {
1502 // CHECK: %[[IDX_INIT:.+]] = tensor.empty()
1503 // CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32
1504 // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]]
1505 // CHECK: %[[VAL_INIT:.+]] = tensor.empty()
1506 // CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648
1507 // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]]
1508 // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
1509 // CHECK: %[[IDX:.+]] = linalg.index 1
1510 // CHECK: %[[CAST:.+]] = arith.index_cast %[[IDX]]
1511 // CHECK: %[[CMP:.+]] = arith.cmpi sgt, %[[ARG1]], %[[ARG3]]
1512 // CHECK: %[[SELECT_VAL:.+]] = arith.select %[[CMP]], %[[ARG1]], %[[ARG3]]
1513 // CHECK: %[[SELECT_IDX:.+]] = arith.select %[[CMP]], %[[CAST]], %[[ARG2]]
1514 // CHECK: linalg.yield %[[SELECT_IDX]], %[[SELECT_VAL]]
1515 %0 = tosa.argmax %arg0 { axis = 1 : i32} : (tensor<3x?xi32>) -> tensor<3xi32>
1521 // CHECK-LABEL: @gather_float
1522 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
1523 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]
1524 func.func @gather_float(%arg0: tensor<2x3x2xf32>, %arg1: tensor<2x3xi32>) -> () {
1525 // CHECK: %[[INIT:.+]] = tensor.empty()
1526 // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xf32>)
1527 // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: f32)
1528 // CHECK: %[[IDX0:.+]] = linalg.index 0
1529 // CHECK: %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
1530 // CHECK: %[[IDX2:.+]] = linalg.index 2
1531 // CHECK: %[[EXTRACT:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[CAST]], %[[IDX2]]] : tensor<2x3x2xf32>
1532 // CHECK: linalg.yield %[[EXTRACT]]
1533 %0 = tosa.gather %arg0, %arg1 : (tensor<2x3x2xf32>, tensor<2x3xi32>) -> tensor<2x3x2xf32>
1539 // CHECK-LABEL: @gather_float_dyn
1540 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
1541 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]
1542 func.func @gather_float_dyn(%arg0: tensor<?x3x2xf32>, %arg1: tensor<?x3xi32>) -> () {
1543 // CHECK: %[[C0:.+]] = arith.constant 0
1544 // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
1545 // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
1546 // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<?x3xi32>) outs(%[[INIT]] : tensor<?x3x2xf32>)
1547 // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: f32)
1548 // CHECK: %[[IDX0:.+]] = linalg.index 0
1549 // CHECK: %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
1550 // CHECK: %[[IDX2:.+]] = linalg.index 2
1551 // CHECK: %[[EXTRACT:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[CAST]], %[[IDX2]]] : tensor<?x3x2xf32>
1552 // CHECK: linalg.yield %[[EXTRACT]]
1553 %0 = tosa.gather %arg0, %arg1 : (tensor<?x3x2xf32>, tensor<?x3xi32>) -> tensor<?x3x2xf32>
1559 // CHECK-LABEL: @gather_float_all_dynamic
1560 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
1561 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]
1562 func.func @gather_float_all_dynamic(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?xi32>) -> () {
1563 // CHECK: %[[C0:.+]] = arith.constant 0
1564 // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
1565 // CHECK: %[[C1:.+]] = arith.constant 1
1566 // CHECK: %[[INDEX:.+]] = tensor.dim %[[ARG1]], %[[C1]]
1567 // CHECK: %[[C2:.+]] = arith.constant 2
1568 // CHECK: %[[CHANNEL:.+]] = tensor.dim %[[ARG0]], %[[C2]]
1569 // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]], %[[INDEX]], %[[CHANNEL]])
1570 // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<?x?xi32>) outs(%[[INIT]] : tensor<?x?x?xf32>)
1571 // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: f32)
1572 // CHECK: %[[IDX0:.+]] = linalg.index 0
1573 // CHECK: %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
1574 // CHECK: %[[IDX2:.+]] = linalg.index 2
1575 // CHECK: %[[EXTRACT:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[CAST]], %[[IDX2]]] : tensor<?x?x?xf32>
1576 // CHECK: linalg.yield %[[EXTRACT]]
1577 %0 = tosa.gather %arg0, %arg1 : (tensor<?x?x?xf32>, tensor<?x?xi32>) -> tensor<?x?x?xf32>
1583 // CHECK-LABEL: @gather_int
1584 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
1585 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]
1586 func.func @gather_int(%arg0: tensor<2x3x2xi32>, %arg1: tensor<2x3xi32>) -> () {
1587 // CHECK: %[[INIT:.+]] = tensor.empty()
1588 // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xi32>)
1589 // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: i32)
1590 // CHECK: %[[IDX0:.+]] = linalg.index 0
1591 // CHECK: %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
1592 // CHECK: %[[IDX2:.+]] = linalg.index 2
1593 // CHECK: %[[EXTRACT:.+]] = tensor.extract %[[ARG0]][%[[IDX0]], %[[CAST]], %[[IDX2]]] : tensor<2x3x2xi32>
1594 // CHECK: linalg.yield %[[EXTRACT]]
1595 %0 = tosa.gather %arg0, %arg1 : (tensor<2x3x2xi32>, tensor<2x3xi32>) -> tensor<2x3x2xi32>
1601 // CHECK-LABEL: @table8
1602 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1603 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
1604 func.func @table8(%arg0: tensor<6xi8>, %arg1: tensor<512xi8>) -> () {
1605 // CHECK: %[[INIT:.+]] = tensor.empty()
1606 // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
1607 // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
1608 // CHECK: %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
1609 // CHECK: %[[OFFSET:.+]] = arith.constant 128
1610 // CHECK: %[[ADD:.+]] = arith.addi %[[CAST]], %[[OFFSET]]
1611 // CHECK: %[[EXTRACT:.+]] = tensor.extract %[[ARG1]][%[[ADD]]]
1612 // CHECK: linalg.yield %[[EXTRACT]]
1613 %0 = tosa.table %arg0, %arg1 : (tensor<6xi8>, tensor<512xi8>) -> tensor<6xi8>
1619 // CHECK-LABEL: @table16
1620 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1621 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
1622 func.func @table16(%arg0: tensor<6xi16>, %arg1: tensor<513xi16>) -> () {
1623 // CHECK: %[[INIT:.+]] = tensor.empty()
1624 // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi16>) outs(%[[INIT]] : tensor<6xi32>)
1625 // CHECK: ^bb0(%[[ARG2:.*]]: i16, %[[ARG3:.*]]: i32)
1626 // CHECK: %[[EXT_IN:.+]] = arith.extsi %[[ARG2]]
1627 // CHECK: %[[C32768:.+]] = arith.constant 32768
1628 // CHECK: %[[C7:.+]] = arith.constant 7
1629 // CHECK: %[[C1:.+]] = arith.constant 1
1630 // CHECK: %[[C127:.+]] = arith.constant 127
1631 // CHECK: %[[INADD:.+]] = arith.addi %[[EXT_IN]], %[[C32768]]
1632 // CHECK: %[[IDX:.+]] = arith.shrui %[[INADD]], %[[C7]]
1633 // CHECK: %[[FRACTION:.+]] = arith.andi %[[INADD]], %[[C127]]
1634 // CHECK: %[[IDXPLUS1:.+]] = arith.addi %[[IDX]], %[[C1]]
1635 // CHECK: %[[IDX_CAST:.+]] = arith.index_cast %[[IDX]]
1636 // CHECK: %[[IDXPLUS1_CAST:.+]] = arith.index_cast %[[IDXPLUS1]]
1637 // CHECK: %[[BASE:.+]] = tensor.extract %[[ARG1]][%[[IDX_CAST]]]
1638 // CHECK: %[[NEXT:.+]] = tensor.extract %[[ARG1]][%[[IDXPLUS1_CAST]]]
1639 // CHECK: %[[BASE_EXT:.+]] = arith.extsi %[[BASE]]
1640 // CHECK: %[[NEXT_EXT:.+]] = arith.extsi %[[NEXT]]
1641 // CHECK: %[[BASE_MUL:.+]] = arith.shli %[[BASE_EXT]], %[[C7]]
1642 // CHECK: %[[DIFF:.+]] = arith.subi %[[NEXT_EXT]], %[[BASE_EXT]]
1643 // CHECK: %[[DIFF_MUL:.+]] = arith.muli %[[DIFF]], %[[FRACTION]]
1644 // CHECK: %[[RESULT:.+]] = arith.addi %[[BASE_MUL]], %[[DIFF_MUL]]
1645 // CHECK: linalg.yield %[[RESULT]]
1646 %0 = tosa.table %arg0, %arg1 : (tensor<6xi16>, tensor<513xi16>) -> tensor<6xi32>
1652 // CHECK-LABEL: @table8_dyn
1653 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1654 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
1655 func.func @table8_dyn(%arg0: tensor<?xi8>, %arg1: tensor<512xi8>) -> () {
1656 // CHECK: %[[CST0:.+]] = arith.constant 0
1657 // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST0]]
1658 // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
1659 // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<?xi8>) outs(%[[INIT]] : tensor<?xi8>)
1660 // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
1661 // CHECK: %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
1662 // CHECK: %[[OFFSET:.+]] = arith.constant 128
1663 // CHECK: %[[ADD:.+]] = arith.addi %[[CAST]], %[[OFFSET]]
1664 // CHECK: %[[EXTRACT:.+]] = tensor.extract %[[ARG1]][%[[ADD]]]
1665 // CHECK: linalg.yield %[[EXTRACT]]
1666 %0 = tosa.table %arg0, %arg1 : (tensor<?xi8>, tensor<512xi8>) -> tensor<?xi8>
1672 // CHECK-LABEL: @table8_dyn_table
1673 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
1674 // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]:
1675 func.func @table8_dyn_table(%arg0: tensor<6xi8>, %arg1: tensor<?xi8>) -> () {
1676 // CHECK: %[[INIT:.+]] = tensor.empty()
1677 // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
1678 // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
1679 // CHECK: %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
1680 // CHECK: %[[OFFSET:.+]] = arith.constant 128
1681 // CHECK: %[[ADD:.+]] = arith.addi %[[CAST]], %[[OFFSET]]
1682 // CHECK: %[[EXTRACT:.+]] = tensor.extract %[[ARG1]][%[[ADD]]]
1683 // CHECK: linalg.yield %[[EXTRACT]]
1684 %0 = tosa.table %arg0, %arg1 : (tensor<6xi8>, tensor<?xi8>) -> tensor<6xi8>
1689 // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
1690 // CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>
1691 // CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
1693 // CHECK-LABEL: func.func @test_static_rfft2d(
1694 // CHECK-SAME: %[[VAL_0:.*]]: tensor<5x5x8xf32>) -> (tensor<5x5x5xf32>, tensor<5x5x5xf32>) {
1695 // CHECK: %[[VAL_1:.*]] = arith.constant 1 : index
1696 // CHECK: %[[VAL_2:.*]] = arith.constant 2 : index
1697 // CHECK: %[[VAL_3:.*]] = arith.constant 8 : index
1698 // CHECK: %[[VAL_4:.*]] = arith.constant 4 : index
1699 // CHECK: %[[VAL_5:.*]] = arith.constant 5 : index
1700 // CHECK: %[[VAL_6:.*]] = tensor.empty() : tensor<5x5x5xf32>
1701 // CHECK: %[[VAL_7:.*]] = arith.constant 0.000000e+00 : f32
1702 // CHECK: %[[VAL_8:.*]] = linalg.fill ins(%[[VAL_7]] : f32) outs(%[[VAL_6]] : tensor<5x5x5xf32>) -> tensor<5x5x5xf32>
1703 // CHECK: %[[VAL_9:.*]] = tensor.empty() : tensor<5x5x5xf32>
1704 // CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32
1705 // CHECK: %[[VAL_11:.*]] = linalg.fill ins(%[[VAL_10]] : f32) outs(%[[VAL_9]] : tensor<5x5x5xf32>) -> tensor<5x5x5xf32>
1706 // CHECK: %[[VAL_12:.*]] = arith.constant 1 : index
1707 // CHECK: %[[VAL_13:.*]] = arith.constant 5 : index
1708 // CHECK: %[[VAL_14:.*]] = arith.constant 2 : index
1709 // CHECK: %[[VAL_15:.*]] = arith.constant 8 : index
1710 // CHECK: %[[VAL_16:.*]] = arith.constant 6.28318548 : f32
1711 // CHECK: %[[VAL_17:.*]] = arith.index_castui %[[VAL_13]] : index to i32
1712 // CHECK: %[[VAL_18:.*]] = arith.uitofp %[[VAL_17]] : i32 to f32
1713 // CHECK: %[[VAL_19:.*]] = arith.index_castui %[[VAL_15]] : index to i32
1714 // CHECK: %[[VAL_20:.*]] = arith.uitofp %[[VAL_19]] : i32 to f32
1715 // CHECK: %[[VAL_21:.*]]:2 = linalg.generic {indexing_maps = [#[[$ATTR_0]], #[[$ATTR_1]], #[[$ATTR_1]]], iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%[[VAL_0]] : tensor<5x5x8xf32>) outs(%[[VAL_8]], %[[VAL_11]] : tensor<5x5x5xf32>, tensor<5x5x5xf32>) {
1716 // CHECK: ^bb0(%[[VAL_22:.*]]: f32, %[[VAL_23:.*]]: f32, %[[VAL_24:.*]]: f32):
1717 // CHECK: %[[VAL_25:.*]] = linalg.index 1 : index
1718 // CHECK: %[[VAL_26:.*]] = linalg.index 2 : index
1719 // CHECK: %[[VAL_27:.*]] = linalg.index 3 : index
1720 // CHECK: %[[VAL_28:.*]] = linalg.index 4 : index
1721 // CHECK: %[[VAL_29:.*]] = index.mul %[[VAL_27]], %[[VAL_25]]
1722 // CHECK: %[[VAL_30:.*]] = index.mul %[[VAL_28]], %[[VAL_26]]
1723 // CHECK: %[[VAL_31:.*]] = index.remu %[[VAL_29]], %[[VAL_13]]
1724 // CHECK: %[[VAL_32:.*]] = index.remu %[[VAL_30]], %[[VAL_15]]
1725 // CHECK: %[[VAL_33:.*]] = arith.index_castui %[[VAL_31]] : index to i32
1726 // CHECK: %[[VAL_34:.*]] = arith.uitofp %[[VAL_33]] : i32 to f32
1727 // CHECK: %[[VAL_35:.*]] = arith.index_castui %[[VAL_32]] : index to i32
1728 // CHECK: %[[VAL_36:.*]] = arith.uitofp %[[VAL_35]] : i32 to f32
1729 // CHECK: %[[VAL_37:.*]] = arith.divf %[[VAL_34]], %[[VAL_18]] : f32
1730 // CHECK: %[[VAL_38:.*]] = arith.divf %[[VAL_36]], %[[VAL_20]] : f32
1731 // CHECK: %[[VAL_39:.*]] = arith.addf %[[VAL_37]], %[[VAL_38]] : f32
1732 // CHECK: %[[VAL_40:.*]] = arith.mulf %[[VAL_16]], %[[VAL_39]] : f32
1733 // CHECK: %[[VAL_41:.*]] = math.cos %[[VAL_40]] : f32
1734 // CHECK: %[[VAL_42:.*]] = math.sin %[[VAL_40]] : f32
1735 // CHECK: %[[VAL_43:.*]] = arith.mulf %[[VAL_22]], %[[VAL_41]] : f32
1736 // CHECK: %[[VAL_44:.*]] = arith.mulf %[[VAL_22]], %[[VAL_42]] : f32
1737 // CHECK: %[[VAL_45:.*]] = arith.addf %[[VAL_23]], %[[VAL_43]] : f32
1738 // CHECK: %[[VAL_46:.*]] = arith.subf %[[VAL_24]], %[[VAL_44]] : f32
1739 // CHECK: linalg.yield %[[VAL_45]], %[[VAL_46]] : f32, f32
1740 // CHECK: } -> (tensor<5x5x5xf32>, tensor<5x5x5xf32>)
1741 // CHECK: return %[[VAL_47:.*]]#0, %[[VAL_47]]#1 : tensor<5x5x5xf32>, tensor<5x5x5xf32>
1743 func.func @test_static_rfft2d(%arg0: tensor<5x5x8xf32>) -> (tensor<5x5x5xf32>, tensor<5x5x5xf32>) {
1744 %output_real, %output_imag = "tosa.rfft2d"(%arg0) {} : (tensor<5x5x8xf32>) -> (tensor<5x5x5xf32>, tensor<5x5x5xf32>)
1745 return %output_real, %output_imag : tensor<5x5x5xf32>, tensor<5x5x5xf32>
1749 // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
1750 // CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>
1751 // CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
1753 // CHECK-LABEL: func.func @test_dynamic_rfft2d(
1754 // CHECK-SAME: %[[VAL_0:.*]]: tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
1755 // CHECK: %[[VAL_1:.*]] = arith.constant 0 : index
1756 // CHECK: %[[VAL_2:.*]] = tensor.dim %[[VAL_0]], %[[VAL_1]] : tensor<?x?x?xf32>
1757 // CHECK: %[[VAL_3:.*]] = arith.constant 1 : index
1758 // CHECK: %[[VAL_4:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xf32>
1759 // CHECK: %[[VAL_5:.*]] = arith.constant 2 : index
1760 // CHECK: %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?x?xf32>
1761 // CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
1762 // CHECK: %[[VAL_8:.*]] = arith.constant 2 : index
1763 // CHECK: %[[VAL_9:.*]] = arith.divui %[[VAL_6]], %[[VAL_8]] : index
1764 // CHECK: %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[VAL_7]] : index
1765 // CHECK: %[[VAL_11:.*]] = tensor.empty(%[[VAL_2]], %[[VAL_4]], %[[VAL_10]]) : tensor<?x?x?xf32>
1766 // CHECK: %[[VAL_12:.*]] = arith.constant 0.000000e+00 : f32
1767 // CHECK: %[[VAL_13:.*]] = linalg.fill ins(%[[VAL_12]] : f32) outs(%[[VAL_11]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
1768 // CHECK: %[[VAL_14:.*]] = tensor.empty(%[[VAL_2]], %[[VAL_4]], %[[VAL_10]]) : tensor<?x?x?xf32>
1769 // CHECK: %[[VAL_15:.*]] = arith.constant 0.000000e+00 : f32
1770 // CHECK: %[[VAL_16:.*]] = linalg.fill ins(%[[VAL_15]] : f32) outs(%[[VAL_14]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
1771 // CHECK: %[[VAL_17:.*]] = arith.constant 1 : index
1772 // CHECK: %[[VAL_18:.*]] = tensor.dim %[[VAL_0]], %[[VAL_17]] : tensor<?x?x?xf32>
1773 // CHECK: %[[VAL_19:.*]] = arith.constant 2 : index
1774 // CHECK: %[[VAL_20:.*]] = tensor.dim %[[VAL_0]], %[[VAL_19]] : tensor<?x?x?xf32>
1775 // CHECK: %[[VAL_21:.*]] = arith.constant 6.28318548 : f32
1776 // CHECK: %[[VAL_22:.*]] = arith.index_castui %[[VAL_18]] : index to i32
1777 // CHECK: %[[VAL_23:.*]] = arith.uitofp %[[VAL_22]] : i32 to f32
1778 // CHECK: %[[VAL_24:.*]] = arith.index_castui %[[VAL_20]] : index to i32
1779 // CHECK: %[[VAL_25:.*]] = arith.uitofp %[[VAL_24]] : i32 to f32
1780 // CHECK: %[[VAL_26:.*]]:2 = linalg.generic {indexing_maps = [#[[$ATTR_0]], #[[$ATTR_1]], #[[$ATTR_1]]], iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%[[VAL_0]] : tensor<?x?x?xf32>) outs(%[[VAL_13]], %[[VAL_16]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
1781 // CHECK: ^bb0(%[[VAL_27:.*]]: f32, %[[VAL_28:.*]]: f32, %[[VAL_29:.*]]: f32):
1782 // CHECK: %[[VAL_30:.*]] = linalg.index 1 : index
1783 // CHECK: %[[VAL_31:.*]] = linalg.index 2 : index
1784 // CHECK: %[[VAL_32:.*]] = linalg.index 3 : index
1785 // CHECK: %[[VAL_33:.*]] = linalg.index 4 : index
1786 // CHECK: %[[VAL_34:.*]] = index.mul %[[VAL_32]], %[[VAL_30]]
1787 // CHECK: %[[VAL_35:.*]] = index.mul %[[VAL_33]], %[[VAL_31]]
1788 // CHECK: %[[VAL_36:.*]] = index.remu %[[VAL_34]], %[[VAL_18]]
1789 // CHECK: %[[VAL_37:.*]] = index.remu %[[VAL_35]], %[[VAL_20]]
1790 // CHECK: %[[VAL_38:.*]] = arith.index_castui %[[VAL_36]] : index to i32
1791 // CHECK: %[[VAL_39:.*]] = arith.uitofp %[[VAL_38]] : i32 to f32
1792 // CHECK: %[[VAL_40:.*]] = arith.index_castui %[[VAL_37]] : index to i32
1793 // CHECK: %[[VAL_41:.*]] = arith.uitofp %[[VAL_40]] : i32 to f32
1794 // CHECK: %[[VAL_42:.*]] = arith.divf %[[VAL_39]], %[[VAL_23]] : f32
1795 // CHECK: %[[VAL_43:.*]] = arith.divf %[[VAL_41]], %[[VAL_25]] : f32
1796 // CHECK: %[[VAL_44:.*]] = arith.addf %[[VAL_42]], %[[VAL_43]] : f32
1797 // CHECK: %[[VAL_45:.*]] = arith.mulf %[[VAL_21]], %[[VAL_44]] : f32
1798 // CHECK: %[[VAL_46:.*]] = math.cos %[[VAL_45]] : f32
1799 // CHECK: %[[VAL_47:.*]] = math.sin %[[VAL_45]] : f32
1800 // CHECK: %[[VAL_48:.*]] = arith.mulf %[[VAL_27]], %[[VAL_46]] : f32
1801 // CHECK: %[[VAL_49:.*]] = arith.mulf %[[VAL_27]], %[[VAL_47]] : f32
1802 // CHECK: %[[VAL_50:.*]] = arith.addf %[[VAL_28]], %[[VAL_48]] : f32
1803 // CHECK: %[[VAL_51:.*]] = arith.subf %[[VAL_29]], %[[VAL_49]] : f32
1804 // CHECK: linalg.yield %[[VAL_50]], %[[VAL_51]] : f32, f32
1805 // CHECK: } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
1806 // CHECK: return %[[VAL_52:.*]]#0, %[[VAL_52]]#1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>
1808 func.func @test_dynamic_rfft2d(%arg0: tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
1809 %output_real, %output_imag = "tosa.rfft2d"(%arg0) {} : (tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
1810 return %output_real, %output_imag : tensor<?x?x?xf32>, tensor<?x?x?xf32>
1814 // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
1815 // CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>
1816 // CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
1818 // CHECK-LABEL: func.func @test_static_fft2d(
1819 // CHECK-SAME: %[[VAL_0:.*]]: tensor<8x8x8xf32>,
1820 // CHECK-SAME: %[[VAL_1:.*]]: tensor<8x8x8xf32>) -> (tensor<8x8x8xf32>, tensor<8x8x8xf32>) {
1821 // CHECK: %[[VAL_2:.*]] = tensor.empty() : tensor<8x8x8xf32>
1822 // CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32
1823 // CHECK: %[[VAL_4:.*]] = linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_2]] : tensor<8x8x8xf32>) -> tensor<8x8x8xf32>
1824 // CHECK: %[[VAL_5:.*]] = tensor.empty() : tensor<8x8x8xf32>
1825 // CHECK: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
1826 // CHECK: %[[VAL_7:.*]] = linalg.fill ins(%[[VAL_6]] : f32) outs(%[[VAL_5]] : tensor<8x8x8xf32>) -> tensor<8x8x8xf32>
1827 // CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
1828 // CHECK: %[[VAL_9:.*]] = arith.constant 8 : index
1829 // CHECK: %[[VAL_10:.*]] = arith.constant 2 : index
1830 // CHECK: %[[VAL_11:.*]] = arith.constant 8 : index
1831 // CHECK: %[[VAL_12:.*]] = arith.constant 6.28318548 : f32
1832 // CHECK: %[[VAL_13:.*]] = arith.index_castui %[[VAL_9]] : index to i32
1833 // CHECK: %[[VAL_14:.*]] = arith.uitofp %[[VAL_13]] : i32 to f32
1834 // CHECK: %[[VAL_15:.*]] = arith.index_castui %[[VAL_11]] : index to i32
1835 // CHECK: %[[VAL_16:.*]] = arith.uitofp %[[VAL_15]] : i32 to f32
1836 // CHECK: %[[VAL_17:.*]]:2 = linalg.generic {indexing_maps = [#[[$ATTR_0]], #[[$ATTR_0]], #[[$ATTR_1]], #[[$ATTR_1]]], iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%[[VAL_0]], %[[VAL_1]] : tensor<8x8x8xf32>, tensor<8x8x8xf32>) outs(%[[VAL_4]], %[[VAL_7]] : tensor<8x8x8xf32>, tensor<8x8x8xf32>) {
1837 // CHECK: ^bb0(%[[VAL_18:.*]]: f32, %[[VAL_19:.*]]: f32, %[[VAL_20:.*]]: f32, %[[VAL_21:.*]]: f32):
1838 // CHECK: %[[VAL_22:.*]] = linalg.index 1 : index
1839 // CHECK: %[[VAL_23:.*]] = linalg.index 2 : index
1840 // CHECK: %[[VAL_24:.*]] = linalg.index 3 : index
1841 // CHECK: %[[VAL_25:.*]] = linalg.index 4 : index
1842 // CHECK: %[[VAL_26:.*]] = index.mul %[[VAL_24]], %[[VAL_22]]
1843 // CHECK: %[[VAL_27:.*]] = index.mul %[[VAL_25]], %[[VAL_23]]
1844 // CHECK: %[[VAL_28:.*]] = index.remu %[[VAL_26]], %[[VAL_9]]
1845 // CHECK: %[[VAL_29:.*]] = index.remu %[[VAL_27]], %[[VAL_11]]
1846 // CHECK: %[[VAL_30:.*]] = arith.index_castui %[[VAL_28]] : index to i32
1847 // CHECK: %[[VAL_31:.*]] = arith.uitofp %[[VAL_30]] : i32 to f32
1848 // CHECK: %[[VAL_32:.*]] = arith.index_castui %[[VAL_29]] : index to i32
1849 // CHECK: %[[VAL_33:.*]] = arith.uitofp %[[VAL_32]] : i32 to f32
1850 // CHECK: %[[VAL_34:.*]] = arith.divf %[[VAL_31]], %[[VAL_14]] : f32
1851 // CHECK: %[[VAL_35:.*]] = arith.divf %[[VAL_33]], %[[VAL_16]] : f32
1852 // CHECK: %[[VAL_36:.*]] = arith.addf %[[VAL_34]], %[[VAL_35]] : f32
1853 // CHECK: %[[VAL_37:.*]] = arith.mulf %[[VAL_12]], %[[VAL_36]] : f32
1854 // CHECK: %[[VAL_38:.*]] = math.cos %[[VAL_37]] : f32
1855 // CHECK: %[[VAL_39:.*]] = math.sin %[[VAL_37]] : f32
1856 // CHECK: %[[VAL_40:.*]] = arith.mulf %[[VAL_18]], %[[VAL_38]] : f32
1857 // CHECK: %[[VAL_41:.*]] = arith.mulf %[[VAL_19]], %[[VAL_39]] : f32
1858 // CHECK: %[[VAL_42:.*]] = arith.addf %[[VAL_40]], %[[VAL_41]] : f32
1859 // CHECK: %[[VAL_43:.*]] = arith.mulf %[[VAL_19]], %[[VAL_38]] : f32
1860 // CHECK: %[[VAL_44:.*]] = arith.mulf %[[VAL_18]], %[[VAL_39]] : f32
1861 // CHECK: %[[VAL_45:.*]] = arith.subf %[[VAL_43]], %[[VAL_44]] : f32
1862 // CHECK: %[[VAL_46:.*]] = arith.addf %[[VAL_20]], %[[VAL_42]] : f32
1863 // CHECK: %[[VAL_47:.*]] = arith.addf %[[VAL_21]], %[[VAL_45]] : f32
1864 // CHECK: linalg.yield %[[VAL_46]], %[[VAL_47]] : f32, f32
1865 // CHECK: } -> (tensor<8x8x8xf32>, tensor<8x8x8xf32>)
1866 // CHECK: return %[[VAL_48:.*]]#0, %[[VAL_48]]#1 : tensor<8x8x8xf32>, tensor<8x8x8xf32>
1868 func.func @test_static_fft2d(%arg0: tensor<8x8x8xf32>, %arg1: tensor<8x8x8xf32>) -> (tensor<8x8x8xf32>, tensor<8x8x8xf32>) {
1869 %output_real, %output_imag = "tosa.fft2d"(%arg0, %arg1) {inverse=false} : (tensor<8x8x8xf32>, tensor<8x8x8xf32>) -> (tensor<8x8x8xf32>, tensor<8x8x8xf32>)
1870 return %output_real, %output_imag : tensor<8x8x8xf32>, tensor<8x8x8xf32>
1874 // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
1875 // CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>
1876 // CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
1878 // CHECK-LABEL: func.func @test_dynamic_fft2d(
1879 // CHECK-SAME: %[[VAL_0:.*]]: tensor<?x?x?xf32>,
1880 // CHECK-SAME: %[[VAL_1:.*]]: tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
1881 // CHECK: %[[VAL_2:.*]] = arith.constant 0 : index
1882 // CHECK: %[[VAL_3:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?x?xf32>
1883 // CHECK: %[[VAL_4:.*]] = arith.constant 1 : index
1884 // CHECK: %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?x?xf32>
1885 // CHECK: %[[VAL_6:.*]] = arith.constant 2 : index
1886 // CHECK: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_6]] : tensor<?x?x?xf32>
1887 // CHECK: %[[VAL_8:.*]] = tensor.empty(%[[VAL_3]], %[[VAL_5]], %[[VAL_7]]) : tensor<?x?x?xf32>
1888 // CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
1889 // CHECK: %[[VAL_10:.*]] = linalg.fill ins(%[[VAL_9]] : f32) outs(%[[VAL_8]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
1890 // CHECK: %[[VAL_11:.*]] = tensor.empty(%[[VAL_3]], %[[VAL_5]], %[[VAL_7]]) : tensor<?x?x?xf32>
1891 // CHECK: %[[VAL_12:.*]] = arith.constant 0.000000e+00 : f32
1892 // CHECK: %[[VAL_13:.*]] = linalg.fill ins(%[[VAL_12]] : f32) outs(%[[VAL_11]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
1893 // CHECK: %[[VAL_14:.*]] = arith.constant 1 : index
1894 // CHECK: %[[VAL_15:.*]] = tensor.dim %[[VAL_0]], %[[VAL_14]] : tensor<?x?x?xf32>
1895 // CHECK: %[[VAL_16:.*]] = arith.constant 2 : index
1896 // CHECK: %[[VAL_17:.*]] = tensor.dim %[[VAL_0]], %[[VAL_16]] : tensor<?x?x?xf32>
1897 // CHECK: %[[VAL_18:.*]] = arith.constant 6.28318548 : f32
1898 // CHECK: %[[VAL_19:.*]] = arith.index_castui %[[VAL_15]] : index to i32
1899 // CHECK: %[[VAL_20:.*]] = arith.uitofp %[[VAL_19]] : i32 to f32
1900 // CHECK: %[[VAL_21:.*]] = arith.index_castui %[[VAL_17]] : index to i32
1901 // CHECK: %[[VAL_22:.*]] = arith.uitofp %[[VAL_21]] : i32 to f32
1902 // CHECK: %[[VAL_23:.*]]:2 = linalg.generic {indexing_maps = [#[[$ATTR_0]], #[[$ATTR_0]], #[[$ATTR_1]], #[[$ATTR_1]]], iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} ins(%[[VAL_0]], %[[VAL_1]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%[[VAL_10]], %[[VAL_13]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
1903 // CHECK: ^bb0(%[[VAL_24:.*]]: f32, %[[VAL_25:.*]]: f32, %[[VAL_26:.*]]: f32, %[[VAL_27:.*]]: f32):
1904 // CHECK: %[[VAL_28:.*]] = linalg.index 1 : index
1905 // CHECK: %[[VAL_29:.*]] = linalg.index 2 : index
1906 // CHECK: %[[VAL_30:.*]] = linalg.index 3 : index
1907 // CHECK: %[[VAL_31:.*]] = linalg.index 4 : index
1908 // CHECK: %[[VAL_32:.*]] = index.mul %[[VAL_30]], %[[VAL_28]]
1909 // CHECK: %[[VAL_33:.*]] = index.mul %[[VAL_31]], %[[VAL_29]]
1910 // CHECK: %[[VAL_34:.*]] = index.remu %[[VAL_32]], %[[VAL_15]]
1911 // CHECK: %[[VAL_35:.*]] = index.remu %[[VAL_33]], %[[VAL_17]]
1912 // CHECK: %[[VAL_36:.*]] = arith.index_castui %[[VAL_34]] : index to i32
1913 // CHECK: %[[VAL_37:.*]] = arith.uitofp %[[VAL_36]] : i32 to f32
1914 // CHECK: %[[VAL_38:.*]] = arith.index_castui %[[VAL_35]] : index to i32
1915 // CHECK: %[[VAL_39:.*]] = arith.uitofp %[[VAL_38]] : i32 to f32
1916 // CHECK: %[[VAL_40:.*]] = arith.divf %[[VAL_37]], %[[VAL_20]] : f32
1917 // CHECK: %[[VAL_41:.*]] = arith.divf %[[VAL_39]], %[[VAL_22]] : f32
1918 // CHECK: %[[VAL_42:.*]] = arith.addf %[[VAL_40]], %[[VAL_41]] : f32
1919 // CHECK: %[[VAL_43:.*]] = arith.mulf %[[VAL_18]], %[[VAL_42]] : f32
1920 // CHECK: %[[VAL_44:.*]] = arith.constant -1.000000e+00 : f32
1921 // CHECK: %[[VAL_45:.*]] = arith.mulf %[[VAL_43]], %[[VAL_44]] : f32
1922 // CHECK: %[[VAL_46:.*]] = math.cos %[[VAL_45]] : f32
1923 // CHECK: %[[VAL_47:.*]] = math.sin %[[VAL_45]] : f32
1924 // CHECK: %[[VAL_48:.*]] = arith.mulf %[[VAL_24]], %[[VAL_46]] : f32
1925 // CHECK: %[[VAL_49:.*]] = arith.mulf %[[VAL_25]], %[[VAL_47]] : f32
1926 // CHECK: %[[VAL_50:.*]] = arith.addf %[[VAL_48]], %[[VAL_49]] : f32
1927 // CHECK: %[[VAL_51:.*]] = arith.mulf %[[VAL_25]], %[[VAL_46]] : f32
1928 // CHECK: %[[VAL_52:.*]] = arith.mulf %[[VAL_24]], %[[VAL_47]] : f32
1929 // CHECK: %[[VAL_53:.*]] = arith.subf %[[VAL_51]], %[[VAL_52]] : f32
1930 // CHECK: %[[VAL_54:.*]] = arith.addf %[[VAL_26]], %[[VAL_50]] : f32
1931 // CHECK: %[[VAL_55:.*]] = arith.addf %[[VAL_27]], %[[VAL_53]] : f32
1932 // CHECK: linalg.yield %[[VAL_54]], %[[VAL_55]] : f32, f32
1933 // CHECK: } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
1934 // CHECK: return %[[VAL_56:.*]]#0, %[[VAL_56]]#1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>
1936 func.func @test_dynamic_fft2d(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
1937 %output_real, %output_imag = "tosa.fft2d"(%arg0, %arg1) {inverse = true} : (tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
1938 return %output_real, %output_imag : tensor<?x?x?xf32>, tensor<?x?x?xf32>
1944 // CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (0)>
1945 // CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (d0)>
1947 // CHECK-LABEL: func.func @test_cast_fp32_i64(
1948 // CHECK-SAME: %[[ARG0:.*]]: tensor<1xf32>) -> tensor<1xi64> {
1949 // CHECK: %[[EMPTY_TENSOR:.*]] = tensor.empty() : tensor<1xi64>
1950 // CHECK: %[[RESULT:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<1xf32>) outs(%[[EMPTY_TENSOR]] : tensor<1xi64>) {
1951 // CHECK: ^bb0(%[[IN:.*]]: f32, %[[OUT:.*]]: i64):
1952 // CHECK: %[[ROUND_EVEN:.*]] = math.roundeven %[[IN]] : f32
1953 // CHECK: %[[FP_INT_MIN:.*]] = arith.constant -9.22337203E+18 : f32
1954 // CHECK: %[[FP_INT_MAX_PLUS_ONE:.*]] = arith.constant 9.22337203E+18 : f32
1955 // CHECK: %[[INT_MAX:.*]] = arith.constant 9223372036854775807 : i64
1956 // CHECK: %[[MAX:.*]] = arith.maximumf %[[ROUND_EVEN]], %[[FP_INT_MIN]] : f32
1957 // CHECK: %[[FPTOSI:.*]] = arith.fptosi %[[MAX]] : f32 to i64
1958 // CHECK: %[[CMPF:.*]] = arith.cmpf uge, %[[ROUND_EVEN]], %[[FP_INT_MAX_PLUS_ONE]] : f32
1959 // CHECK: %[[SELECT:.*]] = arith.select %[[CMPF]], %[[INT_MAX]], %[[FPTOSI]] : i64
1960 // CHECK: linalg.yield %[[SELECT]] : i64
1961 // CHECK: } -> tensor<1xi64>
1962 // CHECK: return %[[RESULT]] : tensor<1xi64>
1963 func.func @test_cast_fp32_i64(%arg0: tensor<1xf32>) -> (tensor<1xi64>) {
1964 %0 = tosa.cast %arg0 : (tensor<1xf32>) -> tensor<1xi64>
1965 return %0: tensor<1xi64>