2 // RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \
3 // RUN: --sparsification --sparse-tensor-conversion \
4 // RUN: --convert-vector-to-scf --convert-scf-to-std \
5 // RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
6 // RUN: --std-bufferize --finalizing-bufferize --lower-affine \
7 // RUN: --convert-vector-to-llvm --convert-memref-to-llvm \
8 // RUN: --convert-std-to-llvm --reconcile-unrealized-casts | \
9 // RUN: mlir-cpu-runner \
10 // RUN: -e entry -entry-point-result=void \
11 // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
14 // Do the same run, but now with SIMDization as well. This should not change the outcome.
17 // RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \
18 // RUN: --sparsification="vectorization-strategy=2 vl=8" --sparse-tensor-conversion \
19 // RUN: --convert-vector-to-scf --convert-scf-to-std \
20 // RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
21 // RUN: --std-bufferize --finalizing-bufferize --lower-affine \
22 // RUN: --convert-vector-to-llvm --convert-memref-to-llvm \
23 // RUN: --convert-std-to-llvm --reconcile-unrealized-casts | \
24 // RUN: mlir-cpu-runner \
25 // RUN: -e entry -entry-point-result=void \
26 // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
29 #SV = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>
30 #DV = #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>
34 affine_map<(i) -> (i)>, // a
35 affine_map<(i) -> ()> // x (scalar out)
37 iterator_types = ["reduction"],
38 doc = "x += OPER_i a(i)"
41 // An example of vector reductions.
44 func @sum_reduction_i32(%arga: tensor<32xi32, #SV>,
45 %argx: tensor<i32>) -> tensor<i32> {
46 %0 = linalg.generic #trait_reduction
47 ins(%arga: tensor<32xi32, #SV>)
48 outs(%argx: tensor<i32>) {
49 ^bb(%a: i32, %x: i32):
50 %0 = arith.addi %x, %a : i32
53 return %0 : tensor<i32>
56 func @sum_reduction_f32(%arga: tensor<32xf32, #SV>,
57 %argx: tensor<f32>) -> tensor<f32> {
58 %0 = linalg.generic #trait_reduction
59 ins(%arga: tensor<32xf32, #SV>)
60 outs(%argx: tensor<f32>) {
61 ^bb(%a: f32, %x: f32):
62 %0 = arith.addf %x, %a : f32
65 return %0 : tensor<f32>
68 func @prod_reduction_i32(%arga: tensor<32xi32, #DV>,
69 %argx: tensor<i32>) -> tensor<i32> {
70 %0 = linalg.generic #trait_reduction
71 ins(%arga: tensor<32xi32, #DV>)
72 outs(%argx: tensor<i32>) {
73 ^bb(%a: i32, %x: i32):
74 %0 = arith.muli %x, %a : i32
77 return %0 : tensor<i32>
80 func @prod_reduction_f32(%arga: tensor<32xf32, #DV>,
81 %argx: tensor<f32>) -> tensor<f32> {
82 %0 = linalg.generic #trait_reduction
83 ins(%arga: tensor<32xf32, #DV>)
84 outs(%argx: tensor<f32>) {
85 ^bb(%a: f32, %x: f32):
86 %0 = arith.mulf %x, %a : f32
89 return %0 : tensor<f32>
92 func @and_reduction_i32(%arga: tensor<32xi32, #DV>,
93 %argx: tensor<i32>) -> tensor<i32> {
94 %0 = linalg.generic #trait_reduction
95 ins(%arga: tensor<32xi32, #DV>)
96 outs(%argx: tensor<i32>) {
97 ^bb(%a: i32, %x: i32):
98 %0 = arith.andi %x, %a : i32
101 return %0 : tensor<i32>
104 func @or_reduction_i32(%arga: tensor<32xi32, #SV>,
105 %argx: tensor<i32>) -> tensor<i32> {
106 %0 = linalg.generic #trait_reduction
107 ins(%arga: tensor<32xi32, #SV>)
108 outs(%argx: tensor<i32>) {
109 ^bb(%a: i32, %x: i32):
110 %0 = arith.ori %x, %a : i32
111 linalg.yield %0 : i32
113 return %0 : tensor<i32>
116 func @xor_reduction_i32(%arga: tensor<32xi32, #SV>,
117 %argx: tensor<i32>) -> tensor<i32> {
118 %0 = linalg.generic #trait_reduction
119 ins(%arga: tensor<32xi32, #SV>)
120 outs(%argx: tensor<i32>) {
121 ^bb(%a: i32, %x: i32):
122 %0 = arith.xori %x, %a : i32
123 linalg.yield %0 : i32
125 return %0 : tensor<i32>
128 func @dump_i32(%arg0 : memref<i32>) {
129 %v = memref.load %arg0[] : memref<i32>
130 vector.print %v : i32
134 func @dump_f32(%arg0 : memref<f32>) {
135 %v = memref.load %arg0[] : memref<f32>
136 vector.print %v : f32
141 %ri = arith.constant dense< 7 > : tensor<i32>
142 %rf = arith.constant dense< 2.0 > : tensor<f32>
144 %c_0_i32 = arith.constant dense<[
145 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 4, 0, 0, 0,
146 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0
149 %c_0_f32 = arith.constant dense<[
150 0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0,
151 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0,
152 0.0, 0.0, 0.0, 0.0, 2.5, 0.0, 0.0, 0.0,
153 2.0, 0.0, 0.0, 0.0, 0.0, 4.0, 0.0, 9.0
156 %c_1_i32 = arith.constant dense<[
157 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 3
161 %c_1_f32 = arith.constant dense<[
162 1.0, 1.0, 1.0, 3.5, 1.0, 1.0, 1.0, 1.0,
163 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0,
164 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0,
165 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0
168 // Convert constants to annotated tensors.
169 %sparse_input_i32 = sparse_tensor.convert %c_0_i32
170 : tensor<32xi32> to tensor<32xi32, #SV>
171 %sparse_input_f32 = sparse_tensor.convert %c_0_f32
172 : tensor<32xf32> to tensor<32xf32, #SV>
173 %dense_input_i32 = sparse_tensor.convert %c_1_i32
174 : tensor<32xi32> to tensor<32xi32, #DV>
175 %dense_input_f32 = sparse_tensor.convert %c_1_f32
176 : tensor<32xf32> to tensor<32xf32, #DV>
179 %0 = call @sum_reduction_i32(%sparse_input_i32, %ri)
180 : (tensor<32xi32, #SV>, tensor<i32>) -> tensor<i32>
181 %1 = call @sum_reduction_f32(%sparse_input_f32, %rf)
182 : (tensor<32xf32, #SV>, tensor<f32>) -> tensor<f32>
183 %2 = call @prod_reduction_i32(%dense_input_i32, %ri)
184 : (tensor<32xi32, #DV>, tensor<i32>) -> tensor<i32>
185 %3 = call @prod_reduction_f32(%dense_input_f32, %rf)
186 : (tensor<32xf32, #DV>, tensor<f32>) -> tensor<f32>
187 %4 = call @and_reduction_i32(%dense_input_i32, %ri)
188 : (tensor<32xi32, #DV>, tensor<i32>) -> tensor<i32>
189 %5 = call @or_reduction_i32(%sparse_input_i32, %ri)
190 : (tensor<32xi32, #SV>, tensor<i32>) -> tensor<i32>
191 %6 = call @xor_reduction_i32(%sparse_input_i32, %ri)
192 : (tensor<32xi32, #SV>, tensor<i32>) -> tensor<i32>
204 %m0 = bufferization.to_memref %0 : memref<i32>
205 call @dump_i32(%m0) : (memref<i32>) -> ()
206 %m1 = bufferization.to_memref %1 : memref<f32>
207 call @dump_f32(%m1) : (memref<f32>) -> ()
208 %m2 = bufferization.to_memref %2 : memref<i32>
209 call @dump_i32(%m2) : (memref<i32>) -> ()
210 %m3 = bufferization.to_memref %3 : memref<f32>
211 call @dump_f32(%m3) : (memref<f32>) -> ()
212 %m4 = bufferization.to_memref %4 : memref<i32>
213 call @dump_i32(%m4) : (memref<i32>) -> ()
214 %m5 = bufferization.to_memref %5 : memref<i32>
215 call @dump_i32(%m5) : (memref<i32>) -> ()
216 %m6 = bufferization.to_memref %6 : memref<i32>
217 call @dump_i32(%m6) : (memref<i32>) -> ()
219 // Release the resources.
220 sparse_tensor.release %sparse_input_i32 : tensor<32xi32, #SV>
221 sparse_tensor.release %sparse_input_f32 : tensor<32xf32, #SV>
222 sparse_tensor.release %dense_input_i32 : tensor<32xi32, #DV>
223 sparse_tensor.release %dense_input_f32 : tensor<32xf32, #DV>
224 memref.dealloc %m0 : memref<i32>
225 memref.dealloc %m1 : memref<f32>
226 memref.dealloc %m2 : memref<i32>
227 memref.dealloc %m3 : memref<f32>
228 memref.dealloc %m4 : memref<i32>
229 memref.dealloc %m5 : memref<i32>
230 memref.dealloc %m6 : memref<i32>