2 * Copyright (c) 2019, Alliance for Open Media. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
16 #include "config/aom_dsp_rtcd.h"
18 #include "test/acm_random.h"
19 #include "test/register_state_check.h"
20 #include "test/util.h"
24 using libaom_test::ACMRandom
;
26 using HadamardFunc
= void (*)(const int16_t *a
, ptrdiff_t a_stride
,
28 // Low precision version of Hadamard Transform
29 using HadamardLPFunc
= void (*)(const int16_t *a
, ptrdiff_t a_stride
,
32 template <typename OutputType
>
33 void Hadamard4x4(const OutputType
*a
, OutputType
*out
) {
35 for (int i
= 0; i
< 4; i
+= 2) {
36 b
[i
+ 0] = (a
[i
* 4] + a
[(i
+ 1) * 4]) >> 1;
37 b
[i
+ 1] = (a
[i
* 4] - a
[(i
+ 1) * 4]) >> 1;
46 template <typename OutputType
>
47 void ReferenceHadamard4x4(const int16_t *a
, int a_stride
, OutputType
*b
) {
50 for (int i
= 0; i
< 4; ++i
) {
51 for (int j
= 0; j
< 4; ++j
) {
52 input
[i
* 4 + j
] = static_cast<OutputType
>(a
[i
* a_stride
+ j
]);
55 for (int i
= 0; i
< 4; ++i
) Hadamard4x4(input
+ i
, buf
+ i
* 4);
56 for (int i
= 0; i
< 4; ++i
) Hadamard4x4(buf
+ i
, b
+ i
* 4);
59 template <typename OutputType
>
60 void HadamardLoop(const OutputType
*a
, OutputType
*out
) {
62 for (int i
= 0; i
< 8; i
+= 2) {
63 b
[i
+ 0] = a
[i
* 8] + a
[(i
+ 1) * 8];
64 b
[i
+ 1] = a
[i
* 8] - a
[(i
+ 1) * 8];
67 for (int i
= 0; i
< 8; i
+= 4) {
68 c
[i
+ 0] = b
[i
+ 0] + b
[i
+ 2];
69 c
[i
+ 1] = b
[i
+ 1] + b
[i
+ 3];
70 c
[i
+ 2] = b
[i
+ 0] - b
[i
+ 2];
71 c
[i
+ 3] = b
[i
+ 1] - b
[i
+ 3];
83 template <typename OutputType
>
84 void ReferenceHadamard8x8(const int16_t *a
, int a_stride
, OutputType
*b
) {
87 for (int i
= 0; i
< 8; ++i
) {
88 for (int j
= 0; j
< 8; ++j
) {
89 input
[i
* 8 + j
] = static_cast<OutputType
>(a
[i
* a_stride
+ j
]);
92 for (int i
= 0; i
< 8; ++i
) HadamardLoop(input
+ i
, buf
+ i
* 8);
93 for (int i
= 0; i
< 8; ++i
) HadamardLoop(buf
+ i
, b
+ i
* 8);
96 template <typename OutputType
>
97 void ReferenceHadamard16x16(const int16_t *a
, int a_stride
, OutputType
*b
) {
98 /* The source is a 16x16 block. The destination is rearranged to 8x32.
100 ReferenceHadamard8x8(a
+ 0 + 0 * a_stride
, a_stride
, b
+ 0);
101 ReferenceHadamard8x8(a
+ 8 + 0 * a_stride
, a_stride
, b
+ 64);
102 ReferenceHadamard8x8(a
+ 0 + 8 * a_stride
, a_stride
, b
+ 128);
103 ReferenceHadamard8x8(a
+ 8 + 8 * a_stride
, a_stride
, b
+ 192);
105 /* Overlay the 8x8 blocks and combine. */
106 for (int i
= 0; i
< 64; ++i
) {
107 /* 8x8 steps the range up to 15 bits. */
108 const OutputType a0
= b
[0];
109 const OutputType a1
= b
[64];
110 const OutputType a2
= b
[128];
111 const OutputType a3
= b
[192];
113 /* Prevent the result from escaping int16_t. */
114 const OutputType b0
= (a0
+ a1
) >> 1;
115 const OutputType b1
= (a0
- a1
) >> 1;
116 const OutputType b2
= (a2
+ a3
) >> 1;
117 const OutputType b3
= (a2
- a3
) >> 1;
119 /* Store a 16 bit value. */
129 template <typename OutputType
>
130 void ReferenceHadamard32x32(const int16_t *a
, int a_stride
, OutputType
*b
) {
131 ReferenceHadamard16x16(a
+ 0 + 0 * a_stride
, a_stride
, b
+ 0);
132 ReferenceHadamard16x16(a
+ 16 + 0 * a_stride
, a_stride
, b
+ 256);
133 ReferenceHadamard16x16(a
+ 0 + 16 * a_stride
, a_stride
, b
+ 512);
134 ReferenceHadamard16x16(a
+ 16 + 16 * a_stride
, a_stride
, b
+ 768);
136 for (int i
= 0; i
< 256; ++i
) {
137 const OutputType a0
= b
[0];
138 const OutputType a1
= b
[256];
139 const OutputType a2
= b
[512];
140 const OutputType a3
= b
[768];
142 const OutputType b0
= (a0
+ a1
) >> 2;
143 const OutputType b1
= (a0
- a1
) >> 2;
144 const OutputType b2
= (a2
+ a3
) >> 2;
145 const OutputType b3
= (a2
- a3
) >> 2;
156 template <typename OutputType
>
157 void ReferenceHadamard(const int16_t *a
, int a_stride
, OutputType
*b
, int bwh
) {
159 ReferenceHadamard32x32(a
, a_stride
, b
);
160 } else if (bwh
== 16) {
161 ReferenceHadamard16x16(a
, a_stride
, b
);
162 } else if (bwh
== 8) {
163 ReferenceHadamard8x8(a
, a_stride
, b
);
164 } else if (bwh
== 4) {
165 ReferenceHadamard4x4(a
, a_stride
, b
);
167 GTEST_FAIL() << "Invalid Hadamard transform size " << bwh
<< std::endl
;
171 template <typename HadamardFuncType
>
172 struct FuncWithSize
{
173 FuncWithSize(HadamardFuncType f
, int s
) : func(f
), block_size(s
) {}
174 HadamardFuncType func
;
178 using HadamardFuncWithSize
= FuncWithSize
<HadamardFunc
>;
179 using HadamardLPFuncWithSize
= FuncWithSize
<HadamardLPFunc
>;
181 template <typename HadamardFuncType
>
182 std::ostream
&operator<<(std::ostream
&os
,
183 const FuncWithSize
<HadamardFuncType
> &hfs
) {
184 return os
<< "block size: " << hfs
.block_size
;
187 template <typename OutputType
, typename HadamardFuncType
>
188 class HadamardTestBase
189 : public ::testing::TestWithParam
<FuncWithSize
<HadamardFuncType
>> {
191 explicit HadamardTestBase(const FuncWithSize
<HadamardFuncType
> &func_param
) {
192 h_func_
= func_param
.func
;
193 bwh_
= func_param
.block_size
;
194 block_size_
= bwh_
* bwh_
;
197 virtual void SetUp() { rnd_
.Reset(ACMRandom::DeterministicSeed()); }
199 virtual int16_t Rand() = 0;
201 void CompareReferenceRandom() {
202 const int kMaxBlockSize
= 32 * 32;
203 DECLARE_ALIGNED(16, int16_t, a
[kMaxBlockSize
]);
204 DECLARE_ALIGNED(16, OutputType
, b
[kMaxBlockSize
]);
205 memset(a
, 0, sizeof(a
));
206 memset(b
, 0, sizeof(b
));
208 OutputType b_ref
[kMaxBlockSize
];
209 memset(b_ref
, 0, sizeof(b_ref
));
211 for (int i
= 0; i
< block_size_
; ++i
) a
[i
] = Rand();
213 ReferenceHadamard(a
, bwh_
, b_ref
, bwh_
);
214 API_REGISTER_STATE_CHECK(h_func_(a
, bwh_
, b
));
216 // The order of the output is not important. Sort before checking.
217 std::sort(b
, b
+ block_size_
);
218 std::sort(b_ref
, b_ref
+ block_size_
);
219 EXPECT_EQ(memcmp(b
, b_ref
, sizeof(b
)), 0);
223 const int kMaxBlockSize
= 32 * 32;
224 DECLARE_ALIGNED(16, int16_t, a
[kMaxBlockSize
* 8]);
225 DECLARE_ALIGNED(16, OutputType
, b
[kMaxBlockSize
]);
226 memset(a
, 0, sizeof(a
));
227 for (int i
= 0; i
< block_size_
* 8; ++i
) a
[i
] = Rand();
229 OutputType b_ref
[kMaxBlockSize
];
230 for (int i
= 8; i
< 64; i
+= 8) {
231 memset(b
, 0, sizeof(b
));
232 memset(b_ref
, 0, sizeof(b_ref
));
234 ReferenceHadamard(a
, i
, b_ref
, bwh_
);
235 API_REGISTER_STATE_CHECK(h_func_(a
, i
, b
));
237 // The order of the output is not important. Sort before checking.
238 std::sort(b
, b
+ block_size_
);
239 std::sort(b_ref
, b_ref
+ block_size_
);
240 EXPECT_EQ(0, memcmp(b
, b_ref
, sizeof(b
)));
244 void SpeedTest(int times
) {
245 const int kMaxBlockSize
= 32 * 32;
246 DECLARE_ALIGNED(16, int16_t, input
[kMaxBlockSize
]);
247 DECLARE_ALIGNED(16, OutputType
, output
[kMaxBlockSize
]);
248 memset(input
, 1, sizeof(input
));
249 memset(output
, 0, sizeof(output
));
251 aom_usec_timer timer
;
252 aom_usec_timer_start(&timer
);
253 for (int i
= 0; i
< times
; ++i
) {
254 h_func_(input
, bwh_
, output
);
256 aom_usec_timer_mark(&timer
);
258 const int elapsed_time
= static_cast<int>(aom_usec_timer_elapsed(&timer
));
259 printf("Hadamard%dx%d[%12d runs]: %d us\n", bwh_
, bwh_
, times
,
268 HadamardFuncType h_func_
;
271 class HadamardLowbdTest
: public HadamardTestBase
<tran_low_t
, HadamardFunc
> {
273 HadamardLowbdTest() : HadamardTestBase(GetParam()) {}
274 virtual int16_t Rand() { return rnd_
.Rand9Signed(); }
277 TEST_P(HadamardLowbdTest
, CompareReferenceRandom
) { CompareReferenceRandom(); }
279 TEST_P(HadamardLowbdTest
, VaryStride
) { VaryStride(); }
281 TEST_P(HadamardLowbdTest
, DISABLED_SpeedTest
) { SpeedTest(1000000); }
283 INSTANTIATE_TEST_SUITE_P(
284 C
, HadamardLowbdTest
,
285 ::testing::Values(HadamardFuncWithSize(&aom_hadamard_4x4_c
, 4),
286 HadamardFuncWithSize(&aom_hadamard_8x8_c
, 8),
287 HadamardFuncWithSize(&aom_hadamard_16x16_c
, 16),
288 HadamardFuncWithSize(&aom_hadamard_32x32_c
, 32)));
291 INSTANTIATE_TEST_SUITE_P(
292 SSE2
, HadamardLowbdTest
,
293 ::testing::Values(HadamardFuncWithSize(&aom_hadamard_8x8_sse2
, 8),
294 HadamardFuncWithSize(&aom_hadamard_16x16_sse2
, 16),
295 HadamardFuncWithSize(&aom_hadamard_32x32_sse2
, 32)));
299 INSTANTIATE_TEST_SUITE_P(
300 AVX2
, HadamardLowbdTest
,
301 ::testing::Values(HadamardFuncWithSize(&aom_hadamard_16x16_avx2
, 16),
302 HadamardFuncWithSize(&aom_hadamard_32x32_avx2
, 32)));
306 INSTANTIATE_TEST_SUITE_P(
307 NEON
, HadamardLowbdTest
,
308 ::testing::Values(HadamardFuncWithSize(&aom_hadamard_8x8_neon
, 8),
309 HadamardFuncWithSize(&aom_hadamard_16x16_neon
, 16)));
312 // Tests for low precision
313 class HadamardLowbdLPTest
: public HadamardTestBase
<int16_t, HadamardLPFunc
> {
315 HadamardLowbdLPTest() : HadamardTestBase(GetParam()) {}
316 virtual int16_t Rand() { return rnd_
.Rand9Signed(); }
319 TEST_P(HadamardLowbdLPTest
, CompareReferenceRandom
) {
320 CompareReferenceRandom();
323 TEST_P(HadamardLowbdLPTest
, VaryStride
) { VaryStride(); }
325 TEST_P(HadamardLowbdLPTest
, DISABLED_SpeedTest
) { SpeedTest(1000000); }
327 INSTANTIATE_TEST_SUITE_P(
328 C
, HadamardLowbdLPTest
,
329 ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_c
, 8),
330 HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_c
, 16)));
333 INSTANTIATE_TEST_SUITE_P(
334 SSE2
, HadamardLowbdLPTest
,
335 ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_sse2
, 8),
336 HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_sse2
, 16)));
340 INSTANTIATE_TEST_SUITE_P(
341 AVX2
, HadamardLowbdLPTest
,
342 ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_avx2
, 16)));
346 INSTANTIATE_TEST_SUITE_P(
347 NEON
, HadamardLowbdLPTest
,
348 ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_neon
, 8),
349 HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_neon
, 16)));