hadamard: Add 4x4 test.
[aom.git] / test / hadamard_test.cc
blobdfabaeb28b0ebe55c94fad7efc81c9083d565101
1 /*
2 * Copyright (c) 2019, Alliance for Open Media. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
11 #include <algorithm>
12 #include <ostream>
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
16 #include "config/aom_dsp_rtcd.h"
18 #include "test/acm_random.h"
19 #include "test/register_state_check.h"
20 #include "test/util.h"
22 namespace {
24 using libaom_test::ACMRandom;
26 using HadamardFunc = void (*)(const int16_t *a, ptrdiff_t a_stride,
27 tran_low_t *b);
28 // Low precision version of Hadamard Transform
29 using HadamardLPFunc = void (*)(const int16_t *a, ptrdiff_t a_stride,
30 int16_t *b);
32 template <typename OutputType>
33 void Hadamard4x4(const OutputType *a, OutputType *out) {
34 OutputType b[8];
35 for (int i = 0; i < 4; i += 2) {
36 b[i + 0] = (a[i * 4] + a[(i + 1) * 4]) >> 1;
37 b[i + 1] = (a[i * 4] - a[(i + 1) * 4]) >> 1;
40 out[0] = b[0] + b[2];
41 out[1] = b[1] + b[3];
42 out[2] = b[0] - b[2];
43 out[3] = b[1] - b[3];
46 template <typename OutputType>
47 void ReferenceHadamard4x4(const int16_t *a, int a_stride, OutputType *b) {
48 OutputType input[16];
49 OutputType buf[16];
50 for (int i = 0; i < 4; ++i) {
51 for (int j = 0; j < 4; ++j) {
52 input[i * 4 + j] = static_cast<OutputType>(a[i * a_stride + j]);
55 for (int i = 0; i < 4; ++i) Hadamard4x4(input + i, buf + i * 4);
56 for (int i = 0; i < 4; ++i) Hadamard4x4(buf + i, b + i * 4);
59 template <typename OutputType>
60 void HadamardLoop(const OutputType *a, OutputType *out) {
61 OutputType b[8];
62 for (int i = 0; i < 8; i += 2) {
63 b[i + 0] = a[i * 8] + a[(i + 1) * 8];
64 b[i + 1] = a[i * 8] - a[(i + 1) * 8];
66 OutputType c[8];
67 for (int i = 0; i < 8; i += 4) {
68 c[i + 0] = b[i + 0] + b[i + 2];
69 c[i + 1] = b[i + 1] + b[i + 3];
70 c[i + 2] = b[i + 0] - b[i + 2];
71 c[i + 3] = b[i + 1] - b[i + 3];
73 out[0] = c[0] + c[4];
74 out[7] = c[1] + c[5];
75 out[3] = c[2] + c[6];
76 out[4] = c[3] + c[7];
77 out[2] = c[0] - c[4];
78 out[6] = c[1] - c[5];
79 out[1] = c[2] - c[6];
80 out[5] = c[3] - c[7];
83 template <typename OutputType>
84 void ReferenceHadamard8x8(const int16_t *a, int a_stride, OutputType *b) {
85 OutputType input[64];
86 OutputType buf[64];
87 for (int i = 0; i < 8; ++i) {
88 for (int j = 0; j < 8; ++j) {
89 input[i * 8 + j] = static_cast<OutputType>(a[i * a_stride + j]);
92 for (int i = 0; i < 8; ++i) HadamardLoop(input + i, buf + i * 8);
93 for (int i = 0; i < 8; ++i) HadamardLoop(buf + i, b + i * 8);
96 template <typename OutputType>
97 void ReferenceHadamard16x16(const int16_t *a, int a_stride, OutputType *b) {
98 /* The source is a 16x16 block. The destination is rearranged to 8x32.
99 * Input is 9 bit. */
100 ReferenceHadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
101 ReferenceHadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
102 ReferenceHadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
103 ReferenceHadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
105 /* Overlay the 8x8 blocks and combine. */
106 for (int i = 0; i < 64; ++i) {
107 /* 8x8 steps the range up to 15 bits. */
108 const OutputType a0 = b[0];
109 const OutputType a1 = b[64];
110 const OutputType a2 = b[128];
111 const OutputType a3 = b[192];
113 /* Prevent the result from escaping int16_t. */
114 const OutputType b0 = (a0 + a1) >> 1;
115 const OutputType b1 = (a0 - a1) >> 1;
116 const OutputType b2 = (a2 + a3) >> 1;
117 const OutputType b3 = (a2 - a3) >> 1;
119 /* Store a 16 bit value. */
120 b[0] = b0 + b2;
121 b[64] = b1 + b3;
122 b[128] = b0 - b2;
123 b[192] = b1 - b3;
125 ++b;
129 template <typename OutputType>
130 void ReferenceHadamard32x32(const int16_t *a, int a_stride, OutputType *b) {
131 ReferenceHadamard16x16(a + 0 + 0 * a_stride, a_stride, b + 0);
132 ReferenceHadamard16x16(a + 16 + 0 * a_stride, a_stride, b + 256);
133 ReferenceHadamard16x16(a + 0 + 16 * a_stride, a_stride, b + 512);
134 ReferenceHadamard16x16(a + 16 + 16 * a_stride, a_stride, b + 768);
136 for (int i = 0; i < 256; ++i) {
137 const OutputType a0 = b[0];
138 const OutputType a1 = b[256];
139 const OutputType a2 = b[512];
140 const OutputType a3 = b[768];
142 const OutputType b0 = (a0 + a1) >> 2;
143 const OutputType b1 = (a0 - a1) >> 2;
144 const OutputType b2 = (a2 + a3) >> 2;
145 const OutputType b3 = (a2 - a3) >> 2;
147 b[0] = b0 + b2;
148 b[256] = b1 + b3;
149 b[512] = b0 - b2;
150 b[768] = b1 - b3;
152 ++b;
156 template <typename OutputType>
157 void ReferenceHadamard(const int16_t *a, int a_stride, OutputType *b, int bwh) {
158 if (bwh == 32) {
159 ReferenceHadamard32x32(a, a_stride, b);
160 } else if (bwh == 16) {
161 ReferenceHadamard16x16(a, a_stride, b);
162 } else if (bwh == 8) {
163 ReferenceHadamard8x8(a, a_stride, b);
164 } else if (bwh == 4) {
165 ReferenceHadamard4x4(a, a_stride, b);
166 } else {
167 GTEST_FAIL() << "Invalid Hadamard transform size " << bwh << std::endl;
171 template <typename HadamardFuncType>
172 struct FuncWithSize {
173 FuncWithSize(HadamardFuncType f, int s) : func(f), block_size(s) {}
174 HadamardFuncType func;
175 int block_size;
178 using HadamardFuncWithSize = FuncWithSize<HadamardFunc>;
179 using HadamardLPFuncWithSize = FuncWithSize<HadamardLPFunc>;
181 template <typename HadamardFuncType>
182 std::ostream &operator<<(std::ostream &os,
183 const FuncWithSize<HadamardFuncType> &hfs) {
184 return os << "block size: " << hfs.block_size;
187 template <typename OutputType, typename HadamardFuncType>
188 class HadamardTestBase
189 : public ::testing::TestWithParam<FuncWithSize<HadamardFuncType>> {
190 public:
191 explicit HadamardTestBase(const FuncWithSize<HadamardFuncType> &func_param) {
192 h_func_ = func_param.func;
193 bwh_ = func_param.block_size;
194 block_size_ = bwh_ * bwh_;
197 virtual void SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
199 virtual int16_t Rand() = 0;
201 void CompareReferenceRandom() {
202 const int kMaxBlockSize = 32 * 32;
203 DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize]);
204 DECLARE_ALIGNED(16, OutputType, b[kMaxBlockSize]);
205 memset(a, 0, sizeof(a));
206 memset(b, 0, sizeof(b));
208 OutputType b_ref[kMaxBlockSize];
209 memset(b_ref, 0, sizeof(b_ref));
211 for (int i = 0; i < block_size_; ++i) a[i] = Rand();
213 ReferenceHadamard(a, bwh_, b_ref, bwh_);
214 API_REGISTER_STATE_CHECK(h_func_(a, bwh_, b));
216 // The order of the output is not important. Sort before checking.
217 std::sort(b, b + block_size_);
218 std::sort(b_ref, b_ref + block_size_);
219 EXPECT_EQ(memcmp(b, b_ref, sizeof(b)), 0);
222 void VaryStride() {
223 const int kMaxBlockSize = 32 * 32;
224 DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize * 8]);
225 DECLARE_ALIGNED(16, OutputType, b[kMaxBlockSize]);
226 memset(a, 0, sizeof(a));
227 for (int i = 0; i < block_size_ * 8; ++i) a[i] = Rand();
229 OutputType b_ref[kMaxBlockSize];
230 for (int i = 8; i < 64; i += 8) {
231 memset(b, 0, sizeof(b));
232 memset(b_ref, 0, sizeof(b_ref));
234 ReferenceHadamard(a, i, b_ref, bwh_);
235 API_REGISTER_STATE_CHECK(h_func_(a, i, b));
237 // The order of the output is not important. Sort before checking.
238 std::sort(b, b + block_size_);
239 std::sort(b_ref, b_ref + block_size_);
240 EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
244 void SpeedTest(int times) {
245 const int kMaxBlockSize = 32 * 32;
246 DECLARE_ALIGNED(16, int16_t, input[kMaxBlockSize]);
247 DECLARE_ALIGNED(16, OutputType, output[kMaxBlockSize]);
248 memset(input, 1, sizeof(input));
249 memset(output, 0, sizeof(output));
251 aom_usec_timer timer;
252 aom_usec_timer_start(&timer);
253 for (int i = 0; i < times; ++i) {
254 h_func_(input, bwh_, output);
256 aom_usec_timer_mark(&timer);
258 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
259 printf("Hadamard%dx%d[%12d runs]: %d us\n", bwh_, bwh_, times,
260 elapsed_time);
263 ACMRandom rnd_;
265 private:
266 int bwh_;
267 int block_size_;
268 HadamardFuncType h_func_;
271 class HadamardLowbdTest : public HadamardTestBase<tran_low_t, HadamardFunc> {
272 public:
273 HadamardLowbdTest() : HadamardTestBase(GetParam()) {}
274 virtual int16_t Rand() { return rnd_.Rand9Signed(); }
277 TEST_P(HadamardLowbdTest, CompareReferenceRandom) { CompareReferenceRandom(); }
279 TEST_P(HadamardLowbdTest, VaryStride) { VaryStride(); }
281 TEST_P(HadamardLowbdTest, DISABLED_SpeedTest) { SpeedTest(1000000); }
283 INSTANTIATE_TEST_SUITE_P(
284 C, HadamardLowbdTest,
285 ::testing::Values(HadamardFuncWithSize(&aom_hadamard_4x4_c, 4),
286 HadamardFuncWithSize(&aom_hadamard_8x8_c, 8),
287 HadamardFuncWithSize(&aom_hadamard_16x16_c, 16),
288 HadamardFuncWithSize(&aom_hadamard_32x32_c, 32)));
290 #if HAVE_SSE2
291 INSTANTIATE_TEST_SUITE_P(
292 SSE2, HadamardLowbdTest,
293 ::testing::Values(HadamardFuncWithSize(&aom_hadamard_8x8_sse2, 8),
294 HadamardFuncWithSize(&aom_hadamard_16x16_sse2, 16),
295 HadamardFuncWithSize(&aom_hadamard_32x32_sse2, 32)));
296 #endif // HAVE_SSE2
298 #if HAVE_AVX2
299 INSTANTIATE_TEST_SUITE_P(
300 AVX2, HadamardLowbdTest,
301 ::testing::Values(HadamardFuncWithSize(&aom_hadamard_16x16_avx2, 16),
302 HadamardFuncWithSize(&aom_hadamard_32x32_avx2, 32)));
303 #endif // HAVE_AVX2
305 #if HAVE_NEON
306 INSTANTIATE_TEST_SUITE_P(
307 NEON, HadamardLowbdTest,
308 ::testing::Values(HadamardFuncWithSize(&aom_hadamard_8x8_neon, 8),
309 HadamardFuncWithSize(&aom_hadamard_16x16_neon, 16)));
310 #endif // HAVE_NEON
312 // Tests for low precision
313 class HadamardLowbdLPTest : public HadamardTestBase<int16_t, HadamardLPFunc> {
314 public:
315 HadamardLowbdLPTest() : HadamardTestBase(GetParam()) {}
316 virtual int16_t Rand() { return rnd_.Rand9Signed(); }
319 TEST_P(HadamardLowbdLPTest, CompareReferenceRandom) {
320 CompareReferenceRandom();
323 TEST_P(HadamardLowbdLPTest, VaryStride) { VaryStride(); }
325 TEST_P(HadamardLowbdLPTest, DISABLED_SpeedTest) { SpeedTest(1000000); }
327 INSTANTIATE_TEST_SUITE_P(
328 C, HadamardLowbdLPTest,
329 ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_c, 8),
330 HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_c, 16)));
332 #if HAVE_SSE2
333 INSTANTIATE_TEST_SUITE_P(
334 SSE2, HadamardLowbdLPTest,
335 ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_sse2, 8),
336 HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_sse2, 16)));
337 #endif // HAVE_SSE2
339 #if HAVE_AVX2
340 INSTANTIATE_TEST_SUITE_P(
341 AVX2, HadamardLowbdLPTest,
342 ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_avx2, 16)));
343 #endif // HAVE_AVX2
345 #if HAVE_NEON
346 INSTANTIATE_TEST_SUITE_P(
347 NEON, HadamardLowbdLPTest,
348 ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_neon, 8),
349 HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_neon, 16)));
350 #endif // HAVE_NEON
352 } // namespace