2 * VVC inter prediction DSP
4 * Copyright (C) 2022 Nuo Mi
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavcodec/h26x/h2656_inter_template.c"
24 #include "libavutil/imgutils.h"
26 #define TMP_STRIDE EDGE_EMU_BUFFER_STRIDE
27 static void av_always_inline
FUNC(put_scaled
)(uint8_t *_dst
, const ptrdiff_t _dst_stride
,
28 const uint8_t *const _src
, ptrdiff_t _src_stride
, const int src_height
,
29 const int _x
, const int _y
, const int dx
, const int dy
,
30 const int height
, const int8_t *hf
, const int8_t *vf
, const int width
, const int is_uni
, const int is_chroma
)
32 int16_t tmp_array
[TMP_STRIDE
* MAX_PB_SIZE
];
33 int16_t *tmp
= tmp_array
;
34 pixel
*dst
= (pixel
*)_dst
;
35 int16_t *dst16
= (int16_t*)_dst
;
36 const ptrdiff_t dst_stride
= _dst_stride
/ sizeof(pixel
);
37 const ptrdiff_t src_stride
= _src_stride
/ sizeof(pixel
);
38 const int shift
= FFMAX(2, 14 - BIT_DEPTH
);
39 const int offset
= 1 << (shift
- 1);
40 const int taps
= is_chroma
? VVC_INTER_CHROMA_TAPS
: VVC_INTER_LUMA_TAPS
;
41 const int extra
= is_chroma
? CHROMA_EXTRA
: LUMA_EXTRA
;
42 const int extra_before
= is_chroma
? CHROMA_EXTRA_BEFORE
: LUMA_EXTRA_BEFORE
;
43 const int shift1
= 6 - is_chroma
;
44 const int shift2
= 4 + is_chroma
;
45 const int x0
= SCALED_INT(_x
);
46 const int y0
= SCALED_INT(_y
);
48 for (int i
= 0; i
< width
; i
++) {
49 const int tx
= _x
+ dx
* i
;
50 const int x
= SCALED_INT(tx
) - x0
;
51 const int mx
= av_zero_extend(tx
>> shift1
, shift2
);
52 const int8_t *filter
= hf
+ mx
* taps
;
53 const pixel
*src
= (pixel
*)_src
- extra_before
* src_stride
;
55 for (int j
= 0; j
< src_height
+ extra
; j
++) {
56 tmp
[j
] = (is_chroma
? CHROMA_FILTER(src
, 1) : LUMA_FILTER(src
, 1)) >> (BIT_DEPTH
- 8);
62 for (int i
= 0; i
< height
; i
++) {
63 const int ty
= _y
+ dy
* i
;
64 const int x
= SCALED_INT(ty
) - y0
;
65 const int mx
= av_zero_extend(ty
>> shift1
, shift2
);
66 const int8_t *filter
= vf
+ mx
* taps
;
68 tmp
= tmp_array
+ extra_before
;
69 for (int j
= 0; j
< width
; j
++) {
70 const int val
= (is_chroma
? CHROMA_FILTER(tmp
, 1) : LUMA_FILTER(tmp
, 1)) >> 6;
72 dst
[j
] = av_clip_pixel((val
+ offset
) >> shift
);
84 static void FUNC(put_luma_scaled
)(int16_t *_dst
,
85 const uint8_t *_src
, ptrdiff_t _src_stride
, const int src_height
,
86 const int x
, const int y
, const int dx
, const int dy
,
87 const int height
, const int8_t *hf
, const int8_t *vf
, const int width
)
89 FUNC(put_scaled
)((uint8_t *)_dst
, MAX_PB_SIZE
* sizeof(pixel
), _src
, _src_stride
, src_height
, x
, y
, dx
, dy
, height
, hf
, vf
, width
, 0, 0);
92 static void FUNC(put_chroma_scaled
)(int16_t *_dst
,
93 const uint8_t *_src
, ptrdiff_t _src_stride
, const int src_height
,
94 const int x
, const int y
, const int dx
, const int dy
,
95 const int height
, const int8_t *hf
, const int8_t *vf
, const int width
)
97 FUNC(put_scaled
)((uint8_t *)_dst
, MAX_PB_SIZE
* sizeof(pixel
), _src
, _src_stride
, src_height
, x
, y
, dx
, dy
, height
, hf
, vf
, width
, 0, 1);
100 static void FUNC(put_uni_luma_scaled
)(uint8_t *_dst
, const ptrdiff_t _dst_stride
,
101 const uint8_t *_src
, ptrdiff_t _src_stride
, const int src_height
,
102 const int x
, const int y
, const int dx
, const int dy
,
103 const int height
, const int8_t *hf
, const int8_t *vf
, const int width
)
105 FUNC(put_scaled
)(_dst
, _dst_stride
, _src
, _src_stride
, src_height
, x
, y
, dx
, dy
, height
, hf
, vf
, width
, 1, 0);
108 static void FUNC(put_uni_chroma_scaled
)(uint8_t *_dst
, const ptrdiff_t _dst_stride
,
109 const uint8_t *_src
, ptrdiff_t _src_stride
, const int src_height
,
110 const int x
, const int y
, const int dx
, const int dy
,
111 const int height
, const int8_t *hf
, const int8_t *vf
, const int width
)
113 FUNC(put_scaled
)(_dst
, _dst_stride
, _src
, _src_stride
, src_height
, x
, y
, dx
, dy
, height
, hf
, vf
, width
, 1, 1);
116 static void av_always_inline
FUNC(put_uni_w_scaled
)(uint8_t *_dst
, const ptrdiff_t _dst_stride
,
117 const uint8_t *const _src
, ptrdiff_t _src_stride
, const int src_height
,
118 const int _x
, const int _y
, const int dx
, const int dy
, const int denom
, const int wx
, const int _ox
,
119 const int height
, const int8_t *hf
, const int8_t *vf
, const int width
, const int is_chroma
)
121 int16_t tmp_array
[TMP_STRIDE
* MAX_PB_SIZE
];
122 int16_t *tmp
= tmp_array
;
123 pixel
*dst
= (pixel
*)_dst
;
124 const ptrdiff_t dst_stride
= _dst_stride
/ sizeof(pixel
);
125 const ptrdiff_t src_stride
= _src_stride
/ sizeof(pixel
);
126 const int shift
= FFMAX(2, 14 - BIT_DEPTH
);
127 const int offset
= 1 << (shift
- 1);
128 const int ox
= _ox
* (1 << (BIT_DEPTH
- 8));
129 const int taps
= is_chroma
? VVC_INTER_CHROMA_TAPS
: VVC_INTER_LUMA_TAPS
;
130 const int extra
= is_chroma
? CHROMA_EXTRA
: LUMA_EXTRA
;
131 const int extra_before
= is_chroma
? CHROMA_EXTRA_BEFORE
: LUMA_EXTRA_BEFORE
;
132 const int shift1
= 6 - is_chroma
;
133 const int shift2
= 4 + is_chroma
;
134 const int x0
= SCALED_INT(_x
);
135 const int y0
= SCALED_INT(_y
);
137 for (int i
= 0; i
< width
; i
++) {
138 const int tx
= _x
+ dx
* i
;
139 const int x
= SCALED_INT(tx
) - x0
;
140 const int mx
= av_zero_extend(tx
>> shift1
, shift2
);
141 const int8_t *filter
= hf
+ mx
* taps
;
142 const pixel
*src
= (pixel
*)_src
- extra_before
* src_stride
;
144 for (int j
= 0; j
< src_height
+ extra
; j
++) {
145 tmp
[j
] = (is_chroma
? CHROMA_FILTER(src
, 1) : LUMA_FILTER(src
, 1)) >> (BIT_DEPTH
- 8);
151 for (int i
= 0; i
< height
; i
++) {
152 const int ty
= _y
+ dy
* i
;
153 const int x
= SCALED_INT(ty
) - y0
;
154 const int mx
= av_zero_extend(ty
>> shift1
, shift2
);
155 const int8_t *filter
= vf
+ mx
* taps
;
157 tmp
= tmp_array
+ extra_before
;
158 for (int j
= 0; j
< width
; j
++) {
159 const int val
= (is_chroma
? CHROMA_FILTER(tmp
, 1) : LUMA_FILTER(tmp
, 1)) >> 6;
160 dst
[j
] = av_clip_pixel(((wx
* val
+ offset
) >> shift
) + ox
);
167 static void FUNC(put_uni_luma_w_scaled
)(uint8_t *_dst
, const ptrdiff_t _dst_stride
,
168 const uint8_t *_src
, ptrdiff_t _src_stride
, const int src_height
,
169 const int x
, const int y
, const int dx
, const int dy
, const int denom
, const int wx
, const int ox
,
170 const int height
, const int8_t *hf
, const int8_t *vf
, const int width
)
172 FUNC(put_uni_w_scaled
)(_dst
, _dst_stride
, _src
, _src_stride
, src_height
, x
, y
, dx
, dy
, denom
, wx
, ox
, height
, hf
, vf
, width
, 0);
175 static void FUNC(put_uni_chroma_w_scaled
)(uint8_t *_dst
, const ptrdiff_t _dst_stride
,
176 const uint8_t *_src
, ptrdiff_t _src_stride
, const int src_height
,
177 const int x
, const int y
, const int dx
, const int dy
, const int denom
, const int wx
, const int ox
,
178 const int height
, const int8_t *hf
, const int8_t *vf
, const int width
)
180 FUNC(put_uni_w_scaled
)(_dst
, _dst_stride
, _src
, _src_stride
, src_height
, x
, y
, dx
, dy
, denom
, wx
, ox
, height
, hf
, vf
, width
, 1);
185 static void FUNC(avg
)(uint8_t *_dst
, const ptrdiff_t _dst_stride
,
186 const int16_t *src0
, const int16_t *src1
, const int width
, const int height
)
188 pixel
*dst
= (pixel
*)_dst
;
189 const ptrdiff_t dst_stride
= _dst_stride
/ sizeof(pixel
);
190 const int shift
= FFMAX(3, 15 - BIT_DEPTH
);
191 const int offset
= 1 << (shift
- 1);
193 for (int y
= 0; y
< height
; y
++) {
194 for (int x
= 0; x
< width
; x
++)
195 dst
[x
] = av_clip_pixel((src0
[x
] + src1
[x
] + offset
) >> shift
);
202 static void FUNC(w_avg
)(uint8_t *_dst
, const ptrdiff_t _dst_stride
,
203 const int16_t *src0
, const int16_t *src1
, const int width
, const int height
,
204 const int denom
, const int w0
, const int w1
, const int o0
, const int o1
)
206 pixel
*dst
= (pixel
*)_dst
;
207 const ptrdiff_t dst_stride
= _dst_stride
/ sizeof(pixel
);
208 const int shift
= denom
+ FFMAX(3, 15 - BIT_DEPTH
);
209 const int offset
= ((o0
+ o1
) * (1 << (BIT_DEPTH
- 8)) + 1) * (1 << (shift
- 1));
211 for (int y
= 0; y
< height
; y
++) {
212 for (int x
= 0; x
< width
; x
++)
213 dst
[x
] = av_clip_pixel((src0
[x
] * w0
+ src1
[x
] * w1
+ offset
) >> shift
);
220 static void FUNC(put_ciip
)(uint8_t *_dst
, const ptrdiff_t _dst_stride
,
221 const int width
, const int height
,
222 const uint8_t *_inter
, const ptrdiff_t _inter_stride
, const int intra_weight
)
224 pixel
*dst
= (pixel
*)_dst
;
225 pixel
*inter
= (pixel
*)_inter
;
226 const size_t dst_stride
= _dst_stride
/ sizeof(pixel
);
227 const size_t inter_stride
= _inter_stride
/ sizeof(pixel
);
228 const int inter_weight
= 4 - intra_weight
;
230 for (int y
= 0; y
< height
; y
++) {
231 for (int x
= 0; x
< width
; x
++)
232 dst
[x
] = (dst
[x
] * intra_weight
+ inter
[x
] * inter_weight
+ 2) >> 2;
234 inter
+= inter_stride
;
238 static void FUNC(put_gpm
)(uint8_t *_dst
, ptrdiff_t dst_stride
,
239 const int width
, const int height
,
240 const int16_t *src0
, const int16_t *src1
,
241 const uint8_t *weights
, const int step_x
, const int step_y
)
243 const int shift
= FFMAX(5, 17 - BIT_DEPTH
);
244 const int offset
= 1 << (shift
- 1);
245 pixel
*dst
= (pixel
*)_dst
;
247 dst_stride
/= sizeof(pixel
);
248 for (int y
= 0; y
< height
; y
++) {
249 for (int x
= 0; x
< width
; x
++) {
250 const uint8_t w
= weights
[x
* step_x
];
251 dst
[x
] = av_clip_pixel((src0
[x
] * w
+ src1
[x
] * (8 - w
) + offset
) >> shift
);
260 //8.5.6.3.3 Luma integer sample fetching process, add one extra pad line
261 static void FUNC(bdof_fetch_samples
)(int16_t *_dst
, const uint8_t *_src
, const ptrdiff_t _src_stride
,
262 const int x_frac
, const int y_frac
, const int width
, const int height
)
264 const int x_off
= (x_frac
>> 3) - 1;
265 const int y_off
= (y_frac
>> 3) - 1;
266 const ptrdiff_t src_stride
= _src_stride
/ sizeof(pixel
);
267 const pixel
*src
= (pixel
*)_src
+ (x_off
) + y_off
* src_stride
;
268 int16_t *dst
= _dst
- 1 - MAX_PB_SIZE
;
269 const int shift
= 14 - BIT_DEPTH
;
270 const int bdof_width
= width
+ 2 * BDOF_BORDER_EXT
;
273 for (int i
= 0; i
< bdof_width
; i
++)
274 dst
[i
] = src
[i
] << shift
;
279 for (int i
= 0; i
< height
; i
++) {
280 dst
[0] = src
[0] << shift
;
281 dst
[1 + width
] = src
[1 + width
] << shift
;
285 for (int i
= 0; i
< bdof_width
; i
++)
286 dst
[i
] = src
[i
] << shift
;
289 //8.5.6.3.3 Luma integer sample fetching process
290 static void FUNC(fetch_samples
)(int16_t *_dst
, const uint8_t *_src
, const ptrdiff_t _src_stride
, const int x_frac
, const int y_frac
)
292 FUNC(bdof_fetch_samples
)(_dst
, _src
, _src_stride
, x_frac
, y_frac
, AFFINE_MIN_BLOCK_SIZE
, AFFINE_MIN_BLOCK_SIZE
);
295 static void FUNC(prof_grad_filter
)(int16_t *gradient_h
, int16_t *gradient_v
, const ptrdiff_t gradient_stride
,
296 const int16_t *_src
, const ptrdiff_t src_stride
, const int width
, const int height
)
299 const int16_t *src
= _src
;
301 for (int y
= 0; y
< height
; y
++) {
302 const int16_t *p
= src
;
303 for (int x
= 0; x
< width
; x
++) {
304 gradient_h
[x
] = (p
[1] >> shift
) - (p
[-1] >> shift
);
305 gradient_v
[x
] = (p
[src_stride
] >> shift
) - (p
[-src_stride
] >> shift
);
308 gradient_h
+= gradient_stride
;
309 gradient_v
+= gradient_stride
;
314 static void FUNC(apply_prof
)(int16_t *dst
, const int16_t *src
, const int16_t *diff_mv_x
, const int16_t *diff_mv_y
)
316 const int limit
= (1 << FFMAX(13, BIT_DEPTH
+ 1)); ///< dILimit
318 int16_t gradient_h
[AFFINE_MIN_BLOCK_SIZE
* AFFINE_MIN_BLOCK_SIZE
];
319 int16_t gradient_v
[AFFINE_MIN_BLOCK_SIZE
* AFFINE_MIN_BLOCK_SIZE
];
320 FUNC(prof_grad_filter
)(gradient_h
, gradient_v
, AFFINE_MIN_BLOCK_SIZE
, src
, MAX_PB_SIZE
, AFFINE_MIN_BLOCK_SIZE
, AFFINE_MIN_BLOCK_SIZE
);
322 for (int y
= 0; y
< AFFINE_MIN_BLOCK_SIZE
; y
++) {
323 for (int x
= 0; x
< AFFINE_MIN_BLOCK_SIZE
; x
++) {
324 const int o
= y
* AFFINE_MIN_BLOCK_SIZE
+ x
;
325 const int di
= gradient_h
[o
] * diff_mv_x
[o
] + gradient_v
[o
] * diff_mv_y
[o
];
326 const int val
= src
[x
] + av_clip(di
, -limit
, limit
- 1);
335 static void FUNC(apply_prof_uni
)(uint8_t *_dst
, const ptrdiff_t _dst_stride
, const int16_t *src
, const int16_t *diff_mv_x
, const int16_t *diff_mv_y
)
337 const int limit
= (1 << FFMAX(13, BIT_DEPTH
+ 1)); ///< dILimit
338 pixel
*dst
= (pixel
*)_dst
;
339 const ptrdiff_t dst_stride
= _dst_stride
/ sizeof(pixel
);
340 const int shift
= 14 - BIT_DEPTH
;
342 const int offset
= 1 << (shift
- 1);
344 const int offset
= 0;
346 int16_t gradient_h
[AFFINE_MIN_BLOCK_SIZE
* AFFINE_MIN_BLOCK_SIZE
];
347 int16_t gradient_v
[AFFINE_MIN_BLOCK_SIZE
* AFFINE_MIN_BLOCK_SIZE
];
349 FUNC(prof_grad_filter
)(gradient_h
, gradient_v
, AFFINE_MIN_BLOCK_SIZE
, src
, MAX_PB_SIZE
, AFFINE_MIN_BLOCK_SIZE
, AFFINE_MIN_BLOCK_SIZE
);
351 for (int y
= 0; y
< AFFINE_MIN_BLOCK_SIZE
; y
++) {
352 for (int x
= 0; x
< AFFINE_MIN_BLOCK_SIZE
; x
++) {
353 const int o
= y
* AFFINE_MIN_BLOCK_SIZE
+ x
;
354 const int di
= gradient_h
[o
] * diff_mv_x
[o
] + gradient_v
[o
] * diff_mv_y
[o
];
355 const int val
= src
[x
] + av_clip(di
, -limit
, limit
- 1);
356 dst
[x
] = av_clip_pixel((val
+ offset
) >> shift
);
364 static void FUNC(apply_prof_uni_w
)(uint8_t *_dst
, const ptrdiff_t _dst_stride
,
365 const int16_t *src
, const int16_t *diff_mv_x
, const int16_t *diff_mv_y
,
366 const int denom
, const int wx
, const int _ox
)
368 const int limit
= (1 << FFMAX(13, BIT_DEPTH
+ 1)); ///< dILimit
369 pixel
*dst
= (pixel
*)_dst
;
370 const ptrdiff_t dst_stride
= _dst_stride
/ sizeof(pixel
);
371 const int shift
= denom
+ FFMAX(2, 14 - BIT_DEPTH
);
372 const int offset
= 1 << (shift
- 1);
373 const int ox
= _ox
* (1 << (BIT_DEPTH
- 8));
374 int16_t gradient_h
[AFFINE_MIN_BLOCK_SIZE
* AFFINE_MIN_BLOCK_SIZE
];
375 int16_t gradient_v
[AFFINE_MIN_BLOCK_SIZE
* AFFINE_MIN_BLOCK_SIZE
];
377 FUNC(prof_grad_filter
)(gradient_h
, gradient_v
, AFFINE_MIN_BLOCK_SIZE
, src
, MAX_PB_SIZE
, AFFINE_MIN_BLOCK_SIZE
, AFFINE_MIN_BLOCK_SIZE
);
379 for (int y
= 0; y
< AFFINE_MIN_BLOCK_SIZE
; y
++) {
380 for (int x
= 0; x
< AFFINE_MIN_BLOCK_SIZE
; x
++) {
381 const int o
= y
* AFFINE_MIN_BLOCK_SIZE
+ x
;
382 const int di
= gradient_h
[o
] * diff_mv_x
[o
] + gradient_v
[o
] * diff_mv_y
[o
];
383 const int val
= src
[x
] + av_clip(di
, -limit
, limit
- 1);
384 dst
[x
] = av_clip_pixel(((val
* wx
+ offset
) >> shift
) + ox
);
391 static void FUNC(derive_bdof_vx_vy
)(const int16_t *_src0
, const int16_t *_src1
,
392 const int pad_left
, const int pad_top
, const int pad_right
, const int pad_bottom
,
393 const int16_t **gradient_h
, const int16_t **gradient_v
,
396 const int shift2
= 4;
397 const int shift3
= 1;
398 const int thres
= 1 << 4;
399 int sgx2
= 0, sgy2
= 0, sgxgy
= 0, sgxdi
= 0, sgydi
= 0;
401 for (int y
= -1; y
< BDOF_MIN_BLOCK_SIZE
+ 1; y
++) {
402 const int dy
= y
+ (pad_top
&& y
< 0) - (pad_bottom
&& y
== BDOF_MIN_BLOCK_SIZE
); // we pad for the first and last row
403 const int16_t *src0
= _src0
+ dy
* MAX_PB_SIZE
;
404 const int16_t *src1
= _src1
+ dy
* MAX_PB_SIZE
;
406 for (int x
= -1; x
< BDOF_MIN_BLOCK_SIZE
+ 1; x
++) {
407 const int dx
= x
+ (pad_left
&& x
< 0) - (pad_right
&& x
== BDOF_MIN_BLOCK_SIZE
); // we pad for the first and last col
408 const int diff
= (src0
[dx
] >> shift2
) - (src1
[dx
] >> shift2
);
409 const int idx
= BDOF_BLOCK_SIZE
* dy
+ dx
;
410 const int temph
= (gradient_h
[0][idx
] + gradient_h
[1][idx
]) >> shift3
;
411 const int tempv
= (gradient_v
[0][idx
] + gradient_v
[1][idx
]) >> shift3
;
413 sgx2
+= FFABS(temph
);
414 sgy2
+= FFABS(tempv
);
415 sgxgy
+= VVC_SIGN(tempv
) * temph
;
416 sgxdi
+= -VVC_SIGN(temph
) * diff
;
417 sgydi
+= -VVC_SIGN(tempv
) * diff
;
420 *vx
= sgx2
> 0 ? av_clip((sgxdi
* (1 << 2)) >> av_log2(sgx2
) , -thres
+ 1, thres
- 1) : 0;
421 *vy
= sgy2
> 0 ? av_clip(((sgydi
* (1 << 2)) - ((*vx
* sgxgy
) >> 1)) >> av_log2(sgy2
), -thres
+ 1, thres
- 1) : 0;
424 static void FUNC(apply_bdof_min_block
)(pixel
* dst
, const ptrdiff_t dst_stride
, const int16_t *src0
, const int16_t *src1
,
425 const int16_t **gh
, const int16_t **gv
, const int vx
, const int vy
)
427 const int shift4
= 15 - BIT_DEPTH
;
428 const int offset4
= 1 << (shift4
- 1);
430 for (int y
= 0; y
< BDOF_MIN_BLOCK_SIZE
; y
++) {
431 for (int x
= 0; x
< BDOF_MIN_BLOCK_SIZE
; x
++) {
432 const int idx
= y
* BDOF_BLOCK_SIZE
+ x
;
433 const int bdof_offset
= vx
* (gh
[0][idx
] - gh
[1][idx
]) + vy
* (gv
[0][idx
] - gv
[1][idx
]);
434 dst
[x
] = av_clip_pixel((src0
[x
] + offset4
+ src1
[x
] + bdof_offset
) >> shift4
);
442 static void FUNC(apply_bdof
)(uint8_t *_dst
, const ptrdiff_t _dst_stride
, const int16_t *_src0
, const int16_t *_src1
,
443 const int block_w
, const int block_h
)
445 int16_t gradient_h
[2][BDOF_BLOCK_SIZE
* BDOF_BLOCK_SIZE
];
446 int16_t gradient_v
[2][BDOF_BLOCK_SIZE
* BDOF_BLOCK_SIZE
];
448 const ptrdiff_t dst_stride
= _dst_stride
/ sizeof(pixel
);
449 pixel
* dst
= (pixel
*)_dst
;
451 FUNC(prof_grad_filter
)(gradient_h
[0], gradient_v
[0], BDOF_BLOCK_SIZE
,
452 _src0
, MAX_PB_SIZE
, block_w
, block_h
);
453 FUNC(prof_grad_filter
)(gradient_h
[1], gradient_v
[1], BDOF_BLOCK_SIZE
,
454 _src1
, MAX_PB_SIZE
, block_w
, block_h
);
456 for (int y
= 0; y
< block_h
; y
+= BDOF_MIN_BLOCK_SIZE
) {
457 for (int x
= 0; x
< block_w
; x
+= BDOF_MIN_BLOCK_SIZE
) {
458 const int16_t* src0
= _src0
+ y
* MAX_PB_SIZE
+ x
;
459 const int16_t* src1
= _src1
+ y
* MAX_PB_SIZE
+ x
;
461 const int idx
= BDOF_BLOCK_SIZE
* y
+ x
;
462 const int16_t* gh
[] = { gradient_h
[0] + idx
, gradient_h
[1] + idx
};
463 const int16_t* gv
[] = { gradient_v
[0] + idx
, gradient_v
[1] + idx
};
464 FUNC(derive_bdof_vx_vy
)(src0
, src1
, !x
, !y
, x
+ BDOF_MIN_BLOCK_SIZE
== block_w
, y
+ BDOF_MIN_BLOCK_SIZE
== block_h
, gh
, gv
, &vx
, &vy
);
465 FUNC(apply_bdof_min_block
)(d
, dst_stride
, src0
, src1
, gh
, gv
, vx
, vy
);
467 dst
+= BDOF_MIN_BLOCK_SIZE
* dst_stride
;
471 #define DMVR_FILTER(src, stride) \
472 (filter[0] * src[x] + \
473 filter[1] * src[x + stride])
475 #define DMVR_FILTER2(filter, src0, src1) \
476 (filter[0] * src0 + filter[1] * src1)
478 //8.5.3.2.2 Luma sample bilinear interpolation process
479 static void FUNC(dmvr
)(int16_t *dst
, const uint8_t *_src
, const ptrdiff_t _src_stride
,
480 const int height
, const intptr_t mx
, const intptr_t my
, const int width
)
483 const pixel
*src
= (const pixel
*)_src
;
484 const ptrdiff_t src_stride
= _src_stride
/ sizeof(pixel
);
486 const int shift4
= BIT_DEPTH
- 10;
487 const int offset4
= 1 << (shift4
- 1);
488 #define DMVR_SHIFT(s) (((s) + offset4) >> shift4)
490 #define DMVR_SHIFT(s) ((s) << (10 - BIT_DEPTH))
491 #endif // BIT_DEPTH > 10
493 for (int y
= 0; y
< height
; y
++) {
494 for (int x
= 0; x
< width
; x
++)
495 dst
[x
] = DMVR_SHIFT(src
[x
]);
501 av_image_copy_plane((uint8_t*)dst
, sizeof(int16_t) * MAX_PB_SIZE
, _src
, _src_stride
,
502 width
* sizeof(pixel
), height
);
503 #endif // BIT_DEPTH != 10
506 //8.5.3.2.2 Luma sample bilinear interpolation process
507 static void FUNC(dmvr_h
)(int16_t *dst
, const uint8_t *_src
, const ptrdiff_t _src_stride
,
508 const int height
, const intptr_t mx
, const intptr_t my
, const int width
)
510 const pixel
*src
= (const pixel
*)_src
;
511 const ptrdiff_t src_stride
= _src_stride
/ sizeof(pixel
);
512 const int8_t *filter
= ff_vvc_inter_luma_dmvr_filters
[mx
];
513 const int shift1
= BIT_DEPTH
- 6;
514 const int offset1
= 1 << (shift1
- 1);
516 for (int y
= 0; y
< height
; y
++) {
517 for (int x
= 0; x
< width
; x
++)
518 dst
[x
] = (DMVR_FILTER(src
, 1) + offset1
) >> shift1
;
524 //8.5.3.2.2 Luma sample bilinear interpolation process
525 static void FUNC(dmvr_v
)(int16_t *dst
, const uint8_t *_src
, const ptrdiff_t _src_stride
,
526 const int height
, const intptr_t mx
, const intptr_t my
, const int width
)
528 const pixel
*src
= (pixel
*)_src
;
529 const ptrdiff_t src_stride
= _src_stride
/ sizeof(pixel
);
530 const int8_t *filter
= ff_vvc_inter_luma_dmvr_filters
[my
];
531 const int shift1
= BIT_DEPTH
- 6;
532 const int offset1
= 1 << (shift1
- 1);
534 for (int y
= 0; y
< height
; y
++) {
535 for (int x
= 0; x
< width
; x
++)
536 dst
[x
] = (DMVR_FILTER(src
, src_stride
) + offset1
) >> shift1
;
543 //8.5.3.2.2 Luma sample bilinear interpolation process
544 static void FUNC(dmvr_hv
)(int16_t *dst
, const uint8_t *_src
, const ptrdiff_t _src_stride
,
545 const int height
, const intptr_t mx
, const intptr_t my
, const int width
)
547 int16_t tmp_array
[MAX_PB_SIZE
* 2];
548 int16_t *tmp0
= tmp_array
;
549 int16_t *tmp1
= tmp_array
+ MAX_PB_SIZE
;
550 const pixel
*src
= (const pixel
*)_src
;
551 const ptrdiff_t src_stride
= _src_stride
/ sizeof(pixel
);
552 const int8_t *filter_x
= ff_vvc_inter_luma_dmvr_filters
[mx
];
553 const int8_t *filter_y
= ff_vvc_inter_luma_dmvr_filters
[my
];
554 const int shift1
= BIT_DEPTH
- 6;
555 const int offset1
= 1 << (shift1
- 1);
556 const int shift2
= 4;
557 const int offset2
= 1 << (shift2
- 1);
559 src
-= BILINEAR_EXTRA_BEFORE
* src_stride
;
560 for (int x
= 0; x
< width
; x
++)
561 tmp0
[x
] = (DMVR_FILTER2(filter_x
, src
[x
], src
[x
+ 1]) + offset1
) >> shift1
;
564 for (int y
= 1; y
< height
+ BILINEAR_EXTRA
; y
++) {
565 for (int x
= 0; x
< width
; x
++) {
566 tmp1
[x
] = (DMVR_FILTER2(filter_x
, src
[x
], src
[x
+ 1]) + offset1
) >> shift1
;
567 dst
[x
] = (DMVR_FILTER2(filter_y
, tmp0
[x
], tmp1
[x
]) + offset2
) >> shift2
;
571 FFSWAP(int16_t *, tmp0
, tmp1
);
575 #define PEL_FUNC(dst, C, idx1, idx2, a) \
577 for (int w = 0; w < 7; w++) \
578 inter->dst[C][w][idx1][idx2] = FUNC(a); \
581 #define DIR_FUNCS(d, C, c) \
582 PEL_FUNC(put_##d, C, 0, 0, put_##d##_pixels); \
583 PEL_FUNC(put_##d, C, 0, 1, put_##d##_##c##_h); \
584 PEL_FUNC(put_##d, C, 1, 0, put_##d##_##c##_v); \
585 PEL_FUNC(put_##d, C, 1, 1, put_##d##_##c##_hv); \
586 PEL_FUNC(put_##d##_w, C, 0, 0, put_##d##_w_pixels); \
587 PEL_FUNC(put_##d##_w, C, 0, 1, put_##d##_##c##_w_h); \
588 PEL_FUNC(put_##d##_w, C, 1, 0, put_##d##_##c##_w_v); \
589 PEL_FUNC(put_##d##_w, C, 1, 1, put_##d##_##c##_w_hv);
591 #define FUNCS(C, c) \
592 PEL_FUNC(put, C, 0, 0, put_pixels); \
593 PEL_FUNC(put, C, 0, 1, put_##c##_h); \
594 PEL_FUNC(put, C, 1, 0, put_##c##_v); \
595 PEL_FUNC(put, C, 1, 1, put_##c##_hv); \
596 DIR_FUNCS(uni, C, c); \
598 static void FUNC(ff_vvc_inter_dsp_init)(VVCInterDSPContext *const inter)
601 FUNCS(CHROMA
, chroma
);
603 for (int i
= 0; i
< FF_ARRAY_ELEMS(inter
->put_scaled
[LUMA
]); i
++) {
604 inter
->put_scaled
[LUMA
][i
] = FUNC(put_luma_scaled
);
605 inter
->put_scaled
[CHROMA
][i
] = FUNC(put_chroma_scaled
);
606 inter
->put_uni_scaled
[LUMA
][i
] = FUNC(put_uni_luma_scaled
);
607 inter
->put_uni_scaled
[CHROMA
][i
] = FUNC(put_uni_chroma_scaled
);
608 inter
->put_uni_w_scaled
[LUMA
][i
] = FUNC(put_uni_luma_w_scaled
);
609 inter
->put_uni_w_scaled
[CHROMA
][i
] = FUNC(put_uni_chroma_w_scaled
);
612 inter
->avg
= FUNC(avg
);
613 inter
->w_avg
= FUNC(w_avg
);
615 inter
->dmvr
[0][0] = FUNC(dmvr
);
616 inter
->dmvr
[0][1] = FUNC(dmvr_h
);
617 inter
->dmvr
[1][0] = FUNC(dmvr_v
);
618 inter
->dmvr
[1][1] = FUNC(dmvr_hv
);
620 inter
->put_ciip
= FUNC(put_ciip
);
621 inter
->put_gpm
= FUNC(put_gpm
);
623 inter
->fetch_samples
= FUNC(fetch_samples
);
624 inter
->bdof_fetch_samples
= FUNC(bdof_fetch_samples
);
625 inter
->apply_prof
= FUNC(apply_prof
);
626 inter
->apply_prof_uni
= FUNC(apply_prof_uni
);
627 inter
->apply_prof_uni_w
= FUNC(apply_prof_uni_w
);
628 inter
->apply_bdof
= FUNC(apply_bdof
);
629 inter
->sad
= vvc_sad
;