4 * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
5 * Copyright (c) 2006 Michael Benjamin <michael.benjamin@analog.com>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "libavcodec/avcodec.h"
26 #include "libavcodec/dsputil.h"
27 #include "dsputil_bfin.h"
32 extern void ff_bfin_idct (DCTELEM
*block
) attribute_l1_text
;
33 extern void ff_bfin_fdct (DCTELEM
*block
) attribute_l1_text
;
34 extern void ff_bfin_vp3_idct (DCTELEM
*block
);
35 extern void ff_bfin_vp3_idct_put (uint8_t *dest
, int line_size
, DCTELEM
*block
);
36 extern void ff_bfin_vp3_idct_add (uint8_t *dest
, int line_size
, DCTELEM
*block
);
37 extern void ff_bfin_add_pixels_clamped (DCTELEM
*block
, uint8_t *dest
, int line_size
) attribute_l1_text
;
38 extern void ff_bfin_put_pixels_clamped (DCTELEM
*block
, uint8_t *dest
, int line_size
) attribute_l1_text
;
39 extern void ff_bfin_diff_pixels (DCTELEM
*block
, uint8_t *s1
, uint8_t *s2
, int stride
) attribute_l1_text
;
40 extern void ff_bfin_get_pixels (DCTELEM
*restrict block
, const uint8_t *pixels
, int line_size
) attribute_l1_text
;
41 extern int ff_bfin_pix_norm1 (uint8_t * pix
, int line_size
) attribute_l1_text
;
42 extern int ff_bfin_z_sad8x8 (uint8_t *blk1
, uint8_t *blk2
, int dsz
, int line_size
, int h
) attribute_l1_text
;
43 extern int ff_bfin_z_sad16x16 (uint8_t *blk1
, uint8_t *blk2
, int dsz
, int line_size
, int h
) attribute_l1_text
;
45 extern void ff_bfin_z_put_pixels16_xy2 (uint8_t *block
, const uint8_t *s0
, int dest_size
, int line_size
, int h
) attribute_l1_text
;
46 extern void ff_bfin_z_put_pixels8_xy2 (uint8_t *block
, const uint8_t *s0
, int dest_size
, int line_size
, int h
) attribute_l1_text
;
47 extern void ff_bfin_put_pixels16_xy2_nornd (uint8_t *block
, const uint8_t *s0
, int line_size
, int h
) attribute_l1_text
;
48 extern void ff_bfin_put_pixels8_xy2_nornd (uint8_t *block
, const uint8_t *s0
, int line_size
, int h
) attribute_l1_text
;
51 extern int ff_bfin_pix_sum (uint8_t *p
, int stride
) attribute_l1_text
;
53 extern void ff_bfin_put_pixels8uc (uint8_t *block
, const uint8_t *s0
, const uint8_t *s1
, int dest_size
, int line_size
, int h
) attribute_l1_text
;
54 extern void ff_bfin_put_pixels16uc (uint8_t *block
, const uint8_t *s0
, const uint8_t *s1
, int dest_size
, int line_size
, int h
) attribute_l1_text
;
55 extern void ff_bfin_put_pixels8uc_nornd (uint8_t *block
, const uint8_t *s0
, const uint8_t *s1
, int line_size
, int h
) attribute_l1_text
;
56 extern void ff_bfin_put_pixels16uc_nornd (uint8_t *block
, const uint8_t *s0
, const uint8_t *s1
, int line_size
, int h
) attribute_l1_text
;
58 extern int ff_bfin_sse4 (void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
) attribute_l1_text
;
59 extern int ff_bfin_sse8 (void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
) attribute_l1_text
;
60 extern int ff_bfin_sse16 (void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
) attribute_l1_text
;
63 static void bfin_idct_add (uint8_t *dest
, int line_size
, DCTELEM
*block
)
66 ff_bfin_add_pixels_clamped (block
, dest
, line_size
);
69 static void bfin_idct_put (uint8_t *dest
, int line_size
, DCTELEM
*block
)
72 ff_bfin_put_pixels_clamped (block
, dest
, line_size
);
76 static void bfin_clear_blocks (DCTELEM
*blocks
)
78 // This is just a simple memset.
83 "LSETUP(clear_blocks_blkfn_lab,clear_blocks_blkfn_lab)LC0=P0;"
84 "clear_blocks_blkfn_lab:"
86 ::"a" (blocks
):"P0","I0","R0");
91 static void bfin_put_pixels8 (uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
)
93 ff_bfin_put_pixels8uc (block
, pixels
, pixels
, line_size
, line_size
, h
);
96 static void bfin_put_pixels8_x2(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
)
98 ff_bfin_put_pixels8uc (block
, pixels
, pixels
+1, line_size
, line_size
, h
);
101 static void bfin_put_pixels8_y2 (uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
)
103 ff_bfin_put_pixels8uc (block
, pixels
, pixels
+line_size
, line_size
, line_size
, h
);
106 static void bfin_put_pixels8_xy2 (uint8_t *block
, const uint8_t *s0
, int line_size
, int h
)
108 ff_bfin_z_put_pixels8_xy2 (block
,s0
,line_size
, line_size
, h
);
111 static void bfin_put_pixels16 (uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
)
113 ff_bfin_put_pixels16uc (block
, pixels
, pixels
, line_size
, line_size
, h
);
116 static void bfin_put_pixels16_x2 (uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
)
118 ff_bfin_put_pixels16uc (block
, pixels
, pixels
+1, line_size
, line_size
, h
);
121 static void bfin_put_pixels16_y2 (uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
)
123 ff_bfin_put_pixels16uc (block
, pixels
, pixels
+line_size
, line_size
, line_size
, h
);
126 static void bfin_put_pixels16_xy2 (uint8_t *block
, const uint8_t *s0
, int line_size
, int h
)
128 ff_bfin_z_put_pixels16_xy2 (block
,s0
,line_size
, line_size
, h
);
131 void bfin_put_pixels8_nornd (uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
)
133 ff_bfin_put_pixels8uc_nornd (block
, pixels
, pixels
, line_size
, h
);
136 static void bfin_put_pixels8_x2_nornd (uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
)
138 ff_bfin_put_pixels8uc_nornd (block
, pixels
, pixels
+1, line_size
, h
);
141 static void bfin_put_pixels8_y2_nornd (uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
)
143 ff_bfin_put_pixels8uc_nornd (block
, pixels
, pixels
+line_size
, line_size
, h
);
147 void bfin_put_pixels16_nornd (uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
)
149 ff_bfin_put_pixels16uc_nornd (block
, pixels
, pixels
, line_size
, h
);
152 static void bfin_put_pixels16_x2_nornd (uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
)
154 ff_bfin_put_pixels16uc_nornd (block
, pixels
, pixels
+1, line_size
, h
);
157 static void bfin_put_pixels16_y2_nornd (uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
)
159 ff_bfin_put_pixels16uc_nornd (block
, pixels
, pixels
+line_size
, line_size
, h
);
162 static int bfin_pix_abs16 (void *c
, uint8_t *blk1
, uint8_t *blk2
, int line_size
, int h
)
164 return ff_bfin_z_sad16x16 (blk1
,blk2
,line_size
,line_size
,h
);
167 static int bfin_vsad_intra16 (void *c
, uint8_t *blk1
, uint8_t *dummy
, int stride
, int h
) {
168 return ff_bfin_z_sad16x16 (blk1
,blk1
+stride
,stride
<<1,stride
<<1,h
);
171 static int bfin_vsad (void *c
, uint8_t *blk1
, uint8_t *blk2
, int stride
, int h
) {
172 return ff_bfin_z_sad16x16 (blk1
,blk1
+stride
,stride
<<1,stride
<<1,h
)
173 + ff_bfin_z_sad16x16 (blk2
,blk2
+stride
,stride
<<1,stride
<<1,h
);
176 static uint8_t vtmp_blk
[256] attribute_l1_data_b
;
178 static int bfin_pix_abs16_x2 (void *c
, uint8_t *blk1
, uint8_t *blk2
, int line_size
, int h
)
180 ff_bfin_put_pixels16uc (vtmp_blk
, blk2
, blk2
+1, 16, line_size
, h
);
181 return ff_bfin_z_sad16x16 (blk1
, vtmp_blk
, line_size
, 16, h
);
184 static int bfin_pix_abs16_y2 (void *c
, uint8_t *blk1
, uint8_t *blk2
, int line_size
, int h
)
186 ff_bfin_put_pixels16uc (vtmp_blk
, blk2
, blk2
+line_size
, 16, line_size
, h
);
187 return ff_bfin_z_sad16x16 (blk1
, vtmp_blk
, line_size
, 16, h
);
190 static int bfin_pix_abs16_xy2 (void *c
, uint8_t *blk1
, uint8_t *blk2
, int line_size
, int h
)
192 ff_bfin_z_put_pixels16_xy2 (vtmp_blk
, blk2
, 16, line_size
, h
);
193 return ff_bfin_z_sad16x16 (blk1
, vtmp_blk
, line_size
, 16, h
);
196 static int bfin_pix_abs8 (void *c
, uint8_t *blk1
, uint8_t *blk2
, int line_size
, int h
)
198 return ff_bfin_z_sad8x8 (blk1
,blk2
,line_size
,line_size
, h
);
201 static int bfin_pix_abs8_x2 (void *c
, uint8_t *blk1
, uint8_t *blk2
, int line_size
, int h
)
203 ff_bfin_put_pixels8uc (vtmp_blk
, blk2
, blk2
+1, 8, line_size
, h
);
204 return ff_bfin_z_sad8x8 (blk1
, vtmp_blk
, line_size
, 8, h
);
207 static int bfin_pix_abs8_y2 (void *c
, uint8_t *blk1
, uint8_t *blk2
, int line_size
, int h
)
209 ff_bfin_put_pixels8uc (vtmp_blk
, blk2
, blk2
+line_size
, 8, line_size
, h
);
210 return ff_bfin_z_sad8x8 (blk1
, vtmp_blk
, line_size
, 8, h
);
213 static int bfin_pix_abs8_xy2 (void *c
, uint8_t *blk1
, uint8_t *blk2
, int line_size
, int h
)
215 ff_bfin_z_put_pixels8_xy2 (vtmp_blk
, blk2
, 8, line_size
, h
);
216 return ff_bfin_z_sad8x8 (blk1
, vtmp_blk
, line_size
, 8, h
);
222 start on 2/11 100 frames of 352x240@25 compiled with no optimization -g debugging
224 6.360s ~ 1.58x off with -O2
225 5.740s ~ 1.43x off with idcts
227 2.64s 2/20 same sman.mp4 decode only
231 void dsputil_init_bfin( DSPContext
* c
, AVCodecContext
*avctx
)
233 c
->get_pixels
= ff_bfin_get_pixels
;
234 c
->diff_pixels
= ff_bfin_diff_pixels
;
235 c
->put_pixels_clamped
= ff_bfin_put_pixels_clamped
;
236 c
->add_pixels_clamped
= ff_bfin_add_pixels_clamped
;
238 c
->clear_blocks
= bfin_clear_blocks
;
239 c
->pix_sum
= ff_bfin_pix_sum
;
240 c
->pix_norm1
= ff_bfin_pix_norm1
;
242 c
->sad
[0] = bfin_pix_abs16
;
243 c
->sad
[1] = bfin_pix_abs8
;
245 c
->vsad
[0] = bfin_vsad
;
246 c
->vsad
[4] = bfin_vsad_intra16
;
248 /* TODO [0] 16 [1] 8 */
249 c
->pix_abs
[0][0] = bfin_pix_abs16
;
250 c
->pix_abs
[0][1] = bfin_pix_abs16_x2
;
251 c
->pix_abs
[0][2] = bfin_pix_abs16_y2
;
252 c
->pix_abs
[0][3] = bfin_pix_abs16_xy2
;
254 c
->pix_abs
[1][0] = bfin_pix_abs8
;
255 c
->pix_abs
[1][1] = bfin_pix_abs8_x2
;
256 c
->pix_abs
[1][2] = bfin_pix_abs8_y2
;
257 c
->pix_abs
[1][3] = bfin_pix_abs8_xy2
;
260 c
->sse
[0] = ff_bfin_sse16
;
261 c
->sse
[1] = ff_bfin_sse8
;
262 c
->sse
[2] = ff_bfin_sse4
;
266 * Halfpel motion compensation with rounding (a+b+1)>>1.
267 * This is an array[4][4] of motion compensation functions for 4
268 * horizontal blocksizes (8,16) and the 4 halfpel positions
269 * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
270 * @param block destination where the result is stored
271 * @param pixels source
272 * @param line_size number of bytes in a horizontal line of block
276 c
->put_pixels_tab
[0][0] = bfin_put_pixels16
;
277 c
->put_pixels_tab
[0][1] = bfin_put_pixels16_x2
;
278 c
->put_pixels_tab
[0][2] = bfin_put_pixels16_y2
;
279 c
->put_pixels_tab
[0][3] = bfin_put_pixels16_xy2
;
281 c
->put_pixels_tab
[1][0] = bfin_put_pixels8
;
282 c
->put_pixels_tab
[1][1] = bfin_put_pixels8_x2
;
283 c
->put_pixels_tab
[1][2] = bfin_put_pixels8_y2
;
284 c
->put_pixels_tab
[1][3] = bfin_put_pixels8_xy2
;
286 c
->put_no_rnd_pixels_tab
[1][0] = bfin_put_pixels8_nornd
;
287 c
->put_no_rnd_pixels_tab
[1][1] = bfin_put_pixels8_x2_nornd
;
288 c
->put_no_rnd_pixels_tab
[1][2] = bfin_put_pixels8_y2_nornd
;
289 c
->put_no_rnd_pixels_tab
[1][3] = ff_bfin_put_pixels8_xy2_nornd
;
291 c
->put_no_rnd_pixels_tab
[0][0] = bfin_put_pixels16_nornd
;
292 c
->put_no_rnd_pixels_tab
[0][1] = bfin_put_pixels16_x2_nornd
;
293 c
->put_no_rnd_pixels_tab
[0][2] = bfin_put_pixels16_y2_nornd
;
294 c
->put_no_rnd_pixels_tab
[0][3] = ff_bfin_put_pixels16_xy2_nornd
;
296 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
297 c
->fdct
= ff_bfin_fdct
;
298 if (avctx
->idct_algo
==FF_IDCT_VP3
) {
299 c
->idct
= ff_bfin_vp3_idct
;
300 c
->idct_add
= ff_bfin_vp3_idct_add
;
301 c
->idct_put
= ff_bfin_vp3_idct_put
;
303 c
->idct
= ff_bfin_idct
;
304 c
->idct_add
= bfin_idct_add
;
305 c
->idct_put
= bfin_idct_put
;