2 * Sun mediaLib optimized DSP utils
3 * Copyright (c) 2001 Fabrice Bellard.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 #include "../dsputil.h"
21 #include "../mpegvideo.h"
23 #include <mlib_types.h>
24 #include <mlib_status.h>
26 #include <mlib_algebra.h>
27 #include <mlib_video.h>
31 static void get_pixels_mlib(DCTELEM
*restrict block
, const uint8_t *pixels
, int line_size
)
36 mlib_VectorConvert_S16_U8_Mod((mlib_s16
*)block
, (mlib_u8
*)pixels
, 8);
43 static void diff_pixels_mlib(DCTELEM
*restrict block
, const uint8_t *s1
, const uint8_t *s2
, int line_size
)
48 mlib_VectorSub_S16_U8_Mod((mlib_s16
*)block
, (mlib_u8
*)s1
, (mlib_u8
*)s2
, 8);
56 static void add_pixels_clamped_mlib(const DCTELEM
*block
, uint8_t *pixels
, int line_size
)
58 mlib_VideoAddBlock_U8_S16(pixels
, (mlib_s16
*)block
, line_size
);
61 /* put block, width 16 pixel, height 8/16 */
63 static void put_pixels16_mlib (uint8_t * dest
, const uint8_t * ref
,
64 int stride
, int height
)
68 mlib_VideoCopyRef_U8_U8_16x8(dest
, (uint8_t *)ref
, stride
);
72 mlib_VideoCopyRef_U8_U8_16x16(dest
, (uint8_t *)ref
, stride
);
80 static void put_pixels16_x2_mlib (uint8_t * dest
, const uint8_t * ref
,
81 int stride
, int height
)
85 mlib_VideoInterpX_U8_U8_16x8(dest
, (uint8_t *)ref
, stride
, stride
);
89 mlib_VideoInterpX_U8_U8_16x16(dest
, (uint8_t *)ref
, stride
, stride
);
97 static void put_pixels16_y2_mlib (uint8_t * dest
, const uint8_t * ref
,
98 int stride
, int height
)
102 mlib_VideoInterpY_U8_U8_16x8(dest
, (uint8_t *)ref
, stride
, stride
);
106 mlib_VideoInterpY_U8_U8_16x16(dest
, (uint8_t *)ref
, stride
, stride
);
114 static void put_pixels16_xy2_mlib(uint8_t * dest
, const uint8_t * ref
,
115 int stride
, int height
)
119 mlib_VideoInterpXY_U8_U8_16x8(dest
, (uint8_t *)ref
, stride
, stride
);
123 mlib_VideoInterpXY_U8_U8_16x16(dest
, (uint8_t *)ref
, stride
, stride
);
131 /* put block, width 8 pixel, height 4/8/16 */
133 static void put_pixels8_mlib (uint8_t * dest
, const uint8_t * ref
,
134 int stride
, int height
)
138 mlib_VideoCopyRef_U8_U8_8x4(dest
, (uint8_t *)ref
, stride
);
142 mlib_VideoCopyRef_U8_U8_8x8(dest
, (uint8_t *)ref
, stride
);
146 mlib_VideoCopyRef_U8_U8_8x16(dest
, (uint8_t *)ref
, stride
);
154 static void put_pixels8_x2_mlib (uint8_t * dest
, const uint8_t * ref
,
155 int stride
, int height
)
159 mlib_VideoInterpX_U8_U8_8x4(dest
, (uint8_t *)ref
, stride
, stride
);
163 mlib_VideoInterpX_U8_U8_8x8(dest
, (uint8_t *)ref
, stride
, stride
);
167 mlib_VideoInterpX_U8_U8_8x16(dest
, (uint8_t *)ref
, stride
, stride
);
175 static void put_pixels8_y2_mlib (uint8_t * dest
, const uint8_t * ref
,
176 int stride
, int height
)
180 mlib_VideoInterpY_U8_U8_8x4(dest
, (uint8_t *)ref
, stride
, stride
);
184 mlib_VideoInterpY_U8_U8_8x8(dest
, (uint8_t *)ref
, stride
, stride
);
188 mlib_VideoInterpY_U8_U8_8x16(dest
, (uint8_t *)ref
, stride
, stride
);
196 static void put_pixels8_xy2_mlib(uint8_t * dest
, const uint8_t * ref
,
197 int stride
, int height
)
201 mlib_VideoInterpXY_U8_U8_8x4(dest
, (uint8_t *)ref
, stride
, stride
);
205 mlib_VideoInterpXY_U8_U8_8x8(dest
, (uint8_t *)ref
, stride
, stride
);
209 mlib_VideoInterpXY_U8_U8_8x16(dest
, (uint8_t *)ref
, stride
, stride
);
217 /* average block, width 16 pixel, height 8/16 */
219 static void avg_pixels16_mlib (uint8_t * dest
, const uint8_t * ref
,
220 int stride
, int height
)
224 mlib_VideoCopyRefAve_U8_U8_16x8(dest
, (uint8_t *)ref
, stride
);
228 mlib_VideoCopyRefAve_U8_U8_16x16(dest
, (uint8_t *)ref
, stride
);
236 static void avg_pixels16_x2_mlib (uint8_t * dest
, const uint8_t * ref
,
237 int stride
, int height
)
241 mlib_VideoInterpAveX_U8_U8_16x8(dest
, (uint8_t *)ref
, stride
, stride
);
245 mlib_VideoInterpAveX_U8_U8_16x16(dest
, (uint8_t *)ref
, stride
, stride
);
253 static void avg_pixels16_y2_mlib (uint8_t * dest
, const uint8_t * ref
,
254 int stride
, int height
)
258 mlib_VideoInterpAveY_U8_U8_16x8(dest
, (uint8_t *)ref
, stride
, stride
);
262 mlib_VideoInterpAveY_U8_U8_16x16(dest
, (uint8_t *)ref
, stride
, stride
);
270 static void avg_pixels16_xy2_mlib(uint8_t * dest
, const uint8_t * ref
,
271 int stride
, int height
)
275 mlib_VideoInterpAveXY_U8_U8_16x8(dest
, (uint8_t *)ref
, stride
, stride
);
279 mlib_VideoInterpAveXY_U8_U8_16x16(dest
, (uint8_t *)ref
, stride
, stride
);
287 /* average block, width 8 pixel, height 4/8/16 */
289 static void avg_pixels8_mlib (uint8_t * dest
, const uint8_t * ref
,
290 int stride
, int height
)
294 mlib_VideoCopyRefAve_U8_U8_8x4(dest
, (uint8_t *)ref
, stride
);
298 mlib_VideoCopyRefAve_U8_U8_8x8(dest
, (uint8_t *)ref
, stride
);
302 mlib_VideoCopyRefAve_U8_U8_8x16(dest
, (uint8_t *)ref
, stride
);
310 static void avg_pixels8_x2_mlib (uint8_t * dest
, const uint8_t * ref
,
311 int stride
, int height
)
315 mlib_VideoInterpAveX_U8_U8_8x4(dest
, (uint8_t *)ref
, stride
, stride
);
319 mlib_VideoInterpAveX_U8_U8_8x8(dest
, (uint8_t *)ref
, stride
, stride
);
323 mlib_VideoInterpAveX_U8_U8_8x16(dest
, (uint8_t *)ref
, stride
, stride
);
331 static void avg_pixels8_y2_mlib (uint8_t * dest
, const uint8_t * ref
,
332 int stride
, int height
)
336 mlib_VideoInterpAveY_U8_U8_8x4(dest
, (uint8_t *)ref
, stride
, stride
);
340 mlib_VideoInterpAveY_U8_U8_8x8(dest
, (uint8_t *)ref
, stride
, stride
);
344 mlib_VideoInterpAveY_U8_U8_8x16(dest
, (uint8_t *)ref
, stride
, stride
);
352 static void avg_pixels8_xy2_mlib(uint8_t * dest
, const uint8_t * ref
,
353 int stride
, int height
)
357 mlib_VideoInterpAveXY_U8_U8_8x4(dest
, (uint8_t *)ref
, stride
, stride
);
361 mlib_VideoInterpAveXY_U8_U8_8x8(dest
, (uint8_t *)ref
, stride
, stride
);
365 mlib_VideoInterpAveXY_U8_U8_8x16(dest
, (uint8_t *)ref
, stride
, stride
);
373 /* swap byte order of a buffer */
375 static void bswap_buf_mlib(uint32_t *dst
, uint32_t *src
, int w
)
377 mlib_VectorReverseByteOrder_U32_U32(dst
, src
, w
);
380 /* transformations */
382 static void ff_idct_put_mlib(uint8_t *dest
, int line_size
, DCTELEM
*data
)
385 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
387 mlib_VideoIDCT8x8_S16_S16 (data
, data
);
390 dest
[0] = cm
[data
[0]];
391 dest
[1] = cm
[data
[1]];
392 dest
[2] = cm
[data
[2]];
393 dest
[3] = cm
[data
[3]];
394 dest
[4] = cm
[data
[4]];
395 dest
[5] = cm
[data
[5]];
396 dest
[6] = cm
[data
[6]];
397 dest
[7] = cm
[data
[7]];
404 static void ff_idct_add_mlib(uint8_t *dest
, int line_size
, DCTELEM
*data
)
406 mlib_VideoIDCT8x8_S16_S16 (data
, data
);
407 mlib_VideoAddBlock_U8_S16(dest
, (mlib_s16
*)data
, line_size
);
410 static void ff_idct_mlib(DCTELEM
*data
)
412 mlib_VideoIDCT8x8_S16_S16 (data
, data
);
415 static void ff_fdct_mlib(DCTELEM
*data
)
417 mlib_VideoDCT8x8_S16_S16 (data
, data
);
420 void dsputil_init_mlib(DSPContext
* c
, AVCodecContext
*avctx
)
422 c
->get_pixels
= get_pixels_mlib
;
423 c
->diff_pixels
= diff_pixels_mlib
;
424 c
->add_pixels_clamped
= add_pixels_clamped_mlib
;
426 c
->put_pixels_tab
[0][0] = put_pixels16_mlib
;
427 c
->put_pixels_tab
[0][1] = put_pixels16_x2_mlib
;
428 c
->put_pixels_tab
[0][2] = put_pixels16_y2_mlib
;
429 c
->put_pixels_tab
[0][3] = put_pixels16_xy2_mlib
;
430 c
->put_pixels_tab
[1][0] = put_pixels8_mlib
;
431 c
->put_pixels_tab
[1][1] = put_pixels8_x2_mlib
;
432 c
->put_pixels_tab
[1][2] = put_pixels8_y2_mlib
;
433 c
->put_pixels_tab
[1][3] = put_pixels8_xy2_mlib
;
435 c
->avg_pixels_tab
[0][0] = avg_pixels16_mlib
;
436 c
->avg_pixels_tab
[0][1] = avg_pixels16_x2_mlib
;
437 c
->avg_pixels_tab
[0][2] = avg_pixels16_y2_mlib
;
438 c
->avg_pixels_tab
[0][3] = avg_pixels16_xy2_mlib
;
439 c
->avg_pixels_tab
[1][0] = avg_pixels8_mlib
;
440 c
->avg_pixels_tab
[1][1] = avg_pixels8_x2_mlib
;
441 c
->avg_pixels_tab
[1][2] = avg_pixels8_y2_mlib
;
442 c
->avg_pixels_tab
[1][3] = avg_pixels8_xy2_mlib
;
444 c
->put_no_rnd_pixels_tab
[0][0] = put_pixels16_mlib
;
445 c
->put_no_rnd_pixels_tab
[1][0] = put_pixels8_mlib
;
447 c
->bswap_buf
= bswap_buf_mlib
;
450 void MPV_common_init_mlib(MpegEncContext
*s
)
452 if(s
->avctx
->dct_algo
==FF_DCT_AUTO
|| s
->avctx
->dct_algo
==FF_DCT_MLIB
){
453 s
->dsp
.fdct
= ff_fdct_mlib
;
456 if(s
->avctx
->idct_algo
==FF_IDCT_AUTO
|| s
->avctx
->idct_algo
==FF_IDCT_MLIB
){
457 s
->dsp
.idct_put
= ff_idct_put_mlib
;
458 s
->dsp
.idct_add
= ff_idct_add_mlib
;
459 s
->dsp
.idct
= ff_idct_mlib
;
460 s
->dsp
.idct_permutation_type
= FF_NO_IDCT_PERM
;