2 * ARM optimized DSP utils
3 * Copyright (c) 2001 Lionel Ulmer
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavcodec/dsputil.h"
27 void dsputil_init_iwmmxt(DSPContext
* c
, AVCodecContext
*avctx
);
28 void ff_float_init_arm_vfp(DSPContext
* c
, AVCodecContext
*avctx
);
29 void ff_dsputil_init_neon(DSPContext
*c
, AVCodecContext
*avctx
);
31 void j_rev_dct_ARM(DCTELEM
*data
);
32 void simple_idct_ARM(DCTELEM
*data
);
34 void simple_idct_armv5te(DCTELEM
*data
);
35 void simple_idct_put_armv5te(uint8_t *dest
, int line_size
, DCTELEM
*data
);
36 void simple_idct_add_armv5te(uint8_t *dest
, int line_size
, DCTELEM
*data
);
38 void ff_simple_idct_armv6(DCTELEM
*data
);
39 void ff_simple_idct_put_armv6(uint8_t *dest
, int line_size
, DCTELEM
*data
);
40 void ff_simple_idct_add_armv6(uint8_t *dest
, int line_size
, DCTELEM
*data
);
42 void ff_simple_idct_neon(DCTELEM
*data
);
43 void ff_simple_idct_put_neon(uint8_t *dest
, int line_size
, DCTELEM
*data
);
44 void ff_simple_idct_add_neon(uint8_t *dest
, int line_size
, DCTELEM
*data
);
46 void ff_vp3_idct_neon(DCTELEM
*data
);
47 void ff_vp3_idct_put_neon(uint8_t *dest
, int line_size
, DCTELEM
*data
);
48 void ff_vp3_idct_add_neon(uint8_t *dest
, int line_size
, DCTELEM
*data
);
51 static void (*ff_put_pixels_clamped
)(const DCTELEM
*block
, uint8_t *pixels
, int line_size
);
52 static void (*ff_add_pixels_clamped
)(const DCTELEM
*block
, uint8_t *pixels
, int line_size
);
54 void put_pixels8_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
55 void put_pixels8_x2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
56 void put_pixels8_y2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
57 void put_pixels8_xy2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
59 void put_no_rnd_pixels8_x2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
60 void put_no_rnd_pixels8_y2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
61 void put_no_rnd_pixels8_xy2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
63 void put_pixels16_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
65 void ff_prefetch_arm(void *mem
, int stride
, int h
);
67 CALL_2X_PIXELS(put_pixels16_x2_arm
, put_pixels8_x2_arm
, 8)
68 CALL_2X_PIXELS(put_pixels16_y2_arm
, put_pixels8_y2_arm
, 8)
69 CALL_2X_PIXELS(put_pixels16_xy2_arm
, put_pixels8_xy2_arm
, 8)
70 CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm
, put_no_rnd_pixels8_x2_arm
, 8)
71 CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm
, put_no_rnd_pixels8_y2_arm
, 8)
72 CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm
, put_no_rnd_pixels8_xy2_arm
, 8)
74 void ff_add_pixels_clamped_ARM(short *block
, unsigned char *dest
,
77 /* XXX: those functions should be suppressed ASAP when all IDCTs are
79 static void j_rev_dct_ARM_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
81 j_rev_dct_ARM (block
);
82 ff_put_pixels_clamped(block
, dest
, line_size
);
84 static void j_rev_dct_ARM_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
86 j_rev_dct_ARM (block
);
87 ff_add_pixels_clamped(block
, dest
, line_size
);
89 static void simple_idct_ARM_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
91 simple_idct_ARM (block
);
92 ff_put_pixels_clamped(block
, dest
, line_size
);
94 static void simple_idct_ARM_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
96 simple_idct_ARM (block
);
97 ff_add_pixels_clamped(block
, dest
, line_size
);
101 static void simple_idct_ipp(DCTELEM
*block
)
103 ippiDCT8x8Inv_Video_16s_C1I(block
);
105 static void simple_idct_ipp_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
107 ippiDCT8x8Inv_Video_16s8u_C1R(block
, dest
, line_size
);
110 void add_pixels_clamped_iwmmxt(const DCTELEM
*block
, uint8_t *pixels
, int line_size
);
112 static void simple_idct_ipp_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
114 ippiDCT8x8Inv_Video_16s_C1I(block
);
116 add_pixels_clamped_iwmmxt(block
, dest
, line_size
);
118 ff_add_pixels_clamped_ARM(block
, dest
, line_size
);
125 return HAVE_IWMMXT
* FF_MM_IWMMXT
;
128 void dsputil_init_arm(DSPContext
* c
, AVCodecContext
*avctx
)
130 int idct_algo
= avctx
->idct_algo
;
132 ff_put_pixels_clamped
= c
->put_pixels_clamped
;
133 ff_add_pixels_clamped
= c
->add_pixels_clamped
;
135 if (avctx
->lowres
== 0) {
136 if(idct_algo
== FF_IDCT_AUTO
){
138 idct_algo
= FF_IDCT_IPP
;
140 idct_algo
= FF_IDCT_SIMPLENEON
;
142 idct_algo
= FF_IDCT_SIMPLEARMV6
;
144 idct_algo
= FF_IDCT_SIMPLEARMV5TE
;
146 idct_algo
= FF_IDCT_ARM
;
150 if(idct_algo
==FF_IDCT_ARM
){
151 c
->idct_put
= j_rev_dct_ARM_put
;
152 c
->idct_add
= j_rev_dct_ARM_add
;
153 c
->idct
= j_rev_dct_ARM
;
154 c
->idct_permutation_type
= FF_LIBMPEG2_IDCT_PERM
;
155 } else if (idct_algo
==FF_IDCT_SIMPLEARM
){
156 c
->idct_put
= simple_idct_ARM_put
;
157 c
->idct_add
= simple_idct_ARM_add
;
158 c
->idct
= simple_idct_ARM
;
159 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
161 } else if (idct_algo
==FF_IDCT_SIMPLEARMV6
){
162 c
->idct_put
= ff_simple_idct_put_armv6
;
163 c
->idct_add
= ff_simple_idct_add_armv6
;
164 c
->idct
= ff_simple_idct_armv6
;
165 c
->idct_permutation_type
= FF_LIBMPEG2_IDCT_PERM
;
168 } else if (idct_algo
==FF_IDCT_SIMPLEARMV5TE
){
169 c
->idct_put
= simple_idct_put_armv5te
;
170 c
->idct_add
= simple_idct_add_armv5te
;
171 c
->idct
= simple_idct_armv5te
;
172 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
175 } else if (idct_algo
==FF_IDCT_IPP
){
176 c
->idct_put
= simple_idct_ipp_put
;
177 c
->idct_add
= simple_idct_ipp_add
;
178 c
->idct
= simple_idct_ipp
;
179 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
182 } else if (idct_algo
==FF_IDCT_SIMPLENEON
){
183 c
->idct_put
= ff_simple_idct_put_neon
;
184 c
->idct_add
= ff_simple_idct_add_neon
;
185 c
->idct
= ff_simple_idct_neon
;
186 c
->idct_permutation_type
= FF_PARTTRANS_IDCT_PERM
;
187 } else if ((CONFIG_VP3_DECODER
|| CONFIG_VP5_DECODER
|| CONFIG_VP6_DECODER
) &&
188 idct_algo
==FF_IDCT_VP3
){
189 c
->idct_put
= ff_vp3_idct_put_neon
;
190 c
->idct_add
= ff_vp3_idct_add_neon
;
191 c
->idct
= ff_vp3_idct_neon
;
192 c
->idct_permutation_type
= FF_TRANSPOSE_IDCT_PERM
;
197 c
->put_pixels_tab
[0][0] = put_pixels16_arm
;
198 c
->put_pixels_tab
[0][1] = put_pixels16_x2_arm
;
199 c
->put_pixels_tab
[0][2] = put_pixels16_y2_arm
;
200 c
->put_pixels_tab
[0][3] = put_pixels16_xy2_arm
;
201 c
->put_no_rnd_pixels_tab
[0][0] = put_pixels16_arm
;
202 c
->put_no_rnd_pixels_tab
[0][1] = put_no_rnd_pixels16_x2_arm
;
203 c
->put_no_rnd_pixels_tab
[0][2] = put_no_rnd_pixels16_y2_arm
;
204 c
->put_no_rnd_pixels_tab
[0][3] = put_no_rnd_pixels16_xy2_arm
;
205 c
->put_pixels_tab
[1][0] = put_pixels8_arm
;
206 c
->put_pixels_tab
[1][1] = put_pixels8_x2_arm
;
207 c
->put_pixels_tab
[1][2] = put_pixels8_y2_arm
;
208 c
->put_pixels_tab
[1][3] = put_pixels8_xy2_arm
;
209 c
->put_no_rnd_pixels_tab
[1][0] = put_pixels8_arm
;
210 c
->put_no_rnd_pixels_tab
[1][1] = put_no_rnd_pixels8_x2_arm
;
211 c
->put_no_rnd_pixels_tab
[1][2] = put_no_rnd_pixels8_y2_arm
;
212 c
->put_no_rnd_pixels_tab
[1][3] = put_no_rnd_pixels8_xy2_arm
;
215 c
->prefetch
= ff_prefetch_arm
;
219 dsputil_init_iwmmxt(c
, avctx
);
222 ff_float_init_arm_vfp(c
, avctx
);
225 ff_dsputil_init_neon(c
, avctx
);