2 * ARMv4L optimized DSP utils
3 * Copyright (c) 2001 Lionel Ulmer.
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavcodec/dsputil.h"
27 extern void dsputil_init_iwmmxt(DSPContext
* c
, AVCodecContext
*avctx
);
28 extern void ff_float_init_arm_vfp(DSPContext
* c
, AVCodecContext
*avctx
);
30 extern void j_rev_dct_ARM(DCTELEM
*data
);
31 extern void simple_idct_ARM(DCTELEM
*data
);
33 extern void simple_idct_armv5te(DCTELEM
*data
);
34 extern void simple_idct_put_armv5te(uint8_t *dest
, int line_size
,
36 extern void simple_idct_add_armv5te(uint8_t *dest
, int line_size
,
39 extern void ff_simple_idct_armv6(DCTELEM
*data
);
40 extern void ff_simple_idct_put_armv6(uint8_t *dest
, int line_size
,
42 extern void ff_simple_idct_add_armv6(uint8_t *dest
, int line_size
,
46 static void (*ff_put_pixels_clamped
)(const DCTELEM
*block
, uint8_t *pixels
, int line_size
);
47 static void (*ff_add_pixels_clamped
)(const DCTELEM
*block
, uint8_t *pixels
, int line_size
);
49 void put_pixels8_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
50 void put_pixels8_x2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
51 void put_pixels8_y2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
52 void put_pixels8_xy2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
54 void put_no_rnd_pixels8_x2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
55 void put_no_rnd_pixels8_y2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
56 void put_no_rnd_pixels8_xy2_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
58 void put_pixels16_arm(uint8_t *block
, const uint8_t *pixels
, int line_size
, int h
);
60 CALL_2X_PIXELS(put_pixels16_x2_arm
, put_pixels8_x2_arm
, 8)
61 CALL_2X_PIXELS(put_pixels16_y2_arm
, put_pixels8_y2_arm
, 8)
62 CALL_2X_PIXELS(put_pixels16_xy2_arm
, put_pixels8_xy2_arm
, 8)
63 CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm
, put_no_rnd_pixels8_x2_arm
, 8)
64 CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm
, put_no_rnd_pixels8_y2_arm
, 8)
65 CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm
, put_no_rnd_pixels8_xy2_arm
, 8)
67 static void add_pixels_clamped_ARM(short *block
, unsigned char *dest
, int line_size
)
76 /* block[0] and block[1]*/
78 "ldrsh r7, [%0, #2] \n\t"
79 "and r6, r4, #0xFF \n\t"
80 "and r8, r4, #0xFF00 \n\t"
82 "add r8, r7, r8, lsr #8 \n\t"
86 "movne r6, r5, lsr #24 \n\t"
88 "movne r8, r7, lsr #24 \n\t"
90 "ldrsh r5, [%0, #4] \n\t" /* moved form [A] */
91 "orr r9, r9, r8, lsl #8 \n\t"
92 /* block[2] and block[3] */
94 "ldrsh r7, [%0, #6] \n\t"
95 "and r6, r4, #0xFF0000 \n\t"
96 "and r8, r4, #0xFF000000 \n\t"
97 "add r6, r5, r6, lsr #16 \n\t"
98 "add r8, r7, r8, lsr #24 \n\t"
101 "tst r6, #0x100 \n\t"
102 "movne r6, r5, lsr #24 \n\t"
103 "tst r8, #0x100 \n\t"
104 "movne r8, r7, lsr #24 \n\t"
105 "orr r9, r9, r6, lsl #16 \n\t"
106 "ldr r4, [%1, #4] \n\t" /* moved form [B] */
107 "orr r9, r9, r8, lsl #24 \n\t"
109 "ldrsh r5, [%0, #8] \n\t" /* moved form [C] */
114 /* block[4] and block[5] */
116 "ldrsh r7, [%0, #10] \n\t"
117 "and r6, r4, #0xFF \n\t"
118 "and r8, r4, #0xFF00 \n\t"
119 "add r6, r5, r6 \n\t"
120 "add r8, r7, r8, lsr #8 \n\t"
123 "tst r6, #0x100 \n\t"
124 "movne r6, r5, lsr #24 \n\t"
125 "tst r8, #0x100 \n\t"
126 "movne r8, r7, lsr #24 \n\t"
128 "ldrsh r5, [%0, #12] \n\t" /* moved from [D] */
129 "orr r9, r9, r8, lsl #8 \n\t"
130 /* block[6] and block[7] */
132 "ldrsh r7, [%0, #14] \n\t"
133 "and r6, r4, #0xFF0000 \n\t"
134 "and r8, r4, #0xFF000000 \n\t"
135 "add r6, r5, r6, lsr #16 \n\t"
136 "add r8, r7, r8, lsr #24 \n\t"
139 "tst r6, #0x100 \n\t"
140 "movne r6, r5, lsr #24 \n\t"
141 "tst r8, #0x100 \n\t"
142 "movne r8, r7, lsr #24 \n\t"
143 "orr r9, r9, r6, lsl #16 \n\t"
144 "add %0, %0, #16 \n\t" /* moved from [E] */
145 "orr r9, r9, r8, lsl #24 \n\t"
146 "subs r10, r10, #1 \n\t" /* moved from [F] */
148 "str r9, [%1, #4] \n\t"
152 "add %1, %1, %2 \n\t"
157 : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" );
160 /* XXX: those functions should be suppressed ASAP when all IDCTs are
162 static void j_rev_dct_ARM_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
164 j_rev_dct_ARM (block
);
165 ff_put_pixels_clamped(block
, dest
, line_size
);
167 static void j_rev_dct_ARM_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
169 j_rev_dct_ARM (block
);
170 ff_add_pixels_clamped(block
, dest
, line_size
);
172 static void simple_idct_ARM_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
174 simple_idct_ARM (block
);
175 ff_put_pixels_clamped(block
, dest
, line_size
);
177 static void simple_idct_ARM_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
179 simple_idct_ARM (block
);
180 ff_add_pixels_clamped(block
, dest
, line_size
);
184 static void simple_idct_ipp(DCTELEM
*block
)
186 ippiDCT8x8Inv_Video_16s_C1I(block
);
188 static void simple_idct_ipp_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
190 ippiDCT8x8Inv_Video_16s8u_C1R(block
, dest
, line_size
);
193 void add_pixels_clamped_iwmmxt(const DCTELEM
*block
, uint8_t *pixels
, int line_size
);
195 static void simple_idct_ipp_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
197 ippiDCT8x8Inv_Video_16s_C1I(block
);
199 add_pixels_clamped_iwmmxt(block
, dest
, line_size
);
201 add_pixels_clamped_ARM(block
, dest
, line_size
);
206 void dsputil_init_armv4l(DSPContext
* c
, AVCodecContext
*avctx
)
208 int idct_algo
= avctx
->idct_algo
;
210 ff_put_pixels_clamped
= c
->put_pixels_clamped
;
211 ff_add_pixels_clamped
= c
->add_pixels_clamped
;
213 if (avctx
->lowres
== 0) {
214 if(idct_algo
== FF_IDCT_AUTO
){
215 #if defined(HAVE_IPP)
216 idct_algo
= FF_IDCT_IPP
;
217 #elif defined(HAVE_ARMV6)
218 idct_algo
= FF_IDCT_SIMPLEARMV6
;
219 #elif defined(HAVE_ARMV5TE)
220 idct_algo
= FF_IDCT_SIMPLEARMV5TE
;
222 idct_algo
= FF_IDCT_ARM
;
226 if(idct_algo
==FF_IDCT_ARM
){
227 c
->idct_put
= j_rev_dct_ARM_put
;
228 c
->idct_add
= j_rev_dct_ARM_add
;
229 c
->idct
= j_rev_dct_ARM
;
230 c
->idct_permutation_type
= FF_LIBMPEG2_IDCT_PERM
;/* FF_NO_IDCT_PERM */
231 } else if (idct_algo
==FF_IDCT_SIMPLEARM
){
232 c
->idct_put
= simple_idct_ARM_put
;
233 c
->idct_add
= simple_idct_ARM_add
;
234 c
->idct
= simple_idct_ARM
;
235 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
237 } else if (idct_algo
==FF_IDCT_SIMPLEARMV6
){
238 c
->idct_put
= ff_simple_idct_put_armv6
;
239 c
->idct_add
= ff_simple_idct_add_armv6
;
240 c
->idct
= ff_simple_idct_armv6
;
241 c
->idct_permutation_type
= FF_LIBMPEG2_IDCT_PERM
;
244 } else if (idct_algo
==FF_IDCT_SIMPLEARMV5TE
){
245 c
->idct_put
= simple_idct_put_armv5te
;
246 c
->idct_add
= simple_idct_add_armv5te
;
247 c
->idct
= simple_idct_armv5te
;
248 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
251 } else if (idct_algo
==FF_IDCT_IPP
){
252 c
->idct_put
= simple_idct_ipp_put
;
253 c
->idct_add
= simple_idct_ipp_add
;
254 c
->idct
= simple_idct_ipp
;
255 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
260 c
->put_pixels_tab
[0][0] = put_pixels16_arm
;
261 c
->put_pixels_tab
[0][1] = put_pixels16_x2_arm
; //OK!
262 c
->put_pixels_tab
[0][2] = put_pixels16_y2_arm
; //OK!
263 c
->put_pixels_tab
[0][3] = put_pixels16_xy2_arm
;
264 c
->put_no_rnd_pixels_tab
[0][0] = put_pixels16_arm
;
265 c
->put_no_rnd_pixels_tab
[0][1] = put_no_rnd_pixels16_x2_arm
; // OK
266 c
->put_no_rnd_pixels_tab
[0][2] = put_no_rnd_pixels16_y2_arm
; //OK
267 c
->put_no_rnd_pixels_tab
[0][3] = put_no_rnd_pixels16_xy2_arm
;
268 c
->put_pixels_tab
[1][0] = put_pixels8_arm
; //OK
269 c
->put_pixels_tab
[1][1] = put_pixels8_x2_arm
; //OK
270 c
->put_pixels_tab
[1][2] = put_pixels8_y2_arm
;
271 c
->put_pixels_tab
[1][3] = put_pixels8_xy2_arm
;
272 c
->put_no_rnd_pixels_tab
[1][0] = put_pixels8_arm
;//OK
273 c
->put_no_rnd_pixels_tab
[1][1] = put_no_rnd_pixels8_x2_arm
; //OK
274 c
->put_no_rnd_pixels_tab
[1][2] = put_no_rnd_pixels8_y2_arm
; //OK
275 c
->put_no_rnd_pixels_tab
[1][3] = put_no_rnd_pixels8_xy2_arm
;
278 dsputil_init_iwmmxt(c
, avctx
);
281 ff_float_init_arm_vfp(c
, avctx
);