2 * Alpha optimized DSP utils
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavcodec/dsputil.h"
23 #include "libavcodec/mpegvideo.h"
26 static void dct_unquantize_h263_intra_axp(MpegEncContext
*s
, DCTELEM
*block
,
32 DCTELEM
*orig_block
= block
;
33 DCTELEM block0
; /* might not be used uninitialized */
35 qadd
= WORD_VEC((qscale
- 1) | 1);
37 /* This mask kills spill from negative subwords to the next subword. */
38 correction
= WORD_VEC((qmul
- 1) + 1); /* multiplication / addition */
42 block0
= block
[0] * s
->y_dc_scale
;
44 block0
= block
[0] * s
->c_dc_scale
;
48 n_coeffs
= 63; // does not always use zigzag table
50 for(i
= 0; i
<= n_coeffs
; block
+= 4, i
+= 4) {
51 uint64_t levels
, negmask
, zeros
, add
;
58 /* I don't think the speed difference justifies runtime
60 negmask
= maxsw4(levels
, -1); /* negative -> ffff (-1) */
61 negmask
= minsw4(negmask
, 0); /* positive -> 0000 (0) */
63 negmask
= cmpbge(WORD_VEC(0x7fff), levels
);
64 negmask
&= (negmask
>> 1) | (1 << 7);
65 negmask
= zap(-1, negmask
);
68 zeros
= cmpbge(0, levels
);
70 /* zeros |= zeros << 1 is not needed since qadd <= 255, so
71 zapping the lower byte suffices. */
74 levels
-= correction
& (negmask
<< 16);
76 /* Negate qadd for negative levels. */
78 add
+= WORD_VEC(0x0001) & negmask
;
79 /* Set qadd to 0 for levels == 0. */
80 add
= zap(add
, zeros
);
87 if (s
->mb_intra
&& !s
->h263_aic
)
88 orig_block
[0] = block0
;
91 static void dct_unquantize_h263_inter_axp(MpegEncContext
*s
, DCTELEM
*block
,
98 qadd
= WORD_VEC((qscale
- 1) | 1);
100 /* This mask kills spill from negative subwords to the next subword. */
101 correction
= WORD_VEC((qmul
- 1) + 1); /* multiplication / addition */
103 n_coeffs
= s
->intra_scantable
.raster_end
[s
->block_last_index
[n
]];
105 for(i
= 0; i
<= n_coeffs
; block
+= 4, i
+= 4) {
106 uint64_t levels
, negmask
, zeros
, add
;
113 /* I don't think the speed difference justifies runtime
115 negmask
= maxsw4(levels
, -1); /* negative -> ffff (-1) */
116 negmask
= minsw4(negmask
, 0); /* positive -> 0000 (0) */
118 negmask
= cmpbge(WORD_VEC(0x7fff), levels
);
119 negmask
&= (negmask
>> 1) | (1 << 7);
120 negmask
= zap(-1, negmask
);
123 zeros
= cmpbge(0, levels
);
125 /* zeros |= zeros << 1 is not needed since qadd <= 255, so
126 zapping the lower byte suffices. */
129 levels
-= correction
& (negmask
<< 16);
131 /* Negate qadd for negative levels. */
132 add
= qadd
^ negmask
;
133 add
+= WORD_VEC(0x0001) & negmask
;
134 /* Set qadd to 0 for levels == 0. */
135 add
= zap(add
, zeros
);
143 void MPV_common_init_axp(MpegEncContext
*s
)
145 s
->dct_unquantize_h263_intra
= dct_unquantize_h263_intra_axp
;
146 s
->dct_unquantize_h263_inter
= dct_unquantize_h263_inter_axp
;