3 * Copyright (c) 2004-2011 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "bit_depth_template.c"
29 #include "libavutil/common.h"
32 void FUNCC(ff_h264_idct_add
)(uint8_t *_dst
, int16_t *_block
, int stride
)
35 pixel
*dst
= (pixel
*)_dst
;
36 dctcoef
*block
= (dctcoef
*)_block
;
37 stride
/= sizeof(pixel
);
42 const int z0
= block
[i
+ 4*0] + block
[i
+ 4*2];
43 const int z1
= block
[i
+ 4*0] - block
[i
+ 4*2];
44 const int z2
= (block
[i
+ 4*1]>>1) - block
[i
+ 4*3];
45 const int z3
= block
[i
+ 4*1] + (block
[i
+ 4*3]>>1);
47 block
[i
+ 4*0]= z0
+ z3
;
48 block
[i
+ 4*1]= z1
+ z2
;
49 block
[i
+ 4*2]= z1
- z2
;
50 block
[i
+ 4*3]= z0
- z3
;
54 const int z0
= block
[0 + 4*i
] + block
[2 + 4*i
];
55 const int z1
= block
[0 + 4*i
] - block
[2 + 4*i
];
56 const int z2
= (block
[1 + 4*i
]>>1) - block
[3 + 4*i
];
57 const int z3
= block
[1 + 4*i
] + (block
[3 + 4*i
]>>1);
59 dst
[i
+ 0*stride
]= av_clip_pixel(dst
[i
+ 0*stride
] + ((z0
+ z3
) >> 6));
60 dst
[i
+ 1*stride
]= av_clip_pixel(dst
[i
+ 1*stride
] + ((z1
+ z2
) >> 6));
61 dst
[i
+ 2*stride
]= av_clip_pixel(dst
[i
+ 2*stride
] + ((z1
- z2
) >> 6));
62 dst
[i
+ 3*stride
]= av_clip_pixel(dst
[i
+ 3*stride
] + ((z0
- z3
) >> 6));
66 void FUNCC(ff_h264_idct8_add
)(uint8_t *_dst
, int16_t *_block
, int stride
){
68 pixel
*dst
= (pixel
*)_dst
;
69 dctcoef
*block
= (dctcoef
*)_block
;
70 stride
/= sizeof(pixel
);
74 for( i
= 0; i
< 8; i
++ )
76 const int a0
= block
[i
+0*8] + block
[i
+4*8];
77 const int a2
= block
[i
+0*8] - block
[i
+4*8];
78 const int a4
= (block
[i
+2*8]>>1) - block
[i
+6*8];
79 const int a6
= (block
[i
+6*8]>>1) + block
[i
+2*8];
81 const int b0
= a0
+ a6
;
82 const int b2
= a2
+ a4
;
83 const int b4
= a2
- a4
;
84 const int b6
= a0
- a6
;
86 const int a1
= -block
[i
+3*8] + block
[i
+5*8] - block
[i
+7*8] - (block
[i
+7*8]>>1);
87 const int a3
= block
[i
+1*8] + block
[i
+7*8] - block
[i
+3*8] - (block
[i
+3*8]>>1);
88 const int a5
= -block
[i
+1*8] + block
[i
+7*8] + block
[i
+5*8] + (block
[i
+5*8]>>1);
89 const int a7
= block
[i
+3*8] + block
[i
+5*8] + block
[i
+1*8] + (block
[i
+1*8]>>1);
91 const int b1
= (a7
>>2) + a1
;
92 const int b3
= a3
+ (a5
>>2);
93 const int b5
= (a3
>>2) - a5
;
94 const int b7
= a7
- (a1
>>2);
96 block
[i
+0*8] = b0
+ b7
;
97 block
[i
+7*8] = b0
- b7
;
98 block
[i
+1*8] = b2
+ b5
;
99 block
[i
+6*8] = b2
- b5
;
100 block
[i
+2*8] = b4
+ b3
;
101 block
[i
+5*8] = b4
- b3
;
102 block
[i
+3*8] = b6
+ b1
;
103 block
[i
+4*8] = b6
- b1
;
105 for( i
= 0; i
< 8; i
++ )
107 const int a0
= block
[0+i
*8] + block
[4+i
*8];
108 const int a2
= block
[0+i
*8] - block
[4+i
*8];
109 const int a4
= (block
[2+i
*8]>>1) - block
[6+i
*8];
110 const int a6
= (block
[6+i
*8]>>1) + block
[2+i
*8];
112 const int b0
= a0
+ a6
;
113 const int b2
= a2
+ a4
;
114 const int b4
= a2
- a4
;
115 const int b6
= a0
- a6
;
117 const int a1
= -block
[3+i
*8] + block
[5+i
*8] - block
[7+i
*8] - (block
[7+i
*8]>>1);
118 const int a3
= block
[1+i
*8] + block
[7+i
*8] - block
[3+i
*8] - (block
[3+i
*8]>>1);
119 const int a5
= -block
[1+i
*8] + block
[7+i
*8] + block
[5+i
*8] + (block
[5+i
*8]>>1);
120 const int a7
= block
[3+i
*8] + block
[5+i
*8] + block
[1+i
*8] + (block
[1+i
*8]>>1);
122 const int b1
= (a7
>>2) + a1
;
123 const int b3
= a3
+ (a5
>>2);
124 const int b5
= (a3
>>2) - a5
;
125 const int b7
= a7
- (a1
>>2);
127 dst
[i
+ 0*stride
] = av_clip_pixel( dst
[i
+ 0*stride
] + ((b0
+ b7
) >> 6) );
128 dst
[i
+ 1*stride
] = av_clip_pixel( dst
[i
+ 1*stride
] + ((b2
+ b5
) >> 6) );
129 dst
[i
+ 2*stride
] = av_clip_pixel( dst
[i
+ 2*stride
] + ((b4
+ b3
) >> 6) );
130 dst
[i
+ 3*stride
] = av_clip_pixel( dst
[i
+ 3*stride
] + ((b6
+ b1
) >> 6) );
131 dst
[i
+ 4*stride
] = av_clip_pixel( dst
[i
+ 4*stride
] + ((b6
- b1
) >> 6) );
132 dst
[i
+ 5*stride
] = av_clip_pixel( dst
[i
+ 5*stride
] + ((b4
- b3
) >> 6) );
133 dst
[i
+ 6*stride
] = av_clip_pixel( dst
[i
+ 6*stride
] + ((b2
- b5
) >> 6) );
134 dst
[i
+ 7*stride
] = av_clip_pixel( dst
[i
+ 7*stride
] + ((b0
- b7
) >> 6) );
138 // assumes all AC coefs are 0
139 void FUNCC(ff_h264_idct_dc_add
)(uint8_t *_dst
, int16_t *block
, int stride
){
141 int dc
= (((dctcoef
*)block
)[0] + 32) >> 6;
142 pixel
*dst
= (pixel
*)_dst
;
143 stride
/= sizeof(pixel
);
144 for( j
= 0; j
< 4; j
++ )
146 for( i
= 0; i
< 4; i
++ )
147 dst
[i
] = av_clip_pixel( dst
[i
] + dc
);
152 void FUNCC(ff_h264_idct8_dc_add
)(uint8_t *_dst
, int16_t *block
, int stride
){
154 int dc
= (((dctcoef
*)block
)[0] + 32) >> 6;
155 pixel
*dst
= (pixel
*)_dst
;
156 stride
/= sizeof(pixel
);
157 for( j
= 0; j
< 8; j
++ )
159 for( i
= 0; i
< 8; i
++ )
160 dst
[i
] = av_clip_pixel( dst
[i
] + dc
);
165 void FUNCC(ff_h264_idct_add16
)(uint8_t *dst
, const int *block_offset
, int16_t *block
, int stride
, const uint8_t nnzc
[15*8]){
168 int nnz
= nnzc
[ scan8
[i
] ];
170 if(nnz
==1 && ((dctcoef
*)block
)[i
*16]) FUNCC(ff_h264_idct_dc_add
)(dst
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
171 else FUNCC(ff_h264_idct_add
)(dst
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
176 void FUNCC(ff_h264_idct_add16intra
)(uint8_t *dst
, const int *block_offset
, int16_t *block
, int stride
, const uint8_t nnzc
[15*8]){
179 if(nnzc
[ scan8
[i
] ]) FUNCC(ff_h264_idct_add
)(dst
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
180 else if(((dctcoef
*)block
)[i
*16]) FUNCC(ff_h264_idct_dc_add
)(dst
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
184 void FUNCC(ff_h264_idct8_add4
)(uint8_t *dst
, const int *block_offset
, int16_t *block
, int stride
, const uint8_t nnzc
[15*8]){
186 for(i
=0; i
<16; i
+=4){
187 int nnz
= nnzc
[ scan8
[i
] ];
189 if(nnz
==1 && ((dctcoef
*)block
)[i
*16]) FUNCC(ff_h264_idct8_dc_add
)(dst
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
190 else FUNCC(ff_h264_idct8_add
)(dst
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
195 void FUNCC(ff_h264_idct_add8
)(uint8_t **dest
, const int *block_offset
, int16_t *block
, int stride
, const uint8_t nnzc
[15*8]){
198 for(i
=j
*16; i
<j
*16+4; i
++){
200 FUNCC(ff_h264_idct_add
)(dest
[j
-1] + block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
201 else if(((dctcoef
*)block
)[i
*16])
202 FUNCC(ff_h264_idct_dc_add
)(dest
[j
-1] + block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
207 void FUNCC(ff_h264_idct_add8_422
)(uint8_t **dest
, const int *block_offset
, int16_t *block
, int stride
, const uint8_t nnzc
[15*8]){
211 for(i
=j
*16; i
<j
*16+4; i
++){
213 FUNCC(ff_h264_idct_add
)(dest
[j
-1] + block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
214 else if(((dctcoef
*)block
)[i
*16])
215 FUNCC(ff_h264_idct_dc_add
)(dest
[j
-1] + block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
220 for(i
=j
*16+4; i
<j
*16+8; i
++){
221 if(nnzc
[ scan8
[i
+4] ])
222 FUNCC(ff_h264_idct_add
)(dest
[j
-1] + block_offset
[i
+4], block
+ i
*16*sizeof(pixel
), stride
);
223 else if(((dctcoef
*)block
)[i
*16])
224 FUNCC(ff_h264_idct_dc_add
)(dest
[j
-1] + block_offset
[i
+4], block
+ i
*16*sizeof(pixel
), stride
);
230 * IDCT transforms the 16 dc values and dequantizes them.
231 * @param qmul quantization parameter
233 void FUNCC(ff_h264_luma_dc_dequant_idct
)(int16_t *_output
, int16_t *_input
, int qmul
){
237 static const uint8_t x_offset
[4]={0, 2*stride
, 8*stride
, 10*stride
};
238 dctcoef
*input
= (dctcoef
*)_input
;
239 dctcoef
*output
= (dctcoef
*)_output
;
242 const int z0
= input
[4*i
+0] + input
[4*i
+1];
243 const int z1
= input
[4*i
+0] - input
[4*i
+1];
244 const int z2
= input
[4*i
+2] - input
[4*i
+3];
245 const int z3
= input
[4*i
+2] + input
[4*i
+3];
254 const int offset
= x_offset
[i
];
255 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
256 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
257 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
258 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
260 output
[stride
* 0+offset
]= ((((z0
+ z3
)*qmul
+ 128 ) >> 8));
261 output
[stride
* 1+offset
]= ((((z1
+ z2
)*qmul
+ 128 ) >> 8));
262 output
[stride
* 4+offset
]= ((((z1
- z2
)*qmul
+ 128 ) >> 8));
263 output
[stride
* 5+offset
]= ((((z0
- z3
)*qmul
+ 128 ) >> 8));
268 void FUNCC(ff_h264_chroma422_dc_dequant_idct
)(int16_t *_block
, int qmul
){
269 const int stride
= 16*2;
270 const int xStride
= 16;
273 static const uint8_t x_offset
[2]={0, 16};
274 dctcoef
*block
= (dctcoef
*)_block
;
277 temp
[2*i
+0] = block
[stride
*i
+ xStride
*0] + block
[stride
*i
+ xStride
*1];
278 temp
[2*i
+1] = block
[stride
*i
+ xStride
*0] - block
[stride
*i
+ xStride
*1];
282 const int offset
= x_offset
[i
];
283 const int z0
= temp
[2*0+i
] + temp
[2*2+i
];
284 const int z1
= temp
[2*0+i
] - temp
[2*2+i
];
285 const int z2
= temp
[2*1+i
] - temp
[2*3+i
];
286 const int z3
= temp
[2*1+i
] + temp
[2*3+i
];
288 block
[stride
*0+offset
]= ((z0
+ z3
)*qmul
+ 128) >> 8;
289 block
[stride
*1+offset
]= ((z1
+ z2
)*qmul
+ 128) >> 8;
290 block
[stride
*2+offset
]= ((z1
- z2
)*qmul
+ 128) >> 8;
291 block
[stride
*3+offset
]= ((z0
- z3
)*qmul
+ 128) >> 8;
295 void FUNCC(ff_h264_chroma_dc_dequant_idct
)(int16_t *_block
, int qmul
){
296 const int stride
= 16*2;
297 const int xStride
= 16;
299 dctcoef
*block
= (dctcoef
*)_block
;
301 a
= block
[stride
*0 + xStride
*0];
302 b
= block
[stride
*0 + xStride
*1];
303 c
= block
[stride
*1 + xStride
*0];
304 d
= block
[stride
*1 + xStride
*1];
311 block
[stride
*0 + xStride
*0]= ((a
+c
)*qmul
) >> 7;
312 block
[stride
*0 + xStride
*1]= ((e
+b
)*qmul
) >> 7;
313 block
[stride
*1 + xStride
*0]= ((a
-c
)*qmul
) >> 7;
314 block
[stride
*1 + xStride
*1]= ((e
-b
)*qmul
) >> 7;