Merge "Add save/restore xmm registers in x86 assembly code"
[libvpx.git] / y4minput.c
blob449afe858f493715e11646e062237c3c5912ccdd
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
10 * Based on code from the OggTheora software codec source code,
11 * Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
13 #include <stdlib.h>
14 #include <string.h>
15 #include "y4minput.h"
17 static int y4m_parse_tags(y4m_input *_y4m,char *_tags){
18 int got_w;
19 int got_h;
20 int got_fps;
21 int got_interlace;
22 int got_par;
23 int got_chroma;
24 char *p;
25 char *q;
26 got_w=got_h=got_fps=got_interlace=got_par=got_chroma=0;
27 for(p=_tags;;p=q){
28 /*Skip any leading spaces.*/
29 while(*p==' ')p++;
30 /*If that's all we have, stop.*/
31 if(p[0]=='\0')break;
32 /*Find the end of this tag.*/
33 for(q=p+1;*q!='\0'&&*q!=' ';q++);
34 /*Process the tag.*/
35 switch(p[0]){
36 case 'W':{
37 if(sscanf(p+1,"%d",&_y4m->pic_w)!=1)return -1;
38 got_w=1;
39 }break;
40 case 'H':{
41 if(sscanf(p+1,"%d",&_y4m->pic_h)!=1)return -1;
42 got_h=1;
43 }break;
44 case 'F':{
45 if(sscanf(p+1,"%d:%d",&_y4m->fps_n,&_y4m->fps_d)!=2){
46 return -1;
48 got_fps=1;
49 }break;
50 case 'I':{
51 _y4m->interlace=p[1];
52 got_interlace=1;
53 }break;
54 case 'A':{
55 if(sscanf(p+1,"%d:%d",&_y4m->par_n,&_y4m->par_d)!=2){
56 return -1;
58 got_par=1;
59 }break;
60 case 'C':{
61 if(q-p>16)return -1;
62 memcpy(_y4m->chroma_type,p+1,q-p-1);
63 _y4m->chroma_type[q-p-1]='\0';
64 got_chroma=1;
65 }break;
66 /*Ignore unknown tags.*/
69 if(!got_w||!got_h||!got_fps)return -1;
70 if(!got_interlace)_y4m->interlace='?';
71 if(!got_par)_y4m->par_n=_y4m->par_d=0;
72 /*Chroma-type is not specified in older files, e.g., those generated by
73 mplayer.*/
74 if(!got_chroma)strcpy(_y4m->chroma_type,"420");
75 return 0;
80 /*All anti-aliasing filters in the following conversion functions are based on
81 one of two window functions:
82 The 6-tap Lanczos window (for down-sampling and shifts):
83 sinc(\pi*t)*sinc(\pi*t/3), |t|<3 (sinc(t)==sin(t)/t)
84 0, |t|>=3
85 The 4-tap Mitchell window (for up-sampling):
86 7|t|^3-12|t|^2+16/3, |t|<1
87 -(7/3)|x|^3+12|x|^2-20|x|+32/3, |t|<2
88 0, |t|>=2
89 The number of taps is intentionally kept small to reduce computational
90 overhead and limit ringing.
92 The taps from these filters are scaled so that their sum is 1, and the result
93 is scaled by 128 and rounded to integers to create a filter whose
94 intermediate values fit inside 16 bits.
95 Coefficients are rounded in such a way as to ensure their sum is still 128,
96 which is usually equivalent to normal rounding.
98 Conversions which require both horizontal and vertical filtering could
99 have these steps pipelined, for less memory consumption and better cache
100 performance, but we do them separately for simplicity.*/
102 #define OC_MINI(_a,_b) ((_a)>(_b)?(_b):(_a))
103 #define OC_MAXI(_a,_b) ((_a)<(_b)?(_b):(_a))
104 #define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
106 /*420jpeg chroma samples are sited like:
107 Y-------Y-------Y-------Y-------
108 | | | |
109 | BR | | BR |
110 | | | |
111 Y-------Y-------Y-------Y-------
112 | | | |
113 | | | |
114 | | | |
115 Y-------Y-------Y-------Y-------
116 | | | |
117 | BR | | BR |
118 | | | |
119 Y-------Y-------Y-------Y-------
120 | | | |
121 | | | |
122 | | | |
124 420mpeg2 chroma samples are sited like:
125 Y-------Y-------Y-------Y-------
126 | | | |
127 BR | BR |
128 | | | |
129 Y-------Y-------Y-------Y-------
130 | | | |
131 | | | |
132 | | | |
133 Y-------Y-------Y-------Y-------
134 | | | |
135 BR | BR |
136 | | | |
137 Y-------Y-------Y-------Y-------
138 | | | |
139 | | | |
140 | | | |
142 We use a resampling filter to shift the site locations one quarter pixel (at
143 the chroma plane's resolution) to the right.
144 The 4:2:2 modes look exactly the same, except there are twice as many chroma
145 lines, and they are vertically co-sited with the luma samples in both the
146 mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
147 static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst,
148 const unsigned char *_src,int _c_w,int _c_h){
149 int pli;
150 int y;
151 int x;
152 for(y=0;y<_c_h;y++){
153 /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
154 window.*/
155 for(x=0;x<OC_MINI(_c_w,2);x++){
156 _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[0]-17*_src[OC_MAXI(x-1,0)]+
157 114*_src[x]+35*_src[OC_MINI(x+1,_c_w-1)]-9*_src[OC_MINI(x+2,_c_w-1)]+
158 _src[OC_MINI(x+3,_c_w-1)]+64)>>7,255);
160 for(;x<_c_w-3;x++){
161 _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[x-2]-17*_src[x-1]+
162 114*_src[x]+35*_src[x+1]-9*_src[x+2]+_src[x+3]+64)>>7,255);
164 for(;x<_c_w;x++){
165 _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[x-2]-17*_src[x-1]+
166 114*_src[x]+35*_src[OC_MINI(x+1,_c_w-1)]-9*_src[OC_MINI(x+2,_c_w-1)]+
167 _src[_c_w-1]+64)>>7,255);
169 _dst+=_c_w;
170 _src+=_c_w;
174 /*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/
175 static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m,unsigned char *_dst,
176 unsigned char *_aux){
177 int c_w;
178 int c_h;
179 int c_sz;
180 int pli;
181 int y;
182 int x;
183 /*Skip past the luma data.*/
184 _dst+=_y4m->pic_w*_y4m->pic_h;
185 /*Compute the size of each chroma plane.*/
186 c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
187 c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
188 c_sz=c_w*c_h;
189 for(pli=1;pli<3;pli++){
190 y4m_42xmpeg2_42xjpeg_helper(_dst,_aux,c_w,c_h);
191 _dst+=c_sz;
192 _aux+=c_sz;
196 /*This format is only used for interlaced content, but is included for
197 completeness.
199 420jpeg chroma samples are sited like:
200 Y-------Y-------Y-------Y-------
201 | | | |
202 | BR | | BR |
203 | | | |
204 Y-------Y-------Y-------Y-------
205 | | | |
206 | | | |
207 | | | |
208 Y-------Y-------Y-------Y-------
209 | | | |
210 | BR | | BR |
211 | | | |
212 Y-------Y-------Y-------Y-------
213 | | | |
214 | | | |
215 | | | |
217 420paldv chroma samples are sited like:
218 YR------Y-------YR------Y-------
219 | | | |
220 | | | |
221 | | | |
222 YB------Y-------YB------Y-------
223 | | | |
224 | | | |
225 | | | |
226 YR------Y-------YR------Y-------
227 | | | |
228 | | | |
229 | | | |
230 YB------Y-------YB------Y-------
231 | | | |
232 | | | |
233 | | | |
235 We use a resampling filter to shift the site locations one quarter pixel (at
236 the chroma plane's resolution) to the right.
237 Then we use another filter to move the C_r location down one quarter pixel,
238 and the C_b location up one quarter pixel.*/
239 static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m,unsigned char *_dst,
240 unsigned char *_aux){
241 unsigned char *tmp;
242 int c_w;
243 int c_h;
244 int c_sz;
245 int pli;
246 int y;
247 int x;
248 /*Skip past the luma data.*/
249 _dst+=_y4m->pic_w*_y4m->pic_h;
250 /*Compute the size of each chroma plane.*/
251 c_w=(_y4m->pic_w+1)/2;
252 c_h=(_y4m->pic_h+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
253 c_sz=c_w*c_h;
254 tmp=_aux+2*c_sz;
255 for(pli=1;pli<3;pli++){
256 /*First do the horizontal re-sampling.
257 This is the same as the mpeg2 case, except that after the horizontal
258 case, we need to apply a second vertical filter.*/
259 y4m_42xmpeg2_42xjpeg_helper(tmp,_aux,c_w,c_h);
260 _aux+=c_sz;
261 switch(pli){
262 case 1:{
263 /*Slide C_b up a quarter-pel.
264 This is the same filter used above, but in the other order.*/
265 for(x=0;x<c_w;x++){
266 for(y=0;y<OC_MINI(c_h,3);y++){
267 _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[0]
268 -9*tmp[OC_MAXI(y-2,0)*c_w]+35*tmp[OC_MAXI(y-1,0)*c_w]
269 +114*tmp[y*c_w]-17*tmp[OC_MINI(y+1,c_h-1)*c_w]
270 +4*tmp[OC_MINI(y+2,c_h-1)*c_w]+64)>>7,255);
272 for(;y<c_h-2;y++){
273 _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[(y-3)*c_w]
274 -9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]
275 -17*tmp[(y+1)*c_w]+4*tmp[(y+2)*c_w]+64)>>7,255);
277 for(;y<c_h;y++){
278 _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[(y-3)*c_w]
279 -9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]
280 -17*tmp[OC_MINI(y+1,c_h-1)*c_w]+4*tmp[(c_h-1)*c_w]+64)>>7,255);
282 _dst++;
283 tmp++;
285 _dst+=c_sz-c_w;
286 tmp-=c_w;
287 }break;
288 case 2:{
289 /*Slide C_r down a quarter-pel.
290 This is the same as the horizontal filter.*/
291 for(x=0;x<c_w;x++){
292 for(y=0;y<OC_MINI(c_h,2);y++){
293 _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[0]
294 -17*tmp[OC_MAXI(y-1,0)*c_w]+114*tmp[y*c_w]
295 +35*tmp[OC_MINI(y+1,c_h-1)*c_w]-9*tmp[OC_MINI(y+2,c_h-1)*c_w]
296 +tmp[OC_MINI(y+3,c_h-1)*c_w]+64)>>7,255);
298 for(;y<c_h-3;y++){
299 _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[(y-2)*c_w]
300 -17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[(y+1)*c_w]
301 -9*tmp[(y+2)*c_w]+tmp[(y+3)*c_w]+64)>>7,255);
303 for(;y<c_h;y++){
304 _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[(y-2)*c_w]
305 -17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[OC_MINI(y+1,c_h-1)*c_w]
306 -9*tmp[OC_MINI(y+2,c_h-1)*c_w]+tmp[(c_h-1)*c_w]+64)>>7,255);
308 _dst++;
309 tmp++;
311 }break;
313 /*For actual interlaced material, this would have to be done separately on
314 each field, and the shift amounts would be different.
315 C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8,
316 C_b up 1/8 in the bottom field.
317 The corresponding filters would be:
318 Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128
319 Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/
323 /*Perform vertical filtering to reduce a single plane from 4:2:2 to 4:2:0.
324 This is used as a helper by several converation routines.*/
325 static void y4m_422jpeg_420jpeg_helper(unsigned char *_dst,
326 const unsigned char *_src,int _c_w,int _c_h){
327 int y;
328 int x;
329 /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
330 for(x=0;x<_c_w;x++){
331 for(y=0;y<OC_MINI(_c_h,2);y+=2){
332 _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(64*_src[0]
333 +78*_src[OC_MINI(1,_c_h-1)*_c_w]
334 -17*_src[OC_MINI(2,_c_h-1)*_c_w]
335 +3*_src[OC_MINI(3,_c_h-1)*_c_w]+64)>>7,255);
337 for(;y<_c_h-3;y+=2){
338 _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(3*(_src[(y-2)*_c_w]+_src[(y+3)*_c_w])
339 -17*(_src[(y-1)*_c_w]+_src[(y+2)*_c_w])
340 +78*(_src[y*_c_w]+_src[(y+1)*_c_w])+64)>>7,255);
342 for(;y<_c_h;y+=2){
343 _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(3*(_src[(y-2)*_c_w]
344 +_src[(_c_h-1)*_c_w])-17*(_src[(y-1)*_c_w]
345 +_src[OC_MINI(y+2,_c_h-1)*_c_w])
346 +78*(_src[y*_c_w]+_src[OC_MINI(y+1,_c_h-1)*_c_w])+64)>>7,255);
348 _src++;
349 _dst++;
353 /*420jpeg chroma samples are sited like:
354 Y-------Y-------Y-------Y-------
355 | | | |
356 | BR | | BR |
357 | | | |
358 Y-------Y-------Y-------Y-------
359 | | | |
360 | | | |
361 | | | |
362 Y-------Y-------Y-------Y-------
363 | | | |
364 | BR | | BR |
365 | | | |
366 Y-------Y-------Y-------Y-------
367 | | | |
368 | | | |
369 | | | |
371 422jpeg chroma samples are sited like:
372 Y---BR--Y-------Y---BR--Y-------
373 | | | |
374 | | | |
375 | | | |
376 Y---BR--Y-------Y---BR--Y-------
377 | | | |
378 | | | |
379 | | | |
380 Y---BR--Y-------Y---BR--Y-------
381 | | | |
382 | | | |
383 | | | |
384 Y---BR--Y-------Y---BR--Y-------
385 | | | |
386 | | | |
387 | | | |
389 We use a resampling filter to decimate the chroma planes by two in the
390 vertical direction.*/
391 static void y4m_convert_422jpeg_420jpeg(y4m_input *_y4m,unsigned char *_dst,
392 unsigned char *_aux){
393 int c_w;
394 int c_h;
395 int c_sz;
396 int dst_c_w;
397 int dst_c_h;
398 int dst_c_sz;
399 int tmp_sz;
400 int pic_sz;
401 int pli;
402 /*Skip past the luma data.*/
403 _dst+=_y4m->pic_w*_y4m->pic_h;
404 /*Compute the size of each chroma plane.*/
405 c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
406 c_h=_y4m->pic_h;
407 dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
408 dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
409 c_sz=c_w*c_h;
410 dst_c_sz=dst_c_w*dst_c_h;
411 for(pli=1;pli<3;pli++){
412 y4m_422jpeg_420jpeg_helper(_dst,_aux,c_w,c_h);
413 _aux+=c_sz;
414 _dst+=dst_c_sz;
418 /*420jpeg chroma samples are sited like:
419 Y-------Y-------Y-------Y-------
420 | | | |
421 | BR | | BR |
422 | | | |
423 Y-------Y-------Y-------Y-------
424 | | | |
425 | | | |
426 | | | |
427 Y-------Y-------Y-------Y-------
428 | | | |
429 | BR | | BR |
430 | | | |
431 Y-------Y-------Y-------Y-------
432 | | | |
433 | | | |
434 | | | |
436 422 chroma samples are sited like:
437 YBR-----Y-------YBR-----Y-------
438 | | | |
439 | | | |
440 | | | |
441 YBR-----Y-------YBR-----Y-------
442 | | | |
443 | | | |
444 | | | |
445 YBR-----Y-------YBR-----Y-------
446 | | | |
447 | | | |
448 | | | |
449 YBR-----Y-------YBR-----Y-------
450 | | | |
451 | | | |
452 | | | |
454 We use a resampling filter to shift the original site locations one quarter
455 pixel (at the original chroma resolution) to the right.
456 Then we use a second resampling filter to decimate the chroma planes by two
457 in the vertical direction.*/
458 static void y4m_convert_422_420jpeg(y4m_input *_y4m,unsigned char *_dst,
459 unsigned char *_aux){
460 unsigned char *tmp;
461 int c_w;
462 int c_h;
463 int c_sz;
464 int dst_c_w;
465 int dst_c_h;
466 int dst_c_sz;
467 int pli;
468 int y;
469 int x;
470 /*Skip past the luma data.*/
471 _dst+=_y4m->pic_w*_y4m->pic_h;
472 /*Compute the size of each chroma plane.*/
473 c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
474 c_h=_y4m->pic_h;
475 dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
476 c_sz=c_w*c_h;
477 dst_c_sz=c_w*dst_c_h;
478 tmp=_aux+2*c_sz;
479 for(pli=1;pli<3;pli++){
480 /*In reality, the horizontal and vertical steps could be pipelined, for
481 less memory consumption and better cache performance, but we do them
482 separately for simplicity.*/
483 /*First do horizontal filtering (convert to 422jpeg)*/
484 y4m_42xmpeg2_42xjpeg_helper(tmp,_aux,c_w,c_h);
485 /*Now do the vertical filtering.*/
486 y4m_422jpeg_420jpeg_helper(_dst,tmp,c_w,c_h);
487 _aux+=c_sz;
488 _dst+=dst_c_sz;
492 /*420jpeg chroma samples are sited like:
493 Y-------Y-------Y-------Y-------
494 | | | |
495 | BR | | BR |
496 | | | |
497 Y-------Y-------Y-------Y-------
498 | | | |
499 | | | |
500 | | | |
501 Y-------Y-------Y-------Y-------
502 | | | |
503 | BR | | BR |
504 | | | |
505 Y-------Y-------Y-------Y-------
506 | | | |
507 | | | |
508 | | | |
510 411 chroma samples are sited like:
511 YBR-----Y-------Y-------Y-------
512 | | | |
513 | | | |
514 | | | |
515 YBR-----Y-------Y-------Y-------
516 | | | |
517 | | | |
518 | | | |
519 YBR-----Y-------Y-------Y-------
520 | | | |
521 | | | |
522 | | | |
523 YBR-----Y-------Y-------Y-------
524 | | | |
525 | | | |
526 | | | |
528 We use a filter to resample at site locations one eighth pixel (at the source
529 chroma plane's horizontal resolution) and five eighths of a pixel to the
530 right.
531 Then we use another filter to decimate the planes by 2 in the vertical
532 direction.*/
533 static void y4m_convert_411_420jpeg(y4m_input *_y4m,unsigned char *_dst,
534 unsigned char *_aux){
535 unsigned char *tmp;
536 int c_w;
537 int c_h;
538 int c_sz;
539 int dst_c_w;
540 int dst_c_h;
541 int dst_c_sz;
542 int tmp_sz;
543 int pli;
544 int y;
545 int x;
546 /*Skip past the luma data.*/
547 _dst+=_y4m->pic_w*_y4m->pic_h;
548 /*Compute the size of each chroma plane.*/
549 c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
550 c_h=_y4m->pic_h;
551 dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
552 dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
553 c_sz=c_w*c_h;
554 dst_c_sz=dst_c_w*dst_c_h;
555 tmp_sz=dst_c_w*c_h;
556 tmp=_aux+2*c_sz;
557 for(pli=1;pli<3;pli++){
558 /*In reality, the horizontal and vertical steps could be pipelined, for
559 less memory consumption and better cache performance, but we do them
560 separately for simplicity.*/
561 /*First do horizontal filtering (convert to 422jpeg)*/
562 for(y=0;y<c_h;y++){
563 /*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a
564 4-tap Mitchell window.*/
565 for(x=0;x<OC_MINI(c_w,1);x++){
566 tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(111*_aux[0]
567 +18*_aux[OC_MINI(1,c_w-1)]-_aux[OC_MINI(2,c_w-1)]+64)>>7,255);
568 tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(47*_aux[0]
569 +86*_aux[OC_MINI(1,c_w-1)]-5*_aux[OC_MINI(2,c_w-1)]+64)>>7,255);
571 for(;x<c_w-2;x++){
572 tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(_aux[x-1]+110*_aux[x]
573 +18*_aux[x+1]-_aux[x+2]+64)>>7,255);
574 tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(-3*_aux[x-1]+50*_aux[x]
575 +86*_aux[x+1]-5*_aux[x+2]+64)>>7,255);
577 for(;x<c_w;x++){
578 tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(_aux[x-1]+110*_aux[x]
579 +18*_aux[OC_MINI(x+1,c_w-1)]-_aux[c_w-1]+64)>>7,255);
580 if((x<<1|1)<dst_c_w){
581 tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(-3*_aux[x-1]+50*_aux[x]
582 +86*_aux[OC_MINI(x+1,c_w-1)]-5*_aux[c_w-1]+64)>>7,255);
585 tmp+=dst_c_w;
586 _aux+=c_w;
588 tmp-=tmp_sz;
589 /*Now do the vertical filtering.*/
590 y4m_422jpeg_420jpeg_helper(_dst,tmp,dst_c_w,c_h);
591 _dst+=dst_c_sz;
595 /*Convert 444 to 420jpeg.*/
596 static void y4m_convert_444_420jpeg(y4m_input *_y4m,unsigned char *_dst,
597 unsigned char *_aux){
598 unsigned char *tmp;
599 int c_w;
600 int c_h;
601 int c_sz;
602 int dst_c_w;
603 int dst_c_h;
604 int dst_c_sz;
605 int tmp_sz;
606 int pli;
607 int y;
608 int x;
609 /*Skip past the luma data.*/
610 _dst+=_y4m->pic_w*_y4m->pic_h;
611 /*Compute the size of each chroma plane.*/
612 c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
613 c_h=_y4m->pic_h;
614 dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
615 dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
616 c_sz=c_w*c_h;
617 dst_c_sz=dst_c_w*dst_c_h;
618 tmp_sz=dst_c_w*c_h;
619 tmp=_aux+2*c_sz;
620 for(pli=1;pli<3;pli++){
621 /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
622 for(y=0;y<c_h;y++){
623 for(x=0;x<OC_MINI(c_w,2);x+=2){
624 tmp[x>>1]=OC_CLAMPI(0,(64*_aux[0]+78*_aux[OC_MINI(1,c_w-1)]
625 -17*_aux[OC_MINI(2,c_w-1)]
626 +3*_aux[OC_MINI(3,c_w-1)]+64)>>7,255);
628 for(;x<c_w-3;x+=2){
629 tmp[x>>1]=OC_CLAMPI(0,(3*(_aux[x-2]+_aux[x+3])
630 -17*(_aux[x-1]+_aux[x+2])+78*(_aux[x]+_aux[x+1])+64)>>7,255);
632 for(;x<c_w;x+=2){
633 tmp[x>>1]=OC_CLAMPI(0,(3*(_aux[x-2]+_aux[c_w-1])-
634 17*(_aux[x-1]+_aux[OC_MINI(x+2,c_w-1)])+
635 78*(_aux[x]+_aux[OC_MINI(x+1,c_w-1)])+64)>>7,255);
637 tmp+=dst_c_w;
638 _aux+=c_w;
640 tmp-=tmp_sz;
641 /*Now do the vertical filtering.*/
642 y4m_422jpeg_420jpeg_helper(_dst,tmp,dst_c_w,c_h);
643 _dst+=dst_c_sz;
647 /*The image is padded with empty chroma components at 4:2:0.*/
648 static void y4m_convert_mono_420jpeg(y4m_input *_y4m,unsigned char *_dst,
649 unsigned char *_aux){
650 int c_sz;
651 _dst+=_y4m->pic_w*_y4m->pic_h;
652 c_sz=((_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h)*
653 ((_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v);
654 memset(_dst,128,c_sz*2);
657 /*No conversion function needed.*/
658 static void y4m_convert_null(y4m_input *_y4m,unsigned char *_dst,
659 unsigned char *_aux){
662 int y4m_input_open(y4m_input *_y4m,FILE *_fin,char *_skip,int _nskip){
663 char buffer[80];
664 int ret;
665 int i;
666 /*Read until newline, or 80 cols, whichever happens first.*/
667 for(i=0;i<79;i++){
668 if(_nskip>0){
669 buffer[i]=*_skip++;
670 _nskip--;
672 else{
673 ret=fread(buffer+i,1,1,_fin);
674 if(ret<1)return -1;
676 if(buffer[i]=='\n')break;
678 /*We skipped too much header data.*/
679 if(_nskip>0)return -1;
680 if(i==79){
681 fprintf(stderr,"Error parsing header; not a YUV2MPEG2 file?\n");
682 return -1;
684 buffer[i]='\0';
685 if(memcmp(buffer,"YUV4MPEG",8)){
686 fprintf(stderr,"Incomplete magic for YUV4MPEG file.\n");
687 return -1;
689 if(buffer[8]!='2'){
690 fprintf(stderr,"Incorrect YUV input file version; YUV4MPEG2 required.\n");
692 ret=y4m_parse_tags(_y4m,buffer+5);
693 if(ret<0){
694 fprintf(stderr,"Error parsing YUV4MPEG2 header.\n");
695 return ret;
697 if(_y4m->interlace=='?'){
698 fprintf(stderr,"Warning: Input video interlacing format unknown; "
699 "assuming progressive scan.\n");
701 else if(_y4m->interlace!='p'){
702 fprintf(stderr,"Input video is interlaced; "
703 "Only progressive scan handled.\n");
704 return -1;
706 if(strcmp(_y4m->chroma_type,"420")==0||
707 strcmp(_y4m->chroma_type,"420jpeg")==0){
708 _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2;
709 _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h
710 +2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
711 /*Natively supported: no conversion required.*/
712 _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=0;
713 _y4m->convert=y4m_convert_null;
715 else if(strcmp(_y4m->chroma_type,"420mpeg2")==0){
716 _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2;
717 _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
718 /*Chroma filter required: read into the aux buf first.*/
719 _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=
720 2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
721 _y4m->convert=y4m_convert_42xmpeg2_42xjpeg;
723 else if(strcmp(_y4m->chroma_type,"420paldv")==0){
724 _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2;
725 _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
726 /*Chroma filter required: read into the aux buf first.
727 We need to make two filter passes, so we need some extra space in the
728 aux buffer.*/
729 _y4m->aux_buf_sz=3*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
730 _y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
731 _y4m->convert=y4m_convert_42xpaldv_42xjpeg;
733 else if(strcmp(_y4m->chroma_type,"422jpeg")==0){
734 _y4m->src_c_dec_h=_y4m->dst_c_dec_h=2;
735 _y4m->src_c_dec_v=1;
736 _y4m->dst_c_dec_v=2;
737 _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
738 /*Chroma filter required: read into the aux buf first.*/
739 _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*_y4m->pic_h;
740 _y4m->convert=y4m_convert_422jpeg_420jpeg;
742 else if(strcmp(_y4m->chroma_type,"422")==0){
743 _y4m->src_c_dec_h=_y4m->dst_c_dec_h=2;
744 _y4m->src_c_dec_v=1;
745 _y4m->dst_c_dec_v=2;
746 _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
747 /*Chroma filter required: read into the aux buf first.
748 We need to make two filter passes, so we need some extra space in the
749 aux buffer.*/
750 _y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*_y4m->pic_h;
751 _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h;
752 _y4m->convert=y4m_convert_422_420jpeg;
754 else if(strcmp(_y4m->chroma_type,"411")==0){
755 _y4m->src_c_dec_h=4;
756 _y4m->dst_c_dec_h=2;
757 _y4m->src_c_dec_v=1;
758 _y4m->dst_c_dec_v=2;
759 _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
760 /*Chroma filter required: read into the aux buf first.
761 We need to make two filter passes, so we need some extra space in the
762 aux buffer.*/
763 _y4m->aux_buf_read_sz=2*((_y4m->pic_w+3)/4)*_y4m->pic_h;
764 _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h;
765 _y4m->convert=y4m_convert_411_420jpeg;
767 else if(strcmp(_y4m->chroma_type,"444")==0){
768 _y4m->src_c_dec_h=1;
769 _y4m->dst_c_dec_h=2;
770 _y4m->src_c_dec_v=1;
771 _y4m->dst_c_dec_v=2;
772 _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
773 /*Chroma filter required: read into the aux buf first.
774 We need to make two filter passes, so we need some extra space in the
775 aux buffer.*/
776 _y4m->aux_buf_read_sz=2*_y4m->pic_w*_y4m->pic_h;
777 _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h;
778 _y4m->convert=y4m_convert_444_420jpeg;
780 else if(strcmp(_y4m->chroma_type,"444alpha")==0){
781 _y4m->src_c_dec_h=1;
782 _y4m->dst_c_dec_h=2;
783 _y4m->src_c_dec_v=1;
784 _y4m->dst_c_dec_v=2;
785 _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
786 /*Chroma filter required: read into the aux buf first.
787 We need to make two filter passes, so we need some extra space in the
788 aux buffer.
789 The extra plane also gets read into the aux buf.
790 It will be discarded.*/
791 _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=3*_y4m->pic_w*_y4m->pic_h;
792 _y4m->convert=y4m_convert_444_420jpeg;
794 else if(strcmp(_y4m->chroma_type,"mono")==0){
795 _y4m->src_c_dec_h=_y4m->src_c_dec_v=0;
796 _y4m->dst_c_dec_h=_y4m->dst_c_dec_v=2;
797 _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
798 /*No extra space required, but we need to clear the chroma planes.*/
799 _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=0;
800 _y4m->convert=y4m_convert_mono_420jpeg;
802 else{
803 fprintf(stderr,"Unknown chroma sampling type: %s\n",_y4m->chroma_type);
804 return -1;
806 /*The size of the final frame buffers is always computed from the
807 destination chroma decimation type.*/
808 _y4m->dst_buf_sz=_y4m->pic_w*_y4m->pic_h
809 +2*((_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h)*
810 ((_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v);
811 _y4m->dst_buf=(unsigned char *)malloc(_y4m->dst_buf_sz);
812 _y4m->aux_buf=(unsigned char *)malloc(_y4m->aux_buf_sz);
813 return 0;
816 void y4m_input_close(y4m_input *_y4m){
817 free(_y4m->dst_buf);
818 free(_y4m->aux_buf);
821 int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *_img){
822 char frame[6];
823 int pic_sz;
824 int frame_c_w;
825 int frame_c_h;
826 int c_w;
827 int c_h;
828 int c_sz;
829 int ret;
830 /*Read and skip the frame header.*/
831 ret=fread(frame,1,6,_fin);
832 if(ret<6)return 0;
833 if(memcmp(frame,"FRAME",5)){
834 fprintf(stderr,"Loss of framing in Y4M input data\n");
835 return -1;
837 if(frame[5]!='\n'){
838 char c;
839 int j;
840 for(j=0;j<79&&fread(&c,1,1,_fin)&&c!='\n';j++);
841 if(j==79){
842 fprintf(stderr,"Error parsing Y4M frame header\n");
843 return -1;
846 /*Read the frame data that needs no conversion.*/
847 if(fread(_y4m->dst_buf,1,_y4m->dst_buf_read_sz,_fin)!=_y4m->dst_buf_read_sz){
848 fprintf(stderr,"Error reading Y4M frame data.\n");
849 return -1;
851 /*Read the frame data that does need conversion.*/
852 if(fread(_y4m->aux_buf,1,_y4m->aux_buf_read_sz,_fin)!=_y4m->aux_buf_read_sz){
853 fprintf(stderr,"Error reading Y4M frame data.\n");
854 return -1;
856 /*Now convert the just read frame.*/
857 (*_y4m->convert)(_y4m,_y4m->dst_buf,_y4m->aux_buf);
858 /*Fill in the frame buffer pointers.
859 We don't use vpx_img_wrap() because it forces padding for odd picture
860 sizes, which would require a separate fread call for every row.*/
861 memset(_img,0,sizeof(*_img));
862 /*Y4M has the planes in Y'CbCr order, which libvpx calls Y, U, and V.*/
863 _img->fmt=IMG_FMT_I420;
864 _img->w=_img->d_w=_y4m->pic_w;
865 _img->h=_img->d_h=_y4m->pic_h;
866 /*This is hard-coded to 4:2:0 for now, as that's all VP8 supports.*/
867 _img->x_chroma_shift=1;
868 _img->y_chroma_shift=1;
869 _img->bps=12;
870 /*Set up the buffer pointers.*/
871 pic_sz=_y4m->pic_w*_y4m->pic_h;
872 c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
873 c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
874 c_sz=c_w*c_h;
875 _img->stride[PLANE_Y]=_y4m->pic_w;
876 _img->stride[PLANE_U]=_img->stride[PLANE_V]=c_w;
877 _img->planes[PLANE_Y]=_y4m->dst_buf;
878 _img->planes[PLANE_U]=_y4m->dst_buf+pic_sz;
879 _img->planes[PLANE_V]=_y4m->dst_buf+pic_sz+c_sz;
880 return 1;