2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG-4 part10 prediction functions.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "libavutil/intreadwrite.h"
32 #include "bit_depth_template.c"
34 static void FUNCC(pred4x4_vertical
)(uint8_t *_src
, const uint8_t *topright
,
37 pixel
*src
= (pixel
*)_src
;
38 int stride
= _stride
/sizeof(pixel
);
39 const pixel4 a
= AV_RN4PA(src
-stride
);
41 AV_WN4PA(src
+0*stride
, a
);
42 AV_WN4PA(src
+1*stride
, a
);
43 AV_WN4PA(src
+2*stride
, a
);
44 AV_WN4PA(src
+3*stride
, a
);
47 static void FUNCC(pred4x4_horizontal
)(uint8_t *_src
, const uint8_t *topright
,
50 pixel
*src
= (pixel
*)_src
;
51 int stride
= _stride
/sizeof(pixel
);
52 AV_WN4PA(src
+0*stride
, PIXEL_SPLAT_X4(src
[-1+0*stride
]));
53 AV_WN4PA(src
+1*stride
, PIXEL_SPLAT_X4(src
[-1+1*stride
]));
54 AV_WN4PA(src
+2*stride
, PIXEL_SPLAT_X4(src
[-1+2*stride
]));
55 AV_WN4PA(src
+3*stride
, PIXEL_SPLAT_X4(src
[-1+3*stride
]));
58 static void FUNCC(pred4x4_dc
)(uint8_t *_src
, const uint8_t *topright
,
61 pixel
*src
= (pixel
*)_src
;
62 int stride
= _stride
/sizeof(pixel
);
63 const int dc
= ( src
[-stride
] + src
[1-stride
] + src
[2-stride
] + src
[3-stride
]
64 + src
[-1+0*stride
] + src
[-1+1*stride
] + src
[-1+2*stride
] + src
[-1+3*stride
] + 4) >>3;
65 const pixel4 a
= PIXEL_SPLAT_X4(dc
);
67 AV_WN4PA(src
+0*stride
, a
);
68 AV_WN4PA(src
+1*stride
, a
);
69 AV_WN4PA(src
+2*stride
, a
);
70 AV_WN4PA(src
+3*stride
, a
);
73 static void FUNCC(pred4x4_left_dc
)(uint8_t *_src
, const uint8_t *topright
,
76 pixel
*src
= (pixel
*)_src
;
77 int stride
= _stride
/sizeof(pixel
);
78 const int dc
= ( src
[-1+0*stride
] + src
[-1+1*stride
] + src
[-1+2*stride
] + src
[-1+3*stride
] + 2) >>2;
79 const pixel4 a
= PIXEL_SPLAT_X4(dc
);
81 AV_WN4PA(src
+0*stride
, a
);
82 AV_WN4PA(src
+1*stride
, a
);
83 AV_WN4PA(src
+2*stride
, a
);
84 AV_WN4PA(src
+3*stride
, a
);
87 static void FUNCC(pred4x4_top_dc
)(uint8_t *_src
, const uint8_t *topright
,
90 pixel
*src
= (pixel
*)_src
;
91 int stride
= _stride
/sizeof(pixel
);
92 const int dc
= ( src
[-stride
] + src
[1-stride
] + src
[2-stride
] + src
[3-stride
] + 2) >>2;
93 const pixel4 a
= PIXEL_SPLAT_X4(dc
);
95 AV_WN4PA(src
+0*stride
, a
);
96 AV_WN4PA(src
+1*stride
, a
);
97 AV_WN4PA(src
+2*stride
, a
);
98 AV_WN4PA(src
+3*stride
, a
);
101 static void FUNCC(pred4x4_128_dc
)(uint8_t *_src
, const uint8_t *topright
,
104 pixel
*src
= (pixel
*)_src
;
105 int stride
= _stride
/sizeof(pixel
);
106 const pixel4 a
= PIXEL_SPLAT_X4(1<<(BIT_DEPTH
-1));
108 AV_WN4PA(src
+0*stride
, a
);
109 AV_WN4PA(src
+1*stride
, a
);
110 AV_WN4PA(src
+2*stride
, a
);
111 AV_WN4PA(src
+3*stride
, a
);
114 static void FUNCC(pred4x4_127_dc
)(uint8_t *_src
, const uint8_t *topright
,
117 pixel
*src
= (pixel
*)_src
;
118 int stride
= _stride
/sizeof(pixel
);
119 const pixel4 a
= PIXEL_SPLAT_X4((1<<(BIT_DEPTH
-1))-1);
121 AV_WN4PA(src
+0*stride
, a
);
122 AV_WN4PA(src
+1*stride
, a
);
123 AV_WN4PA(src
+2*stride
, a
);
124 AV_WN4PA(src
+3*stride
, a
);
127 static void FUNCC(pred4x4_129_dc
)(uint8_t *_src
, const uint8_t *topright
,
130 pixel
*src
= (pixel
*)_src
;
131 int stride
= _stride
/sizeof(pixel
);
132 const pixel4 a
= PIXEL_SPLAT_X4((1<<(BIT_DEPTH
-1))+1);
134 AV_WN4PA(src
+0*stride
, a
);
135 AV_WN4PA(src
+1*stride
, a
);
136 AV_WN4PA(src
+2*stride
, a
);
137 AV_WN4PA(src
+3*stride
, a
);
141 #define LOAD_TOP_RIGHT_EDGE\
142 const unsigned av_unused t4 = topright[0];\
143 const unsigned av_unused t5 = topright[1];\
144 const unsigned av_unused t6 = topright[2];\
145 const unsigned av_unused t7 = topright[3];\
147 #define LOAD_DOWN_LEFT_EDGE\
148 const unsigned av_unused l4 = src[-1+4*stride];\
149 const unsigned av_unused l5 = src[-1+5*stride];\
150 const unsigned av_unused l6 = src[-1+6*stride];\
151 const unsigned av_unused l7 = src[-1+7*stride];\
153 #define LOAD_LEFT_EDGE\
154 const unsigned av_unused l0 = src[-1+0*stride];\
155 const unsigned av_unused l1 = src[-1+1*stride];\
156 const unsigned av_unused l2 = src[-1+2*stride];\
157 const unsigned av_unused l3 = src[-1+3*stride];\
159 #define LOAD_TOP_EDGE\
160 const unsigned av_unused t0 = src[ 0-1*stride];\
161 const unsigned av_unused t1 = src[ 1-1*stride];\
162 const unsigned av_unused t2 = src[ 2-1*stride];\
163 const unsigned av_unused t3 = src[ 3-1*stride];\
165 static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright,
168 pixel
*src
= (pixel
*)_src
;
169 int stride
= _stride
/sizeof(pixel
);
170 const int lt
= src
[-1-1*stride
];
174 src
[0+3*stride
]=(l3
+ 2*l2
+ l1
+ 2)>>2;
176 src
[1+3*stride
]=(l2
+ 2*l1
+ l0
+ 2)>>2;
179 src
[2+3*stride
]=(l1
+ 2*l0
+ lt
+ 2)>>2;
183 src
[3+3*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
186 src
[3+2*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
188 src
[3+1*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
189 src
[3+0*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
192 static void FUNCC(pred4x4_down_left
)(uint8_t *_src
, const uint8_t *_topright
,
195 pixel
*src
= (pixel
*)_src
;
196 const pixel
*topright
= (const pixel
*)_topright
;
197 int stride
= _stride
/sizeof(pixel
);
202 src
[0+0*stride
]=(t0
+ t2
+ 2*t1
+ 2)>>2;
204 src
[0+1*stride
]=(t1
+ t3
+ 2*t2
+ 2)>>2;
207 src
[0+2*stride
]=(t2
+ t4
+ 2*t3
+ 2)>>2;
211 src
[0+3*stride
]=(t3
+ t5
+ 2*t4
+ 2)>>2;
214 src
[1+3*stride
]=(t4
+ t6
+ 2*t5
+ 2)>>2;
216 src
[2+3*stride
]=(t5
+ t7
+ 2*t6
+ 2)>>2;
217 src
[3+3*stride
]=(t6
+ 3*t7
+ 2)>>2;
220 static void FUNCC(pred4x4_vertical_right
)(uint8_t *_src
,
221 const uint8_t *topright
,
224 pixel
*src
= (pixel
*)_src
;
225 int stride
= _stride
/sizeof(pixel
);
226 const int lt
= src
[-1-1*stride
];
231 src
[1+2*stride
]=(lt
+ t0
+ 1)>>1;
233 src
[2+2*stride
]=(t0
+ t1
+ 1)>>1;
235 src
[3+2*stride
]=(t1
+ t2
+ 1)>>1;
236 src
[3+0*stride
]=(t2
+ t3
+ 1)>>1;
238 src
[1+3*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
240 src
[2+3*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
242 src
[3+3*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
243 src
[3+1*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
244 src
[0+2*stride
]=(lt
+ 2*l0
+ l1
+ 2)>>2;
245 src
[0+3*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
248 static void FUNCC(pred4x4_vertical_left
)(uint8_t *_src
,
249 const uint8_t *_topright
,
252 pixel
*src
= (pixel
*)_src
;
253 const pixel
*topright
= (const pixel
*)_topright
;
254 int stride
= _stride
/sizeof(pixel
);
258 src
[0+0*stride
]=(t0
+ t1
+ 1)>>1;
260 src
[0+2*stride
]=(t1
+ t2
+ 1)>>1;
262 src
[1+2*stride
]=(t2
+ t3
+ 1)>>1;
264 src
[2+2*stride
]=(t3
+ t4
+ 1)>>1;
265 src
[3+2*stride
]=(t4
+ t5
+ 1)>>1;
266 src
[0+1*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
268 src
[0+3*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
270 src
[1+3*stride
]=(t2
+ 2*t3
+ t4
+ 2)>>2;
272 src
[2+3*stride
]=(t3
+ 2*t4
+ t5
+ 2)>>2;
273 src
[3+3*stride
]=(t4
+ 2*t5
+ t6
+ 2)>>2;
276 static void FUNCC(pred4x4_horizontal_up
)(uint8_t *_src
, const uint8_t *topright
,
279 pixel
*src
= (pixel
*)_src
;
280 int stride
= _stride
/sizeof(pixel
);
283 src
[0+0*stride
]=(l0
+ l1
+ 1)>>1;
284 src
[1+0*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
286 src
[0+1*stride
]=(l1
+ l2
+ 1)>>1;
288 src
[1+1*stride
]=(l1
+ 2*l2
+ l3
+ 2)>>2;
290 src
[0+2*stride
]=(l2
+ l3
+ 1)>>1;
292 src
[1+2*stride
]=(l2
+ 2*l3
+ l3
+ 2)>>2;
301 static void FUNCC(pred4x4_horizontal_down
)(uint8_t *_src
,
302 const uint8_t *topright
,
305 pixel
*src
= (pixel
*)_src
;
306 int stride
= _stride
/sizeof(pixel
);
307 const int lt
= src
[-1-1*stride
];
312 src
[2+1*stride
]=(lt
+ l0
+ 1)>>1;
314 src
[3+1*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
315 src
[2+0*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
316 src
[3+0*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
318 src
[2+2*stride
]=(l0
+ l1
+ 1)>>1;
320 src
[3+2*stride
]=(lt
+ 2*l0
+ l1
+ 2)>>2;
322 src
[2+3*stride
]=(l1
+ l2
+ 1)>>1;
324 src
[3+3*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
325 src
[0+3*stride
]=(l2
+ l3
+ 1)>>1;
326 src
[1+3*stride
]=(l1
+ 2*l2
+ l3
+ 2)>>2;
329 static void FUNCC(pred16x16_vertical
)(uint8_t *_src
, ptrdiff_t _stride
)
332 pixel
*src
= (pixel
*)_src
;
333 int stride
= _stride
/sizeof(pixel
);
334 const pixel4 a
= AV_RN4PA(((pixel4
*)(src
-stride
))+0);
335 const pixel4 b
= AV_RN4PA(((pixel4
*)(src
-stride
))+1);
336 const pixel4 c
= AV_RN4PA(((pixel4
*)(src
-stride
))+2);
337 const pixel4 d
= AV_RN4PA(((pixel4
*)(src
-stride
))+3);
340 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, a
);
341 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, b
);
342 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+2, c
);
343 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+3, d
);
347 static void FUNCC(pred16x16_horizontal
)(uint8_t *_src
, ptrdiff_t stride
)
350 pixel
*src
= (pixel
*)_src
;
351 stride
/= sizeof(pixel
);
354 const pixel4 a
= PIXEL_SPLAT_X4(src
[-1+i
*stride
]);
356 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, a
);
357 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, a
);
358 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+2, a
);
359 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+3, a
);
363 #define PREDICT_16x16_DC(v)\
364 for(i=0; i<16; i++){\
365 AV_WN4PA(src+ 0, v);\
366 AV_WN4PA(src+ 4, v);\
367 AV_WN4PA(src+ 8, v);\
368 AV_WN4PA(src+12, v);\
372 static void FUNCC(pred16x16_dc
)(uint8_t *_src
, ptrdiff_t stride
)
375 pixel
*src
= (pixel
*)_src
;
377 stride
/= sizeof(pixel
);
380 dc
+= src
[-1+i
*stride
];
387 dcsplat
= PIXEL_SPLAT_X4((dc
+16)>>5);
388 PREDICT_16x16_DC(dcsplat
);
391 static void FUNCC(pred16x16_left_dc
)(uint8_t *_src
, ptrdiff_t stride
)
394 pixel
*src
= (pixel
*)_src
;
396 stride
/= sizeof(pixel
);
399 dc
+= src
[-1+i
*stride
];
402 dcsplat
= PIXEL_SPLAT_X4((dc
+8)>>4);
403 PREDICT_16x16_DC(dcsplat
);
406 static void FUNCC(pred16x16_top_dc
)(uint8_t *_src
, ptrdiff_t stride
)
409 pixel
*src
= (pixel
*)_src
;
411 stride
/= sizeof(pixel
);
417 dcsplat
= PIXEL_SPLAT_X4((dc
+8)>>4);
418 PREDICT_16x16_DC(dcsplat
);
421 #define PRED16x16_X(n, v) \
422 static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
425 pixel *src = (pixel*)_src;\
426 stride /= sizeof(pixel);\
427 PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
430 PRED16x16_X(127, (1<<(BIT_DEPTH
-1))-1)
431 PRED16x16_X(128, (1<<(BIT_DEPTH
-1))+0)
432 PRED16x16_X(129, (1<<(BIT_DEPTH
-1))+1)
434 static inline void FUNCC(pred16x16_plane_compat
)(uint8_t *_src
,
442 pixel
*src
= (pixel
*)_src
;
443 int stride
= _stride
/sizeof(pixel
);
444 const pixel
* const src0
= src
+7-stride
;
445 const pixel
* src1
= src
+8*stride
-1;
446 const pixel
* src2
= src1
-2*stride
; // == src+6*stride-1;
447 int H
= src0
[1] - src0
[-1];
448 int V
= src1
[0] - src2
[ 0];
449 for(k
=2; k
<=8; ++k
) {
450 src1
+= stride
; src2
-= stride
;
451 H
+= k
*(src0
[k
] - src0
[-k
]);
452 V
+= k
*(src1
[0] - src2
[ 0]);
455 H
= ( 5*(H
/4) ) / 16;
456 V
= ( 5*(V
/4) ) / 16;
458 /* required for 100% accuracy */
461 H
= ( H
+ (H
>>2) ) >> 4;
462 V
= ( V
+ (V
>>2) ) >> 4;
468 a
= 16*(src1
[0] + src2
[16] + 1) - 7*(V
+H
);
469 for(j
=16; j
>0; --j
) {
472 for(i
=-16; i
<0; i
+=4) {
473 src
[16+i
] = CLIP((b
) >> 5);
474 src
[17+i
] = CLIP((b
+ H
) >> 5);
475 src
[18+i
] = CLIP((b
+2*H
) >> 5);
476 src
[19+i
] = CLIP((b
+3*H
) >> 5);
483 static void FUNCC(pred16x16_plane
)(uint8_t *src
, ptrdiff_t stride
)
485 FUNCC(pred16x16_plane_compat
)(src
, stride
, 0, 0);
488 static void FUNCC(pred8x8_vertical
)(uint8_t *_src
, ptrdiff_t _stride
)
491 pixel
*src
= (pixel
*)_src
;
492 int stride
= _stride
/sizeof(pixel
);
493 const pixel4 a
= AV_RN4PA(((pixel4
*)(src
-stride
))+0);
494 const pixel4 b
= AV_RN4PA(((pixel4
*)(src
-stride
))+1);
497 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, a
);
498 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, b
);
502 static void FUNCC(pred8x16_vertical
)(uint8_t *_src
, ptrdiff_t _stride
)
505 pixel
*src
= (pixel
*)_src
;
506 int stride
= _stride
>>(sizeof(pixel
)-1);
507 const pixel4 a
= AV_RN4PA(((pixel4
*)(src
-stride
))+0);
508 const pixel4 b
= AV_RN4PA(((pixel4
*)(src
-stride
))+1);
511 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, a
);
512 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, b
);
516 static void FUNCC(pred8x8_horizontal
)(uint8_t *_src
, ptrdiff_t stride
)
519 pixel
*src
= (pixel
*)_src
;
520 stride
/= sizeof(pixel
);
523 const pixel4 a
= PIXEL_SPLAT_X4(src
[-1+i
*stride
]);
524 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, a
);
525 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, a
);
529 static void FUNCC(pred8x16_horizontal
)(uint8_t *_src
, ptrdiff_t stride
)
532 pixel
*src
= (pixel
*)_src
;
533 stride
>>= sizeof(pixel
)-1;
535 const pixel4 a
= PIXEL_SPLAT_X4(src
[-1+i
*stride
]);
536 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, a
);
537 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, a
);
541 #define PRED8x8_X(n, v)\
542 static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
545 const pixel4 a = PIXEL_SPLAT_X4(v);\
546 pixel *src = (pixel*)_src;\
547 stride /= sizeof(pixel);\
549 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\
550 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\
554 PRED8x8_X(127, (1<<(BIT_DEPTH
-1))-1)
555 PRED8x8_X(128, (1<<(BIT_DEPTH
-1))+0)
556 PRED8x8_X(129, (1<<(BIT_DEPTH
-1))+1)
558 static void FUNCC(pred8x16_128_dc
)(uint8_t *_src
, ptrdiff_t stride
)
560 FUNCC(pred8x8_128_dc
)(_src
, stride
);
561 FUNCC(pred8x8_128_dc
)(_src
+8*stride
, stride
);
564 static void FUNCC(pred8x8_left_dc
)(uint8_t *_src
, ptrdiff_t stride
)
568 pixel4 dc0splat
, dc2splat
;
569 pixel
*src
= (pixel
*)_src
;
570 stride
/= sizeof(pixel
);
574 dc0
+= src
[-1+i
*stride
];
575 dc2
+= src
[-1+(i
+4)*stride
];
577 dc0splat
= PIXEL_SPLAT_X4((dc0
+ 2)>>2);
578 dc2splat
= PIXEL_SPLAT_X4((dc2
+ 2)>>2);
581 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc0splat
);
582 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc0splat
);
585 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc2splat
);
586 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc2splat
);
590 static void FUNCC(pred8x16_left_dc
)(uint8_t *_src
, ptrdiff_t stride
)
592 FUNCC(pred8x8_left_dc
)(_src
, stride
);
593 FUNCC(pred8x8_left_dc
)(_src
+8*stride
, stride
);
596 static void FUNCC(pred8x8_top_dc
)(uint8_t *_src
, ptrdiff_t stride
)
600 pixel4 dc0splat
, dc1splat
;
601 pixel
*src
= (pixel
*)_src
;
602 stride
/= sizeof(pixel
);
607 dc1
+= src
[4+i
-stride
];
609 dc0splat
= PIXEL_SPLAT_X4((dc0
+ 2)>>2);
610 dc1splat
= PIXEL_SPLAT_X4((dc1
+ 2)>>2);
613 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc0splat
);
614 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc1splat
);
617 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc0splat
);
618 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc1splat
);
622 static void FUNCC(pred8x16_top_dc
)(uint8_t *_src
, ptrdiff_t stride
)
626 pixel4 dc0splat
, dc1splat
;
627 pixel
*src
= (pixel
*)_src
;
628 stride
>>= sizeof(pixel
)-1;
633 dc1
+= src
[4+i
-stride
];
635 dc0splat
= PIXEL_SPLAT_X4((dc0
+ 2)>>2);
636 dc1splat
= PIXEL_SPLAT_X4((dc1
+ 2)>>2);
639 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc0splat
);
640 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc1splat
);
644 static void FUNCC(pred8x8_dc
)(uint8_t *_src
, ptrdiff_t stride
)
648 pixel4 dc0splat
, dc1splat
, dc2splat
, dc3splat
;
649 pixel
*src
= (pixel
*)_src
;
650 stride
/= sizeof(pixel
);
654 dc0
+= src
[-1+i
*stride
] + src
[i
-stride
];
655 dc1
+= src
[4+i
-stride
];
656 dc2
+= src
[-1+(i
+4)*stride
];
658 dc0splat
= PIXEL_SPLAT_X4((dc0
+ 4)>>3);
659 dc1splat
= PIXEL_SPLAT_X4((dc1
+ 2)>>2);
660 dc2splat
= PIXEL_SPLAT_X4((dc2
+ 2)>>2);
661 dc3splat
= PIXEL_SPLAT_X4((dc1
+ dc2
+ 4)>>3);
664 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc0splat
);
665 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc1splat
);
668 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc2splat
);
669 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc3splat
);
673 static void FUNCC(pred8x16_dc
)(uint8_t *_src
, ptrdiff_t stride
)
676 int dc0
, dc1
, dc2
, dc3
, dc4
;
677 pixel4 dc0splat
, dc1splat
, dc2splat
, dc3splat
, dc4splat
, dc5splat
, dc6splat
, dc7splat
;
678 pixel
*src
= (pixel
*)_src
;
679 stride
>>= sizeof(pixel
)-1;
681 dc0
=dc1
=dc2
=dc3
=dc4
=0;
683 dc0
+= src
[-1+i
*stride
] + src
[i
-stride
];
684 dc1
+= src
[4+i
-stride
];
685 dc2
+= src
[-1+(i
+4)*stride
];
686 dc3
+= src
[-1+(i
+8)*stride
];
687 dc4
+= src
[-1+(i
+12)*stride
];
689 dc0splat
= PIXEL_SPLAT_X4((dc0
+ 4)>>3);
690 dc1splat
= PIXEL_SPLAT_X4((dc1
+ 2)>>2);
691 dc2splat
= PIXEL_SPLAT_X4((dc2
+ 2)>>2);
692 dc3splat
= PIXEL_SPLAT_X4((dc1
+ dc2
+ 4)>>3);
693 dc4splat
= PIXEL_SPLAT_X4((dc3
+ 2)>>2);
694 dc5splat
= PIXEL_SPLAT_X4((dc1
+ dc3
+ 4)>>3);
695 dc6splat
= PIXEL_SPLAT_X4((dc4
+ 2)>>2);
696 dc7splat
= PIXEL_SPLAT_X4((dc1
+ dc4
+ 4)>>3);
699 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc0splat
);
700 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc1splat
);
703 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc2splat
);
704 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc3splat
);
707 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc4splat
);
708 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc5splat
);
710 for(i
=12; i
<16; i
++){
711 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+0, dc6splat
);
712 AV_WN4PA(((pixel4
*)(src
+i
*stride
))+1, dc7splat
);
716 static void FUNC(pred8x8_mad_cow_dc_l0t
)(uint8_t *src
, ptrdiff_t stride
)
718 FUNCC(pred8x8_top_dc
)(src
, stride
);
719 FUNCC(pred4x4_dc
)(src
, NULL
, stride
);
722 static void FUNC(pred8x16_mad_cow_dc_l0t
)(uint8_t *src
, ptrdiff_t stride
)
724 FUNCC(pred8x16_top_dc
)(src
, stride
);
725 FUNCC(pred4x4_dc
)(src
, NULL
, stride
);
728 static void FUNC(pred8x8_mad_cow_dc_0lt
)(uint8_t *src
, ptrdiff_t stride
)
730 FUNCC(pred8x8_dc
)(src
, stride
);
731 FUNCC(pred4x4_top_dc
)(src
, NULL
, stride
);
734 static void FUNC(pred8x16_mad_cow_dc_0lt
)(uint8_t *src
, ptrdiff_t stride
)
736 FUNCC(pred8x16_dc
)(src
, stride
);
737 FUNCC(pred4x4_top_dc
)(src
, NULL
, stride
);
740 static void FUNC(pred8x8_mad_cow_dc_l00
)(uint8_t *src
, ptrdiff_t stride
)
742 FUNCC(pred8x8_left_dc
)(src
, stride
);
743 FUNCC(pred4x4_128_dc
)(src
+ 4*stride
, NULL
, stride
);
744 FUNCC(pred4x4_128_dc
)(src
+ 4*stride
+ 4*sizeof(pixel
), NULL
, stride
);
747 static void FUNC(pred8x16_mad_cow_dc_l00
)(uint8_t *src
, ptrdiff_t stride
)
749 FUNCC(pred8x16_left_dc
)(src
, stride
);
750 FUNCC(pred4x4_128_dc
)(src
+ 4*stride
, NULL
, stride
);
751 FUNCC(pred4x4_128_dc
)(src
+ 4*stride
+ 4*sizeof(pixel
), NULL
, stride
);
754 static void FUNC(pred8x8_mad_cow_dc_0l0
)(uint8_t *src
, ptrdiff_t stride
)
756 FUNCC(pred8x8_left_dc
)(src
, stride
);
757 FUNCC(pred4x4_128_dc
)(src
, NULL
, stride
);
758 FUNCC(pred4x4_128_dc
)(src
+ 4*sizeof(pixel
), NULL
, stride
);
761 static void FUNC(pred8x16_mad_cow_dc_0l0
)(uint8_t *src
, ptrdiff_t stride
)
763 FUNCC(pred8x16_left_dc
)(src
, stride
);
764 FUNCC(pred4x4_128_dc
)(src
, NULL
, stride
);
765 FUNCC(pred4x4_128_dc
)(src
+ 4*sizeof(pixel
), NULL
, stride
);
768 static void FUNCC(pred8x8_plane
)(uint8_t *_src
, ptrdiff_t _stride
)
773 pixel
*src
= (pixel
*)_src
;
774 int stride
= _stride
/sizeof(pixel
);
775 const pixel
* const src0
= src
+3-stride
;
776 const pixel
* src1
= src
+4*stride
-1;
777 const pixel
* src2
= src1
-2*stride
; // == src+2*stride-1;
778 int H
= src0
[1] - src0
[-1];
779 int V
= src1
[0] - src2
[ 0];
780 for(k
=2; k
<=4; ++k
) {
781 src1
+= stride
; src2
-= stride
;
782 H
+= k
*(src0
[k
] - src0
[-k
]);
783 V
+= k
*(src1
[0] - src2
[ 0]);
785 H
= ( 17*H
+16 ) >> 5;
786 V
= ( 17*V
+16 ) >> 5;
788 a
= 16*(src1
[0] + src2
[8]+1) - 3*(V
+H
);
792 src
[0] = CLIP((b
) >> 5);
793 src
[1] = CLIP((b
+ H
) >> 5);
794 src
[2] = CLIP((b
+2*H
) >> 5);
795 src
[3] = CLIP((b
+3*H
) >> 5);
796 src
[4] = CLIP((b
+4*H
) >> 5);
797 src
[5] = CLIP((b
+5*H
) >> 5);
798 src
[6] = CLIP((b
+6*H
) >> 5);
799 src
[7] = CLIP((b
+7*H
) >> 5);
804 static void FUNCC(pred8x16_plane
)(uint8_t *_src
, ptrdiff_t _stride
)
809 pixel
*src
= (pixel
*)_src
;
810 int stride
= _stride
>>(sizeof(pixel
)-1);
811 const pixel
* const src0
= src
+3-stride
;
812 const pixel
* src1
= src
+8*stride
-1;
813 const pixel
* src2
= src1
-2*stride
; // == src+6*stride-1;
814 int H
= src0
[1] - src0
[-1];
815 int V
= src1
[0] - src2
[ 0];
817 for (k
= 2; k
<= 4; ++k
) {
818 src1
+= stride
; src2
-= stride
;
819 H
+= k
*(src0
[k
] - src0
[-k
]);
820 V
+= k
*(src1
[0] - src2
[ 0]);
822 for (; k
<= 8; ++k
) {
823 src1
+= stride
; src2
-= stride
;
824 V
+= k
*(src1
[0] - src2
[0]);
830 a
= 16*(src1
[0] + src2
[8] + 1) - 7*V
- 3*H
;
831 for(j
=16; j
>0; --j
) {
834 src
[0] = CLIP((b
) >> 5);
835 src
[1] = CLIP((b
+ H
) >> 5);
836 src
[2] = CLIP((b
+2*H
) >> 5);
837 src
[3] = CLIP((b
+3*H
) >> 5);
838 src
[4] = CLIP((b
+4*H
) >> 5);
839 src
[5] = CLIP((b
+5*H
) >> 5);
840 src
[6] = CLIP((b
+6*H
) >> 5);
841 src
[7] = CLIP((b
+7*H
) >> 5);
846 #define SRC(x,y) src[(x)+(y)*stride]
848 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
849 #define PREDICT_8x8_LOAD_LEFT \
850 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
851 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
852 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
853 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
856 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
857 #define PREDICT_8x8_LOAD_TOP \
858 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
859 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
860 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
861 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
862 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
865 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
866 #define PREDICT_8x8_LOAD_TOPRIGHT \
867 int t8, t9, t10, t11, t12, t13, t14, t15; \
869 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
870 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
871 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
873 #define PREDICT_8x8_LOAD_TOPLEFT \
874 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
876 #define PREDICT_8x8_DC(v) \
878 for( y = 0; y < 8; y++ ) { \
879 AV_WN4PA(((pixel4*)src)+0, v); \
880 AV_WN4PA(((pixel4*)src)+1, v); \
884 static void FUNCC(pred8x8l_128_dc
)(uint8_t *_src
, int has_topleft
,
885 int has_topright
, ptrdiff_t _stride
)
887 pixel
*src
= (pixel
*)_src
;
888 int stride
= _stride
/sizeof(pixel
);
890 PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH
-1)));
892 static void FUNCC(pred8x8l_left_dc
)(uint8_t *_src
, int has_topleft
,
893 int has_topright
, ptrdiff_t _stride
)
895 pixel
*src
= (pixel
*)_src
;
896 int stride
= _stride
/sizeof(pixel
);
898 PREDICT_8x8_LOAD_LEFT
;
899 const pixel4 dc
= PIXEL_SPLAT_X4((l0
+l1
+l2
+l3
+l4
+l5
+l6
+l7
+4) >> 3);
902 static void FUNCC(pred8x8l_top_dc
)(uint8_t *_src
, int has_topleft
,
903 int has_topright
, ptrdiff_t _stride
)
905 pixel
*src
= (pixel
*)_src
;
906 int stride
= _stride
/sizeof(pixel
);
908 PREDICT_8x8_LOAD_TOP
;
909 const pixel4 dc
= PIXEL_SPLAT_X4((t0
+t1
+t2
+t3
+t4
+t5
+t6
+t7
+4) >> 3);
912 static void FUNCC(pred8x8l_dc
)(uint8_t *_src
, int has_topleft
,
913 int has_topright
, ptrdiff_t _stride
)
915 pixel
*src
= (pixel
*)_src
;
916 int stride
= _stride
/sizeof(pixel
);
918 PREDICT_8x8_LOAD_LEFT
;
919 PREDICT_8x8_LOAD_TOP
;
920 const pixel4 dc
= PIXEL_SPLAT_X4((l0
+l1
+l2
+l3
+l4
+l5
+l6
+l7
921 +t0
+t1
+t2
+t3
+t4
+t5
+t6
+t7
+8) >> 4);
924 static void FUNCC(pred8x8l_horizontal
)(uint8_t *_src
, int has_topleft
,
925 int has_topright
, ptrdiff_t _stride
)
927 pixel
*src
= (pixel
*)_src
;
928 int stride
= _stride
/sizeof(pixel
);
931 PREDICT_8x8_LOAD_LEFT
;
932 #define ROW(y) a = PIXEL_SPLAT_X4(l##y); \
933 AV_WN4PA(src+y*stride, a); \
934 AV_WN4PA(src+y*stride+4, a);
935 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
938 static void FUNCC(pred8x8l_vertical
)(uint8_t *_src
, int has_topleft
,
939 int has_topright
, ptrdiff_t _stride
)
942 pixel
*src
= (pixel
*)_src
;
943 int stride
= _stride
/sizeof(pixel
);
946 PREDICT_8x8_LOAD_TOP
;
955 a
= AV_RN4PA(((pixel4
*)src
)+0);
956 b
= AV_RN4PA(((pixel4
*)src
)+1);
957 for( y
= 1; y
< 8; y
++ ) {
958 AV_WN4PA(((pixel4
*)(src
+y
*stride
))+0, a
);
959 AV_WN4PA(((pixel4
*)(src
+y
*stride
))+1, b
);
962 static void FUNCC(pred8x8l_down_left
)(uint8_t *_src
, int has_topleft
,
963 int has_topright
, ptrdiff_t _stride
)
965 pixel
*src
= (pixel
*)_src
;
966 int stride
= _stride
/sizeof(pixel
);
967 PREDICT_8x8_LOAD_TOP
;
968 PREDICT_8x8_LOAD_TOPRIGHT
;
969 SRC(0,0)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
970 SRC(0,1)=SRC(1,0)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
971 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
972 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
973 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
974 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
975 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6
+ 2*t7
+ t8
+ 2) >> 2;
976 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7
+ 2*t8
+ t9
+ 2) >> 2;
977 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8
+ 2*t9
+ t10
+ 2) >> 2;
978 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9
+ 2*t10
+ t11
+ 2) >> 2;
979 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10
+ 2*t11
+ t12
+ 2) >> 2;
980 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11
+ 2*t12
+ t13
+ 2) >> 2;
981 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12
+ 2*t13
+ t14
+ 2) >> 2;
982 SRC(6,7)=SRC(7,6)= (t13
+ 2*t14
+ t15
+ 2) >> 2;
983 SRC(7,7)= (t14
+ 3*t15
+ 2) >> 2;
985 static void FUNCC(pred8x8l_down_right
)(uint8_t *_src
, int has_topleft
,
986 int has_topright
, ptrdiff_t _stride
)
988 pixel
*src
= (pixel
*)_src
;
989 int stride
= _stride
/sizeof(pixel
);
990 PREDICT_8x8_LOAD_TOP
;
991 PREDICT_8x8_LOAD_LEFT
;
992 PREDICT_8x8_LOAD_TOPLEFT
;
993 SRC(0,7)= (l7
+ 2*l6
+ l5
+ 2) >> 2;
994 SRC(0,6)=SRC(1,7)= (l6
+ 2*l5
+ l4
+ 2) >> 2;
995 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5
+ 2*l4
+ l3
+ 2) >> 2;
996 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4
+ 2*l3
+ l2
+ 2) >> 2;
997 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3
+ 2*l2
+ l1
+ 2) >> 2;
998 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2
+ 2*l1
+ l0
+ 2) >> 2;
999 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1
+ 2*l0
+ lt
+ 2) >> 2;
1000 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
1001 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt
+ 2*t0
+ t1
+ 2) >> 2;
1002 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
1003 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
1004 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
1005 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
1006 SRC(6,0)=SRC(7,1)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
1007 SRC(7,0)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
1009 static void FUNCC(pred8x8l_vertical_right
)(uint8_t *_src
, int has_topleft
,
1010 int has_topright
, ptrdiff_t _stride
)
1012 pixel
*src
= (pixel
*)_src
;
1013 int stride
= _stride
/sizeof(pixel
);
1014 PREDICT_8x8_LOAD_TOP
;
1015 PREDICT_8x8_LOAD_LEFT
;
1016 PREDICT_8x8_LOAD_TOPLEFT
;
1017 SRC(0,6)= (l5
+ 2*l4
+ l3
+ 2) >> 2;
1018 SRC(0,7)= (l6
+ 2*l5
+ l4
+ 2) >> 2;
1019 SRC(0,4)=SRC(1,6)= (l3
+ 2*l2
+ l1
+ 2) >> 2;
1020 SRC(0,5)=SRC(1,7)= (l4
+ 2*l3
+ l2
+ 2) >> 2;
1021 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1
+ 2*l0
+ lt
+ 2) >> 2;
1022 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2
+ 2*l1
+ l0
+ 2) >> 2;
1023 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
1024 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt
+ t0
+ 1) >> 1;
1025 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt
+ 2*t0
+ t1
+ 2) >> 2;
1026 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0
+ t1
+ 1) >> 1;
1027 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
1028 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1
+ t2
+ 1) >> 1;
1029 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
1030 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2
+ t3
+ 1) >> 1;
1031 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
1032 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3
+ t4
+ 1) >> 1;
1033 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
1034 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4
+ t5
+ 1) >> 1;
1035 SRC(6,1)=SRC(7,3)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
1036 SRC(6,0)=SRC(7,2)= (t5
+ t6
+ 1) >> 1;
1037 SRC(7,1)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
1038 SRC(7,0)= (t6
+ t7
+ 1) >> 1;
1040 static void FUNCC(pred8x8l_horizontal_down
)(uint8_t *_src
, int has_topleft
,
1041 int has_topright
, ptrdiff_t _stride
)
1043 pixel
*src
= (pixel
*)_src
;
1044 int stride
= _stride
/sizeof(pixel
);
1045 PREDICT_8x8_LOAD_TOP
;
1046 PREDICT_8x8_LOAD_LEFT
;
1047 PREDICT_8x8_LOAD_TOPLEFT
;
1048 SRC(0,7)= (l6
+ l7
+ 1) >> 1;
1049 SRC(1,7)= (l5
+ 2*l6
+ l7
+ 2) >> 2;
1050 SRC(0,6)=SRC(2,7)= (l5
+ l6
+ 1) >> 1;
1051 SRC(1,6)=SRC(3,7)= (l4
+ 2*l5
+ l6
+ 2) >> 2;
1052 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4
+ l5
+ 1) >> 1;
1053 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3
+ 2*l4
+ l5
+ 2) >> 2;
1054 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3
+ l4
+ 1) >> 1;
1055 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2
+ 2*l3
+ l4
+ 2) >> 2;
1056 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2
+ l3
+ 1) >> 1;
1057 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1
+ 2*l2
+ l3
+ 2) >> 2;
1058 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1
+ l2
+ 1) >> 1;
1059 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0
+ 2*l1
+ l2
+ 2) >> 2;
1060 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0
+ l1
+ 1) >> 1;
1061 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt
+ 2*l0
+ l1
+ 2) >> 2;
1062 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt
+ l0
+ 1) >> 1;
1063 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
1064 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1
+ 2*t0
+ lt
+ 2) >> 2;
1065 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2
+ 2*t1
+ t0
+ 2) >> 2;
1066 SRC(4,0)=SRC(6,1)= (t3
+ 2*t2
+ t1
+ 2) >> 2;
1067 SRC(5,0)=SRC(7,1)= (t4
+ 2*t3
+ t2
+ 2) >> 2;
1068 SRC(6,0)= (t5
+ 2*t4
+ t3
+ 2) >> 2;
1069 SRC(7,0)= (t6
+ 2*t5
+ t4
+ 2) >> 2;
1071 static void FUNCC(pred8x8l_vertical_left
)(uint8_t *_src
, int has_topleft
,
1072 int has_topright
, ptrdiff_t _stride
)
1074 pixel
*src
= (pixel
*)_src
;
1075 int stride
= _stride
/sizeof(pixel
);
1076 PREDICT_8x8_LOAD_TOP
;
1077 PREDICT_8x8_LOAD_TOPRIGHT
;
1078 SRC(0,0)= (t0
+ t1
+ 1) >> 1;
1079 SRC(0,1)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
1080 SRC(0,2)=SRC(1,0)= (t1
+ t2
+ 1) >> 1;
1081 SRC(0,3)=SRC(1,1)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
1082 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2
+ t3
+ 1) >> 1;
1083 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
1084 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3
+ t4
+ 1) >> 1;
1085 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
1086 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4
+ t5
+ 1) >> 1;
1087 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
1088 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5
+ t6
+ 1) >> 1;
1089 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
1090 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6
+ t7
+ 1) >> 1;
1091 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6
+ 2*t7
+ t8
+ 2) >> 2;
1092 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7
+ t8
+ 1) >> 1;
1093 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7
+ 2*t8
+ t9
+ 2) >> 2;
1094 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8
+ t9
+ 1) >> 1;
1095 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8
+ 2*t9
+ t10
+ 2) >> 2;
1096 SRC(6,6)=SRC(7,4)= (t9
+ t10
+ 1) >> 1;
1097 SRC(6,7)=SRC(7,5)= (t9
+ 2*t10
+ t11
+ 2) >> 2;
1098 SRC(7,6)= (t10
+ t11
+ 1) >> 1;
1099 SRC(7,7)= (t10
+ 2*t11
+ t12
+ 2) >> 2;
1101 static void FUNCC(pred8x8l_horizontal_up
)(uint8_t *_src
, int has_topleft
,
1102 int has_topright
, ptrdiff_t _stride
)
1104 pixel
*src
= (pixel
*)_src
;
1105 int stride
= _stride
/sizeof(pixel
);
1106 PREDICT_8x8_LOAD_LEFT
;
1107 SRC(0,0)= (l0
+ l1
+ 1) >> 1;
1108 SRC(1,0)= (l0
+ 2*l1
+ l2
+ 2) >> 2;
1109 SRC(0,1)=SRC(2,0)= (l1
+ l2
+ 1) >> 1;
1110 SRC(1,1)=SRC(3,0)= (l1
+ 2*l2
+ l3
+ 2) >> 2;
1111 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2
+ l3
+ 1) >> 1;
1112 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2
+ 2*l3
+ l4
+ 2) >> 2;
1113 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3
+ l4
+ 1) >> 1;
1114 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3
+ 2*l4
+ l5
+ 2) >> 2;
1115 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4
+ l5
+ 1) >> 1;
1116 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4
+ 2*l5
+ l6
+ 2) >> 2;
1117 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5
+ l6
+ 1) >> 1;
1118 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5
+ 2*l6
+ l7
+ 2) >> 2;
1119 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6
+ l7
+ 1) >> 1;
1120 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6
+ 3*l7
+ 2) >> 2;
1121 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
1122 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
1123 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
1124 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7
;
1127 static void FUNCC(pred8x8l_vertical_filter_add
)(uint8_t *_src
, int16_t *_block
, int has_topleft
,
1128 int has_topright
, ptrdiff_t _stride
)
1131 pixel
*src
= (pixel
*)_src
;
1132 const dctcoef
*block
= (const dctcoef
*)_block
;
1134 int stride
= _stride
/sizeof(pixel
);
1135 PREDICT_8x8_LOAD_TOP
;
1146 for (i
= 0; i
< 8; i
++) {
1148 src
[0 * stride
] = v
+= block
[0];
1149 src
[1 * stride
] = v
+= block
[8];
1150 src
[2 * stride
] = v
+= block
[16];
1151 src
[3 * stride
] = v
+= block
[24];
1152 src
[4 * stride
] = v
+= block
[32];
1153 src
[5 * stride
] = v
+= block
[40];
1154 src
[6 * stride
] = v
+= block
[48];
1155 src
[7 * stride
] = v
+ block
[56];
1160 memset(_block
, 0, sizeof(dctcoef
) * 64);
1163 static void FUNCC(pred8x8l_horizontal_filter_add
)(uint8_t *_src
, int16_t *_block
, int has_topleft
,
1164 int has_topright
, ptrdiff_t _stride
)
1167 pixel
*src
= (pixel
*)_src
;
1168 const dctcoef
*block
= (const dctcoef
*)_block
;
1170 int stride
= _stride
/sizeof(pixel
);
1171 PREDICT_8x8_LOAD_LEFT
;
1182 for (i
= 0; i
< 8; i
++) {
1184 src
[0] = v
+= block
[0];
1185 src
[1] = v
+= block
[1];
1186 src
[2] = v
+= block
[2];
1187 src
[3] = v
+= block
[3];
1188 src
[4] = v
+= block
[4];
1189 src
[5] = v
+= block
[5];
1190 src
[6] = v
+= block
[6];
1191 src
[7] = v
+ block
[7];
1196 memset(_block
, 0, sizeof(dctcoef
) * 64);
1199 #undef PREDICT_8x8_LOAD_LEFT
1200 #undef PREDICT_8x8_LOAD_TOP
1201 #undef PREDICT_8x8_LOAD_TOPLEFT
1202 #undef PREDICT_8x8_LOAD_TOPRIGHT
1203 #undef PREDICT_8x8_DC
1209 static void FUNCC(pred4x4_vertical_add
)(uint8_t *_pix
, int16_t *_block
,
1213 pixel
*pix
= (pixel
*)_pix
;
1214 const dctcoef
*block
= (const dctcoef
*)_block
;
1215 stride
/= sizeof(pixel
);
1219 pix
[1*stride
]= v
+= block
[0];
1220 pix
[2*stride
]= v
+= block
[4];
1221 pix
[3*stride
]= v
+= block
[8];
1222 pix
[4*stride
]= v
+ block
[12];
1227 memset(_block
, 0, sizeof(dctcoef
) * 16);
1230 static void FUNCC(pred4x4_horizontal_add
)(uint8_t *_pix
, int16_t *_block
,
1234 pixel
*pix
= (pixel
*)_pix
;
1235 const dctcoef
*block
= (const dctcoef
*)_block
;
1236 stride
/= sizeof(pixel
);
1239 pix
[0]= v
+= block
[0];
1240 pix
[1]= v
+= block
[1];
1241 pix
[2]= v
+= block
[2];
1242 pix
[3]= v
+ block
[3];
1247 memset(_block
, 0, sizeof(dctcoef
) * 16);
1250 static void FUNCC(pred8x8l_vertical_add
)(uint8_t *_pix
, int16_t *_block
,
1254 pixel
*pix
= (pixel
*)_pix
;
1255 const dctcoef
*block
= (const dctcoef
*)_block
;
1256 stride
/= sizeof(pixel
);
1260 pix
[1*stride
]= v
+= block
[0];
1261 pix
[2*stride
]= v
+= block
[8];
1262 pix
[3*stride
]= v
+= block
[16];
1263 pix
[4*stride
]= v
+= block
[24];
1264 pix
[5*stride
]= v
+= block
[32];
1265 pix
[6*stride
]= v
+= block
[40];
1266 pix
[7*stride
]= v
+= block
[48];
1267 pix
[8*stride
]= v
+ block
[56];
1272 memset(_block
, 0, sizeof(dctcoef
) * 64);
1275 static void FUNCC(pred8x8l_horizontal_add
)(uint8_t *_pix
, int16_t *_block
,
1279 pixel
*pix
= (pixel
*)_pix
;
1280 const dctcoef
*block
= (const dctcoef
*)_block
;
1281 stride
/= sizeof(pixel
);
1284 pix
[0]= v
+= block
[0];
1285 pix
[1]= v
+= block
[1];
1286 pix
[2]= v
+= block
[2];
1287 pix
[3]= v
+= block
[3];
1288 pix
[4]= v
+= block
[4];
1289 pix
[5]= v
+= block
[5];
1290 pix
[6]= v
+= block
[6];
1291 pix
[7]= v
+ block
[7];
1296 memset(_block
, 0, sizeof(dctcoef
) * 64);
1299 static void FUNCC(pred16x16_vertical_add
)(uint8_t *pix
, const int *block_offset
,
1305 FUNCC(pred4x4_vertical_add
)(pix
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
1308 static void FUNCC(pred16x16_horizontal_add
)(uint8_t *pix
,
1309 const int *block_offset
,
1315 FUNCC(pred4x4_horizontal_add
)(pix
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
1318 static void FUNCC(pred8x8_vertical_add
)(uint8_t *pix
, const int *block_offset
,
1319 int16_t *block
, ptrdiff_t stride
)
1323 FUNCC(pred4x4_vertical_add
)(pix
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
1326 static void FUNCC(pred8x16_vertical_add
)(uint8_t *pix
, const int *block_offset
,
1327 int16_t *block
, ptrdiff_t stride
)
1331 FUNCC(pred4x4_vertical_add
)(pix
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
1333 FUNCC(pred4x4_vertical_add
)(pix
+ block_offset
[i
+4], block
+ i
*16*sizeof(pixel
), stride
);
1336 static void FUNCC(pred8x8_horizontal_add
)(uint8_t *pix
, const int *block_offset
,
1342 FUNCC(pred4x4_horizontal_add
)(pix
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
1345 static void FUNCC(pred8x16_horizontal_add
)(uint8_t *pix
,
1346 const int *block_offset
,
1347 int16_t *block
, ptrdiff_t stride
)
1351 FUNCC(pred4x4_horizontal_add
)(pix
+ block_offset
[i
], block
+ i
*16*sizeof(pixel
), stride
);
1353 FUNCC(pred4x4_horizontal_add
)(pix
+ block_offset
[i
+4], block
+ i
*16*sizeof(pixel
), stride
);