1 #include "../libmpeg3.h"
2 #include "mpeg3video.h"
5 #define CLIP(x) ((x) >= 0 ? ((x) < 255 ? (x) : 255) : 0)
7 static long long mpeg3_MMX_0
= 0L;
8 static unsigned long mpeg3_MMX_10w
[] = {0x00100010, 0x00100010}; /*dd 00010 0010h, 000100010h */
9 static unsigned long mpeg3_MMX_80w
[] = {0x00800080, 0x00800080}; /*dd 00080 0080h, 000800080h */
11 static unsigned long mpeg3_MMX_00FFw
[] = {0x00ff00ff, 0x00ff00ff}; /*dd 000FF 00FFh, 000FF00FFh */
13 static unsigned short mpeg3_MMX_Ublucoeff
[] = {0x81, 0x81, 0x81, 0x81}; /*dd 00081 0081h, 000810081h */
14 static unsigned short mpeg3_MMX_Vredcoeff
[] = {0x66, 0x66, 0x66, 0x66}; /*dd 00066 0066h, 000660066h */
16 static unsigned short mpeg3_MMX_Ugrncoeff
[] = {0xffe8, 0xffe8, 0xffe8, 0xffe8}; /*dd 0FFE7 FFE7h, 0FFE7FFE7h */
17 static unsigned short mpeg3_MMX_Vgrncoeff
[] = {0xffcd, 0xffcd, 0xffcd, 0xffcd}; /*dd 0FFCC FFCCh, 0FFCCFFCCh */
19 static unsigned short mpeg3_MMX_Ycoeff
[] = {0x4a, 0x4a, 0x4a, 0x4a}; /*dd 0004A 004Ah, 0004A004Ah */
21 static unsigned short mpeg3_MMX_redmask
[] = {0xf800, 0xf800, 0xf800, 0xf800}; /*dd 07c00 7c00h, 07c007c00h */
23 static unsigned short mpeg3_MMX_grnmask
[] = {0x7e0, 0x7e0, 0x7e0, 0x7e0}; /*dd 003e0 03e0h, 003e003e0h */
25 static unsigned char mpeg3_601_to_rgb
[256];
28 /* r = (int)(*y + 1.371 * (*cr - 128)); */
29 /* g = (int)(*y - 0.698 * (*cr - 128) - 0.336 * (*cb - 128)); */
30 /* b = (int)(*y + 1.732 * (*cb - 128)); */
33 inline void mpeg3video_rgb16_mmx(unsigned char *lum
,
46 row1
= (unsigned short *)out
;
50 y
= lum
+ cols
* rows
;
56 "movd (%1), %%mm0\n" /* 4 Cb 0 0 0 0 u3 u2 u1 u0 */
58 "movd (%0), %%mm1\n" /* 4 Cr 0 0 0 0 v3 v2 v1 v0 */
59 "punpcklbw %%mm7, %%mm0\n" /* 4 W cb 0 u3 0 u2 0 u1 0 u0 */
60 "punpcklbw %%mm7, %%mm1\n" /* 4 W cr 0 v3 0 v2 0 v1 0 v0 */
62 "psubw mpeg3_MMX_80w, %%mm0\n"
63 "psubw mpeg3_MMX_80w, %%mm1\n"
64 "movq %%mm0, %%mm2\n" /* Cb 0 u3 0 u2 0 u1 0 u0 */
65 "movq %%mm1, %%mm3\n" /* Cr */
66 "pmullw mpeg3_MMX_Ugrncoeff, %%mm2\n" /* Cb2green 0 R3 0 R2 0 R1 0 R0 */
67 "movq (%2), %%mm6\n" /* L1 l7 L6 L5 L4 L3 L2 L1 L0 */
68 "pmullw mpeg3_MMX_Ublucoeff, %%mm0\n" /* Cb2blue */
69 "pand mpeg3_MMX_00FFw, %%mm6\n" /* L1 00 L6 00 L4 00 L2 00 L0 */
70 "pmullw mpeg3_MMX_Vgrncoeff, %%mm3\n" /* Cr2green */
71 "movq (%2), %%mm7\n" /* L2 */
72 "pmullw mpeg3_MMX_Vredcoeff, %%mm1\n" /* Cr2red */
73 "psrlw $8, %%mm7\n" /* L2 00 L7 00 L5 00 L3 00 L1 */
74 "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum1 */
75 "paddw %%mm3, %%mm2\n" /* Cb2green + Cr2green == green */
76 "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum2 */
78 "movq %%mm6, %%mm4\n" /* lum1 */
79 "paddw %%mm0, %%mm6\n" /* lum1 +blue 00 B6 00 B4 00 B2 00 B0 */
80 "movq %%mm4, %%mm5\n" /* lum1 */
81 "paddw %%mm1, %%mm4\n" /* lum1 +red 00 R6 00 R4 00 R2 00 R0 */
82 "paddw %%mm2, %%mm5\n" /* lum1 +green 00 G6 00 G4 00 G2 00 G0 */
83 "psraw $6, %%mm4\n" /* R1 0 .. 64 */
84 "movq %%mm7, %%mm3\n" /* lum2 00 L7 00 L5 00 L3 00 L1 */
85 "psraw $6, %%mm5\n" /* G1 - .. + */
86 "paddw %%mm0, %%mm7\n" /* Lum2 +blue 00 B7 00 B5 00 B3 00 B1 */
87 "psraw $6, %%mm6\n" /* B1 0 .. 64 */
88 "packuswb %%mm4, %%mm4\n" /* R1 R1 */
89 "packuswb %%mm5, %%mm5\n" /* G1 G1 */
90 "packuswb %%mm6, %%mm6\n" /* B1 B1 */
91 "punpcklbw %%mm4, %%mm4\n"
92 "punpcklbw %%mm5, %%mm5\n"
94 "pand mpeg3_MMX_redmask, %%mm4\n"
95 "psllw $3, %%mm5\n" /* GREEN 1 */
96 "punpcklbw %%mm6, %%mm6\n"
97 "pand mpeg3_MMX_grnmask, %%mm5\n"
98 "pand mpeg3_MMX_redmask, %%mm6\n"
99 "por %%mm5, %%mm4\n" /* */
100 "psrlw $11, %%mm6\n" /* BLUE 1 */
101 "movq %%mm3, %%mm5\n" /* lum2 */
102 "paddw %%mm1, %%mm3\n" /* lum2 +red 00 R7 00 R5 00 R3 00 R1 */
103 "paddw %%mm2, %%mm5\n" /* lum2 +green 00 G7 00 G5 00 G3 00 G1 */
104 "psraw $6, %%mm3\n" /* R2 */
105 "por %%mm6, %%mm4\n" /* MM4 */
106 "psraw $6, %%mm5\n" /* G2 */
107 "movq (%2, %3), %%mm6\n" /* L3 */
109 "packuswb %%mm3, %%mm3\n"
110 "packuswb %%mm5, %%mm5\n"
111 "packuswb %%mm7, %%mm7\n"
112 "pand mpeg3_MMX_00FFw, %%mm6\n" /* L3 */
113 "punpcklbw %%mm3, %%mm3\n"
114 "punpcklbw %%mm5, %%mm5\n"
115 "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum3 */
116 "punpcklbw %%mm7, %%mm7\n"
117 "psllw $3, %%mm5\n" /* GREEN 2 */
118 "pand mpeg3_MMX_redmask, %%mm7\n"
119 "pand mpeg3_MMX_redmask, %%mm3\n"
120 "psrlw $11, %%mm7\n" /* BLUE 2 */
121 "pand mpeg3_MMX_grnmask, %%mm5\n"
123 "movq (%2,%3), %%mm7\n" /* L4 */
124 "por %%mm5, %%mm3\n" /* */
125 "psrlw $8, %%mm7\n" /* L4 */
126 "movq %%mm4, %%mm5\n"
127 "punpcklwd %%mm3, %%mm4\n"
128 "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum4 */
129 "punpckhwd %%mm3, %%mm5\n"
132 "movq %%mm5, 8(%4)\n"
134 "movq %%mm6, %%mm4\n" /* Lum3 */
135 "paddw %%mm0, %%mm6\n" /* Lum3 +blue */
137 "movq %%mm4, %%mm5\n" /* Lum3 */
138 "paddw %%mm1, %%mm4\n" /* Lum3 +red */
139 "paddw %%mm2, %%mm5\n" /* Lum3 +green */
141 "movq %%mm7, %%mm3\n" /* Lum4 */
143 "paddw %%mm0, %%mm7\n" /* Lum4 +blue */
144 "psraw $6, %%mm6\n" /* Lum3 +blue */
145 "movq %%mm3, %%mm0\n" /* Lum4 */
146 "packuswb %%mm4, %%mm4\n"
147 "paddw %%mm1, %%mm3\n" /* Lum4 +red */
148 "packuswb %%mm5, %%mm5\n"
149 "paddw %%mm2, %%mm0\n" /* Lum4 +green */
150 "packuswb %%mm6, %%mm6\n"
151 "punpcklbw %%mm4, %%mm4\n"
152 "punpcklbw %%mm5, %%mm5\n"
153 "punpcklbw %%mm6, %%mm6\n"
154 "psllw $3, %%mm5\n" /* GREEN 3 */
155 "pand mpeg3_MMX_redmask, %%mm4\n"
156 "psraw $6, %%mm3\n" /* psr 6 */
158 "pand mpeg3_MMX_redmask, %%mm6\n" /* BLUE */
159 "pand mpeg3_MMX_grnmask, %%mm5\n"
160 "psrlw $11, %%mm6\n" /* BLUE 3 */
164 "packuswb %%mm3, %%mm3\n"
165 "packuswb %%mm0, %%mm0\n"
166 "packuswb %%mm7, %%mm7\n"
167 "punpcklbw %%mm3, %%mm3\n"
168 "punpcklbw %%mm0, %%mm0\n"
169 "punpcklbw %%mm7, %%mm7\n"
170 "pand mpeg3_MMX_redmask, %%mm3\n"
171 "pand mpeg3_MMX_redmask, %%mm7\n" /* BLUE */
172 "psllw $3, %%mm0\n" /* GREEN 4 */
174 "pand mpeg3_MMX_grnmask, %%mm0\n"
179 "movq %%mm4, %%mm5\n"
181 "punpcklwd %%mm3, %%mm4\n"
182 "punpckhwd %%mm3, %%mm5\n"
184 "movq %%mm4, (%4,%5,2)\n"
185 "movq %%mm5, 8(%4,%5,2)\n"
193 "addl %3, %2\n" /* lum += cols */
194 "addl %7, %4\n" /* row1 += mod */
210 static unsigned long long mpeg3_MMX_U_80
= 0x0000008000800000;
211 static unsigned long long mpeg3_MMX_V_80
= 0x0000000000800080;
212 static long long mpeg3_MMX_U_COEF
= 0x00000058ffd30000;
213 static long long mpeg3_MMX_V_COEF
= 0x00000000ffea006f;
214 static long long mpeg3_MMX_601_Y_COEF
= 0x0000004800480048;
215 static long long mpeg3_MMX_601_Y_DIFF
= 0x0000000000000010;
217 inline void mpeg3_bgra32_mmx(unsigned long y
,
220 unsigned long *output
)
223 /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */
225 "movd (%0), %%mm0;\n" /* Load y 0x00000000000000yy */
226 "movd (%1), %%mm1;\n" /* Load u 0x00000000000000cr */
227 "movq %%mm0, %%mm3;\n" /* Copy y to temp */
228 "psllq $16, %%mm1;\n" /* Shift u 0x0000000000cr0000 */
229 "movd (%2), %%mm2;\n" /* Load v 0x00000000000000cb */
230 "psllq $16, %%mm3;\n" /* Shift y */
231 "movq %%mm1, %%mm4;\n" /* Copy u to temp */
232 "por %%mm3, %%mm0;\n" /* Overlay new y byte 0x0000000000yy00yy */
233 "psllq $16, %%mm4;\n" /* Shift u */
234 "movq %%mm2, %%mm5;\n" /* Copy v to temp */
235 "psllq $16, %%mm3;\n" /* Shift y */
236 "por %%mm4, %%mm1;\n" /* Overlay new u byte 0x000000cr00cr0000 */
237 "psllq $16, %%mm5;\n" /* Shift v */
238 "por %%mm3, %%mm0;\n" /* Overlay new y byte 0x000000yy00yy00yy */
239 "por %%mm5, %%mm2;\n" /* Overlay new v byte 0x0000000000cb00cb */
241 /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */
242 "psubw mpeg3_MMX_U_80, %%mm1;\n" /* Subtract 128 from u 0x000000uu00uu0000 */
243 "pmullw mpeg3_MMX_U_COEF, %%mm1;\n" /* Multiply u coeffs 0x0000uuuuuuuu0000 */
244 "psllw $6, %%mm0;\n" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */
245 "psubw mpeg3_MMX_V_80, %%mm2;\n" /* Subtract 128 from v 0x0000000000cb00cb */
246 "pmullw mpeg3_MMX_V_COEF, %%mm2;\n" /* Multiply v coeffs 0x0000crcrcrcrcrcr */
248 /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */
249 "paddsw %%mm1, %%mm0;\n" /* Add u to result */
250 "paddsw %%mm2, %%mm0;\n" /* Add v to result 0x0000rrrrggggbbbb */
251 "psraw $6, %%mm0;\n" /* Demote precision */
252 "packuswb %%mm0, %%mm0;\n" /* Pack into ARGB 0x0000000000rrggbb */
253 "movd %%mm0, (%3);\n" /* Store output */
255 : "r" (&y
), "r" (&u
), "r" (&v
), "r" (output
));
258 inline void mpeg3_601_bgra32_mmx(unsigned long y
,
261 unsigned long *output
)
264 /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */
266 "movd (%0), %%mm0;\n" /* Load y 0x00000000000000yy */
267 "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;\n" /* Subtract 16 from y */
268 "movd (%1), %%mm1;\n" /* Load u 0x00000000000000cr */
269 "movq %%mm0, %%mm3;\n" /* Copy y to temp */
270 "psllq $16, %%mm1;\n" /* Shift u 0x0000000000cr0000 */
271 "movd (%2), %%mm2;\n" /* Load v 0x00000000000000cb */
272 "psllq $16, %%mm3;\n" /* Shift y */
273 "movq %%mm1, %%mm4;\n" /* Copy u to temp */
274 "por %%mm3, %%mm0;\n" /* Overlay new y byte 0x0000000000yy00yy */
275 "psllq $16, %%mm4;\n" /* Shift u */
276 "movq %%mm2, %%mm5;\n" /* Copy v to temp */
277 "psllq $16, %%mm3;\n" /* Shift y */
278 "por %%mm4, %%mm1;\n" /* Overlay new u byte 0x000000cr00cr0000 */
279 "psllq $16, %%mm5;\n" /* Shift v */
280 "por %%mm3, %%mm0;\n" /* Overlay new y byte 0x000000yy00yy00yy */
281 "por %%mm5, %%mm2;\n" /* Overlay new v byte 0x0000000000cb00cb */
283 /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */
284 "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;\n" /* Scale and shift y coeffs */
285 "psubw mpeg3_MMX_U_80, %%mm1;\n" /* Subtract 128 from u 0x000000uu00uu0000 */
286 "pmullw mpeg3_MMX_U_COEF, %%mm1;\n" /* Multiply u coeffs 0x0000uuuuuuuu0000 */
287 "psubw mpeg3_MMX_V_80, %%mm2;\n" /* Subtract 128 from v 0x0000000000cb00cb */
288 "pmullw mpeg3_MMX_V_COEF, %%mm2;\n" /* Multiply v coeffs 0x0000crcrcrcrcrcr */
290 /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */
291 "paddsw %%mm1, %%mm0;\n" /* Add u to result */
292 "paddsw %%mm2, %%mm0;\n" /* Add v to result 0x0000rrrrggggbbbb */
293 "psraw $6, %%mm0;\n" /* Demote precision */
294 "packuswb %%mm0, %%mm0;\n" /* Pack into ARGB 0x0000000000rrggbb */
295 "movd %%mm0, (%3);\n" /* Store output */
297 : "r" (&y
), "r" (&u
), "r" (&v
), "r" (output
));
300 static unsigned long long mpeg3_MMX_U_80_RGB
= 0x0000000000800080;
301 static unsigned long long mpeg3_MMX_V_80_RGB
= 0x0000008000800000;
302 static long long mpeg3_MMX_U_COEF_RGB
= 0x00000000ffd30058;
303 static long long mpeg3_MMX_V_COEF_RGB
= 0x0000006fffea0000;
305 inline void mpeg3_rgba32_mmx(unsigned long y
,
308 unsigned long *output
)
311 /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */
313 "movd (%0), %%mm0;\n" /* Load y 0x00000000000000yy */
314 "movd (%1), %%mm1;\n" /* Load v 0x00000000000000vv */
315 "movq %%mm0, %%mm3;\n" /* Copy y to temp */
316 "psllq $16, %%mm1;\n" /* Shift v 0x0000000000vv0000 */
317 "movd (%2), %%mm2;\n" /* Load u 0x00000000000000uu */
318 "psllq $16, %%mm3;\n" /* Shift y */
319 "movq %%mm1, %%mm4;\n" /* Copy v to temp */
320 "por %%mm3, %%mm0;\n" /* Overlay new y byte 0x0000000000yy00yy */
321 "psllq $16, %%mm4;\n" /* Shift v */
322 "movq %%mm2, %%mm5;\n" /* Copy u to temp */
323 "psllq $16, %%mm3;\n" /* Shift y */
324 "por %%mm4, %%mm1;\n" /* Overlay new v byte 0x000000vv00vv0000 */
325 "psllq $16, %%mm5;\n" /* Shift u */
326 "por %%mm3, %%mm0;\n" /* Overlay new y byte 0x000000yy00yy00yy */
327 "por %%mm5, %%mm2;\n" /* Overlay new u byte 0x0000000000uu00uu */
329 /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */
330 "psubw mpeg3_MMX_V_80_RGB, %%mm1;\n" /* Subtract 128 from v 0x000000vv00vv0000 */
331 "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;\n" /* Multiply v coeffs 0x0000vvvvvvvv0000 */
332 "psllw $6, %%mm0;\n" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */
333 "psubw mpeg3_MMX_U_80_RGB, %%mm2;\n" /* Subtract 128 from u 0x0000000000uu00uu */
334 "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;\n" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */
336 /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */
337 "paddsw %%mm1, %%mm0;\n" /* Add v to result */
338 "paddsw %%mm2, %%mm0;\n" /* Add u to result 0x0000bbbbggggrrrr */
339 "psraw $6, %%mm0;\n" /* Demote precision */
340 "packuswb %%mm0, %%mm0;\n" /* Pack into RGBA 0x0000000000bbggrr */
341 "movd %%mm0, (%3);\n" /* Store output */
343 : "r" (&y
), "r" (&v
), "r" (&u
), "r" (output
));
346 inline void mpeg3_601_rgba32_mmx(unsigned long y
,
349 unsigned long *output
)
352 /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */
354 "movd (%0), %%mm0;\n" /* Load y 0x00000000000000yy */
355 "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;\n" /* Subtract 16 from y */
356 "movd (%1), %%mm1;\n" /* Load v 0x00000000000000vv */
357 "movq %%mm0, %%mm3;\n" /* Copy y to temp */
358 "psllq $16, %%mm1;\n" /* Shift v 0x0000000000vv0000 */
359 "movd (%2), %%mm2;\n" /* Load u 0x00000000000000uu */
360 "psllq $16, %%mm3;\n" /* Shift y */
361 "movq %%mm1, %%mm4;\n" /* Copy v to temp */
362 "por %%mm3, %%mm0;\n" /* Overlay new y byte 0x0000000000yy00yy */
363 "psllq $16, %%mm4;\n" /* Shift v */
364 "movq %%mm2, %%mm5;\n" /* Copy u to temp */
365 "psllq $16, %%mm3;\n" /* Shift y */
366 "por %%mm4, %%mm1;\n" /* Overlay new v byte 0x000000vv00vv0000 */
367 "psllq $16, %%mm5;\n" /* Shift u */
368 "por %%mm3, %%mm0;\n" /* Overlay new y byte 0x000000yy00yy00yy */
369 "por %%mm5, %%mm2;\n" /* Overlay new u byte 0x0000000000uu00uu */
371 /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */
372 "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;\n" /* Scale y coeffs */
373 "psubw mpeg3_MMX_V_80_RGB, %%mm1;\n" /* Subtract 128 from v 0x000000vv00vv0000 */
374 "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;\n" /* Multiply v coeffs 0x0000vvvvvvvv0000 */
375 "psubw mpeg3_MMX_U_80_RGB, %%mm2;\n" /* Subtract 128 from u 0x0000000000uu00uu */
376 "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;\n" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */
378 /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */
379 "paddsw %%mm1, %%mm0;\n" /* Add v to result */
380 "paddsw %%mm2, %%mm0;\n" /* Add u to result 0x0000bbbbggggrrrr */
381 "psraw $6, %%mm0;\n" /* Demote precision */
382 "packuswb %%mm0, %%mm0;\n" /* Pack into RGBA 0x0000000000bbggrr */
383 "movd %%mm0, (%3);\n" /* Store output */
385 : "r" (&y
), "r" (&v
), "r" (&u
), "r" (output
));
390 #define DITHER_ROW_HEAD \
391 for(h = 0; h < video->out_h; h++) \
393 y_in = &src[0][(video->y_table[h] + video->in_y) * \
394 video->coded_picture_width] + \
396 if(video->chroma_format == CHROMA420) \
398 cb_in = &src[1][((video->y_table[h] + video->in_y) >> 1) * \
399 video->chrom_width] + \
400 (video->in_x >> 1); \
401 cr_in = &src[2][((video->y_table[h] + video->in_y) >> 1) * \
402 video->chrom_width] + \
403 (video->in_x >> 1); \
407 cb_in = &src[1][(video->y_table[h] + video->in_y) * \
408 video->chrom_width] + \
409 (video->in_x >> 1); \
410 cr_in = &src[2][(video->y_table[h] + video->in_y) * \
411 video->chrom_width] + \
412 (video->in_x >> 1); \
414 data = output_rows[h];
416 #define DITHER_ROW_TAIL \
419 #define DITHER_SCALE_HEAD \
420 for(w = 0; w < video->out_w; w++) \
422 uv_subscript = video->x_table[w] / 2; \
423 y_l = y_in[video->x_table[w]]; \
425 r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \
426 g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \
427 b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16;
429 #define DITHER_SCALE_601_HEAD \
430 for(w = 0; w < video->out_w; w++) \
432 uv_subscript = video->x_table[w] / 2; \
433 y_l = mpeg3_601_to_rgb[y_in[video->x_table[w]]]; \
435 r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \
436 g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \
437 b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16;
439 #define DITHER_SCALE_TAIL \
442 #define DITHER_MMX_SCALE_HEAD \
443 for(w = 0; w < video->out_w; w++) \
445 uv_subscript = video->x_table[w] / 2;
447 #define DITHER_MMX_SCALE_TAIL \
451 #define DITHER_MMX_HEAD \
452 for(w = 0; w < video->out_w; w += 2) \
455 #define DITHER_MMX_TAIL \
461 #define DITHER_HEAD \
462 for(w = 0; w < video->horizontal_size; w++) \
466 r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \
467 g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \
468 b_l = (y_l + video->cb_to_b[*cb_in]) >> 16;
470 #define DITHER_601_HEAD \
471 for(w = 0; w < video->horizontal_size; w++) \
473 y_l = mpeg3_601_to_rgb[*y_in++]; \
475 r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \
476 g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \
477 b_l = (y_l + video->cb_to_b[*cb_in]) >> 16;
479 #define DITHER_TAIL \
488 #define STORE_PIXEL_BGR888 \
489 *data++ = CLIP(b_l); \
490 *data++ = CLIP(g_l); \
493 #define STORE_PIXEL_BGRA8888 \
494 *data++ = CLIP(b_l); \
495 *data++ = CLIP(g_l); \
496 *data++ = CLIP(r_l); \
499 #define STORE_PIXEL_RGB565 \
500 *((unsigned short*)data)++ = \
501 ((CLIP(r_l) & 0xf8) << 8) | \
502 ((CLIP(g_l) & 0xfc) << 3) | \
503 ((CLIP(b_l) & 0xf8) >> 3);
505 #define STORE_PIXEL_RGB888 \
506 *data++ = CLIP(r_l); \
507 *data++ = CLIP(g_l); \
510 #define STORE_PIXEL_RGBA8888 \
511 *data++ = CLIP(r_l); \
512 *data++ = CLIP(g_l); \
513 *data++ = CLIP(b_l); \
516 #define STORE_PIXEL_RGBA16161616 \
517 *data_s++ = CLIP(r_l); \
518 *data_s++ = CLIP(g_l); \
519 *data_s++ = CLIP(b_l); \
524 /* Only good for YUV 4:2:0 */
525 int mpeg3video_ditherframe(mpeg3video_t
*video
,
527 unsigned char **output_rows
)
530 register unsigned char *y_in
, *cb_in
, *cr_in
;
531 long y_l
, r_l
, b_l
, g_l
;
532 register unsigned char *data
;
533 register int uv_subscript
, step
, w
= -1;
538 /* =================================== MMX ===================================== */
539 if(/* video->have_mmx */ 1 &&
540 video
->out_w
== video
->horizontal_size
&&
541 video
->out_h
== video
->vertical_size
&&
542 video
->in_w
== video
->out_w
&&
543 video
->in_h
== video
->out_h
&&
546 (video
->color_model
== MPEG3_RGB565
||
547 video
->color_model
== MPEG3_601_RGB565
) &&
548 video
->chroma_format
== CHROMA420
)
550 /* Unscaled 16 bit from NIST */
551 mpeg3video_rgb16_mmx(src
[0],
557 (output_rows
[1] - output_rows
[0]) / 2 - video
->out_w
);
560 if(/* video->have_mmx */ 1 &&
561 (video
->color_model
== MPEG3_BGRA8888
||
562 video
->color_model
== MPEG3_BGR888
||
563 /* video->color_model == MPEG3_RGB888 || */
564 video
->color_model
== MPEG3_RGBA8888
||
565 video
->color_model
== MPEG3_601_BGR888
||
566 video
->color_model
== MPEG3_601_BGRA8888
||
567 video
->color_model
== MPEG3_601_RGB888
||
568 video
->color_model
== MPEG3_601_RGBA8888
))
571 if(video
->color_model
== MPEG3_BGRA8888
||
572 video
->color_model
== MPEG3_RGBA8888
||
573 video
->color_model
== MPEG3_601_BGRA8888
||
574 video
->color_model
== MPEG3_601_RGBA8888
) step
= 4;
576 if(video
->color_model
== MPEG3_BGR888
||
577 video
->color_model
== MPEG3_RGB888
||
578 video
->color_model
== MPEG3_601_BGR888
||
579 video
->color_model
== MPEG3_601_RGB888
) step
= 3;
582 /* Transfer row with scaling */
583 if(video
->out_w
!= video
->horizontal_size
)
585 switch(video
->color_model
)
589 DITHER_MMX_SCALE_HEAD
590 mpeg3_bgra32_mmx(y_in
[video
->x_table
[w
]],
593 (unsigned long*)data
);
594 DITHER_MMX_SCALE_TAIL
597 case MPEG3_601_BGRA8888
:
598 case MPEG3_601_BGR888
:
599 DITHER_MMX_SCALE_HEAD
600 mpeg3_601_bgra32_mmx(y_in
[video
->x_table
[w
]],
603 (unsigned long*)data
);
604 DITHER_MMX_SCALE_TAIL
609 DITHER_MMX_SCALE_HEAD
610 mpeg3_rgba32_mmx(y_in
[video
->x_table
[w
]],
613 (unsigned long*)data
);
614 DITHER_MMX_SCALE_TAIL
617 case MPEG3_601_RGBA8888
:
618 case MPEG3_601_RGB888
:
619 DITHER_MMX_SCALE_HEAD
620 mpeg3_601_rgba32_mmx(y_in
[video
->x_table
[w
]],
623 (unsigned long*)data
);
624 DITHER_MMX_SCALE_TAIL
629 /* Transfer row unscaled */
631 switch(video
->color_model
)
633 /* MMX byte swap 24 and 32 bit */
637 mpeg3_bgra32_mmx(*y_in
++,
640 (unsigned long*)data
);
642 mpeg3_bgra32_mmx(*y_in
++,
645 (unsigned long*)data
);
649 /* MMX 601 byte swap 24 and 32 bit */
650 case MPEG3_601_BGRA8888
:
651 case MPEG3_601_BGR888
:
653 mpeg3_601_bgra32_mmx(*y_in
++,
656 (unsigned long*)data
);
658 mpeg3_601_bgra32_mmx(*y_in
++,
661 (unsigned long*)data
);
665 /* MMX 24 and 32 bit no byte swap */
669 mpeg3_rgba32_mmx(*y_in
++,
672 (unsigned long*)data
);
674 mpeg3_rgba32_mmx(*y_in
++,
677 (unsigned long*)data
);
681 /* MMX 601 24 and 32 bit no byte swap */
682 case MPEG3_601_RGBA8888
:
683 case MPEG3_601_RGB888
:
685 mpeg3_601_rgba32_mmx(*y_in
++,
688 (unsigned long*)data
);
690 mpeg3_601_rgba32_mmx(*y_in
++,
693 (unsigned long*)data
);
702 /* ================================== NO MMX ==================================== */
705 /* Transfer row with scaling */
706 if(video
->out_w
!= video
->horizontal_size
)
708 switch(video
->color_model
)
735 case MPEG3_601_BGR888
:
736 DITHER_SCALE_601_HEAD
740 case MPEG3_601_BGRA8888
:
741 DITHER_SCALE_601_HEAD
745 case MPEG3_601_RGB565
:
746 DITHER_SCALE_601_HEAD
750 case MPEG3_601_RGB888
:
751 DITHER_SCALE_601_HEAD
755 case MPEG3_601_RGBA8888
:
756 DITHER_SCALE_601_HEAD
760 case MPEG3_RGBA16161616
:
762 register unsigned short *data_s
= (unsigned short*)data
;
764 STORE_PIXEL_RGBA16161616
772 /* Transfer row unscaled */
773 switch(video
->color_model
)
800 case MPEG3_601_BGR888
:
805 case MPEG3_601_BGRA8888
:
810 case MPEG3_601_RGB565
:
815 case MPEG3_601_RGB888
:
820 case MPEG3_601_RGBA8888
:
825 case MPEG3_RGBA16161616
:
827 register unsigned short *data_s
= (unsigned short*)data
;
829 STORE_PIXEL_RGBA16161616
836 } /* End of non-MMX */
840 __asm__
__volatile__ ("emms");
845 int mpeg3video_ditherframe444(mpeg3video_t
*video
, unsigned char *src
[])
850 int mpeg3video_dithertop(mpeg3video_t
*video
, unsigned char *src
[])
852 return mpeg3video_ditherframe(video
, src
, video
->output_rows
);
855 int mpeg3video_dithertop444(mpeg3video_t
*video
, unsigned char *src
[])
860 int mpeg3video_ditherbot(mpeg3video_t
*video
, unsigned char *src
[])
865 int mpeg3video_ditherbot444(mpeg3video_t
*video
, unsigned char *src
[])
870 void memcpy_fast(unsigned char *output
, unsigned char *input
, long len
)
873 /* 8 byte alignment */
875 * if(!((long)input & 0x7))
878 * for(i = 0; i < len2; )
880 * ((int64_t*)output)[i] = ((int64_t*)input)[i];
882 * ((int64_t*)output)[i] = ((int64_t*)input)[i];
886 * for(i *= 16; i < len; i++)
888 * output[i] = input[i];
893 memcpy(output
, input
, len
);
896 int mpeg3video_init_output()
899 for(i
= 0; i
< 256; i
++)
901 value
= (int)(1.1644 * i
- 255 * 0.0627 + 0.5);
902 if(value
< 0) value
= 0;
904 if(value
> 255) value
= 255;
905 mpeg3_601_to_rgb
[i
] = value
;
910 int mpeg3video_present_frame(mpeg3video_t
*video
)
913 unsigned char **src
= video
->output_src
;
915 /* Copy YUV buffers */
919 long offset0
, offset1
;
920 int chroma_denominator
;
922 if(video
->chroma_format
== CHROMA420
)
923 chroma_denominator
= 2;
925 chroma_denominator
= 1;
928 if(!video
->y_output
) return 0;
932 if(video
->in_x
== 0 &&
933 video
->in_w
>= video
->coded_picture_width
&&
934 video
->row_span
== video
->coded_picture_width
)
936 size0
= video
->coded_picture_width
* video
->in_h
;
937 size1
= video
->chrom_width
* (int)((float)video
->in_h
/ chroma_denominator
+ 0.5);
938 offset0
= video
->coded_picture_width
* video
->in_y
;
939 offset1
= video
->chrom_width
* (int)((float)video
->in_y
/ chroma_denominator
+ 0.5);
941 printf("mpeg3video_present_frame 1\n");
943 * if(video->in_y > 0)
945 * offset[1] += video->chrom_width / 2;
946 * size[1] += video->chrom_width / 2;
950 memcpy(video
->y_output
, src
[0] + offset0
, size0
);
951 memcpy(video
->u_output
, src
[1] + offset1
, size1
);
952 memcpy(video
->v_output
, src
[2] + offset1
, size1
);
955 /* One block per row */
957 //printf("mpeg3video_present_frame 2 %d %d %d\n", video->in_w, video->coded_picture_width, video->chrom_width);
958 int row_span
= video
->in_w
;
963 row_span
= video
->row_span
;
965 row_span0
= row_span
;
966 row_span1
= (row_span
>> 1);
968 size1
= (video
->in_w
>> 1);
969 offset0
= video
->coded_picture_width
* video
->in_y
;
970 offset1
= video
->chrom_width
* video
->in_y
/ chroma_denominator
;
972 for(i
= 0; i
< video
->in_h
; i
++)
974 memcpy(video
->y_output
+ i
* row_span0
,
975 src
[0] + offset0
+ video
->in_x
,
978 offset0
+= video
->coded_picture_width
;
980 if(chroma_denominator
== 1 || !(i
% 2))
982 memcpy(video
->u_output
+ i
/ chroma_denominator
* row_span1
,
983 src
[1] + offset1
+ (video
->in_x
>> 1),
985 memcpy(video
->v_output
+ i
/ chroma_denominator
* row_span1
,
986 src
[2] + offset1
+ (video
->in_x
>> 1),
988 if(video
->horizontal_size
< video
->in_w
)
990 memset(video
->u_output
+
991 i
/ chroma_denominator
* row_span1
+
992 (video
->horizontal_size
>> 1),
995 (video
->horizontal_size
>> 1));
996 memset(video
->v_output
+
997 i
/ chroma_denominator
* row_span1
+
998 (video
->horizontal_size
>> 1),
1000 (video
->in_w
>> 1) -
1001 (video
->horizontal_size
>> 1));
1006 if(chroma_denominator
== 1 || (i
% 2))
1007 offset1
+= video
->chrom_width
;
1014 /* Want RGB buffer */
1015 /* Copy the frame to the output with YUV to RGB conversion */
1018 if(video
->chroma_format
!= CHROMA444
)
1020 mpeg3video_ditherframe(video
, src
, video
->output_rows
);
1023 mpeg3video_ditherframe444(video
, src
);
1027 if((video
->pict_struct
== FRAME_PICTURE
&& video
->topfirst
) ||
1028 video
->pict_struct
== BOTTOM_FIELD
)
1030 /* top field first */
1031 if(video
->chroma_format
!= CHROMA444
)
1033 mpeg3video_dithertop(video
, src
);
1034 mpeg3video_ditherbot(video
, src
);
1038 mpeg3video_dithertop444(video
, src
);
1039 mpeg3video_ditherbot444(video
, src
);
1044 /* bottom field first */
1045 if(video
->chroma_format
!= CHROMA444
)
1047 mpeg3video_ditherbot(video
, src
);
1048 mpeg3video_dithertop(video
, src
);
1052 mpeg3video_ditherbot444(video
, src
);
1053 mpeg3video_dithertop444(video
, src
);
1060 int mpeg3video_display_second_field(mpeg3video_t
*video
)