r123: Merged HEAD and TEST. New stuff shall be committed to HEAD from now on.
[cinelerra_cv/mob.git] / libmpeg3 / video / output.c
blob1a3efbd625ae94c017a2cad97acb0ea30567b3ac
1 #include "../libmpeg3.h"
2 #include "mpeg3video.h"
3 #include <string.h>
5 #define CLIP(x) ((x) >= 0 ? ((x) < 255 ? (x) : 255) : 0)
7 static long long mpeg3_MMX_0 = 0L;
8 static unsigned long mpeg3_MMX_10w[] = {0x00100010, 0x00100010}; /*dd 00010 0010h, 000100010h */
9 static unsigned long mpeg3_MMX_80w[] = {0x00800080, 0x00800080}; /*dd 00080 0080h, 000800080h */
11 static unsigned long mpeg3_MMX_00FFw[] = {0x00ff00ff, 0x00ff00ff}; /*dd 000FF 00FFh, 000FF00FFh */
13 static unsigned short mpeg3_MMX_Ublucoeff[] = {0x81, 0x81, 0x81, 0x81}; /*dd 00081 0081h, 000810081h */
14 static unsigned short mpeg3_MMX_Vredcoeff[] = {0x66, 0x66, 0x66, 0x66}; /*dd 00066 0066h, 000660066h */
16 static unsigned short mpeg3_MMX_Ugrncoeff[] = {0xffe8, 0xffe8, 0xffe8, 0xffe8}; /*dd 0FFE7 FFE7h, 0FFE7FFE7h */
17 static unsigned short mpeg3_MMX_Vgrncoeff[] = {0xffcd, 0xffcd, 0xffcd, 0xffcd}; /*dd 0FFCC FFCCh, 0FFCCFFCCh */
19 static unsigned short mpeg3_MMX_Ycoeff[] = {0x4a, 0x4a, 0x4a, 0x4a}; /*dd 0004A 004Ah, 0004A004Ah */
21 static unsigned short mpeg3_MMX_redmask[] = {0xf800, 0xf800, 0xf800, 0xf800}; /*dd 07c00 7c00h, 07c007c00h */
23 static unsigned short mpeg3_MMX_grnmask[] = {0x7e0, 0x7e0, 0x7e0, 0x7e0}; /*dd 003e0 03e0h, 003e003e0h */
25 static unsigned char mpeg3_601_to_rgb[256];
27 /* Algorithm */
28 /* r = (int)(*y + 1.371 * (*cr - 128)); */
29 /* g = (int)(*y - 0.698 * (*cr - 128) - 0.336 * (*cb - 128)); */
30 /* b = (int)(*y + 1.732 * (*cb - 128)); */
32 #ifdef HAVE_MMX
33 inline void mpeg3video_rgb16_mmx(unsigned char *lum,
34 unsigned char *cr,
35 unsigned char *cb,
36 unsigned char *out,
37 int rows,
38 int cols,
39 int mod)
41 unsigned short *row1;
42 int x;
43 unsigned char *y;
44 int col1;
46 row1 = (unsigned short *)out;
47 col1 = cols + mod;
48 mod += cols + mod;
49 mod *= 2;
50 y = lum + cols * rows;
51 x = 0;
53 __asm__ __volatile__(
54 ".align 8\n"
55 "1:\n"
56 "movd (%1), %%mm0\n" /* 4 Cb 0 0 0 0 u3 u2 u1 u0 */
57 "pxor %%mm7, %%mm7\n"
58 "movd (%0), %%mm1\n" /* 4 Cr 0 0 0 0 v3 v2 v1 v0 */
59 "punpcklbw %%mm7, %%mm0\n" /* 4 W cb 0 u3 0 u2 0 u1 0 u0 */
60 "punpcklbw %%mm7, %%mm1\n" /* 4 W cr 0 v3 0 v2 0 v1 0 v0 */
62 "psubw mpeg3_MMX_80w, %%mm0\n"
63 "psubw mpeg3_MMX_80w, %%mm1\n"
64 "movq %%mm0, %%mm2\n" /* Cb 0 u3 0 u2 0 u1 0 u0 */
65 "movq %%mm1, %%mm3\n" /* Cr */
66 "pmullw mpeg3_MMX_Ugrncoeff, %%mm2\n" /* Cb2green 0 R3 0 R2 0 R1 0 R0 */
67 "movq (%2), %%mm6\n" /* L1 l7 L6 L5 L4 L3 L2 L1 L0 */
68 "pmullw mpeg3_MMX_Ublucoeff, %%mm0\n" /* Cb2blue */
69 "pand mpeg3_MMX_00FFw, %%mm6\n" /* L1 00 L6 00 L4 00 L2 00 L0 */
70 "pmullw mpeg3_MMX_Vgrncoeff, %%mm3\n" /* Cr2green */
71 "movq (%2), %%mm7\n" /* L2 */
72 "pmullw mpeg3_MMX_Vredcoeff, %%mm1\n" /* Cr2red */
73 "psrlw $8, %%mm7\n" /* L2 00 L7 00 L5 00 L3 00 L1 */
74 "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum1 */
75 "paddw %%mm3, %%mm2\n" /* Cb2green + Cr2green == green */
76 "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum2 */
78 "movq %%mm6, %%mm4\n" /* lum1 */
79 "paddw %%mm0, %%mm6\n" /* lum1 +blue 00 B6 00 B4 00 B2 00 B0 */
80 "movq %%mm4, %%mm5\n" /* lum1 */
81 "paddw %%mm1, %%mm4\n" /* lum1 +red 00 R6 00 R4 00 R2 00 R0 */
82 "paddw %%mm2, %%mm5\n" /* lum1 +green 00 G6 00 G4 00 G2 00 G0 */
83 "psraw $6, %%mm4\n" /* R1 0 .. 64 */
84 "movq %%mm7, %%mm3\n" /* lum2 00 L7 00 L5 00 L3 00 L1 */
85 "psraw $6, %%mm5\n" /* G1 - .. + */
86 "paddw %%mm0, %%mm7\n" /* Lum2 +blue 00 B7 00 B5 00 B3 00 B1 */
87 "psraw $6, %%mm6\n" /* B1 0 .. 64 */
88 "packuswb %%mm4, %%mm4\n" /* R1 R1 */
89 "packuswb %%mm5, %%mm5\n" /* G1 G1 */
90 "packuswb %%mm6, %%mm6\n" /* B1 B1 */
91 "punpcklbw %%mm4, %%mm4\n"
92 "punpcklbw %%mm5, %%mm5\n"
94 "pand mpeg3_MMX_redmask, %%mm4\n"
95 "psllw $3, %%mm5\n" /* GREEN 1 */
96 "punpcklbw %%mm6, %%mm6\n"
97 "pand mpeg3_MMX_grnmask, %%mm5\n"
98 "pand mpeg3_MMX_redmask, %%mm6\n"
99 "por %%mm5, %%mm4\n" /* */
100 "psrlw $11, %%mm6\n" /* BLUE 1 */
101 "movq %%mm3, %%mm5\n" /* lum2 */
102 "paddw %%mm1, %%mm3\n" /* lum2 +red 00 R7 00 R5 00 R3 00 R1 */
103 "paddw %%mm2, %%mm5\n" /* lum2 +green 00 G7 00 G5 00 G3 00 G1 */
104 "psraw $6, %%mm3\n" /* R2 */
105 "por %%mm6, %%mm4\n" /* MM4 */
106 "psraw $6, %%mm5\n" /* G2 */
107 "movq (%2, %3), %%mm6\n" /* L3 */
108 "psraw $6, %%mm7\n"
109 "packuswb %%mm3, %%mm3\n"
110 "packuswb %%mm5, %%mm5\n"
111 "packuswb %%mm7, %%mm7\n"
112 "pand mpeg3_MMX_00FFw, %%mm6\n" /* L3 */
113 "punpcklbw %%mm3, %%mm3\n"
114 "punpcklbw %%mm5, %%mm5\n"
115 "pmullw mpeg3_MMX_Ycoeff, %%mm6\n" /* lum3 */
116 "punpcklbw %%mm7, %%mm7\n"
117 "psllw $3, %%mm5\n" /* GREEN 2 */
118 "pand mpeg3_MMX_redmask, %%mm7\n"
119 "pand mpeg3_MMX_redmask, %%mm3\n"
120 "psrlw $11, %%mm7\n" /* BLUE 2 */
121 "pand mpeg3_MMX_grnmask, %%mm5\n"
122 "por %%mm7, %%mm3\n"
123 "movq (%2,%3), %%mm7\n" /* L4 */
124 "por %%mm5, %%mm3\n" /* */
125 "psrlw $8, %%mm7\n" /* L4 */
126 "movq %%mm4, %%mm5\n"
127 "punpcklwd %%mm3, %%mm4\n"
128 "pmullw mpeg3_MMX_Ycoeff, %%mm7\n" /* lum4 */
129 "punpckhwd %%mm3, %%mm5\n"
131 "movq %%mm4, (%4)\n"
132 "movq %%mm5, 8(%4)\n"
134 "movq %%mm6, %%mm4\n" /* Lum3 */
135 "paddw %%mm0, %%mm6\n" /* Lum3 +blue */
137 "movq %%mm4, %%mm5\n" /* Lum3 */
138 "paddw %%mm1, %%mm4\n" /* Lum3 +red */
139 "paddw %%mm2, %%mm5\n" /* Lum3 +green */
140 "psraw $6, %%mm4\n"
141 "movq %%mm7, %%mm3\n" /* Lum4 */
142 "psraw $6, %%mm5\n"
143 "paddw %%mm0, %%mm7\n" /* Lum4 +blue */
144 "psraw $6, %%mm6\n" /* Lum3 +blue */
145 "movq %%mm3, %%mm0\n" /* Lum4 */
146 "packuswb %%mm4, %%mm4\n"
147 "paddw %%mm1, %%mm3\n" /* Lum4 +red */
148 "packuswb %%mm5, %%mm5\n"
149 "paddw %%mm2, %%mm0\n" /* Lum4 +green */
150 "packuswb %%mm6, %%mm6\n"
151 "punpcklbw %%mm4, %%mm4\n"
152 "punpcklbw %%mm5, %%mm5\n"
153 "punpcklbw %%mm6, %%mm6\n"
154 "psllw $3, %%mm5\n" /* GREEN 3 */
155 "pand mpeg3_MMX_redmask, %%mm4\n"
156 "psraw $6, %%mm3\n" /* psr 6 */
157 "psraw $6, %%mm0\n"
158 "pand mpeg3_MMX_redmask, %%mm6\n" /* BLUE */
159 "pand mpeg3_MMX_grnmask, %%mm5\n"
160 "psrlw $11, %%mm6\n" /* BLUE 3 */
161 "por %%mm5, %%mm4\n"
162 "psraw $6, %%mm7\n"
163 "por %%mm6, %%mm4\n"
164 "packuswb %%mm3, %%mm3\n"
165 "packuswb %%mm0, %%mm0\n"
166 "packuswb %%mm7, %%mm7\n"
167 "punpcklbw %%mm3, %%mm3\n"
168 "punpcklbw %%mm0, %%mm0\n"
169 "punpcklbw %%mm7, %%mm7\n"
170 "pand mpeg3_MMX_redmask, %%mm3\n"
171 "pand mpeg3_MMX_redmask, %%mm7\n" /* BLUE */
172 "psllw $3, %%mm0\n" /* GREEN 4 */
173 "psrlw $11, %%mm7\n"
174 "pand mpeg3_MMX_grnmask, %%mm0\n"
175 "por %%mm7, %%mm3\n"
176 "addl $8, %6\n"
177 "por %%mm0, %%mm3\n"
179 "movq %%mm4, %%mm5\n"
181 "punpcklwd %%mm3, %%mm4\n"
182 "punpckhwd %%mm3, %%mm5\n"
184 "movq %%mm4, (%4,%5,2)\n"
185 "movq %%mm5, 8(%4,%5,2)\n"
187 "addl $8, %2\n"
188 "addl $4, %0\n"
189 "addl $4, %1\n"
190 "cmpl %3, %6\n"
191 "leal 16(%4), %4\n"
192 "jl 1b\n"
193 "addl %3, %2\n" /* lum += cols */
194 "addl %7, %4\n" /* row1 += mod */
195 "movl $0, %6\n"
196 "cmpl %8, %2\n"
197 "jl 1b\n"
198 : : "r" (cr),
199 "r" (cb),
200 "r" (lum),
201 "r" (cols),
202 "r" (row1) ,
203 "r" (col1),
204 "m" (x),
205 "m" (mod),
206 "m" (y)
210 static unsigned long long mpeg3_MMX_U_80 = 0x0000008000800000;
211 static unsigned long long mpeg3_MMX_V_80 = 0x0000000000800080;
212 static long long mpeg3_MMX_U_COEF = 0x00000058ffd30000;
213 static long long mpeg3_MMX_V_COEF = 0x00000000ffea006f;
214 static long long mpeg3_MMX_601_Y_COEF = 0x0000004800480048;
215 static long long mpeg3_MMX_601_Y_DIFF = 0x0000000000000010;
217 inline void mpeg3_bgra32_mmx(unsigned long y,
218 unsigned long u,
219 unsigned long v,
220 unsigned long *output)
222 asm(
223 /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */
224 /* for bgr24. */
225 "movd (%0), %%mm0;\n" /* Load y 0x00000000000000yy */
226 "movd (%1), %%mm1;\n" /* Load u 0x00000000000000cr */
227 "movq %%mm0, %%mm3;\n" /* Copy y to temp */
228 "psllq $16, %%mm1;\n" /* Shift u 0x0000000000cr0000 */
229 "movd (%2), %%mm2;\n" /* Load v 0x00000000000000cb */
230 "psllq $16, %%mm3;\n" /* Shift y */
231 "movq %%mm1, %%mm4;\n" /* Copy u to temp */
232 "por %%mm3, %%mm0;\n" /* Overlay new y byte 0x0000000000yy00yy */
233 "psllq $16, %%mm4;\n" /* Shift u */
234 "movq %%mm2, %%mm5;\n" /* Copy v to temp */
235 "psllq $16, %%mm3;\n" /* Shift y */
236 "por %%mm4, %%mm1;\n" /* Overlay new u byte 0x000000cr00cr0000 */
237 "psllq $16, %%mm5;\n" /* Shift v */
238 "por %%mm3, %%mm0;\n" /* Overlay new y byte 0x000000yy00yy00yy */
239 "por %%mm5, %%mm2;\n" /* Overlay new v byte 0x0000000000cb00cb */
241 /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */
242 "psubw mpeg3_MMX_U_80, %%mm1;\n" /* Subtract 128 from u 0x000000uu00uu0000 */
243 "pmullw mpeg3_MMX_U_COEF, %%mm1;\n" /* Multiply u coeffs 0x0000uuuuuuuu0000 */
244 "psllw $6, %%mm0;\n" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */
245 "psubw mpeg3_MMX_V_80, %%mm2;\n" /* Subtract 128 from v 0x0000000000cb00cb */
246 "pmullw mpeg3_MMX_V_COEF, %%mm2;\n" /* Multiply v coeffs 0x0000crcrcrcrcrcr */
248 /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */
249 "paddsw %%mm1, %%mm0;\n" /* Add u to result */
250 "paddsw %%mm2, %%mm0;\n" /* Add v to result 0x0000rrrrggggbbbb */
251 "psraw $6, %%mm0;\n" /* Demote precision */
252 "packuswb %%mm0, %%mm0;\n" /* Pack into ARGB 0x0000000000rrggbb */
253 "movd %%mm0, (%3);\n" /* Store output */
255 : "r" (&y), "r" (&u), "r" (&v), "r" (output));
258 inline void mpeg3_601_bgra32_mmx(unsigned long y,
259 unsigned long u,
260 unsigned long v,
261 unsigned long *output)
263 asm(
264 /* Output will be 0x00rrggbb with the 00 trailing so this can also be used */
265 /* for bgr24. */
266 "movd (%0), %%mm0;\n" /* Load y 0x00000000000000yy */
267 "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;\n" /* Subtract 16 from y */
268 "movd (%1), %%mm1;\n" /* Load u 0x00000000000000cr */
269 "movq %%mm0, %%mm3;\n" /* Copy y to temp */
270 "psllq $16, %%mm1;\n" /* Shift u 0x0000000000cr0000 */
271 "movd (%2), %%mm2;\n" /* Load v 0x00000000000000cb */
272 "psllq $16, %%mm3;\n" /* Shift y */
273 "movq %%mm1, %%mm4;\n" /* Copy u to temp */
274 "por %%mm3, %%mm0;\n" /* Overlay new y byte 0x0000000000yy00yy */
275 "psllq $16, %%mm4;\n" /* Shift u */
276 "movq %%mm2, %%mm5;\n" /* Copy v to temp */
277 "psllq $16, %%mm3;\n" /* Shift y */
278 "por %%mm4, %%mm1;\n" /* Overlay new u byte 0x000000cr00cr0000 */
279 "psllq $16, %%mm5;\n" /* Shift v */
280 "por %%mm3, %%mm0;\n" /* Overlay new y byte 0x000000yy00yy00yy */
281 "por %%mm5, %%mm2;\n" /* Overlay new v byte 0x0000000000cb00cb */
283 /* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */
284 "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;\n" /* Scale and shift y coeffs */
285 "psubw mpeg3_MMX_U_80, %%mm1;\n" /* Subtract 128 from u 0x000000uu00uu0000 */
286 "pmullw mpeg3_MMX_U_COEF, %%mm1;\n" /* Multiply u coeffs 0x0000uuuuuuuu0000 */
287 "psubw mpeg3_MMX_V_80, %%mm2;\n" /* Subtract 128 from v 0x0000000000cb00cb */
288 "pmullw mpeg3_MMX_V_COEF, %%mm2;\n" /* Multiply v coeffs 0x0000crcrcrcrcrcr */
290 /* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */
291 "paddsw %%mm1, %%mm0;\n" /* Add u to result */
292 "paddsw %%mm2, %%mm0;\n" /* Add v to result 0x0000rrrrggggbbbb */
293 "psraw $6, %%mm0;\n" /* Demote precision */
294 "packuswb %%mm0, %%mm0;\n" /* Pack into ARGB 0x0000000000rrggbb */
295 "movd %%mm0, (%3);\n" /* Store output */
297 : "r" (&y), "r" (&u), "r" (&v), "r" (output));
300 static unsigned long long mpeg3_MMX_U_80_RGB = 0x0000000000800080;
301 static unsigned long long mpeg3_MMX_V_80_RGB = 0x0000008000800000;
302 static long long mpeg3_MMX_U_COEF_RGB = 0x00000000ffd30058;
303 static long long mpeg3_MMX_V_COEF_RGB = 0x0000006fffea0000;
305 inline void mpeg3_rgba32_mmx(unsigned long y,
306 unsigned long u,
307 unsigned long v,
308 unsigned long *output)
310 asm(
311 /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */
312 /* for rgb24. */
313 "movd (%0), %%mm0;\n" /* Load y 0x00000000000000yy */
314 "movd (%1), %%mm1;\n" /* Load v 0x00000000000000vv */
315 "movq %%mm0, %%mm3;\n" /* Copy y to temp */
316 "psllq $16, %%mm1;\n" /* Shift v 0x0000000000vv0000 */
317 "movd (%2), %%mm2;\n" /* Load u 0x00000000000000uu */
318 "psllq $16, %%mm3;\n" /* Shift y */
319 "movq %%mm1, %%mm4;\n" /* Copy v to temp */
320 "por %%mm3, %%mm0;\n" /* Overlay new y byte 0x0000000000yy00yy */
321 "psllq $16, %%mm4;\n" /* Shift v */
322 "movq %%mm2, %%mm5;\n" /* Copy u to temp */
323 "psllq $16, %%mm3;\n" /* Shift y */
324 "por %%mm4, %%mm1;\n" /* Overlay new v byte 0x000000vv00vv0000 */
325 "psllq $16, %%mm5;\n" /* Shift u */
326 "por %%mm3, %%mm0;\n" /* Overlay new y byte 0x000000yy00yy00yy */
327 "por %%mm5, %%mm2;\n" /* Overlay new u byte 0x0000000000uu00uu */
329 /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */
330 "psubw mpeg3_MMX_V_80_RGB, %%mm1;\n" /* Subtract 128 from v 0x000000vv00vv0000 */
331 "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;\n" /* Multiply v coeffs 0x0000vvvvvvvv0000 */
332 "psllw $6, %%mm0;\n" /* Shift y coeffs 0x0000yyy0yyy0yyy0 */
333 "psubw mpeg3_MMX_U_80_RGB, %%mm2;\n" /* Subtract 128 from u 0x0000000000uu00uu */
334 "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;\n" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */
336 /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */
337 "paddsw %%mm1, %%mm0;\n" /* Add v to result */
338 "paddsw %%mm2, %%mm0;\n" /* Add u to result 0x0000bbbbggggrrrr */
339 "psraw $6, %%mm0;\n" /* Demote precision */
340 "packuswb %%mm0, %%mm0;\n" /* Pack into RGBA 0x0000000000bbggrr */
341 "movd %%mm0, (%3);\n" /* Store output */
343 : "r" (&y), "r" (&v), "r" (&u), "r" (output));
346 inline void mpeg3_601_rgba32_mmx(unsigned long y,
347 unsigned long u,
348 unsigned long v,
349 unsigned long *output)
351 asm(
352 /* Output will be 0x00bbggrr with the 00 trailing so this can also be used */
353 /* for rgb24. */
354 "movd (%0), %%mm0;\n" /* Load y 0x00000000000000yy */
355 "psubsw mpeg3_MMX_601_Y_DIFF, %%mm0;\n" /* Subtract 16 from y */
356 "movd (%1), %%mm1;\n" /* Load v 0x00000000000000vv */
357 "movq %%mm0, %%mm3;\n" /* Copy y to temp */
358 "psllq $16, %%mm1;\n" /* Shift v 0x0000000000vv0000 */
359 "movd (%2), %%mm2;\n" /* Load u 0x00000000000000uu */
360 "psllq $16, %%mm3;\n" /* Shift y */
361 "movq %%mm1, %%mm4;\n" /* Copy v to temp */
362 "por %%mm3, %%mm0;\n" /* Overlay new y byte 0x0000000000yy00yy */
363 "psllq $16, %%mm4;\n" /* Shift v */
364 "movq %%mm2, %%mm5;\n" /* Copy u to temp */
365 "psllq $16, %%mm3;\n" /* Shift y */
366 "por %%mm4, %%mm1;\n" /* Overlay new v byte 0x000000vv00vv0000 */
367 "psllq $16, %%mm5;\n" /* Shift u */
368 "por %%mm3, %%mm0;\n" /* Overlay new y byte 0x000000yy00yy00yy */
369 "por %%mm5, %%mm2;\n" /* Overlay new u byte 0x0000000000uu00uu */
371 /* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */
372 "pmullw mpeg3_MMX_601_Y_COEF, %%mm0;\n" /* Scale y coeffs */
373 "psubw mpeg3_MMX_V_80_RGB, %%mm1;\n" /* Subtract 128 from v 0x000000vv00vv0000 */
374 "pmullw mpeg3_MMX_V_COEF_RGB, %%mm1;\n" /* Multiply v coeffs 0x0000vvvvvvvv0000 */
375 "psubw mpeg3_MMX_U_80_RGB, %%mm2;\n" /* Subtract 128 from u 0x0000000000uu00uu */
376 "pmullw mpeg3_MMX_U_COEF_RGB, %%mm2;\n" /* Multiply u coeffs 0x0000uuuuuuuuuuuu */
378 /* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */
379 "paddsw %%mm1, %%mm0;\n" /* Add v to result */
380 "paddsw %%mm2, %%mm0;\n" /* Add u to result 0x0000bbbbggggrrrr */
381 "psraw $6, %%mm0;\n" /* Demote precision */
382 "packuswb %%mm0, %%mm0;\n" /* Pack into RGBA 0x0000000000bbggrr */
383 "movd %%mm0, (%3);\n" /* Store output */
385 : "r" (&y), "r" (&v), "r" (&u), "r" (output));
388 #endif
390 #define DITHER_ROW_HEAD \
391 for(h = 0; h < video->out_h; h++) \
393 y_in = &src[0][(video->y_table[h] + video->in_y) * \
394 video->coded_picture_width] + \
395 video->in_x; \
396 if(video->chroma_format == CHROMA420) \
398 cb_in = &src[1][((video->y_table[h] + video->in_y) >> 1) * \
399 video->chrom_width] + \
400 (video->in_x >> 1); \
401 cr_in = &src[2][((video->y_table[h] + video->in_y) >> 1) * \
402 video->chrom_width] + \
403 (video->in_x >> 1); \
405 else \
407 cb_in = &src[1][(video->y_table[h] + video->in_y) * \
408 video->chrom_width] + \
409 (video->in_x >> 1); \
410 cr_in = &src[2][(video->y_table[h] + video->in_y) * \
411 video->chrom_width] + \
412 (video->in_x >> 1); \
414 data = output_rows[h];
416 #define DITHER_ROW_TAIL \
419 #define DITHER_SCALE_HEAD \
420 for(w = 0; w < video->out_w; w++) \
422 uv_subscript = video->x_table[w] / 2; \
423 y_l = y_in[video->x_table[w]]; \
424 y_l <<= 16; \
425 r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \
426 g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \
427 b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16;
429 #define DITHER_SCALE_601_HEAD \
430 for(w = 0; w < video->out_w; w++) \
432 uv_subscript = video->x_table[w] / 2; \
433 y_l = mpeg3_601_to_rgb[y_in[video->x_table[w]]]; \
434 y_l <<= 16; \
435 r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \
436 g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \
437 b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16;
439 #define DITHER_SCALE_TAIL \
442 #define DITHER_MMX_SCALE_HEAD \
443 for(w = 0; w < video->out_w; w++) \
445 uv_subscript = video->x_table[w] / 2;
447 #define DITHER_MMX_SCALE_TAIL \
448 data += step; \
451 #define DITHER_MMX_HEAD \
452 for(w = 0; w < video->out_w; w += 2) \
455 #define DITHER_MMX_TAIL \
456 data += step; \
457 cr_in++; \
458 cb_in++; \
461 #define DITHER_HEAD \
462 for(w = 0; w < video->horizontal_size; w++) \
464 y_l = *y_in++; \
465 y_l <<= 16; \
466 r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \
467 g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \
468 b_l = (y_l + video->cb_to_b[*cb_in]) >> 16;
470 #define DITHER_601_HEAD \
471 for(w = 0; w < video->horizontal_size; w++) \
473 y_l = mpeg3_601_to_rgb[*y_in++]; \
474 y_l <<= 16; \
475 r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \
476 g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \
477 b_l = (y_l + video->cb_to_b[*cb_in]) >> 16;
479 #define DITHER_TAIL \
480 if(w & 1) \
482 cr_in++; \
483 cb_in++; \
488 #define STORE_PIXEL_BGR888 \
489 *data++ = CLIP(b_l); \
490 *data++ = CLIP(g_l); \
491 *data++ = CLIP(r_l);
493 #define STORE_PIXEL_BGRA8888 \
494 *data++ = CLIP(b_l); \
495 *data++ = CLIP(g_l); \
496 *data++ = CLIP(r_l); \
497 *data++ = 0;
499 #define STORE_PIXEL_RGB565 \
500 *((unsigned short*)data)++ = \
501 ((CLIP(r_l) & 0xf8) << 8) | \
502 ((CLIP(g_l) & 0xfc) << 3) | \
503 ((CLIP(b_l) & 0xf8) >> 3);
505 #define STORE_PIXEL_RGB888 \
506 *data++ = CLIP(r_l); \
507 *data++ = CLIP(g_l); \
508 *data++ = CLIP(b_l);
510 #define STORE_PIXEL_RGBA8888 \
511 *data++ = CLIP(r_l); \
512 *data++ = CLIP(g_l); \
513 *data++ = CLIP(b_l); \
514 *data++ = 0;
516 #define STORE_PIXEL_RGBA16161616 \
517 *data_s++ = CLIP(r_l); \
518 *data_s++ = CLIP(g_l); \
519 *data_s++ = CLIP(b_l); \
520 *data_s++ = 0;
524 /* Only good for YUV 4:2:0 */
525 int mpeg3video_ditherframe(mpeg3video_t *video,
526 unsigned char **src,
527 unsigned char **output_rows)
529 int h = 0;
530 register unsigned char *y_in, *cb_in, *cr_in;
531 long y_l, r_l, b_l, g_l;
532 register unsigned char *data;
533 register int uv_subscript, step, w = -1;
537 #ifdef HAVE_MMX
538 /* =================================== MMX ===================================== */
539 if(/* video->have_mmx */ 1 &&
540 video->out_w == video->horizontal_size &&
541 video->out_h == video->vertical_size &&
542 video->in_w == video->out_w &&
543 video->in_h == video->out_h &&
544 video->in_x == 0 &&
545 video->in_y == 0 &&
546 (video->color_model == MPEG3_RGB565 ||
547 video->color_model == MPEG3_601_RGB565) &&
548 video->chroma_format == CHROMA420)
550 /* Unscaled 16 bit from NIST */
551 mpeg3video_rgb16_mmx(src[0],
552 src[2],
553 src[1],
554 output_rows[0],
555 video->out_h,
556 video->out_w,
557 (output_rows[1] - output_rows[0]) / 2 - video->out_w);
559 else
560 if(/* video->have_mmx */ 1 &&
561 (video->color_model == MPEG3_BGRA8888 ||
562 video->color_model == MPEG3_BGR888 ||
563 /* video->color_model == MPEG3_RGB888 || */
564 video->color_model == MPEG3_RGBA8888 ||
565 video->color_model == MPEG3_601_BGR888 ||
566 video->color_model == MPEG3_601_BGRA8888 ||
567 video->color_model == MPEG3_601_RGB888 ||
568 video->color_model == MPEG3_601_RGBA8888))
570 /* Original MMX */
571 if(video->color_model == MPEG3_BGRA8888 ||
572 video->color_model == MPEG3_RGBA8888 ||
573 video->color_model == MPEG3_601_BGRA8888 ||
574 video->color_model == MPEG3_601_RGBA8888) step = 4;
575 else
576 if(video->color_model == MPEG3_BGR888 ||
577 video->color_model == MPEG3_RGB888 ||
578 video->color_model == MPEG3_601_BGR888 ||
579 video->color_model == MPEG3_601_RGB888) step = 3;
581 DITHER_ROW_HEAD
582 /* Transfer row with scaling */
583 if(video->out_w != video->horizontal_size)
585 switch(video->color_model)
587 case MPEG3_BGRA8888:
588 case MPEG3_BGR888:
589 DITHER_MMX_SCALE_HEAD
590 mpeg3_bgra32_mmx(y_in[video->x_table[w]],
591 cr_in[uv_subscript],
592 cb_in[uv_subscript],
593 (unsigned long*)data);
594 DITHER_MMX_SCALE_TAIL
595 break;
597 case MPEG3_601_BGRA8888:
598 case MPEG3_601_BGR888:
599 DITHER_MMX_SCALE_HEAD
600 mpeg3_601_bgra32_mmx(y_in[video->x_table[w]],
601 cr_in[uv_subscript],
602 cb_in[uv_subscript],
603 (unsigned long*)data);
604 DITHER_MMX_SCALE_TAIL
605 break;
607 case MPEG3_RGBA8888:
608 case MPEG3_RGB888:
609 DITHER_MMX_SCALE_HEAD
610 mpeg3_rgba32_mmx(y_in[video->x_table[w]],
611 cr_in[uv_subscript],
612 cb_in[uv_subscript],
613 (unsigned long*)data);
614 DITHER_MMX_SCALE_TAIL
615 break;
617 case MPEG3_601_RGBA8888:
618 case MPEG3_601_RGB888:
619 DITHER_MMX_SCALE_HEAD
620 mpeg3_601_rgba32_mmx(y_in[video->x_table[w]],
621 cr_in[uv_subscript],
622 cb_in[uv_subscript],
623 (unsigned long*)data);
624 DITHER_MMX_SCALE_TAIL
625 break;
628 else
629 /* Transfer row unscaled */
631 switch(video->color_model)
633 /* MMX byte swap 24 and 32 bit */
634 case MPEG3_BGRA8888:
635 case MPEG3_BGR888:
636 DITHER_MMX_HEAD
637 mpeg3_bgra32_mmx(*y_in++,
638 *cr_in,
639 *cb_in,
640 (unsigned long*)data);
641 data += step;
642 mpeg3_bgra32_mmx(*y_in++,
643 *cr_in,
644 *cb_in,
645 (unsigned long*)data);
646 DITHER_MMX_TAIL
647 break;
649 /* MMX 601 byte swap 24 and 32 bit */
650 case MPEG3_601_BGRA8888:
651 case MPEG3_601_BGR888:
652 DITHER_MMX_HEAD
653 mpeg3_601_bgra32_mmx(*y_in++,
654 *cr_in,
655 *cb_in,
656 (unsigned long*)data);
657 data += step;
658 mpeg3_601_bgra32_mmx(*y_in++,
659 *cr_in,
660 *cb_in,
661 (unsigned long*)data);
662 DITHER_MMX_TAIL
663 break;
665 /* MMX 24 and 32 bit no byte swap */
666 case MPEG3_RGBA8888:
667 case MPEG3_RGB888:
668 DITHER_MMX_HEAD
669 mpeg3_rgba32_mmx(*y_in++,
670 *cr_in,
671 *cb_in,
672 (unsigned long*)data);
673 data += step;
674 mpeg3_rgba32_mmx(*y_in++,
675 *cr_in,
676 *cb_in,
677 (unsigned long*)data);
678 DITHER_MMX_TAIL
679 break;
681 /* MMX 601 24 and 32 bit no byte swap */
682 case MPEG3_601_RGBA8888:
683 case MPEG3_601_RGB888:
684 DITHER_MMX_HEAD
685 mpeg3_601_rgba32_mmx(*y_in++,
686 *cr_in,
687 *cb_in,
688 (unsigned long*)data);
689 data += step;
690 mpeg3_601_rgba32_mmx(*y_in++,
691 *cr_in,
692 *cb_in,
693 (unsigned long*)data);
694 DITHER_MMX_TAIL
695 break;
698 DITHER_ROW_TAIL
700 else
701 #endif
702 /* ================================== NO MMX ==================================== */
704 DITHER_ROW_HEAD
705 /* Transfer row with scaling */
706 if(video->out_w != video->horizontal_size)
708 switch(video->color_model)
710 case MPEG3_BGR888:
711 DITHER_SCALE_HEAD
712 STORE_PIXEL_BGR888
713 DITHER_SCALE_TAIL
714 break;
715 case MPEG3_BGRA8888:
716 DITHER_SCALE_HEAD
717 STORE_PIXEL_BGRA8888
718 DITHER_SCALE_TAIL
719 break;
720 case MPEG3_RGB565:
721 DITHER_SCALE_HEAD
722 STORE_PIXEL_RGB565
723 DITHER_SCALE_TAIL
724 break;
725 case MPEG3_RGB888:
726 DITHER_SCALE_HEAD
727 STORE_PIXEL_RGB888
728 DITHER_SCALE_TAIL
729 break;
730 case MPEG3_RGBA8888:
731 DITHER_SCALE_HEAD
732 STORE_PIXEL_RGBA8888
733 DITHER_SCALE_TAIL
734 break;
735 case MPEG3_601_BGR888:
736 DITHER_SCALE_601_HEAD
737 STORE_PIXEL_BGR888
738 DITHER_SCALE_TAIL
739 break;
740 case MPEG3_601_BGRA8888:
741 DITHER_SCALE_601_HEAD
742 STORE_PIXEL_BGRA8888
743 DITHER_SCALE_TAIL
744 break;
745 case MPEG3_601_RGB565:
746 DITHER_SCALE_601_HEAD
747 STORE_PIXEL_RGB565
748 DITHER_SCALE_TAIL
749 break;
750 case MPEG3_601_RGB888:
751 DITHER_SCALE_601_HEAD
752 STORE_PIXEL_RGB888
753 DITHER_SCALE_TAIL
754 break;
755 case MPEG3_601_RGBA8888:
756 DITHER_SCALE_601_HEAD
757 STORE_PIXEL_RGBA8888
758 DITHER_SCALE_TAIL
759 break;
760 case MPEG3_RGBA16161616:
762 register unsigned short *data_s = (unsigned short*)data;
763 DITHER_SCALE_HEAD
764 STORE_PIXEL_RGBA16161616
765 DITHER_SCALE_TAIL
767 break;
770 else
772 /* Transfer row unscaled */
773 switch(video->color_model)
775 case MPEG3_BGR888:
776 DITHER_HEAD
777 STORE_PIXEL_BGR888
778 DITHER_TAIL
779 break;
780 case MPEG3_BGRA8888:
781 DITHER_HEAD
782 STORE_PIXEL_BGRA8888
783 DITHER_TAIL
784 break;
785 case MPEG3_RGB565:
786 DITHER_HEAD
787 STORE_PIXEL_RGB565
788 DITHER_TAIL
789 break;
790 case MPEG3_RGB888:
791 DITHER_HEAD
792 STORE_PIXEL_RGB888
793 DITHER_TAIL
794 break;
795 case MPEG3_RGBA8888:
796 DITHER_HEAD
797 STORE_PIXEL_RGBA8888
798 DITHER_TAIL
799 break;
800 case MPEG3_601_BGR888:
801 DITHER_601_HEAD
802 STORE_PIXEL_BGR888
803 DITHER_TAIL
804 break;
805 case MPEG3_601_BGRA8888:
806 DITHER_601_HEAD
807 STORE_PIXEL_BGRA8888
808 DITHER_TAIL
809 break;
810 case MPEG3_601_RGB565:
811 DITHER_601_HEAD
812 STORE_PIXEL_RGB565
813 DITHER_TAIL
814 break;
815 case MPEG3_601_RGB888:
816 DITHER_601_HEAD
817 STORE_PIXEL_RGB888
818 DITHER_TAIL
819 break;
820 case MPEG3_601_RGBA8888:
821 DITHER_601_HEAD
822 STORE_PIXEL_RGBA8888
823 DITHER_TAIL
824 break;
825 case MPEG3_RGBA16161616:
827 register unsigned short *data_s = (unsigned short*)data;
828 DITHER_HEAD
829 STORE_PIXEL_RGBA16161616
830 DITHER_TAIL
832 break;
835 DITHER_ROW_TAIL
836 } /* End of non-MMX */
838 #ifdef HAVE_MMX
839 if(video->have_mmx)
840 __asm__ __volatile__ ("emms");
841 #endif
842 return 0;
845 int mpeg3video_ditherframe444(mpeg3video_t *video, unsigned char *src[])
847 return 0;
850 int mpeg3video_dithertop(mpeg3video_t *video, unsigned char *src[])
852 return mpeg3video_ditherframe(video, src, video->output_rows);
855 int mpeg3video_dithertop444(mpeg3video_t *video, unsigned char *src[])
857 return 0;
860 int mpeg3video_ditherbot(mpeg3video_t *video, unsigned char *src[])
862 return 0;
865 int mpeg3video_ditherbot444(mpeg3video_t *video, unsigned char *src[])
867 return 0;
870 void memcpy_fast(unsigned char *output, unsigned char *input, long len)
872 int i, len2;
873 /* 8 byte alignment */
875 * if(!((long)input & 0x7))
877 * len2 = len >> 4;
878 * for(i = 0; i < len2; )
880 * ((int64_t*)output)[i] = ((int64_t*)input)[i];
881 * i++;
882 * ((int64_t*)output)[i] = ((int64_t*)input)[i];
883 * i++;
886 * for(i *= 16; i < len; i++)
888 * output[i] = input[i];
891 * else
893 memcpy(output, input, len);
896 int mpeg3video_init_output()
898 int i, value;
899 for(i = 0; i < 256; i++)
901 value = (int)(1.1644 * i - 255 * 0.0627 + 0.5);
902 if(value < 0) value = 0;
903 else
904 if(value > 255) value = 255;
905 mpeg3_601_to_rgb[i] = value;
907 return 0;
910 int mpeg3video_present_frame(mpeg3video_t *video)
912 int i, j, k, l;
913 unsigned char **src = video->output_src;
915 /* Copy YUV buffers */
916 if(video->want_yvu)
918 long size0, size1;
919 long offset0, offset1;
920 int chroma_denominator;
922 if(video->chroma_format == CHROMA420)
923 chroma_denominator = 2;
924 else
925 chroma_denominator = 1;
927 /* Drop a frame */
928 if(!video->y_output) return 0;
930 /* Copy a frame */
931 /* Three blocks */
932 if(video->in_x == 0 &&
933 video->in_w >= video->coded_picture_width &&
934 video->row_span == video->coded_picture_width)
936 size0 = video->coded_picture_width * video->in_h;
937 size1 = video->chrom_width * (int)((float)video->in_h / chroma_denominator + 0.5);
938 offset0 = video->coded_picture_width * video->in_y;
939 offset1 = video->chrom_width * (int)((float)video->in_y / chroma_denominator + 0.5);
941 printf("mpeg3video_present_frame 1\n");
943 * if(video->in_y > 0)
945 * offset[1] += video->chrom_width / 2;
946 * size[1] += video->chrom_width / 2;
950 memcpy(video->y_output, src[0] + offset0, size0);
951 memcpy(video->u_output, src[1] + offset1, size1);
952 memcpy(video->v_output, src[2] + offset1, size1);
954 else
955 /* One block per row */
957 //printf("mpeg3video_present_frame 2 %d %d %d\n", video->in_w, video->coded_picture_width, video->chrom_width);
958 int row_span = video->in_w;
959 int row_span0;
960 int row_span1;
962 if(video->row_span)
963 row_span = video->row_span;
965 row_span0 = row_span;
966 row_span1 = (row_span >> 1);
967 size0 = video->in_w;
968 size1 = (video->in_w >> 1);
969 offset0 = video->coded_picture_width * video->in_y;
970 offset1 = video->chrom_width * video->in_y / chroma_denominator;
972 for(i = 0; i < video->in_h; i++)
974 memcpy(video->y_output + i * row_span0,
975 src[0] + offset0 + video->in_x,
976 size0);
978 offset0 += video->coded_picture_width;
980 if(chroma_denominator == 1 || !(i % 2))
982 memcpy(video->u_output + i / chroma_denominator * row_span1,
983 src[1] + offset1 + (video->in_x >> 1),
984 size1);
985 memcpy(video->v_output + i / chroma_denominator * row_span1,
986 src[2] + offset1 + (video->in_x >> 1),
987 size1);
988 if(video->horizontal_size < video->in_w)
990 memset(video->u_output +
991 i / chroma_denominator * row_span1 +
992 (video->horizontal_size >> 1),
993 0x80,
994 (video->in_w >> 1) -
995 (video->horizontal_size >> 1));
996 memset(video->v_output +
997 i / chroma_denominator * row_span1 +
998 (video->horizontal_size >> 1),
999 0x80,
1000 (video->in_w >> 1) -
1001 (video->horizontal_size >> 1));
1006 if(chroma_denominator == 1 || (i % 2))
1007 offset1 += video->chrom_width;
1011 return 0;
1014 /* Want RGB buffer */
1015 /* Copy the frame to the output with YUV to RGB conversion */
1016 if(video->prog_seq)
1018 if(video->chroma_format != CHROMA444)
1020 mpeg3video_ditherframe(video, src, video->output_rows);
1022 else
1023 mpeg3video_ditherframe444(video, src);
1025 else
1027 if((video->pict_struct == FRAME_PICTURE && video->topfirst) ||
1028 video->pict_struct == BOTTOM_FIELD)
1030 /* top field first */
1031 if(video->chroma_format != CHROMA444)
1033 mpeg3video_dithertop(video, src);
1034 mpeg3video_ditherbot(video, src);
1036 else
1038 mpeg3video_dithertop444(video, src);
1039 mpeg3video_ditherbot444(video, src);
1042 else
1044 /* bottom field first */
1045 if(video->chroma_format != CHROMA444)
1047 mpeg3video_ditherbot(video, src);
1048 mpeg3video_dithertop(video, src);
1050 else
1052 mpeg3video_ditherbot444(video, src);
1053 mpeg3video_dithertop444(video, src);
1057 return 0;
1060 int mpeg3video_display_second_field(mpeg3video_t *video)
1062 /* Not used */
1063 return 0;