2 * generic alpha renderers for all YUV modes and RGB depths
3 * Optimized by Nick and Michael.
5 * This file is part of MPlayer.
7 * MPlayer is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * MPlayer is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28 #define PREFETCH "prefetch"
29 #define PREFETCHW "prefetchw"
30 #define PAVGB "pavgusb"
32 #define PREFETCH "prefetchnta"
33 #define PREFETCHW "prefetcht0"
36 #define PREFETCH " # nop"
37 #define PREFETCHW " # nop"
41 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
47 static inline void RENAME(vo_draw_alpha_yv12
)(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
51 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
52 "movq %%mm5, %%mm4\n\t"
53 "movq %%mm5, %%mm7\n\t"
54 "psllw $8, %%mm5\n\t" //FF00FF00FF00
55 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
65 ::"m"(*dstbase
),"m"(*srca
),"m"(*src
):"memory");
74 "movq %0, %%mm0\n\t" // dstbase
75 "movq %%mm0, %%mm1\n\t"
76 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
77 "psrlw $8, %%mm1\n\t" //0Y0Y0Y0Y
78 "movq %1, %%mm2\n\t" //srca HGFEDCBA
79 "paddb %%mm7, %%mm2\n\t"
80 "movq %%mm2, %%mm3\n\t"
81 "pand %%mm4, %%mm2\n\t" //0G0E0C0A
82 "psrlw $8, %%mm3\n\t" //0H0F0D0B
83 "pmullw %%mm2, %%mm0\n\t"
84 "pmullw %%mm3, %%mm1\n\t"
86 "pand %%mm5, %%mm1\n\t"
87 "por %%mm1, %%mm0\n\t"
91 :: "m" (dstbase
[x
]), "m" (srca
[x
]), "m" (src
[x
])
96 if(srca
[x
]) dstbase
[x
]=((dstbase
[x
]*srca
[x
])>>8)+src
[x
];
104 __asm__
volatile(EMMS:::"memory");
109 static inline void RENAME(vo_draw_alpha_yuy2
)(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
113 "pxor %%mm7, %%mm7\n\t"
114 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
115 "movq %%mm5, %%mm6\n\t"
116 "movq %%mm5, %%mm4\n\t"
117 "psllw $8, %%mm5\n\t" //FF00FF00FF00
118 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
128 ::"m"(*dstbase
),"m"(*srca
),"m"(*src
));
132 "orl %%eax, %%eax\n\t"
137 "movq %0, %%mm0\n\t" // dstbase
138 "movq %%mm0, %%mm1\n\t"
139 "pand %%mm4, %%mm0\n\t" //0Y0Y0Y0Y
140 "movd %%eax, %%mm2\n\t" //srca 0000DCBA
141 "paddb %%mm6, %%mm2\n\t"
142 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
143 "pmullw %%mm2, %%mm0\n\t"
144 "psrlw $8, %%mm0\n\t"
145 "pand %%mm5, %%mm1\n\t" //U0V0U0V0
146 "movd %2, %%mm2\n\t" //src 0000DCBA
147 "punpcklbw %%mm7, %%mm2\n\t" //srca 0D0C0B0A
148 "por %%mm1, %%mm0\n\t"
149 "paddb %%mm2, %%mm0\n\t"
152 :: "m" (dstbase
[x
*2]), "m" (srca
[x
]), "m" (src
[x
])
158 dstbase
[2*x
]=((dstbase
[2*x
]*srca
[x
])>>8)+src
[x
];
159 dstbase
[2*x
+1]=((((signed)dstbase
[2*x
+1]-128)*srca
[x
])>>8)+128;
168 __asm__
volatile(EMMS:::"memory");
173 static inline void RENAME(vo_draw_alpha_rgb24
)(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
177 "pxor %%mm7, %%mm7\n\t"
178 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
182 register unsigned char *dst
= dstbase
;
184 #if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX)
190 ::"m"(*dst
),"m"(*srca
),"m"(*src
):"memory");
192 if(srca
[x
] || srca
[x
+1])
197 "movq %0, %%mm0\n\t" // dstbase
198 "movq %%mm0, %%mm1\n\t"
199 "movq %%mm0, %%mm5\n\t"
200 "punpcklbw %%mm7, %%mm0\n\t"
201 "punpckhbw %%mm7, %%mm1\n\t"
202 "movd %1, %%mm2\n\t" // srca ABCD0000
203 "paddb %%mm6, %%mm2\n\t"
204 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
205 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
206 "psrlq $8, %%mm2\n\t" // srca AAABBBB0
207 "movq %%mm2, %%mm3\n\t"
208 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0B
209 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B00
210 "pmullw %%mm2, %%mm0\n\t"
211 "pmullw %%mm3, %%mm1\n\t"
212 "psrlw $8, %%mm0\n\t"
213 "psrlw $8, %%mm1\n\t"
214 "packuswb %%mm1, %%mm0\n\t"
215 "movd %2, %%mm2 \n\t" // src ABCD0000
216 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
217 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
218 "psrlq $8, %%mm2\n\t" // src AAABBBB0
219 "paddb %%mm2, %%mm0\n\t"
222 "por %%mm0, %%mm5\n\t"
224 :: "m" (dst
[0]), "m" (srca
[x
]), "m" (src
[x
]), "m"(mask24hl
), "m"(mask24lh
));
231 "movzbl (%0), %%ecx\n\t"
232 "movzbl 1(%0), %%eax\n\t"
234 "imull %1, %%ecx\n\t"
235 "imull %1, %%eax\n\t"
240 "movb %%ch, (%0)\n\t"
241 "movb %%ah, 1(%0)\n\t"
243 "movzbl 2(%0), %%eax\n\t"
244 "imull %1, %%eax\n\t"
246 "movb %%ah, 2(%0)\n\t"
249 "r" ((unsigned)srca
[x
]),
250 "r" (((unsigned)src
[x
])<<8)
256 #endif /* !HAVE_MMX */
257 #else /*non x86 arch or x86_64 with MMX disabled */
260 dst
[0]=((dst
[0]*srca
[x
])>>8)+src
[x
];
261 dst
[1]=((dst
[1]*srca
[x
])>>8)+src
[x
];
262 dst
[2]=((dst
[2]*srca
[x
])>>8)+src
[x
];
266 #endif /* arch_x86 */
272 __asm__
volatile(EMMS:::"memory");
277 static inline void RENAME(vo_draw_alpha_rgb32
)(int w
,int h
, unsigned char* src
, unsigned char *srca
, int srcstride
, unsigned char* dstbase
,int dststride
){
285 "pxor %%mm7, %%mm7\n\t"
286 "pcmpeqb %%mm6, %%mm6\n\t" // F..F
288 #else /* HAVE_AMD3DNOW */
290 "pxor %%mm7, %%mm7\n\t"
291 "pcmpeqb %%mm5, %%mm5\n\t" // F..F
292 "movq %%mm5, %%mm4\n\t"
293 "psllw $8, %%mm5\n\t" //FF00FF00FF00
294 "psrlw $8, %%mm4\n\t" //00FF00FF00FF
296 #endif /* HAVE_AMD3DNOW */
297 #endif /* HAVE_MMX */
300 #if ARCH_X86 && (!ARCH_X86_64 || HAVE_MMX)
307 ::"m"(*dstbase
),"m"(*srca
),"m"(*src
):"memory");
309 if(srca
[x
] || srca
[x
+1])
314 "movq %0, %%mm0\n\t" // dstbase
315 "movq %%mm0, %%mm1\n\t"
316 "punpcklbw %%mm7, %%mm0\n\t"
317 "punpckhbw %%mm7, %%mm1\n\t"
318 "movd %1, %%mm2\n\t" // srca ABCD0000
319 "paddb %%mm6, %%mm2\n\t"
320 "punpcklbw %%mm2, %%mm2\n\t" // srca AABBCCDD
321 "punpcklbw %%mm2, %%mm2\n\t" // srca AAAABBBB
322 "movq %%mm2, %%mm3\n\t"
323 "punpcklbw %%mm7, %%mm2\n\t" // srca 0A0A0A0A
324 "punpckhbw %%mm7, %%mm3\n\t" // srca 0B0B0B0B
325 "pmullw %%mm2, %%mm0\n\t"
326 "pmullw %%mm3, %%mm1\n\t"
327 "psrlw $8, %%mm0\n\t"
328 "psrlw $8, %%mm1\n\t"
329 "packuswb %%mm1, %%mm0\n\t"
330 "movd %2, %%mm2 \n\t" // src ABCD0000
331 "punpcklbw %%mm2, %%mm2\n\t" // src AABBCCDD
332 "punpcklbw %%mm2, %%mm2\n\t" // src AAAABBBB
333 "paddb %%mm2, %%mm0\n\t"
335 :: "m" (dstbase
[4*x
]), "m" (srca
[x
]), "m" (src
[x
]));
337 #else //this is faster for intels crap
342 ::"m"(*dstbase
),"m"(*srca
),"m"(*src
):"memory");
346 "orl %%eax, %%eax\n\t"
351 "movq %0, %%mm0\n\t" // dstbase
352 "movq %%mm0, %%mm1\n\t"
353 "pand %%mm4, %%mm0\n\t" //0R0B0R0B
354 "psrlw $8, %%mm1\n\t" //0?0G0?0G
355 "movd %%eax, %%mm2\n\t" //srca 0000DCBA
356 "paddb %3, %%mm2\n\t"
357 "punpcklbw %%mm2, %%mm2\n\t" //srca DDCCBBAA
358 "movq %%mm2, %%mm3\n\t"
359 "punpcklbw %%mm7, %%mm2\n\t" //srca 0B0B0A0A
360 "pmullw %%mm2, %%mm0\n\t"
361 "pmullw %%mm2, %%mm1\n\t"
362 "psrlw $8, %%mm0\n\t"
363 "pand %%mm5, %%mm1\n\t"
364 "por %%mm1, %%mm0\n\t"
365 "movd %2, %%mm2 \n\t" //src 0000DCBA
366 "punpcklbw %%mm2, %%mm2\n\t" //src DDCCBBAA
367 "movq %%mm2, %%mm6\n\t"
368 "punpcklbw %%mm2, %%mm2\n\t" //src BBBBAAAA
369 "paddb %%mm2, %%mm0\n\t"
372 "movq 8%0, %%mm0\n\t" // dstbase
373 "movq %%mm0, %%mm1\n\t"
374 "pand %%mm4, %%mm0\n\t" //0R0B0R0B
375 "psrlw $8, %%mm1\n\t" //0?0G0?0G
376 "punpckhbw %%mm7, %%mm3\n\t" //srca 0D0D0C0C
377 "pmullw %%mm3, %%mm0\n\t"
378 "pmullw %%mm3, %%mm1\n\t"
379 "psrlw $8, %%mm0\n\t"
380 "pand %%mm5, %%mm1\n\t"
381 "por %%mm1, %%mm0\n\t"
382 "punpckhbw %%mm6, %%mm6\n\t" //src DDDDCCCC
383 "paddb %%mm6, %%mm0\n\t"
384 "movq %%mm0, 8%0\n\t"
386 :: "m" (dstbase
[4*x
]), "m" (srca
[x
]), "m" (src
[x
]), "m" (bFF
)
394 "movzbl (%0), %%ecx\n\t"
395 "movzbl 1(%0), %%eax\n\t"
396 "movzbl 2(%0), %%edx\n\t"
398 "imull %1, %%ecx\n\t"
399 "imull %1, %%eax\n\t"
400 "imull %1, %%edx\n\t"
406 "movb %%ch, (%0)\n\t"
407 "movb %%ah, 1(%0)\n\t"
408 "movb %%dh, 2(%0)\n\t"
411 :"r" (&dstbase
[4*x
]),
412 "r" ((unsigned)srca
[x
]),
413 "r" (((unsigned)src
[x
])<<8)
414 :"%eax", "%ecx", "%edx"
418 #endif /* HAVE_MMX */
419 #else /*non x86 arch or x86_64 with MMX disabled */
422 dstbase
[4*x
+0]=((dstbase
[4*x
+0]*srca
[x
])>>8)+src
[x
];
423 dstbase
[4*x
+1]=((dstbase
[4*x
+1]*srca
[x
])>>8)+src
[x
];
424 dstbase
[4*x
+2]=((dstbase
[4*x
+2]*srca
[x
])>>8)+src
[x
];
427 #endif /* arch_x86 */
433 __asm__
volatile(EMMS:::"memory");