2 * software RGB to RGB converter
3 * pluralize by software PAL8 to RGB converter
4 * software YUV to YUV converter
5 * software YUV to RGB converter
6 * Written by Nick Kurshev.
7 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
8 * lot of big-endian byte order fixes by Alex Beregszaszi
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
36 #if COMPILE_TEMPLATE_SSE2
42 #if COMPILE_TEMPLATE_AMD3DNOW
43 #define PREFETCH "prefetch"
44 #define PAVGB "pavgusb"
45 #elif COMPILE_TEMPLATE_MMX2
46 #define PREFETCH "prefetchnta"
49 #define PREFETCH " # nop"
52 #if COMPILE_TEMPLATE_AMD3DNOW
53 /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
59 #if COMPILE_TEMPLATE_MMX2
60 #define MOVNTQ "movntq"
61 #define SFENCE "sfence"
64 #define SFENCE " # nop"
67 static inline void RENAME(rgb24tobgr32
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
70 const uint8_t *s
= src
;
72 #if COMPILE_TEMPLATE_MMX
73 const uint8_t *mm_end
;
76 #if COMPILE_TEMPLATE_MMX
77 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
79 __asm__
volatile("movq %0, %%mm7"::"m"(mask32a
):"memory");
84 "punpckldq 3%1, %%mm0 \n\t"
85 "movd 6%1, %%mm1 \n\t"
86 "punpckldq 9%1, %%mm1 \n\t"
87 "movd 12%1, %%mm2 \n\t"
88 "punpckldq 15%1, %%mm2 \n\t"
89 "movd 18%1, %%mm3 \n\t"
90 "punpckldq 21%1, %%mm3 \n\t"
91 "por %%mm7, %%mm0 \n\t"
92 "por %%mm7, %%mm1 \n\t"
93 "por %%mm7, %%mm2 \n\t"
94 "por %%mm7, %%mm3 \n\t"
95 MOVNTQ
" %%mm0, %0 \n\t"
96 MOVNTQ
" %%mm1, 8%0 \n\t"
97 MOVNTQ
" %%mm2, 16%0 \n\t"
105 __asm__
volatile(SFENCE:::"memory");
106 __asm__
volatile(EMMS:::"memory");
110 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
125 #define STORE_BGR24_MMX \
126 "psrlq $8, %%mm2 \n\t" \
127 "psrlq $8, %%mm3 \n\t" \
128 "psrlq $8, %%mm6 \n\t" \
129 "psrlq $8, %%mm7 \n\t" \
130 "pand "MANGLE(mask24l)", %%mm0\n\t" \
131 "pand "MANGLE(mask24l)", %%mm1\n\t" \
132 "pand "MANGLE(mask24l)", %%mm4\n\t" \
133 "pand "MANGLE(mask24l)", %%mm5\n\t" \
134 "pand "MANGLE(mask24h)", %%mm2\n\t" \
135 "pand "MANGLE(mask24h)", %%mm3\n\t" \
136 "pand "MANGLE(mask24h)", %%mm6\n\t" \
137 "pand "MANGLE(mask24h)", %%mm7\n\t" \
138 "por %%mm2, %%mm0 \n\t" \
139 "por %%mm3, %%mm1 \n\t" \
140 "por %%mm6, %%mm4 \n\t" \
141 "por %%mm7, %%mm5 \n\t" \
143 "movq %%mm1, %%mm2 \n\t" \
144 "movq %%mm4, %%mm3 \n\t" \
145 "psllq $48, %%mm2 \n\t" \
146 "psllq $32, %%mm3 \n\t" \
147 "pand "MANGLE(mask24hh)", %%mm2\n\t" \
148 "pand "MANGLE(mask24hhh)", %%mm3\n\t" \
149 "por %%mm2, %%mm0 \n\t" \
150 "psrlq $16, %%mm1 \n\t" \
151 "psrlq $32, %%mm4 \n\t" \
152 "psllq $16, %%mm5 \n\t" \
153 "por %%mm3, %%mm1 \n\t" \
154 "pand "MANGLE(mask24hhhh)", %%mm5\n\t" \
155 "por %%mm5, %%mm4 \n\t" \
157 MOVNTQ" %%mm0, %0 \n\t" \
158 MOVNTQ" %%mm1, 8%0 \n\t" \
162 static inline void RENAME(rgb32tobgr24
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
165 const uint8_t *s
= src
;
167 #if COMPILE_TEMPLATE_MMX
168 const uint8_t *mm_end
;
171 #if COMPILE_TEMPLATE_MMX
172 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
177 "movq %1, %%mm0 \n\t"
178 "movq 8%1, %%mm1 \n\t"
179 "movq 16%1, %%mm4 \n\t"
180 "movq 24%1, %%mm5 \n\t"
181 "movq %%mm0, %%mm2 \n\t"
182 "movq %%mm1, %%mm3 \n\t"
183 "movq %%mm4, %%mm6 \n\t"
184 "movq %%mm5, %%mm7 \n\t"
192 __asm__
volatile(SFENCE:::"memory");
193 __asm__
volatile(EMMS:::"memory");
197 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
213 original by Strepto/Astral
214 ported to gcc & bugfixed: A'rpi
215 MMX2, 3DNOW optimization by Nick Kurshev
216 32-bit C version, and and&add trick by Michael Niedermayer
218 static inline void RENAME(rgb15to16
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
220 register const uint8_t* s
=src
;
221 register uint8_t* d
=dst
;
222 register const uint8_t *end
;
223 const uint8_t *mm_end
;
225 #if COMPILE_TEMPLATE_MMX
226 __asm__
volatile(PREFETCH
" %0"::"m"(*s
));
227 __asm__
volatile("movq %0, %%mm4"::"m"(mask15s
));
232 "movq %1, %%mm0 \n\t"
233 "movq 8%1, %%mm2 \n\t"
234 "movq %%mm0, %%mm1 \n\t"
235 "movq %%mm2, %%mm3 \n\t"
236 "pand %%mm4, %%mm0 \n\t"
237 "pand %%mm4, %%mm2 \n\t"
238 "paddw %%mm1, %%mm0 \n\t"
239 "paddw %%mm3, %%mm2 \n\t"
240 MOVNTQ
" %%mm0, %0 \n\t"
248 __asm__
volatile(SFENCE:::"memory");
249 __asm__
volatile(EMMS:::"memory");
253 register unsigned x
= *((const uint32_t *)s
);
254 *((uint32_t *)d
) = (x
&0x7FFF7FFF) + (x
&0x7FE07FE0);
259 register unsigned short x
= *((const uint16_t *)s
);
260 *((uint16_t *)d
) = (x
&0x7FFF) + (x
&0x7FE0);
264 static inline void RENAME(rgb16to15
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
266 register const uint8_t* s
=src
;
267 register uint8_t* d
=dst
;
268 register const uint8_t *end
;
269 const uint8_t *mm_end
;
271 #if COMPILE_TEMPLATE_MMX
272 __asm__
volatile(PREFETCH
" %0"::"m"(*s
));
273 __asm__
volatile("movq %0, %%mm7"::"m"(mask15rg
));
274 __asm__
volatile("movq %0, %%mm6"::"m"(mask15b
));
279 "movq %1, %%mm0 \n\t"
280 "movq 8%1, %%mm2 \n\t"
281 "movq %%mm0, %%mm1 \n\t"
282 "movq %%mm2, %%mm3 \n\t"
283 "psrlq $1, %%mm0 \n\t"
284 "psrlq $1, %%mm2 \n\t"
285 "pand %%mm7, %%mm0 \n\t"
286 "pand %%mm7, %%mm2 \n\t"
287 "pand %%mm6, %%mm1 \n\t"
288 "pand %%mm6, %%mm3 \n\t"
289 "por %%mm1, %%mm0 \n\t"
290 "por %%mm3, %%mm2 \n\t"
291 MOVNTQ
" %%mm0, %0 \n\t"
299 __asm__
volatile(SFENCE:::"memory");
300 __asm__
volatile(EMMS:::"memory");
304 register uint32_t x
= *((const uint32_t*)s
);
305 *((uint32_t *)d
) = ((x
>>1)&0x7FE07FE0) | (x
&0x001F001F);
310 register uint16_t x
= *((const uint16_t*)s
);
311 *((uint16_t *)d
) = ((x
>>1)&0x7FE0) | (x
&0x001F);
315 static inline void RENAME(rgb32to16
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
317 const uint8_t *s
= src
;
319 #if COMPILE_TEMPLATE_MMX
320 const uint8_t *mm_end
;
322 uint16_t *d
= (uint16_t *)dst
;
324 #if COMPILE_TEMPLATE_MMX
326 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
328 "movq %3, %%mm5 \n\t"
329 "movq %4, %%mm6 \n\t"
330 "movq %5, %%mm7 \n\t"
334 PREFETCH
" 32(%1) \n\t"
335 "movd (%1), %%mm0 \n\t"
336 "movd 4(%1), %%mm3 \n\t"
337 "punpckldq 8(%1), %%mm0 \n\t"
338 "punpckldq 12(%1), %%mm3 \n\t"
339 "movq %%mm0, %%mm1 \n\t"
340 "movq %%mm3, %%mm4 \n\t"
341 "pand %%mm6, %%mm0 \n\t"
342 "pand %%mm6, %%mm3 \n\t"
343 "pmaddwd %%mm7, %%mm0 \n\t"
344 "pmaddwd %%mm7, %%mm3 \n\t"
345 "pand %%mm5, %%mm1 \n\t"
346 "pand %%mm5, %%mm4 \n\t"
347 "por %%mm1, %%mm0 \n\t"
348 "por %%mm4, %%mm3 \n\t"
349 "psrld $5, %%mm0 \n\t"
350 "pslld $11, %%mm3 \n\t"
351 "por %%mm3, %%mm0 \n\t"
352 MOVNTQ
" %%mm0, (%0) \n\t"
359 : "r" (mm_end
), "m" (mask3216g
), "m" (mask3216br
), "m" (mul3216
)
362 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
364 "movq %0, %%mm7 \n\t"
365 "movq %1, %%mm6 \n\t"
366 ::"m"(red_16mask
),"m"(green_16mask
));
370 "movd %1, %%mm0 \n\t"
371 "movd 4%1, %%mm3 \n\t"
372 "punpckldq 8%1, %%mm0 \n\t"
373 "punpckldq 12%1, %%mm3 \n\t"
374 "movq %%mm0, %%mm1 \n\t"
375 "movq %%mm0, %%mm2 \n\t"
376 "movq %%mm3, %%mm4 \n\t"
377 "movq %%mm3, %%mm5 \n\t"
378 "psrlq $3, %%mm0 \n\t"
379 "psrlq $3, %%mm3 \n\t"
380 "pand %2, %%mm0 \n\t"
381 "pand %2, %%mm3 \n\t"
382 "psrlq $5, %%mm1 \n\t"
383 "psrlq $5, %%mm4 \n\t"
384 "pand %%mm6, %%mm1 \n\t"
385 "pand %%mm6, %%mm4 \n\t"
386 "psrlq $8, %%mm2 \n\t"
387 "psrlq $8, %%mm5 \n\t"
388 "pand %%mm7, %%mm2 \n\t"
389 "pand %%mm7, %%mm5 \n\t"
390 "por %%mm1, %%mm0 \n\t"
391 "por %%mm4, %%mm3 \n\t"
392 "por %%mm2, %%mm0 \n\t"
393 "por %%mm5, %%mm3 \n\t"
394 "psllq $16, %%mm3 \n\t"
395 "por %%mm3, %%mm0 \n\t"
396 MOVNTQ
" %%mm0, %0 \n\t"
397 :"=m"(*d
):"m"(*s
),"m"(blue_16mask
):"memory");
402 __asm__
volatile(SFENCE:::"memory");
403 __asm__
volatile(EMMS:::"memory");
406 register int rgb
= *(const uint32_t*)s
; s
+= 4;
407 *d
++ = ((rgb
&0xFF)>>3) + ((rgb
&0xFC00)>>5) + ((rgb
&0xF80000)>>8);
411 static inline void RENAME(rgb32tobgr16
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
413 const uint8_t *s
= src
;
415 #if COMPILE_TEMPLATE_MMX
416 const uint8_t *mm_end
;
418 uint16_t *d
= (uint16_t *)dst
;
420 #if COMPILE_TEMPLATE_MMX
421 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
423 "movq %0, %%mm7 \n\t"
424 "movq %1, %%mm6 \n\t"
425 ::"m"(red_16mask
),"m"(green_16mask
));
430 "movd %1, %%mm0 \n\t"
431 "movd 4%1, %%mm3 \n\t"
432 "punpckldq 8%1, %%mm0 \n\t"
433 "punpckldq 12%1, %%mm3 \n\t"
434 "movq %%mm0, %%mm1 \n\t"
435 "movq %%mm0, %%mm2 \n\t"
436 "movq %%mm3, %%mm4 \n\t"
437 "movq %%mm3, %%mm5 \n\t"
438 "psllq $8, %%mm0 \n\t"
439 "psllq $8, %%mm3 \n\t"
440 "pand %%mm7, %%mm0 \n\t"
441 "pand %%mm7, %%mm3 \n\t"
442 "psrlq $5, %%mm1 \n\t"
443 "psrlq $5, %%mm4 \n\t"
444 "pand %%mm6, %%mm1 \n\t"
445 "pand %%mm6, %%mm4 \n\t"
446 "psrlq $19, %%mm2 \n\t"
447 "psrlq $19, %%mm5 \n\t"
448 "pand %2, %%mm2 \n\t"
449 "pand %2, %%mm5 \n\t"
450 "por %%mm1, %%mm0 \n\t"
451 "por %%mm4, %%mm3 \n\t"
452 "por %%mm2, %%mm0 \n\t"
453 "por %%mm5, %%mm3 \n\t"
454 "psllq $16, %%mm3 \n\t"
455 "por %%mm3, %%mm0 \n\t"
456 MOVNTQ
" %%mm0, %0 \n\t"
457 :"=m"(*d
):"m"(*s
),"m"(blue_16mask
):"memory");
461 __asm__
volatile(SFENCE:::"memory");
462 __asm__
volatile(EMMS:::"memory");
465 register int rgb
= *(const uint32_t*)s
; s
+= 4;
466 *d
++ = ((rgb
&0xF8)<<8) + ((rgb
&0xFC00)>>5) + ((rgb
&0xF80000)>>19);
470 static inline void RENAME(rgb32to15
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
472 const uint8_t *s
= src
;
474 #if COMPILE_TEMPLATE_MMX
475 const uint8_t *mm_end
;
477 uint16_t *d
= (uint16_t *)dst
;
479 #if COMPILE_TEMPLATE_MMX
481 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
483 "movq %3, %%mm5 \n\t"
484 "movq %4, %%mm6 \n\t"
485 "movq %5, %%mm7 \n\t"
489 PREFETCH
" 32(%1) \n\t"
490 "movd (%1), %%mm0 \n\t"
491 "movd 4(%1), %%mm3 \n\t"
492 "punpckldq 8(%1), %%mm0 \n\t"
493 "punpckldq 12(%1), %%mm3 \n\t"
494 "movq %%mm0, %%mm1 \n\t"
495 "movq %%mm3, %%mm4 \n\t"
496 "pand %%mm6, %%mm0 \n\t"
497 "pand %%mm6, %%mm3 \n\t"
498 "pmaddwd %%mm7, %%mm0 \n\t"
499 "pmaddwd %%mm7, %%mm3 \n\t"
500 "pand %%mm5, %%mm1 \n\t"
501 "pand %%mm5, %%mm4 \n\t"
502 "por %%mm1, %%mm0 \n\t"
503 "por %%mm4, %%mm3 \n\t"
504 "psrld $6, %%mm0 \n\t"
505 "pslld $10, %%mm3 \n\t"
506 "por %%mm3, %%mm0 \n\t"
507 MOVNTQ
" %%mm0, (%0) \n\t"
514 : "r" (mm_end
), "m" (mask3215g
), "m" (mask3216br
), "m" (mul3215
)
517 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
519 "movq %0, %%mm7 \n\t"
520 "movq %1, %%mm6 \n\t"
521 ::"m"(red_15mask
),"m"(green_15mask
));
525 "movd %1, %%mm0 \n\t"
526 "movd 4%1, %%mm3 \n\t"
527 "punpckldq 8%1, %%mm0 \n\t"
528 "punpckldq 12%1, %%mm3 \n\t"
529 "movq %%mm0, %%mm1 \n\t"
530 "movq %%mm0, %%mm2 \n\t"
531 "movq %%mm3, %%mm4 \n\t"
532 "movq %%mm3, %%mm5 \n\t"
533 "psrlq $3, %%mm0 \n\t"
534 "psrlq $3, %%mm3 \n\t"
535 "pand %2, %%mm0 \n\t"
536 "pand %2, %%mm3 \n\t"
537 "psrlq $6, %%mm1 \n\t"
538 "psrlq $6, %%mm4 \n\t"
539 "pand %%mm6, %%mm1 \n\t"
540 "pand %%mm6, %%mm4 \n\t"
541 "psrlq $9, %%mm2 \n\t"
542 "psrlq $9, %%mm5 \n\t"
543 "pand %%mm7, %%mm2 \n\t"
544 "pand %%mm7, %%mm5 \n\t"
545 "por %%mm1, %%mm0 \n\t"
546 "por %%mm4, %%mm3 \n\t"
547 "por %%mm2, %%mm0 \n\t"
548 "por %%mm5, %%mm3 \n\t"
549 "psllq $16, %%mm3 \n\t"
550 "por %%mm3, %%mm0 \n\t"
551 MOVNTQ
" %%mm0, %0 \n\t"
552 :"=m"(*d
):"m"(*s
),"m"(blue_15mask
):"memory");
557 __asm__
volatile(SFENCE:::"memory");
558 __asm__
volatile(EMMS:::"memory");
561 register int rgb
= *(const uint32_t*)s
; s
+= 4;
562 *d
++ = ((rgb
&0xFF)>>3) + ((rgb
&0xF800)>>6) + ((rgb
&0xF80000)>>9);
566 static inline void RENAME(rgb32tobgr15
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
568 const uint8_t *s
= src
;
570 #if COMPILE_TEMPLATE_MMX
571 const uint8_t *mm_end
;
573 uint16_t *d
= (uint16_t *)dst
;
575 #if COMPILE_TEMPLATE_MMX
576 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
578 "movq %0, %%mm7 \n\t"
579 "movq %1, %%mm6 \n\t"
580 ::"m"(red_15mask
),"m"(green_15mask
));
585 "movd %1, %%mm0 \n\t"
586 "movd 4%1, %%mm3 \n\t"
587 "punpckldq 8%1, %%mm0 \n\t"
588 "punpckldq 12%1, %%mm3 \n\t"
589 "movq %%mm0, %%mm1 \n\t"
590 "movq %%mm0, %%mm2 \n\t"
591 "movq %%mm3, %%mm4 \n\t"
592 "movq %%mm3, %%mm5 \n\t"
593 "psllq $7, %%mm0 \n\t"
594 "psllq $7, %%mm3 \n\t"
595 "pand %%mm7, %%mm0 \n\t"
596 "pand %%mm7, %%mm3 \n\t"
597 "psrlq $6, %%mm1 \n\t"
598 "psrlq $6, %%mm4 \n\t"
599 "pand %%mm6, %%mm1 \n\t"
600 "pand %%mm6, %%mm4 \n\t"
601 "psrlq $19, %%mm2 \n\t"
602 "psrlq $19, %%mm5 \n\t"
603 "pand %2, %%mm2 \n\t"
604 "pand %2, %%mm5 \n\t"
605 "por %%mm1, %%mm0 \n\t"
606 "por %%mm4, %%mm3 \n\t"
607 "por %%mm2, %%mm0 \n\t"
608 "por %%mm5, %%mm3 \n\t"
609 "psllq $16, %%mm3 \n\t"
610 "por %%mm3, %%mm0 \n\t"
611 MOVNTQ
" %%mm0, %0 \n\t"
612 :"=m"(*d
):"m"(*s
),"m"(blue_15mask
):"memory");
616 __asm__
volatile(SFENCE:::"memory");
617 __asm__
volatile(EMMS:::"memory");
620 register int rgb
= *(const uint32_t*)s
; s
+= 4;
621 *d
++ = ((rgb
&0xF8)<<7) + ((rgb
&0xF800)>>6) + ((rgb
&0xF80000)>>19);
625 static inline void RENAME(rgb24tobgr16
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
627 const uint8_t *s
= src
;
629 #if COMPILE_TEMPLATE_MMX
630 const uint8_t *mm_end
;
632 uint16_t *d
= (uint16_t *)dst
;
634 #if COMPILE_TEMPLATE_MMX
635 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
637 "movq %0, %%mm7 \n\t"
638 "movq %1, %%mm6 \n\t"
639 ::"m"(red_16mask
),"m"(green_16mask
));
644 "movd %1, %%mm0 \n\t"
645 "movd 3%1, %%mm3 \n\t"
646 "punpckldq 6%1, %%mm0 \n\t"
647 "punpckldq 9%1, %%mm3 \n\t"
648 "movq %%mm0, %%mm1 \n\t"
649 "movq %%mm0, %%mm2 \n\t"
650 "movq %%mm3, %%mm4 \n\t"
651 "movq %%mm3, %%mm5 \n\t"
652 "psrlq $3, %%mm0 \n\t"
653 "psrlq $3, %%mm3 \n\t"
654 "pand %2, %%mm0 \n\t"
655 "pand %2, %%mm3 \n\t"
656 "psrlq $5, %%mm1 \n\t"
657 "psrlq $5, %%mm4 \n\t"
658 "pand %%mm6, %%mm1 \n\t"
659 "pand %%mm6, %%mm4 \n\t"
660 "psrlq $8, %%mm2 \n\t"
661 "psrlq $8, %%mm5 \n\t"
662 "pand %%mm7, %%mm2 \n\t"
663 "pand %%mm7, %%mm5 \n\t"
664 "por %%mm1, %%mm0 \n\t"
665 "por %%mm4, %%mm3 \n\t"
666 "por %%mm2, %%mm0 \n\t"
667 "por %%mm5, %%mm3 \n\t"
668 "psllq $16, %%mm3 \n\t"
669 "por %%mm3, %%mm0 \n\t"
670 MOVNTQ
" %%mm0, %0 \n\t"
671 :"=m"(*d
):"m"(*s
),"m"(blue_16mask
):"memory");
675 __asm__
volatile(SFENCE:::"memory");
676 __asm__
volatile(EMMS:::"memory");
682 *d
++ = (b
>>3) | ((g
&0xFC)<<3) | ((r
&0xF8)<<8);
686 static inline void RENAME(rgb24to16
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
688 const uint8_t *s
= src
;
690 #if COMPILE_TEMPLATE_MMX
691 const uint8_t *mm_end
;
693 uint16_t *d
= (uint16_t *)dst
;
695 #if COMPILE_TEMPLATE_MMX
696 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
698 "movq %0, %%mm7 \n\t"
699 "movq %1, %%mm6 \n\t"
700 ::"m"(red_16mask
),"m"(green_16mask
));
705 "movd %1, %%mm0 \n\t"
706 "movd 3%1, %%mm3 \n\t"
707 "punpckldq 6%1, %%mm0 \n\t"
708 "punpckldq 9%1, %%mm3 \n\t"
709 "movq %%mm0, %%mm1 \n\t"
710 "movq %%mm0, %%mm2 \n\t"
711 "movq %%mm3, %%mm4 \n\t"
712 "movq %%mm3, %%mm5 \n\t"
713 "psllq $8, %%mm0 \n\t"
714 "psllq $8, %%mm3 \n\t"
715 "pand %%mm7, %%mm0 \n\t"
716 "pand %%mm7, %%mm3 \n\t"
717 "psrlq $5, %%mm1 \n\t"
718 "psrlq $5, %%mm4 \n\t"
719 "pand %%mm6, %%mm1 \n\t"
720 "pand %%mm6, %%mm4 \n\t"
721 "psrlq $19, %%mm2 \n\t"
722 "psrlq $19, %%mm5 \n\t"
723 "pand %2, %%mm2 \n\t"
724 "pand %2, %%mm5 \n\t"
725 "por %%mm1, %%mm0 \n\t"
726 "por %%mm4, %%mm3 \n\t"
727 "por %%mm2, %%mm0 \n\t"
728 "por %%mm5, %%mm3 \n\t"
729 "psllq $16, %%mm3 \n\t"
730 "por %%mm3, %%mm0 \n\t"
731 MOVNTQ
" %%mm0, %0 \n\t"
732 :"=m"(*d
):"m"(*s
),"m"(blue_16mask
):"memory");
736 __asm__
volatile(SFENCE:::"memory");
737 __asm__
volatile(EMMS:::"memory");
743 *d
++ = (b
>>3) | ((g
&0xFC)<<3) | ((r
&0xF8)<<8);
747 static inline void RENAME(rgb24tobgr15
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
749 const uint8_t *s
= src
;
751 #if COMPILE_TEMPLATE_MMX
752 const uint8_t *mm_end
;
754 uint16_t *d
= (uint16_t *)dst
;
756 #if COMPILE_TEMPLATE_MMX
757 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
759 "movq %0, %%mm7 \n\t"
760 "movq %1, %%mm6 \n\t"
761 ::"m"(red_15mask
),"m"(green_15mask
));
766 "movd %1, %%mm0 \n\t"
767 "movd 3%1, %%mm3 \n\t"
768 "punpckldq 6%1, %%mm0 \n\t"
769 "punpckldq 9%1, %%mm3 \n\t"
770 "movq %%mm0, %%mm1 \n\t"
771 "movq %%mm0, %%mm2 \n\t"
772 "movq %%mm3, %%mm4 \n\t"
773 "movq %%mm3, %%mm5 \n\t"
774 "psrlq $3, %%mm0 \n\t"
775 "psrlq $3, %%mm3 \n\t"
776 "pand %2, %%mm0 \n\t"
777 "pand %2, %%mm3 \n\t"
778 "psrlq $6, %%mm1 \n\t"
779 "psrlq $6, %%mm4 \n\t"
780 "pand %%mm6, %%mm1 \n\t"
781 "pand %%mm6, %%mm4 \n\t"
782 "psrlq $9, %%mm2 \n\t"
783 "psrlq $9, %%mm5 \n\t"
784 "pand %%mm7, %%mm2 \n\t"
785 "pand %%mm7, %%mm5 \n\t"
786 "por %%mm1, %%mm0 \n\t"
787 "por %%mm4, %%mm3 \n\t"
788 "por %%mm2, %%mm0 \n\t"
789 "por %%mm5, %%mm3 \n\t"
790 "psllq $16, %%mm3 \n\t"
791 "por %%mm3, %%mm0 \n\t"
792 MOVNTQ
" %%mm0, %0 \n\t"
793 :"=m"(*d
):"m"(*s
),"m"(blue_15mask
):"memory");
797 __asm__
volatile(SFENCE:::"memory");
798 __asm__
volatile(EMMS:::"memory");
804 *d
++ = (b
>>3) | ((g
&0xF8)<<2) | ((r
&0xF8)<<7);
808 static inline void RENAME(rgb24to15
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
810 const uint8_t *s
= src
;
812 #if COMPILE_TEMPLATE_MMX
813 const uint8_t *mm_end
;
815 uint16_t *d
= (uint16_t *)dst
;
817 #if COMPILE_TEMPLATE_MMX
818 __asm__
volatile(PREFETCH
" %0"::"m"(*src
):"memory");
820 "movq %0, %%mm7 \n\t"
821 "movq %1, %%mm6 \n\t"
822 ::"m"(red_15mask
),"m"(green_15mask
));
827 "movd %1, %%mm0 \n\t"
828 "movd 3%1, %%mm3 \n\t"
829 "punpckldq 6%1, %%mm0 \n\t"
830 "punpckldq 9%1, %%mm3 \n\t"
831 "movq %%mm0, %%mm1 \n\t"
832 "movq %%mm0, %%mm2 \n\t"
833 "movq %%mm3, %%mm4 \n\t"
834 "movq %%mm3, %%mm5 \n\t"
835 "psllq $7, %%mm0 \n\t"
836 "psllq $7, %%mm3 \n\t"
837 "pand %%mm7, %%mm0 \n\t"
838 "pand %%mm7, %%mm3 \n\t"
839 "psrlq $6, %%mm1 \n\t"
840 "psrlq $6, %%mm4 \n\t"
841 "pand %%mm6, %%mm1 \n\t"
842 "pand %%mm6, %%mm4 \n\t"
843 "psrlq $19, %%mm2 \n\t"
844 "psrlq $19, %%mm5 \n\t"
845 "pand %2, %%mm2 \n\t"
846 "pand %2, %%mm5 \n\t"
847 "por %%mm1, %%mm0 \n\t"
848 "por %%mm4, %%mm3 \n\t"
849 "por %%mm2, %%mm0 \n\t"
850 "por %%mm5, %%mm3 \n\t"
851 "psllq $16, %%mm3 \n\t"
852 "por %%mm3, %%mm0 \n\t"
853 MOVNTQ
" %%mm0, %0 \n\t"
854 :"=m"(*d
):"m"(*s
),"m"(blue_15mask
):"memory");
858 __asm__
volatile(SFENCE:::"memory");
859 __asm__
volatile(EMMS:::"memory");
865 *d
++ = (b
>>3) | ((g
&0xF8)<<2) | ((r
&0xF8)<<7);
870 I use less accurate approximation here by simply left-shifting the input
871 value and filling the low order bits with zeroes. This method improves PNG
872 compression but this scheme cannot reproduce white exactly, since it does
873 not generate an all-ones maximum value; the net effect is to darken the
876 The better method should be "left bit replication":
886 | leftmost bits repeated to fill open bits
890 static inline void RENAME(rgb15tobgr24
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
893 #if COMPILE_TEMPLATE_MMX
894 const uint16_t *mm_end
;
897 const uint16_t *s
= (const uint16_t*)src
;
898 end
= s
+ src_size
/2;
899 #if COMPILE_TEMPLATE_MMX
900 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
905 "movq %1, %%mm0 \n\t"
906 "movq %1, %%mm1 \n\t"
907 "movq %1, %%mm2 \n\t"
908 "pand %2, %%mm0 \n\t"
909 "pand %3, %%mm1 \n\t"
910 "pand %4, %%mm2 \n\t"
911 "psllq $3, %%mm0 \n\t"
912 "psrlq $2, %%mm1 \n\t"
913 "psrlq $7, %%mm2 \n\t"
914 "movq %%mm0, %%mm3 \n\t"
915 "movq %%mm1, %%mm4 \n\t"
916 "movq %%mm2, %%mm5 \n\t"
917 "punpcklwd %5, %%mm0 \n\t"
918 "punpcklwd %5, %%mm1 \n\t"
919 "punpcklwd %5, %%mm2 \n\t"
920 "punpckhwd %5, %%mm3 \n\t"
921 "punpckhwd %5, %%mm4 \n\t"
922 "punpckhwd %5, %%mm5 \n\t"
923 "psllq $8, %%mm1 \n\t"
924 "psllq $16, %%mm2 \n\t"
925 "por %%mm1, %%mm0 \n\t"
926 "por %%mm2, %%mm0 \n\t"
927 "psllq $8, %%mm4 \n\t"
928 "psllq $16, %%mm5 \n\t"
929 "por %%mm4, %%mm3 \n\t"
930 "por %%mm5, %%mm3 \n\t"
932 "movq %%mm0, %%mm6 \n\t"
933 "movq %%mm3, %%mm7 \n\t"
935 "movq 8%1, %%mm0 \n\t"
936 "movq 8%1, %%mm1 \n\t"
937 "movq 8%1, %%mm2 \n\t"
938 "pand %2, %%mm0 \n\t"
939 "pand %3, %%mm1 \n\t"
940 "pand %4, %%mm2 \n\t"
941 "psllq $3, %%mm0 \n\t"
942 "psrlq $2, %%mm1 \n\t"
943 "psrlq $7, %%mm2 \n\t"
944 "movq %%mm0, %%mm3 \n\t"
945 "movq %%mm1, %%mm4 \n\t"
946 "movq %%mm2, %%mm5 \n\t"
947 "punpcklwd %5, %%mm0 \n\t"
948 "punpcklwd %5, %%mm1 \n\t"
949 "punpcklwd %5, %%mm2 \n\t"
950 "punpckhwd %5, %%mm3 \n\t"
951 "punpckhwd %5, %%mm4 \n\t"
952 "punpckhwd %5, %%mm5 \n\t"
953 "psllq $8, %%mm1 \n\t"
954 "psllq $16, %%mm2 \n\t"
955 "por %%mm1, %%mm0 \n\t"
956 "por %%mm2, %%mm0 \n\t"
957 "psllq $8, %%mm4 \n\t"
958 "psllq $16, %%mm5 \n\t"
959 "por %%mm4, %%mm3 \n\t"
960 "por %%mm5, %%mm3 \n\t"
963 :"m"(*s
),"m"(mask15b
),"m"(mask15g
),"m"(mask15r
), "m"(mmx_null
)
965 /* borrowed 32 to 24 */
967 "movq %%mm0, %%mm4 \n\t"
968 "movq %%mm3, %%mm5 \n\t"
969 "movq %%mm6, %%mm0 \n\t"
970 "movq %%mm7, %%mm1 \n\t"
972 "movq %%mm4, %%mm6 \n\t"
973 "movq %%mm5, %%mm7 \n\t"
974 "movq %%mm0, %%mm2 \n\t"
975 "movq %%mm1, %%mm3 \n\t"
985 __asm__
volatile(SFENCE:::"memory");
986 __asm__
volatile(EMMS:::"memory");
989 register uint16_t bgr
;
991 *d
++ = (bgr
&0x1F)<<3;
992 *d
++ = (bgr
&0x3E0)>>2;
993 *d
++ = (bgr
&0x7C00)>>7;
997 static inline void RENAME(rgb16tobgr24
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
1000 #if COMPILE_TEMPLATE_MMX
1001 const uint16_t *mm_end
;
1003 uint8_t *d
= (uint8_t *)dst
;
1004 const uint16_t *s
= (const uint16_t *)src
;
1005 end
= s
+ src_size
/2;
1006 #if COMPILE_TEMPLATE_MMX
1007 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
1009 while (s
< mm_end
) {
1011 PREFETCH
" 32%1 \n\t"
1012 "movq %1, %%mm0 \n\t"
1013 "movq %1, %%mm1 \n\t"
1014 "movq %1, %%mm2 \n\t"
1015 "pand %2, %%mm0 \n\t"
1016 "pand %3, %%mm1 \n\t"
1017 "pand %4, %%mm2 \n\t"
1018 "psllq $3, %%mm0 \n\t"
1019 "psrlq $3, %%mm1 \n\t"
1020 "psrlq $8, %%mm2 \n\t"
1021 "movq %%mm0, %%mm3 \n\t"
1022 "movq %%mm1, %%mm4 \n\t"
1023 "movq %%mm2, %%mm5 \n\t"
1024 "punpcklwd %5, %%mm0 \n\t"
1025 "punpcklwd %5, %%mm1 \n\t"
1026 "punpcklwd %5, %%mm2 \n\t"
1027 "punpckhwd %5, %%mm3 \n\t"
1028 "punpckhwd %5, %%mm4 \n\t"
1029 "punpckhwd %5, %%mm5 \n\t"
1030 "psllq $8, %%mm1 \n\t"
1031 "psllq $16, %%mm2 \n\t"
1032 "por %%mm1, %%mm0 \n\t"
1033 "por %%mm2, %%mm0 \n\t"
1034 "psllq $8, %%mm4 \n\t"
1035 "psllq $16, %%mm5 \n\t"
1036 "por %%mm4, %%mm3 \n\t"
1037 "por %%mm5, %%mm3 \n\t"
1039 "movq %%mm0, %%mm6 \n\t"
1040 "movq %%mm3, %%mm7 \n\t"
1042 "movq 8%1, %%mm0 \n\t"
1043 "movq 8%1, %%mm1 \n\t"
1044 "movq 8%1, %%mm2 \n\t"
1045 "pand %2, %%mm0 \n\t"
1046 "pand %3, %%mm1 \n\t"
1047 "pand %4, %%mm2 \n\t"
1048 "psllq $3, %%mm0 \n\t"
1049 "psrlq $3, %%mm1 \n\t"
1050 "psrlq $8, %%mm2 \n\t"
1051 "movq %%mm0, %%mm3 \n\t"
1052 "movq %%mm1, %%mm4 \n\t"
1053 "movq %%mm2, %%mm5 \n\t"
1054 "punpcklwd %5, %%mm0 \n\t"
1055 "punpcklwd %5, %%mm1 \n\t"
1056 "punpcklwd %5, %%mm2 \n\t"
1057 "punpckhwd %5, %%mm3 \n\t"
1058 "punpckhwd %5, %%mm4 \n\t"
1059 "punpckhwd %5, %%mm5 \n\t"
1060 "psllq $8, %%mm1 \n\t"
1061 "psllq $16, %%mm2 \n\t"
1062 "por %%mm1, %%mm0 \n\t"
1063 "por %%mm2, %%mm0 \n\t"
1064 "psllq $8, %%mm4 \n\t"
1065 "psllq $16, %%mm5 \n\t"
1066 "por %%mm4, %%mm3 \n\t"
1067 "por %%mm5, %%mm3 \n\t"
1069 :"m"(*s
),"m"(mask16b
),"m"(mask16g
),"m"(mask16r
),"m"(mmx_null
)
1071 /* borrowed 32 to 24 */
1073 "movq %%mm0, %%mm4 \n\t"
1074 "movq %%mm3, %%mm5 \n\t"
1075 "movq %%mm6, %%mm0 \n\t"
1076 "movq %%mm7, %%mm1 \n\t"
1078 "movq %%mm4, %%mm6 \n\t"
1079 "movq %%mm5, %%mm7 \n\t"
1080 "movq %%mm0, %%mm2 \n\t"
1081 "movq %%mm1, %%mm3 \n\t"
1091 __asm__
volatile(SFENCE:::"memory");
1092 __asm__
volatile(EMMS:::"memory");
1095 register uint16_t bgr
;
1097 *d
++ = (bgr
&0x1F)<<3;
1098 *d
++ = (bgr
&0x7E0)>>3;
1099 *d
++ = (bgr
&0xF800)>>8;
1104 * mm0 = 00 B3 00 B2 00 B1 00 B0
1105 * mm1 = 00 G3 00 G2 00 G1 00 G0
1106 * mm2 = 00 R3 00 R2 00 R1 00 R0
1107 * mm6 = FF FF FF FF FF FF FF FF
1108 * mm7 = 00 00 00 00 00 00 00 00
1110 #define PACK_RGB32 \
1111 "packuswb %%mm7, %%mm0 \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \
1112 "packuswb %%mm7, %%mm1 \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \
1113 "packuswb %%mm7, %%mm2 \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \
1114 "punpcklbw %%mm1, %%mm0 \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \
1115 "punpcklbw %%mm6, %%mm2 \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \
1116 "movq %%mm0, %%mm3 \n\t" \
1117 "punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
1118 "punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
1119 MOVNTQ" %%mm0, %0 \n\t" \
1120 MOVNTQ" %%mm3, 8%0 \n\t" \
1122 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
1124 const uint16_t *end
;
1125 #if COMPILE_TEMPLATE_MMX
1126 const uint16_t *mm_end
;
1129 const uint16_t *s
= (const uint16_t *)src
;
1130 end
= s
+ src_size
/2;
1131 #if COMPILE_TEMPLATE_MMX
1132 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
1133 __asm__
volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
1134 __asm__
volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
1136 while (s
< mm_end
) {
1138 PREFETCH
" 32%1 \n\t"
1139 "movq %1, %%mm0 \n\t"
1140 "movq %1, %%mm1 \n\t"
1141 "movq %1, %%mm2 \n\t"
1142 "pand %2, %%mm0 \n\t"
1143 "pand %3, %%mm1 \n\t"
1144 "pand %4, %%mm2 \n\t"
1145 "psllq $3, %%mm0 \n\t"
1146 "psrlq $2, %%mm1 \n\t"
1147 "psrlq $7, %%mm2 \n\t"
1150 :"m"(*s
),"m"(mask15b
),"m"(mask15g
),"m"(mask15r
)
1155 __asm__
volatile(SFENCE:::"memory");
1156 __asm__
volatile(EMMS:::"memory");
1159 register uint16_t bgr
;
1163 *d
++ = (bgr
&0x7C00)>>7;
1164 *d
++ = (bgr
&0x3E0)>>2;
1165 *d
++ = (bgr
&0x1F)<<3;
1167 *d
++ = (bgr
&0x1F)<<3;
1168 *d
++ = (bgr
&0x3E0)>>2;
1169 *d
++ = (bgr
&0x7C00)>>7;
1175 static inline void RENAME(rgb16to32
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
1177 const uint16_t *end
;
1178 #if COMPILE_TEMPLATE_MMX
1179 const uint16_t *mm_end
;
1182 const uint16_t *s
= (const uint16_t*)src
;
1183 end
= s
+ src_size
/2;
1184 #if COMPILE_TEMPLATE_MMX
1185 __asm__
volatile(PREFETCH
" %0"::"m"(*s
):"memory");
1186 __asm__
volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
1187 __asm__
volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
1189 while (s
< mm_end
) {
1191 PREFETCH
" 32%1 \n\t"
1192 "movq %1, %%mm0 \n\t"
1193 "movq %1, %%mm1 \n\t"
1194 "movq %1, %%mm2 \n\t"
1195 "pand %2, %%mm0 \n\t"
1196 "pand %3, %%mm1 \n\t"
1197 "pand %4, %%mm2 \n\t"
1198 "psllq $3, %%mm0 \n\t"
1199 "psrlq $3, %%mm1 \n\t"
1200 "psrlq $8, %%mm2 \n\t"
1203 :"m"(*s
),"m"(mask16b
),"m"(mask16g
),"m"(mask16r
)
1208 __asm__
volatile(SFENCE:::"memory");
1209 __asm__
volatile(EMMS:::"memory");
1212 register uint16_t bgr
;
1216 *d
++ = (bgr
&0xF800)>>8;
1217 *d
++ = (bgr
&0x7E0)>>3;
1218 *d
++ = (bgr
&0x1F)<<3;
1220 *d
++ = (bgr
&0x1F)<<3;
1221 *d
++ = (bgr
&0x7E0)>>3;
1222 *d
++ = (bgr
&0xF800)>>8;
1228 static inline void RENAME(shuffle_bytes_2103
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
1230 x86_reg idx
= 15 - src_size
;
1231 const uint8_t *s
= src
-idx
;
1232 uint8_t *d
= dst
-idx
;
1233 #if COMPILE_TEMPLATE_MMX
1237 PREFETCH
" (%1, %0) \n\t"
1238 "movq %3, %%mm7 \n\t"
1239 "pxor %4, %%mm7 \n\t"
1240 "movq %%mm7, %%mm6 \n\t"
1241 "pxor %5, %%mm7 \n\t"
1244 PREFETCH
" 32(%1, %0) \n\t"
1245 "movq (%1, %0), %%mm0 \n\t"
1246 "movq 8(%1, %0), %%mm1 \n\t"
1247 # if COMPILE_TEMPLATE_MMX2
1248 "pshufw $177, %%mm0, %%mm3 \n\t"
1249 "pshufw $177, %%mm1, %%mm5 \n\t"
1250 "pand %%mm7, %%mm0 \n\t"
1251 "pand %%mm6, %%mm3 \n\t"
1252 "pand %%mm7, %%mm1 \n\t"
1253 "pand %%mm6, %%mm5 \n\t"
1254 "por %%mm3, %%mm0 \n\t"
1255 "por %%mm5, %%mm1 \n\t"
1257 "movq %%mm0, %%mm2 \n\t"
1258 "movq %%mm1, %%mm4 \n\t"
1259 "pand %%mm7, %%mm0 \n\t"
1260 "pand %%mm6, %%mm2 \n\t"
1261 "pand %%mm7, %%mm1 \n\t"
1262 "pand %%mm6, %%mm4 \n\t"
1263 "movq %%mm2, %%mm3 \n\t"
1264 "movq %%mm4, %%mm5 \n\t"
1265 "pslld $16, %%mm2 \n\t"
1266 "psrld $16, %%mm3 \n\t"
1267 "pslld $16, %%mm4 \n\t"
1268 "psrld $16, %%mm5 \n\t"
1269 "por %%mm2, %%mm0 \n\t"
1270 "por %%mm4, %%mm1 \n\t"
1271 "por %%mm3, %%mm0 \n\t"
1272 "por %%mm5, %%mm1 \n\t"
1274 MOVNTQ
" %%mm0, (%2, %0) \n\t"
1275 MOVNTQ
" %%mm1, 8(%2, %0) \n\t"
1282 : "r" (s
), "r" (d
), "m" (mask32b
), "m" (mask32r
), "m" (mmx_one
)
1285 for (; idx
<15; idx
+=4) {
1286 register int v
= *(const uint32_t *)&s
[idx
], g
= v
& 0xff00ff00;
1288 *(uint32_t *)&d
[idx
] = (v
>>16) + g
+ (v
<<16);
1292 static inline void RENAME(rgb24tobgr24
)(const uint8_t *src
, uint8_t *dst
, long src_size
)
1295 #if COMPILE_TEMPLATE_MMX
1296 x86_reg mmx_size
= 23 - src_size
;
1298 "test %%"REG_a
", %%"REG_a
" \n\t"
1300 "movq "MANGLE(mask24r
)", %%mm5 \n\t"
1301 "movq "MANGLE(mask24g
)", %%mm6 \n\t"
1302 "movq "MANGLE(mask24b
)", %%mm7 \n\t"
1305 PREFETCH
" 32(%1, %%"REG_a
") \n\t"
1306 "movq (%1, %%"REG_a
"), %%mm0 \n\t" // BGR BGR BG
1307 "movq (%1, %%"REG_a
"), %%mm1 \n\t" // BGR BGR BG
1308 "movq 2(%1, %%"REG_a
"), %%mm2 \n\t" // R BGR BGR B
1309 "psllq $16, %%mm0 \n\t" // 00 BGR BGR
1310 "pand %%mm5, %%mm0 \n\t"
1311 "pand %%mm6, %%mm1 \n\t"
1312 "pand %%mm7, %%mm2 \n\t"
1313 "por %%mm0, %%mm1 \n\t"
1314 "por %%mm2, %%mm1 \n\t"
1315 "movq 6(%1, %%"REG_a
"), %%mm0 \n\t" // BGR BGR BG
1316 MOVNTQ
" %%mm1, (%2, %%"REG_a
") \n\t" // RGB RGB RG
1317 "movq 8(%1, %%"REG_a
"), %%mm1 \n\t" // R BGR BGR B
1318 "movq 10(%1, %%"REG_a
"), %%mm2 \n\t" // GR BGR BGR
1319 "pand %%mm7, %%mm0 \n\t"
1320 "pand %%mm5, %%mm1 \n\t"
1321 "pand %%mm6, %%mm2 \n\t"
1322 "por %%mm0, %%mm1 \n\t"
1323 "por %%mm2, %%mm1 \n\t"
1324 "movq 14(%1, %%"REG_a
"), %%mm0 \n\t" // R BGR BGR B
1325 MOVNTQ
" %%mm1, 8(%2, %%"REG_a
") \n\t" // B RGB RGB R
1326 "movq 16(%1, %%"REG_a
"), %%mm1 \n\t" // GR BGR BGR
1327 "movq 18(%1, %%"REG_a
"), %%mm2 \n\t" // BGR BGR BG
1328 "pand %%mm6, %%mm0 \n\t"
1329 "pand %%mm7, %%mm1 \n\t"
1330 "pand %%mm5, %%mm2 \n\t"
1331 "por %%mm0, %%mm1 \n\t"
1332 "por %%mm2, %%mm1 \n\t"
1333 MOVNTQ
" %%mm1, 16(%2, %%"REG_a
") \n\t"
1334 "add $24, %%"REG_a
" \n\t"
1338 : "r" (src
-mmx_size
), "r"(dst
-mmx_size
)
1341 __asm__
volatile(SFENCE:::"memory");
1342 __asm__
volatile(EMMS:::"memory");
1344 if (mmx_size
==23) return; //finished, was multiple of 8
1348 src_size
= 23-mmx_size
;
1352 for (i
=0; i
<src_size
; i
+=3) {
1355 dst
[i
+ 1] = src
[i
+ 1];
1356 dst
[i
+ 2] = src
[i
+ 0];
1361 static inline void RENAME(yuvPlanartoyuy2
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1362 long width
, long height
,
1363 long lumStride
, long chromStride
, long dstStride
, long vertLumPerChroma
)
1366 const x86_reg chromWidth
= width
>>1;
1367 for (y
=0; y
<height
; y
++) {
1368 #if COMPILE_TEMPLATE_MMX
1369 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1371 "xor %%"REG_a
", %%"REG_a
" \n\t"
1374 PREFETCH
" 32(%1, %%"REG_a
", 2) \n\t"
1375 PREFETCH
" 32(%2, %%"REG_a
") \n\t"
1376 PREFETCH
" 32(%3, %%"REG_a
") \n\t"
1377 "movq (%2, %%"REG_a
"), %%mm0 \n\t" // U(0)
1378 "movq %%mm0, %%mm2 \n\t" // U(0)
1379 "movq (%3, %%"REG_a
"), %%mm1 \n\t" // V(0)
1380 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
1381 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
1383 "movq (%1, %%"REG_a
",2), %%mm3 \n\t" // Y(0)
1384 "movq 8(%1, %%"REG_a
",2), %%mm5 \n\t" // Y(8)
1385 "movq %%mm3, %%mm4 \n\t" // Y(0)
1386 "movq %%mm5, %%mm6 \n\t" // Y(8)
1387 "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0)
1388 "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4)
1389 "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8)
1390 "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12)
1392 MOVNTQ
" %%mm3, (%0, %%"REG_a
", 4) \n\t"
1393 MOVNTQ
" %%mm4, 8(%0, %%"REG_a
", 4) \n\t"
1394 MOVNTQ
" %%mm5, 16(%0, %%"REG_a
", 4) \n\t"
1395 MOVNTQ
" %%mm6, 24(%0, %%"REG_a
", 4) \n\t"
1397 "add $8, %%"REG_a
" \n\t"
1398 "cmp %4, %%"REG_a
" \n\t"
1400 ::"r"(dst
), "r"(ysrc
), "r"(usrc
), "r"(vsrc
), "g" (chromWidth
)
1405 #if ARCH_ALPHA && HAVE_MVI
1406 #define pl2yuy2(n) \
1411 __asm__("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \
1412 __asm__("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \
1413 __asm__("unpkbl %1, %0" : "=r"(u) : "r"(u)); \
1414 __asm__("unpkbl %1, %0" : "=r"(v) : "r"(v)); \
1415 yuv1 = (u << 8) + (v << 24); \
1422 uint64_t *qdst
= (uint64_t *) dst
;
1423 uint64_t *qdst2
= (uint64_t *) (dst
+ dstStride
);
1424 const uint32_t *yc
= (uint32_t *) ysrc
;
1425 const uint32_t *yc2
= (uint32_t *) (ysrc
+ lumStride
);
1426 const uint16_t *uc
= (uint16_t*) usrc
, *vc
= (uint16_t*) vsrc
;
1427 for (i
= 0; i
< chromWidth
; i
+= 8) {
1428 uint64_t y1
, y2
, yuv1
, yuv2
;
1431 __asm__("ldq $31,64(%0)" :: "r"(yc
));
1432 __asm__("ldq $31,64(%0)" :: "r"(yc2
));
1433 __asm__("ldq $31,64(%0)" :: "r"(uc
));
1434 __asm__("ldq $31,64(%0)" :: "r"(vc
));
1452 #elif HAVE_FAST_64BIT
1454 uint64_t *ldst
= (uint64_t *) dst
;
1455 const uint8_t *yc
= ysrc
, *uc
= usrc
, *vc
= vsrc
;
1456 for (i
= 0; i
< chromWidth
; i
+= 2) {
1458 k
= yc
[0] + (uc
[0] << 8) +
1459 (yc
[1] << 16) + (vc
[0] << 24);
1460 l
= yc
[2] + (uc
[1] << 8) +
1461 (yc
[3] << 16) + (vc
[1] << 24);
1462 *ldst
++ = k
+ (l
<< 32);
1469 int i
, *idst
= (int32_t *) dst
;
1470 const uint8_t *yc
= ysrc
, *uc
= usrc
, *vc
= vsrc
;
1471 for (i
= 0; i
< chromWidth
; i
++) {
1473 *idst
++ = (yc
[0] << 24)+ (uc
[0] << 16) +
1474 (yc
[1] << 8) + (vc
[0] << 0);
1476 *idst
++ = yc
[0] + (uc
[0] << 8) +
1477 (yc
[1] << 16) + (vc
[0] << 24);
1485 if ((y
&(vertLumPerChroma
-1)) == vertLumPerChroma
-1) {
1486 usrc
+= chromStride
;
1487 vsrc
+= chromStride
;
1492 #if COMPILE_TEMPLATE_MMX
1500 * Height should be a multiple of 2 and width should be a multiple of 16.
1501 * (If this is a problem for anyone then tell me, and I will fix it.)
1503 static inline void RENAME(yv12toyuy2
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1504 long width
, long height
,
1505 long lumStride
, long chromStride
, long dstStride
)
1507 //FIXME interpolate chroma
1508 RENAME(yuvPlanartoyuy2
)(ysrc
, usrc
, vsrc
, dst
, width
, height
, lumStride
, chromStride
, dstStride
, 2);
1511 static inline void RENAME(yuvPlanartouyvy
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1512 long width
, long height
,
1513 long lumStride
, long chromStride
, long dstStride
, long vertLumPerChroma
)
1516 const x86_reg chromWidth
= width
>>1;
1517 for (y
=0; y
<height
; y
++) {
1518 #if COMPILE_TEMPLATE_MMX
1519 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1521 "xor %%"REG_a
", %%"REG_a
" \n\t"
1524 PREFETCH
" 32(%1, %%"REG_a
", 2) \n\t"
1525 PREFETCH
" 32(%2, %%"REG_a
") \n\t"
1526 PREFETCH
" 32(%3, %%"REG_a
") \n\t"
1527 "movq (%2, %%"REG_a
"), %%mm0 \n\t" // U(0)
1528 "movq %%mm0, %%mm2 \n\t" // U(0)
1529 "movq (%3, %%"REG_a
"), %%mm1 \n\t" // V(0)
1530 "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
1531 "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
1533 "movq (%1, %%"REG_a
",2), %%mm3 \n\t" // Y(0)
1534 "movq 8(%1, %%"REG_a
",2), %%mm5 \n\t" // Y(8)
1535 "movq %%mm0, %%mm4 \n\t" // Y(0)
1536 "movq %%mm2, %%mm6 \n\t" // Y(8)
1537 "punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0)
1538 "punpckhbw %%mm3, %%mm4 \n\t" // YUYV YUYV(4)
1539 "punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8)
1540 "punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12)
1542 MOVNTQ
" %%mm0, (%0, %%"REG_a
", 4) \n\t"
1543 MOVNTQ
" %%mm4, 8(%0, %%"REG_a
", 4) \n\t"
1544 MOVNTQ
" %%mm2, 16(%0, %%"REG_a
", 4) \n\t"
1545 MOVNTQ
" %%mm6, 24(%0, %%"REG_a
", 4) \n\t"
1547 "add $8, %%"REG_a
" \n\t"
1548 "cmp %4, %%"REG_a
" \n\t"
1550 ::"r"(dst
), "r"(ysrc
), "r"(usrc
), "r"(vsrc
), "g" (chromWidth
)
1554 //FIXME adapt the Alpha ASM code from yv12->yuy2
1558 uint64_t *ldst
= (uint64_t *) dst
;
1559 const uint8_t *yc
= ysrc
, *uc
= usrc
, *vc
= vsrc
;
1560 for (i
= 0; i
< chromWidth
; i
+= 2) {
1562 k
= uc
[0] + (yc
[0] << 8) +
1563 (vc
[0] << 16) + (yc
[1] << 24);
1564 l
= uc
[1] + (yc
[2] << 8) +
1565 (vc
[1] << 16) + (yc
[3] << 24);
1566 *ldst
++ = k
+ (l
<< 32);
1573 int i
, *idst
= (int32_t *) dst
;
1574 const uint8_t *yc
= ysrc
, *uc
= usrc
, *vc
= vsrc
;
1575 for (i
= 0; i
< chromWidth
; i
++) {
1577 *idst
++ = (uc
[0] << 24)+ (yc
[0] << 16) +
1578 (vc
[0] << 8) + (yc
[1] << 0);
1580 *idst
++ = uc
[0] + (yc
[0] << 8) +
1581 (vc
[0] << 16) + (yc
[1] << 24);
1589 if ((y
&(vertLumPerChroma
-1)) == vertLumPerChroma
-1) {
1590 usrc
+= chromStride
;
1591 vsrc
+= chromStride
;
1596 #if COMPILE_TEMPLATE_MMX
1604 * Height should be a multiple of 2 and width should be a multiple of 16
1605 * (If this is a problem for anyone then tell me, and I will fix it.)
1607 static inline void RENAME(yv12touyvy
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1608 long width
, long height
,
1609 long lumStride
, long chromStride
, long dstStride
)
1611 //FIXME interpolate chroma
1612 RENAME(yuvPlanartouyvy
)(ysrc
, usrc
, vsrc
, dst
, width
, height
, lumStride
, chromStride
, dstStride
, 2);
1616 * Width should be a multiple of 16.
1618 static inline void RENAME(yuv422ptouyvy
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1619 long width
, long height
,
1620 long lumStride
, long chromStride
, long dstStride
)
1622 RENAME(yuvPlanartouyvy
)(ysrc
, usrc
, vsrc
, dst
, width
, height
, lumStride
, chromStride
, dstStride
, 1);
1626 * Width should be a multiple of 16.
1628 static inline void RENAME(yuv422ptoyuy2
)(const uint8_t *ysrc
, const uint8_t *usrc
, const uint8_t *vsrc
, uint8_t *dst
,
1629 long width
, long height
,
1630 long lumStride
, long chromStride
, long dstStride
)
1632 RENAME(yuvPlanartoyuy2
)(ysrc
, usrc
, vsrc
, dst
, width
, height
, lumStride
, chromStride
, dstStride
, 1);
1636 * Height should be a multiple of 2 and width should be a multiple of 16.
1637 * (If this is a problem for anyone then tell me, and I will fix it.)
1639 static inline void RENAME(yuy2toyv12
)(const uint8_t *src
, uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
,
1640 long width
, long height
,
1641 long lumStride
, long chromStride
, long srcStride
)
1644 const x86_reg chromWidth
= width
>>1;
1645 for (y
=0; y
<height
; y
+=2) {
1646 #if COMPILE_TEMPLATE_MMX
1648 "xor %%"REG_a
", %%"REG_a
" \n\t"
1649 "pcmpeqw %%mm7, %%mm7 \n\t"
1650 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
1653 PREFETCH
" 64(%0, %%"REG_a
", 4) \n\t"
1654 "movq (%0, %%"REG_a
", 4), %%mm0 \n\t" // YUYV YUYV(0)
1655 "movq 8(%0, %%"REG_a
", 4), %%mm1 \n\t" // YUYV YUYV(4)
1656 "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0)
1657 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4)
1658 "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0)
1659 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4)
1660 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
1661 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
1662 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
1663 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
1665 MOVNTQ
" %%mm2, (%1, %%"REG_a
", 2) \n\t"
1667 "movq 16(%0, %%"REG_a
", 4), %%mm1 \n\t" // YUYV YUYV(8)
1668 "movq 24(%0, %%"REG_a
", 4), %%mm2 \n\t" // YUYV YUYV(12)
1669 "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8)
1670 "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12)
1671 "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8)
1672 "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12)
1673 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
1674 "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
1675 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
1676 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
1678 MOVNTQ
" %%mm3, 8(%1, %%"REG_a
", 2) \n\t"
1680 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
1681 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
1682 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
1683 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
1684 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
1685 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
1686 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
1687 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
1689 MOVNTQ
" %%mm0, (%3, %%"REG_a
") \n\t"
1690 MOVNTQ
" %%mm2, (%2, %%"REG_a
") \n\t"
1692 "add $8, %%"REG_a
" \n\t"
1693 "cmp %4, %%"REG_a
" \n\t"
1695 ::"r"(src
), "r"(ydst
), "r"(udst
), "r"(vdst
), "g" (chromWidth
)
1696 : "memory", "%"REG_a
1703 "xor %%"REG_a
", %%"REG_a
" \n\t"
1706 PREFETCH
" 64(%0, %%"REG_a
", 4) \n\t"
1707 "movq (%0, %%"REG_a
", 4), %%mm0 \n\t" // YUYV YUYV(0)
1708 "movq 8(%0, %%"REG_a
", 4), %%mm1 \n\t" // YUYV YUYV(4)
1709 "movq 16(%0, %%"REG_a
", 4), %%mm2 \n\t" // YUYV YUYV(8)
1710 "movq 24(%0, %%"REG_a
", 4), %%mm3 \n\t" // YUYV YUYV(12)
1711 "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
1712 "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
1713 "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
1714 "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12)
1715 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
1716 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
1718 MOVNTQ
" %%mm0, (%1, %%"REG_a
", 2) \n\t"
1719 MOVNTQ
" %%mm2, 8(%1, %%"REG_a
", 2) \n\t"
1721 "add $8, %%"REG_a
" \n\t"
1722 "cmp %4, %%"REG_a
" \n\t"
1725 ::"r"(src
), "r"(ydst
), "r"(udst
), "r"(vdst
), "g" (chromWidth
)
1726 : "memory", "%"REG_a
1730 for (i
=0; i
<chromWidth
; i
++) {
1731 ydst
[2*i
+0] = src
[4*i
+0];
1732 udst
[i
] = src
[4*i
+1];
1733 ydst
[2*i
+1] = src
[4*i
+2];
1734 vdst
[i
] = src
[4*i
+3];
1739 for (i
=0; i
<chromWidth
; i
++) {
1740 ydst
[2*i
+0] = src
[4*i
+0];
1741 ydst
[2*i
+1] = src
[4*i
+2];
1744 udst
+= chromStride
;
1745 vdst
+= chromStride
;
1749 #if COMPILE_TEMPLATE_MMX
1750 __asm__
volatile(EMMS
" \n\t"
1756 static inline void RENAME(planar2x
)(const uint8_t *src
, uint8_t *dst
, long srcWidth
, long srcHeight
, long srcStride
, long dstStride
)
1763 for (x
=0; x
<srcWidth
-1; x
++) {
1764 dst
[2*x
+1]= (3*src
[x
] + src
[x
+1])>>2;
1765 dst
[2*x
+2]= ( src
[x
] + 3*src
[x
+1])>>2;
1767 dst
[2*srcWidth
-1]= src
[srcWidth
-1];
1771 for (y
=1; y
<srcHeight
; y
++) {
1772 #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
1773 const x86_reg mmxSize
= srcWidth
&~15;
1775 "mov %4, %%"REG_a
" \n\t"
1776 "movq "MANGLE(mmx_ff
)", %%mm0 \n\t"
1777 "movq (%0, %%"REG_a
"), %%mm4 \n\t"
1778 "movq %%mm4, %%mm2 \n\t"
1779 "psllq $8, %%mm4 \n\t"
1780 "pand %%mm0, %%mm2 \n\t"
1781 "por %%mm2, %%mm4 \n\t"
1782 "movq (%1, %%"REG_a
"), %%mm5 \n\t"
1783 "movq %%mm5, %%mm3 \n\t"
1784 "psllq $8, %%mm5 \n\t"
1785 "pand %%mm0, %%mm3 \n\t"
1786 "por %%mm3, %%mm5 \n\t"
1788 "movq (%0, %%"REG_a
"), %%mm0 \n\t"
1789 "movq (%1, %%"REG_a
"), %%mm1 \n\t"
1790 "movq 1(%0, %%"REG_a
"), %%mm2 \n\t"
1791 "movq 1(%1, %%"REG_a
"), %%mm3 \n\t"
1792 PAVGB
" %%mm0, %%mm5 \n\t"
1793 PAVGB
" %%mm0, %%mm3 \n\t"
1794 PAVGB
" %%mm0, %%mm5 \n\t"
1795 PAVGB
" %%mm0, %%mm3 \n\t"
1796 PAVGB
" %%mm1, %%mm4 \n\t"
1797 PAVGB
" %%mm1, %%mm2 \n\t"
1798 PAVGB
" %%mm1, %%mm4 \n\t"
1799 PAVGB
" %%mm1, %%mm2 \n\t"
1800 "movq %%mm5, %%mm7 \n\t"
1801 "movq %%mm4, %%mm6 \n\t"
1802 "punpcklbw %%mm3, %%mm5 \n\t"
1803 "punpckhbw %%mm3, %%mm7 \n\t"
1804 "punpcklbw %%mm2, %%mm4 \n\t"
1805 "punpckhbw %%mm2, %%mm6 \n\t"
1807 MOVNTQ
" %%mm5, (%2, %%"REG_a
", 2) \n\t"
1808 MOVNTQ
" %%mm7, 8(%2, %%"REG_a
", 2) \n\t"
1809 MOVNTQ
" %%mm4, (%3, %%"REG_a
", 2) \n\t"
1810 MOVNTQ
" %%mm6, 8(%3, %%"REG_a
", 2) \n\t"
1812 "movq %%mm5, (%2, %%"REG_a
", 2) \n\t"
1813 "movq %%mm7, 8(%2, %%"REG_a
", 2) \n\t"
1814 "movq %%mm4, (%3, %%"REG_a
", 2) \n\t"
1815 "movq %%mm6, 8(%3, %%"REG_a
", 2) \n\t"
1817 "add $8, %%"REG_a
" \n\t"
1818 "movq -1(%0, %%"REG_a
"), %%mm4 \n\t"
1819 "movq -1(%1, %%"REG_a
"), %%mm5 \n\t"
1821 :: "r" (src
+ mmxSize
), "r" (src
+ srcStride
+ mmxSize
),
1822 "r" (dst
+ mmxSize
*2), "r" (dst
+ dstStride
+ mmxSize
*2),
1827 const x86_reg mmxSize
=1;
1829 dst
[0 ]= (3*src
[0] + src
[srcStride
])>>2;
1830 dst
[dstStride
]= ( src
[0] + 3*src
[srcStride
])>>2;
1833 for (x
=mmxSize
-1; x
<srcWidth
-1; x
++) {
1834 dst
[2*x
+1]= (3*src
[x
+0] + src
[x
+srcStride
+1])>>2;
1835 dst
[2*x
+dstStride
+2]= ( src
[x
+0] + 3*src
[x
+srcStride
+1])>>2;
1836 dst
[2*x
+dstStride
+1]= ( src
[x
+1] + 3*src
[x
+srcStride
])>>2;
1837 dst
[2*x
+2]= (3*src
[x
+1] + src
[x
+srcStride
])>>2;
1839 dst
[srcWidth
*2 -1 ]= (3*src
[srcWidth
-1] + src
[srcWidth
-1 + srcStride
])>>2;
1840 dst
[srcWidth
*2 -1 + dstStride
]= ( src
[srcWidth
-1] + 3*src
[srcWidth
-1 + srcStride
])>>2;
1850 for (x
=0; x
<srcWidth
-1; x
++) {
1851 dst
[2*x
+1]= (3*src
[x
] + src
[x
+1])>>2;
1852 dst
[2*x
+2]= ( src
[x
] + 3*src
[x
+1])>>2;
1854 dst
[2*srcWidth
-1]= src
[srcWidth
-1];
1856 for (x
=0; x
<srcWidth
; x
++) {
1862 #if COMPILE_TEMPLATE_MMX
1863 __asm__
volatile(EMMS
" \n\t"
1870 * Height should be a multiple of 2 and width should be a multiple of 16.
1871 * (If this is a problem for anyone then tell me, and I will fix it.)
1872 * Chrominance data is only taken from every second line, others are ignored.
1873 * FIXME: Write HQ version.
1875 static inline void RENAME(uyvytoyv12
)(const uint8_t *src
, uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
,
1876 long width
, long height
,
1877 long lumStride
, long chromStride
, long srcStride
)
1880 const x86_reg chromWidth
= width
>>1;
1881 for (y
=0; y
<height
; y
+=2) {
1882 #if COMPILE_TEMPLATE_MMX
1884 "xor %%"REG_a
", %%"REG_a
" \n\t"
1885 "pcmpeqw %%mm7, %%mm7 \n\t"
1886 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
1889 PREFETCH
" 64(%0, %%"REG_a
", 4) \n\t"
1890 "movq (%0, %%"REG_a
", 4), %%mm0 \n\t" // UYVY UYVY(0)
1891 "movq 8(%0, %%"REG_a
", 4), %%mm1 \n\t" // UYVY UYVY(4)
1892 "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0)
1893 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4)
1894 "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0)
1895 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4)
1896 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
1897 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
1898 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
1899 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
1901 MOVNTQ
" %%mm2, (%1, %%"REG_a
", 2) \n\t"
1903 "movq 16(%0, %%"REG_a
", 4), %%mm1 \n\t" // UYVY UYVY(8)
1904 "movq 24(%0, %%"REG_a
", 4), %%mm2 \n\t" // UYVY UYVY(12)
1905 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8)
1906 "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12)
1907 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8)
1908 "pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12)
1909 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
1910 "psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
1911 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
1912 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
1914 MOVNTQ
" %%mm3, 8(%1, %%"REG_a
", 2) \n\t"
1916 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
1917 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
1918 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
1919 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
1920 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
1921 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
1922 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
1923 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
1925 MOVNTQ
" %%mm0, (%3, %%"REG_a
") \n\t"
1926 MOVNTQ
" %%mm2, (%2, %%"REG_a
") \n\t"
1928 "add $8, %%"REG_a
" \n\t"
1929 "cmp %4, %%"REG_a
" \n\t"
1931 ::"r"(src
), "r"(ydst
), "r"(udst
), "r"(vdst
), "g" (chromWidth
)
1932 : "memory", "%"REG_a
1939 "xor %%"REG_a
", %%"REG_a
" \n\t"
1942 PREFETCH
" 64(%0, %%"REG_a
", 4) \n\t"
1943 "movq (%0, %%"REG_a
", 4), %%mm0 \n\t" // YUYV YUYV(0)
1944 "movq 8(%0, %%"REG_a
", 4), %%mm1 \n\t" // YUYV YUYV(4)
1945 "movq 16(%0, %%"REG_a
", 4), %%mm2 \n\t" // YUYV YUYV(8)
1946 "movq 24(%0, %%"REG_a
", 4), %%mm3 \n\t" // YUYV YUYV(12)
1947 "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
1948 "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
1949 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
1950 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12)
1951 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
1952 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
1954 MOVNTQ
" %%mm0, (%1, %%"REG_a
", 2) \n\t"
1955 MOVNTQ
" %%mm2, 8(%1, %%"REG_a
", 2) \n\t"
1957 "add $8, %%"REG_a
" \n\t"
1958 "cmp %4, %%"REG_a
" \n\t"
1961 ::"r"(src
), "r"(ydst
), "r"(udst
), "r"(vdst
), "g" (chromWidth
)
1962 : "memory", "%"REG_a
1966 for (i
=0; i
<chromWidth
; i
++) {
1967 udst
[i
] = src
[4*i
+0];
1968 ydst
[2*i
+0] = src
[4*i
+1];
1969 vdst
[i
] = src
[4*i
+2];
1970 ydst
[2*i
+1] = src
[4*i
+3];
1975 for (i
=0; i
<chromWidth
; i
++) {
1976 ydst
[2*i
+0] = src
[4*i
+1];
1977 ydst
[2*i
+1] = src
[4*i
+3];
1980 udst
+= chromStride
;
1981 vdst
+= chromStride
;
1985 #if COMPILE_TEMPLATE_MMX
1986 __asm__
volatile(EMMS
" \n\t"
1993 * Height should be a multiple of 2 and width should be a multiple of 2.
1994 * (If this is a problem for anyone then tell me, and I will fix it.)
1995 * Chrominance data is only taken from every second line,
1996 * others are ignored in the C version.
1997 * FIXME: Write HQ version.
1999 static inline void RENAME(rgb24toyv12
)(const uint8_t *src
, uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
,
2000 long width
, long height
,
2001 long lumStride
, long chromStride
, long srcStride
)
2004 const x86_reg chromWidth
= width
>>1;
2005 #if COMPILE_TEMPLATE_MMX
2006 for (y
=0; y
<height
-2; y
+=2) {
2008 for (i
=0; i
<2; i
++) {
2010 "mov %2, %%"REG_a
" \n\t"
2011 "movq "MANGLE(ff_bgr2YCoeff
)", %%mm6 \n\t"
2012 "movq "MANGLE(ff_w1111
)", %%mm5 \n\t"
2013 "pxor %%mm7, %%mm7 \n\t"
2014 "lea (%%"REG_a
", %%"REG_a
", 2), %%"REG_d
" \n\t"
2017 PREFETCH
" 64(%0, %%"REG_d
") \n\t"
2018 "movd (%0, %%"REG_d
"), %%mm0 \n\t"
2019 "movd 3(%0, %%"REG_d
"), %%mm1 \n\t"
2020 "punpcklbw %%mm7, %%mm0 \n\t"
2021 "punpcklbw %%mm7, %%mm1 \n\t"
2022 "movd 6(%0, %%"REG_d
"), %%mm2 \n\t"
2023 "movd 9(%0, %%"REG_d
"), %%mm3 \n\t"
2024 "punpcklbw %%mm7, %%mm2 \n\t"
2025 "punpcklbw %%mm7, %%mm3 \n\t"
2026 "pmaddwd %%mm6, %%mm0 \n\t"
2027 "pmaddwd %%mm6, %%mm1 \n\t"
2028 "pmaddwd %%mm6, %%mm2 \n\t"
2029 "pmaddwd %%mm6, %%mm3 \n\t"
2030 #ifndef FAST_BGR2YV12
2031 "psrad $8, %%mm0 \n\t"
2032 "psrad $8, %%mm1 \n\t"
2033 "psrad $8, %%mm2 \n\t"
2034 "psrad $8, %%mm3 \n\t"
2036 "packssdw %%mm1, %%mm0 \n\t"
2037 "packssdw %%mm3, %%mm2 \n\t"
2038 "pmaddwd %%mm5, %%mm0 \n\t"
2039 "pmaddwd %%mm5, %%mm2 \n\t"
2040 "packssdw %%mm2, %%mm0 \n\t"
2041 "psraw $7, %%mm0 \n\t"
2043 "movd 12(%0, %%"REG_d
"), %%mm4 \n\t"
2044 "movd 15(%0, %%"REG_d
"), %%mm1 \n\t"
2045 "punpcklbw %%mm7, %%mm4 \n\t"
2046 "punpcklbw %%mm7, %%mm1 \n\t"
2047 "movd 18(%0, %%"REG_d
"), %%mm2 \n\t"
2048 "movd 21(%0, %%"REG_d
"), %%mm3 \n\t"
2049 "punpcklbw %%mm7, %%mm2 \n\t"
2050 "punpcklbw %%mm7, %%mm3 \n\t"
2051 "pmaddwd %%mm6, %%mm4 \n\t"
2052 "pmaddwd %%mm6, %%mm1 \n\t"
2053 "pmaddwd %%mm6, %%mm2 \n\t"
2054 "pmaddwd %%mm6, %%mm3 \n\t"
2055 #ifndef FAST_BGR2YV12
2056 "psrad $8, %%mm4 \n\t"
2057 "psrad $8, %%mm1 \n\t"
2058 "psrad $8, %%mm2 \n\t"
2059 "psrad $8, %%mm3 \n\t"
2061 "packssdw %%mm1, %%mm4 \n\t"
2062 "packssdw %%mm3, %%mm2 \n\t"
2063 "pmaddwd %%mm5, %%mm4 \n\t"
2064 "pmaddwd %%mm5, %%mm2 \n\t"
2065 "add $24, %%"REG_d
" \n\t"
2066 "packssdw %%mm2, %%mm4 \n\t"
2067 "psraw $7, %%mm4 \n\t"
2069 "packuswb %%mm4, %%mm0 \n\t"
2070 "paddusb "MANGLE(ff_bgr2YOffset
)", %%mm0 \n\t"
2072 MOVNTQ
" %%mm0, (%1, %%"REG_a
") \n\t"
2073 "add $8, %%"REG_a
" \n\t"
2075 : : "r" (src
+width
*3), "r" (ydst
+width
), "g" ((x86_reg
)-width
)
2076 : "%"REG_a
, "%"REG_d
2083 "mov %4, %%"REG_a
" \n\t"
2084 "movq "MANGLE(ff_w1111
)", %%mm5 \n\t"
2085 "movq "MANGLE(ff_bgr2UCoeff
)", %%mm6 \n\t"
2086 "pxor %%mm7, %%mm7 \n\t"
2087 "lea (%%"REG_a
", %%"REG_a
", 2), %%"REG_d
" \n\t"
2088 "add %%"REG_d
", %%"REG_d
" \n\t"
2091 PREFETCH
" 64(%0, %%"REG_d
") \n\t"
2092 PREFETCH
" 64(%1, %%"REG_d
") \n\t"
2093 #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
2094 "movq (%0, %%"REG_d
"), %%mm0 \n\t"
2095 "movq (%1, %%"REG_d
"), %%mm1 \n\t"
2096 "movq 6(%0, %%"REG_d
"), %%mm2 \n\t"
2097 "movq 6(%1, %%"REG_d
"), %%mm3 \n\t"
2098 PAVGB
" %%mm1, %%mm0 \n\t"
2099 PAVGB
" %%mm3, %%mm2 \n\t"
2100 "movq %%mm0, %%mm1 \n\t"
2101 "movq %%mm2, %%mm3 \n\t"
2102 "psrlq $24, %%mm0 \n\t"
2103 "psrlq $24, %%mm2 \n\t"
2104 PAVGB
" %%mm1, %%mm0 \n\t"
2105 PAVGB
" %%mm3, %%mm2 \n\t"
2106 "punpcklbw %%mm7, %%mm0 \n\t"
2107 "punpcklbw %%mm7, %%mm2 \n\t"
2109 "movd (%0, %%"REG_d
"), %%mm0 \n\t"
2110 "movd (%1, %%"REG_d
"), %%mm1 \n\t"
2111 "movd 3(%0, %%"REG_d
"), %%mm2 \n\t"
2112 "movd 3(%1, %%"REG_d
"), %%mm3 \n\t"
2113 "punpcklbw %%mm7, %%mm0 \n\t"
2114 "punpcklbw %%mm7, %%mm1 \n\t"
2115 "punpcklbw %%mm7, %%mm2 \n\t"
2116 "punpcklbw %%mm7, %%mm3 \n\t"
2117 "paddw %%mm1, %%mm0 \n\t"
2118 "paddw %%mm3, %%mm2 \n\t"
2119 "paddw %%mm2, %%mm0 \n\t"
2120 "movd 6(%0, %%"REG_d
"), %%mm4 \n\t"
2121 "movd 6(%1, %%"REG_d
"), %%mm1 \n\t"
2122 "movd 9(%0, %%"REG_d
"), %%mm2 \n\t"
2123 "movd 9(%1, %%"REG_d
"), %%mm3 \n\t"
2124 "punpcklbw %%mm7, %%mm4 \n\t"
2125 "punpcklbw %%mm7, %%mm1 \n\t"
2126 "punpcklbw %%mm7, %%mm2 \n\t"
2127 "punpcklbw %%mm7, %%mm3 \n\t"
2128 "paddw %%mm1, %%mm4 \n\t"
2129 "paddw %%mm3, %%mm2 \n\t"
2130 "paddw %%mm4, %%mm2 \n\t"
2131 "psrlw $2, %%mm0 \n\t"
2132 "psrlw $2, %%mm2 \n\t"
2134 "movq "MANGLE(ff_bgr2VCoeff
)", %%mm1 \n\t"
2135 "movq "MANGLE(ff_bgr2VCoeff
)", %%mm3 \n\t"
2137 "pmaddwd %%mm0, %%mm1 \n\t"
2138 "pmaddwd %%mm2, %%mm3 \n\t"
2139 "pmaddwd %%mm6, %%mm0 \n\t"
2140 "pmaddwd %%mm6, %%mm2 \n\t"
2141 #ifndef FAST_BGR2YV12
2142 "psrad $8, %%mm0 \n\t"
2143 "psrad $8, %%mm1 \n\t"
2144 "psrad $8, %%mm2 \n\t"
2145 "psrad $8, %%mm3 \n\t"
2147 "packssdw %%mm2, %%mm0 \n\t"
2148 "packssdw %%mm3, %%mm1 \n\t"
2149 "pmaddwd %%mm5, %%mm0 \n\t"
2150 "pmaddwd %%mm5, %%mm1 \n\t"
2151 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0
2152 "psraw $7, %%mm0 \n\t"
2154 #if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW
2155 "movq 12(%0, %%"REG_d
"), %%mm4 \n\t"
2156 "movq 12(%1, %%"REG_d
"), %%mm1 \n\t"
2157 "movq 18(%0, %%"REG_d
"), %%mm2 \n\t"
2158 "movq 18(%1, %%"REG_d
"), %%mm3 \n\t"
2159 PAVGB
" %%mm1, %%mm4 \n\t"
2160 PAVGB
" %%mm3, %%mm2 \n\t"
2161 "movq %%mm4, %%mm1 \n\t"
2162 "movq %%mm2, %%mm3 \n\t"
2163 "psrlq $24, %%mm4 \n\t"
2164 "psrlq $24, %%mm2 \n\t"
2165 PAVGB
" %%mm1, %%mm4 \n\t"
2166 PAVGB
" %%mm3, %%mm2 \n\t"
2167 "punpcklbw %%mm7, %%mm4 \n\t"
2168 "punpcklbw %%mm7, %%mm2 \n\t"
2170 "movd 12(%0, %%"REG_d
"), %%mm4 \n\t"
2171 "movd 12(%1, %%"REG_d
"), %%mm1 \n\t"
2172 "movd 15(%0, %%"REG_d
"), %%mm2 \n\t"
2173 "movd 15(%1, %%"REG_d
"), %%mm3 \n\t"
2174 "punpcklbw %%mm7, %%mm4 \n\t"
2175 "punpcklbw %%mm7, %%mm1 \n\t"
2176 "punpcklbw %%mm7, %%mm2 \n\t"
2177 "punpcklbw %%mm7, %%mm3 \n\t"
2178 "paddw %%mm1, %%mm4 \n\t"
2179 "paddw %%mm3, %%mm2 \n\t"
2180 "paddw %%mm2, %%mm4 \n\t"
2181 "movd 18(%0, %%"REG_d
"), %%mm5 \n\t"
2182 "movd 18(%1, %%"REG_d
"), %%mm1 \n\t"
2183 "movd 21(%0, %%"REG_d
"), %%mm2 \n\t"
2184 "movd 21(%1, %%"REG_d
"), %%mm3 \n\t"
2185 "punpcklbw %%mm7, %%mm5 \n\t"
2186 "punpcklbw %%mm7, %%mm1 \n\t"
2187 "punpcklbw %%mm7, %%mm2 \n\t"
2188 "punpcklbw %%mm7, %%mm3 \n\t"
2189 "paddw %%mm1, %%mm5 \n\t"
2190 "paddw %%mm3, %%mm2 \n\t"
2191 "paddw %%mm5, %%mm2 \n\t"
2192 "movq "MANGLE(ff_w1111
)", %%mm5 \n\t"
2193 "psrlw $2, %%mm4 \n\t"
2194 "psrlw $2, %%mm2 \n\t"
2196 "movq "MANGLE(ff_bgr2VCoeff
)", %%mm1 \n\t"
2197 "movq "MANGLE(ff_bgr2VCoeff
)", %%mm3 \n\t"
2199 "pmaddwd %%mm4, %%mm1 \n\t"
2200 "pmaddwd %%mm2, %%mm3 \n\t"
2201 "pmaddwd %%mm6, %%mm4 \n\t"
2202 "pmaddwd %%mm6, %%mm2 \n\t"
2203 #ifndef FAST_BGR2YV12
2204 "psrad $8, %%mm4 \n\t"
2205 "psrad $8, %%mm1 \n\t"
2206 "psrad $8, %%mm2 \n\t"
2207 "psrad $8, %%mm3 \n\t"
2209 "packssdw %%mm2, %%mm4 \n\t"
2210 "packssdw %%mm3, %%mm1 \n\t"
2211 "pmaddwd %%mm5, %%mm4 \n\t"
2212 "pmaddwd %%mm5, %%mm1 \n\t"
2213 "add $24, %%"REG_d
" \n\t"
2214 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2
2215 "psraw $7, %%mm4 \n\t"
2217 "movq %%mm0, %%mm1 \n\t"
2218 "punpckldq %%mm4, %%mm0 \n\t"
2219 "punpckhdq %%mm4, %%mm1 \n\t"
2220 "packsswb %%mm1, %%mm0 \n\t"
2221 "paddb "MANGLE(ff_bgr2UVOffset
)", %%mm0 \n\t"
2222 "movd %%mm0, (%2, %%"REG_a
") \n\t"
2223 "punpckhdq %%mm0, %%mm0 \n\t"
2224 "movd %%mm0, (%3, %%"REG_a
") \n\t"
2225 "add $4, %%"REG_a
" \n\t"
2227 : : "r" (src
+chromWidth
*6), "r" (src
+srcStride
+chromWidth
*6), "r" (udst
+chromWidth
), "r" (vdst
+chromWidth
), "g" (-chromWidth
)
2228 : "%"REG_a
, "%"REG_d
2231 udst
+= chromStride
;
2232 vdst
+= chromStride
;
2236 __asm__
volatile(EMMS
" \n\t"
2242 for (; y
<height
; y
+=2) {
2244 for (i
=0; i
<chromWidth
; i
++) {
2245 unsigned int b
= src
[6*i
+0];
2246 unsigned int g
= src
[6*i
+1];
2247 unsigned int r
= src
[6*i
+2];
2249 unsigned int Y
= ((RY
*r
+ GY
*g
+ BY
*b
)>>RGB2YUV_SHIFT
) + 16;
2250 unsigned int V
= ((RV
*r
+ GV
*g
+ BV
*b
)>>RGB2YUV_SHIFT
) + 128;
2251 unsigned int U
= ((RU
*r
+ GU
*g
+ BU
*b
)>>RGB2YUV_SHIFT
) + 128;
2261 Y
= ((RY
*r
+ GY
*g
+ BY
*b
)>>RGB2YUV_SHIFT
) + 16;
2267 for (i
=0; i
<chromWidth
; i
++) {
2268 unsigned int b
= src
[6*i
+0];
2269 unsigned int g
= src
[6*i
+1];
2270 unsigned int r
= src
[6*i
+2];
2272 unsigned int Y
= ((RY
*r
+ GY
*g
+ BY
*b
)>>RGB2YUV_SHIFT
) + 16;
2280 Y
= ((RY
*r
+ GY
*g
+ BY
*b
)>>RGB2YUV_SHIFT
) + 16;
2283 udst
+= chromStride
;
2284 vdst
+= chromStride
;
2290 static void RENAME(interleaveBytes
)(const uint8_t *src1
, const uint8_t *src2
, uint8_t *dest
,
2291 long width
, long height
, long src1Stride
,
2292 long src2Stride
, long dstStride
)
2296 for (h
=0; h
< height
; h
++) {
2299 #if COMPILE_TEMPLATE_MMX
2300 #if COMPILE_TEMPLATE_SSE2
2302 "xor %%"REG_a
", %%"REG_a
" \n\t"
2304 PREFETCH
" 64(%1, %%"REG_a
") \n\t"
2305 PREFETCH
" 64(%2, %%"REG_a
") \n\t"
2306 "movdqa (%1, %%"REG_a
"), %%xmm0 \n\t"
2307 "movdqa (%1, %%"REG_a
"), %%xmm1 \n\t"
2308 "movdqa (%2, %%"REG_a
"), %%xmm2 \n\t"
2309 "punpcklbw %%xmm2, %%xmm0 \n\t"
2310 "punpckhbw %%xmm2, %%xmm1 \n\t"
2311 "movntdq %%xmm0, (%0, %%"REG_a
", 2) \n\t"
2312 "movntdq %%xmm1, 16(%0, %%"REG_a
", 2) \n\t"
2313 "add $16, %%"REG_a
" \n\t"
2314 "cmp %3, %%"REG_a
" \n\t"
2316 ::"r"(dest
), "r"(src1
), "r"(src2
), "r" ((x86_reg
)width
-15)
2317 : "memory", "%"REG_a
""
2321 "xor %%"REG_a
", %%"REG_a
" \n\t"
2323 PREFETCH
" 64(%1, %%"REG_a
") \n\t"
2324 PREFETCH
" 64(%2, %%"REG_a
") \n\t"
2325 "movq (%1, %%"REG_a
"), %%mm0 \n\t"
2326 "movq 8(%1, %%"REG_a
"), %%mm2 \n\t"
2327 "movq %%mm0, %%mm1 \n\t"
2328 "movq %%mm2, %%mm3 \n\t"
2329 "movq (%2, %%"REG_a
"), %%mm4 \n\t"
2330 "movq 8(%2, %%"REG_a
"), %%mm5 \n\t"
2331 "punpcklbw %%mm4, %%mm0 \n\t"
2332 "punpckhbw %%mm4, %%mm1 \n\t"
2333 "punpcklbw %%mm5, %%mm2 \n\t"
2334 "punpckhbw %%mm5, %%mm3 \n\t"
2335 MOVNTQ
" %%mm0, (%0, %%"REG_a
", 2) \n\t"
2336 MOVNTQ
" %%mm1, 8(%0, %%"REG_a
", 2) \n\t"
2337 MOVNTQ
" %%mm2, 16(%0, %%"REG_a
", 2) \n\t"
2338 MOVNTQ
" %%mm3, 24(%0, %%"REG_a
", 2) \n\t"
2339 "add $16, %%"REG_a
" \n\t"
2340 "cmp %3, %%"REG_a
" \n\t"
2342 ::"r"(dest
), "r"(src1
), "r"(src2
), "r" ((x86_reg
)width
-15)
2343 : "memory", "%"REG_a
2346 for (w
= (width
&(~15)); w
< width
; w
++) {
2347 dest
[2*w
+0] = src1
[w
];
2348 dest
[2*w
+1] = src2
[w
];
2351 for (w
=0; w
< width
; w
++) {
2352 dest
[2*w
+0] = src1
[w
];
2353 dest
[2*w
+1] = src2
[w
];
2360 #if COMPILE_TEMPLATE_MMX
2369 static inline void RENAME(vu9_to_vu12
)(const uint8_t *src1
, const uint8_t *src2
,
2370 uint8_t *dst1
, uint8_t *dst2
,
2371 long width
, long height
,
2372 long srcStride1
, long srcStride2
,
2373 long dstStride1
, long dstStride2
)
2377 w
=width
/2; h
=height
/2;
2378 #if COMPILE_TEMPLATE_MMX
2382 ::"m"(*(src1
+srcStride1
)),"m"(*(src2
+srcStride2
)):"memory");
2385 const uint8_t* s1
=src1
+srcStride1
*(y
>>1);
2386 uint8_t* d
=dst1
+dstStride1
*y
;
2388 #if COMPILE_TEMPLATE_MMX
2389 for (;x
<w
-31;x
+=32) {
2391 PREFETCH
" 32%1 \n\t"
2392 "movq %1, %%mm0 \n\t"
2393 "movq 8%1, %%mm2 \n\t"
2394 "movq 16%1, %%mm4 \n\t"
2395 "movq 24%1, %%mm6 \n\t"
2396 "movq %%mm0, %%mm1 \n\t"
2397 "movq %%mm2, %%mm3 \n\t"
2398 "movq %%mm4, %%mm5 \n\t"
2399 "movq %%mm6, %%mm7 \n\t"
2400 "punpcklbw %%mm0, %%mm0 \n\t"
2401 "punpckhbw %%mm1, %%mm1 \n\t"
2402 "punpcklbw %%mm2, %%mm2 \n\t"
2403 "punpckhbw %%mm3, %%mm3 \n\t"
2404 "punpcklbw %%mm4, %%mm4 \n\t"
2405 "punpckhbw %%mm5, %%mm5 \n\t"
2406 "punpcklbw %%mm6, %%mm6 \n\t"
2407 "punpckhbw %%mm7, %%mm7 \n\t"
2408 MOVNTQ
" %%mm0, %0 \n\t"
2409 MOVNTQ
" %%mm1, 8%0 \n\t"
2410 MOVNTQ
" %%mm2, 16%0 \n\t"
2411 MOVNTQ
" %%mm3, 24%0 \n\t"
2412 MOVNTQ
" %%mm4, 32%0 \n\t"
2413 MOVNTQ
" %%mm5, 40%0 \n\t"
2414 MOVNTQ
" %%mm6, 48%0 \n\t"
2415 MOVNTQ
" %%mm7, 56%0"
2421 for (;x
<w
;x
++) d
[2*x
]=d
[2*x
+1]=s1
[x
];
2424 const uint8_t* s2
=src2
+srcStride2
*(y
>>1);
2425 uint8_t* d
=dst2
+dstStride2
*y
;
2427 #if COMPILE_TEMPLATE_MMX
2428 for (;x
<w
-31;x
+=32) {
2430 PREFETCH
" 32%1 \n\t"
2431 "movq %1, %%mm0 \n\t"
2432 "movq 8%1, %%mm2 \n\t"
2433 "movq 16%1, %%mm4 \n\t"
2434 "movq 24%1, %%mm6 \n\t"
2435 "movq %%mm0, %%mm1 \n\t"
2436 "movq %%mm2, %%mm3 \n\t"
2437 "movq %%mm4, %%mm5 \n\t"
2438 "movq %%mm6, %%mm7 \n\t"
2439 "punpcklbw %%mm0, %%mm0 \n\t"
2440 "punpckhbw %%mm1, %%mm1 \n\t"
2441 "punpcklbw %%mm2, %%mm2 \n\t"
2442 "punpckhbw %%mm3, %%mm3 \n\t"
2443 "punpcklbw %%mm4, %%mm4 \n\t"
2444 "punpckhbw %%mm5, %%mm5 \n\t"
2445 "punpcklbw %%mm6, %%mm6 \n\t"
2446 "punpckhbw %%mm7, %%mm7 \n\t"
2447 MOVNTQ
" %%mm0, %0 \n\t"
2448 MOVNTQ
" %%mm1, 8%0 \n\t"
2449 MOVNTQ
" %%mm2, 16%0 \n\t"
2450 MOVNTQ
" %%mm3, 24%0 \n\t"
2451 MOVNTQ
" %%mm4, 32%0 \n\t"
2452 MOVNTQ
" %%mm5, 40%0 \n\t"
2453 MOVNTQ
" %%mm6, 48%0 \n\t"
2454 MOVNTQ
" %%mm7, 56%0"
2460 for (;x
<w
;x
++) d
[2*x
]=d
[2*x
+1]=s2
[x
];
2462 #if COMPILE_TEMPLATE_MMX
2471 static inline void RENAME(yvu9_to_yuy2
)(const uint8_t *src1
, const uint8_t *src2
, const uint8_t *src3
,
2473 long width
, long height
,
2474 long srcStride1
, long srcStride2
,
2475 long srcStride3
, long dstStride
)
2479 w
=width
/2; h
=height
;
2481 const uint8_t* yp
=src1
+srcStride1
*y
;
2482 const uint8_t* up
=src2
+srcStride2
*(y
>>2);
2483 const uint8_t* vp
=src3
+srcStride3
*(y
>>2);
2484 uint8_t* d
=dst
+dstStride
*y
;
2486 #if COMPILE_TEMPLATE_MMX
2489 PREFETCH
" 32(%1, %0) \n\t"
2490 PREFETCH
" 32(%2, %0) \n\t"
2491 PREFETCH
" 32(%3, %0) \n\t"
2492 "movq (%1, %0, 4), %%mm0 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
2493 "movq (%2, %0), %%mm1 \n\t" /* U0U1U2U3U4U5U6U7 */
2494 "movq (%3, %0), %%mm2 \n\t" /* V0V1V2V3V4V5V6V7 */
2495 "movq %%mm0, %%mm3 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
2496 "movq %%mm1, %%mm4 \n\t" /* U0U1U2U3U4U5U6U7 */
2497 "movq %%mm2, %%mm5 \n\t" /* V0V1V2V3V4V5V6V7 */
2498 "punpcklbw %%mm1, %%mm1 \n\t" /* U0U0 U1U1 U2U2 U3U3 */
2499 "punpcklbw %%mm2, %%mm2 \n\t" /* V0V0 V1V1 V2V2 V3V3 */
2500 "punpckhbw %%mm4, %%mm4 \n\t" /* U4U4 U5U5 U6U6 U7U7 */
2501 "punpckhbw %%mm5, %%mm5 \n\t" /* V4V4 V5V5 V6V6 V7V7 */
2503 "movq %%mm1, %%mm6 \n\t"
2504 "punpcklbw %%mm2, %%mm1 \n\t" /* U0V0 U0V0 U1V1 U1V1*/
2505 "punpcklbw %%mm1, %%mm0 \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/
2506 "punpckhbw %%mm1, %%mm3 \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/
2507 MOVNTQ
" %%mm0, (%4, %0, 8) \n\t"
2508 MOVNTQ
" %%mm3, 8(%4, %0, 8) \n\t"
2510 "punpckhbw %%mm2, %%mm6 \n\t" /* U2V2 U2V2 U3V3 U3V3*/
2511 "movq 8(%1, %0, 4), %%mm0 \n\t"
2512 "movq %%mm0, %%mm3 \n\t"
2513 "punpcklbw %%mm6, %%mm0 \n\t" /* Y U2 Y V2 Y U2 Y V2*/
2514 "punpckhbw %%mm6, %%mm3 \n\t" /* Y U3 Y V3 Y U3 Y V3*/
2515 MOVNTQ
" %%mm0, 16(%4, %0, 8) \n\t"
2516 MOVNTQ
" %%mm3, 24(%4, %0, 8) \n\t"
2518 "movq %%mm4, %%mm6 \n\t"
2519 "movq 16(%1, %0, 4), %%mm0 \n\t"
2520 "movq %%mm0, %%mm3 \n\t"
2521 "punpcklbw %%mm5, %%mm4 \n\t"
2522 "punpcklbw %%mm4, %%mm0 \n\t" /* Y U4 Y V4 Y U4 Y V4*/
2523 "punpckhbw %%mm4, %%mm3 \n\t" /* Y U5 Y V5 Y U5 Y V5*/
2524 MOVNTQ
" %%mm0, 32(%4, %0, 8) \n\t"
2525 MOVNTQ
" %%mm3, 40(%4, %0, 8) \n\t"
2527 "punpckhbw %%mm5, %%mm6 \n\t"
2528 "movq 24(%1, %0, 4), %%mm0 \n\t"
2529 "movq %%mm0, %%mm3 \n\t"
2530 "punpcklbw %%mm6, %%mm0 \n\t" /* Y U6 Y V6 Y U6 Y V6*/
2531 "punpckhbw %%mm6, %%mm3 \n\t" /* Y U7 Y V7 Y U7 Y V7*/
2532 MOVNTQ
" %%mm0, 48(%4, %0, 8) \n\t"
2533 MOVNTQ
" %%mm3, 56(%4, %0, 8) \n\t"
2536 : "r"(yp
), "r" (up
), "r"(vp
), "r"(d
)
2541 const long x2
= x
<<2;
2544 d
[8*x
+2] = yp
[x2
+1];
2546 d
[8*x
+4] = yp
[x2
+2];
2548 d
[8*x
+6] = yp
[x2
+3];
2552 #if COMPILE_TEMPLATE_MMX
2561 static void RENAME(extract_even
)(const uint8_t *src
, uint8_t *dst
, x86_reg count
)
2567 #if COMPILE_TEMPLATE_MMX
2571 "pcmpeqw %%mm7, %%mm7 \n\t"
2572 "psrlw $8, %%mm7 \n\t"
2574 "movq -30(%1, %0, 2), %%mm0 \n\t"
2575 "movq -22(%1, %0, 2), %%mm1 \n\t"
2576 "movq -14(%1, %0, 2), %%mm2 \n\t"
2577 "movq -6(%1, %0, 2), %%mm3 \n\t"
2578 "pand %%mm7, %%mm0 \n\t"
2579 "pand %%mm7, %%mm1 \n\t"
2580 "pand %%mm7, %%mm2 \n\t"
2581 "pand %%mm7, %%mm3 \n\t"
2582 "packuswb %%mm1, %%mm0 \n\t"
2583 "packuswb %%mm3, %%mm2 \n\t"
2584 MOVNTQ
" %%mm0,-15(%2, %0) \n\t"
2585 MOVNTQ
" %%mm2,- 7(%2, %0) \n\t"
2589 : "r"(src
), "r"(dst
)
2595 dst
[count
]= src
[2*count
];
2600 static void RENAME(extract_even2
)(const uint8_t *src
, uint8_t *dst0
, uint8_t *dst1
, x86_reg count
)
2606 #if COMPILE_TEMPLATE_MMX
2610 "pcmpeqw %%mm7, %%mm7 \n\t"
2611 "psrlw $8, %%mm7 \n\t"
2613 "movq -28(%1, %0, 4), %%mm0 \n\t"
2614 "movq -20(%1, %0, 4), %%mm1 \n\t"
2615 "movq -12(%1, %0, 4), %%mm2 \n\t"
2616 "movq -4(%1, %0, 4), %%mm3 \n\t"
2617 "pand %%mm7, %%mm0 \n\t"
2618 "pand %%mm7, %%mm1 \n\t"
2619 "pand %%mm7, %%mm2 \n\t"
2620 "pand %%mm7, %%mm3 \n\t"
2621 "packuswb %%mm1, %%mm0 \n\t"
2622 "packuswb %%mm3, %%mm2 \n\t"
2623 "movq %%mm0, %%mm1 \n\t"
2624 "movq %%mm2, %%mm3 \n\t"
2625 "psrlw $8, %%mm0 \n\t"
2626 "psrlw $8, %%mm2 \n\t"
2627 "pand %%mm7, %%mm1 \n\t"
2628 "pand %%mm7, %%mm3 \n\t"
2629 "packuswb %%mm2, %%mm0 \n\t"
2630 "packuswb %%mm3, %%mm1 \n\t"
2631 MOVNTQ
" %%mm0,- 7(%3, %0) \n\t"
2632 MOVNTQ
" %%mm1,- 7(%2, %0) \n\t"
2636 : "r"(src
), "r"(dst0
), "r"(dst1
)
2642 dst0
[count
]= src
[4*count
+0];
2643 dst1
[count
]= src
[4*count
+2];
2648 static void RENAME(extract_even2avg
)(const uint8_t *src0
, const uint8_t *src1
, uint8_t *dst0
, uint8_t *dst1
, x86_reg count
)
2659 "pcmpeqw %%mm7, %%mm7 \n\t"
2660 "psrlw $8, %%mm7 \n\t"
2662 "movq -28(%1, %0, 4), %%mm0 \n\t"
2663 "movq -20(%1, %0, 4), %%mm1 \n\t"
2664 "movq -12(%1, %0, 4), %%mm2 \n\t"
2665 "movq -4(%1, %0, 4), %%mm3 \n\t"
2666 PAVGB
" -28(%2, %0, 4), %%mm0 \n\t"
2667 PAVGB
" -20(%2, %0, 4), %%mm1 \n\t"
2668 PAVGB
" -12(%2, %0, 4), %%mm2 \n\t"
2669 PAVGB
" - 4(%2, %0, 4), %%mm3 \n\t"
2670 "pand %%mm7, %%mm0 \n\t"
2671 "pand %%mm7, %%mm1 \n\t"
2672 "pand %%mm7, %%mm2 \n\t"
2673 "pand %%mm7, %%mm3 \n\t"
2674 "packuswb %%mm1, %%mm0 \n\t"
2675 "packuswb %%mm3, %%mm2 \n\t"
2676 "movq %%mm0, %%mm1 \n\t"
2677 "movq %%mm2, %%mm3 \n\t"
2678 "psrlw $8, %%mm0 \n\t"
2679 "psrlw $8, %%mm2 \n\t"
2680 "pand %%mm7, %%mm1 \n\t"
2681 "pand %%mm7, %%mm3 \n\t"
2682 "packuswb %%mm2, %%mm0 \n\t"
2683 "packuswb %%mm3, %%mm1 \n\t"
2684 MOVNTQ
" %%mm0,- 7(%4, %0) \n\t"
2685 MOVNTQ
" %%mm1,- 7(%3, %0) \n\t"
2689 : "r"(src0
), "r"(src1
), "r"(dst0
), "r"(dst1
)
2695 dst0
[count
]= (src0
[4*count
+0]+src1
[4*count
+0])>>1;
2696 dst1
[count
]= (src0
[4*count
+2]+src1
[4*count
+2])>>1;
2701 static void RENAME(extract_odd2
)(const uint8_t *src
, uint8_t *dst0
, uint8_t *dst1
, x86_reg count
)
2707 #if COMPILE_TEMPLATE_MMX
2711 "pcmpeqw %%mm7, %%mm7 \n\t"
2712 "psrlw $8, %%mm7 \n\t"
2714 "movq -28(%1, %0, 4), %%mm0 \n\t"
2715 "movq -20(%1, %0, 4), %%mm1 \n\t"
2716 "movq -12(%1, %0, 4), %%mm2 \n\t"
2717 "movq -4(%1, %0, 4), %%mm3 \n\t"
2718 "psrlw $8, %%mm0 \n\t"
2719 "psrlw $8, %%mm1 \n\t"
2720 "psrlw $8, %%mm2 \n\t"
2721 "psrlw $8, %%mm3 \n\t"
2722 "packuswb %%mm1, %%mm0 \n\t"
2723 "packuswb %%mm3, %%mm2 \n\t"
2724 "movq %%mm0, %%mm1 \n\t"
2725 "movq %%mm2, %%mm3 \n\t"
2726 "psrlw $8, %%mm0 \n\t"
2727 "psrlw $8, %%mm2 \n\t"
2728 "pand %%mm7, %%mm1 \n\t"
2729 "pand %%mm7, %%mm3 \n\t"
2730 "packuswb %%mm2, %%mm0 \n\t"
2731 "packuswb %%mm3, %%mm1 \n\t"
2732 MOVNTQ
" %%mm0,- 7(%3, %0) \n\t"
2733 MOVNTQ
" %%mm1,- 7(%2, %0) \n\t"
2737 : "r"(src
), "r"(dst0
), "r"(dst1
)
2744 dst0
[count
]= src
[4*count
+0];
2745 dst1
[count
]= src
[4*count
+2];
2750 static void RENAME(extract_odd2avg
)(const uint8_t *src0
, const uint8_t *src1
, uint8_t *dst0
, uint8_t *dst1
, x86_reg count
)
2761 "pcmpeqw %%mm7, %%mm7 \n\t"
2762 "psrlw $8, %%mm7 \n\t"
2764 "movq -28(%1, %0, 4), %%mm0 \n\t"
2765 "movq -20(%1, %0, 4), %%mm1 \n\t"
2766 "movq -12(%1, %0, 4), %%mm2 \n\t"
2767 "movq -4(%1, %0, 4), %%mm3 \n\t"
2768 PAVGB
" -28(%2, %0, 4), %%mm0 \n\t"
2769 PAVGB
" -20(%2, %0, 4), %%mm1 \n\t"
2770 PAVGB
" -12(%2, %0, 4), %%mm2 \n\t"
2771 PAVGB
" - 4(%2, %0, 4), %%mm3 \n\t"
2772 "psrlw $8, %%mm0 \n\t"
2773 "psrlw $8, %%mm1 \n\t"
2774 "psrlw $8, %%mm2 \n\t"
2775 "psrlw $8, %%mm3 \n\t"
2776 "packuswb %%mm1, %%mm0 \n\t"
2777 "packuswb %%mm3, %%mm2 \n\t"
2778 "movq %%mm0, %%mm1 \n\t"
2779 "movq %%mm2, %%mm3 \n\t"
2780 "psrlw $8, %%mm0 \n\t"
2781 "psrlw $8, %%mm2 \n\t"
2782 "pand %%mm7, %%mm1 \n\t"
2783 "pand %%mm7, %%mm3 \n\t"
2784 "packuswb %%mm2, %%mm0 \n\t"
2785 "packuswb %%mm3, %%mm1 \n\t"
2786 MOVNTQ
" %%mm0,- 7(%4, %0) \n\t"
2787 MOVNTQ
" %%mm1,- 7(%3, %0) \n\t"
2791 : "r"(src0
), "r"(src1
), "r"(dst0
), "r"(dst1
)
2799 dst0
[count
]= (src0
[4*count
+0]+src1
[4*count
+0])>>1;
2800 dst1
[count
]= (src0
[4*count
+2]+src1
[4*count
+2])>>1;
2805 static void RENAME(yuyvtoyuv420
)(uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
, const uint8_t *src
,
2806 long width
, long height
,
2807 long lumStride
, long chromStride
, long srcStride
)
2810 const long chromWidth
= -((-width
)>>1);
2812 for (y
=0; y
<height
; y
++) {
2813 RENAME(extract_even
)(src
, ydst
, width
);
2815 RENAME(extract_odd2avg
)(src
-srcStride
, src
, udst
, vdst
, chromWidth
);
2823 #if COMPILE_TEMPLATE_MMX
2832 static void RENAME(yuyvtoyuv422
)(uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
, const uint8_t *src
,
2833 long width
, long height
,
2834 long lumStride
, long chromStride
, long srcStride
)
2837 const long chromWidth
= -((-width
)>>1);
2839 for (y
=0; y
<height
; y
++) {
2840 RENAME(extract_even
)(src
, ydst
, width
);
2841 RENAME(extract_odd2
)(src
, udst
, vdst
, chromWidth
);
2848 #if COMPILE_TEMPLATE_MMX
2857 static void RENAME(uyvytoyuv420
)(uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
, const uint8_t *src
,
2858 long width
, long height
,
2859 long lumStride
, long chromStride
, long srcStride
)
2862 const long chromWidth
= -((-width
)>>1);
2864 for (y
=0; y
<height
; y
++) {
2865 RENAME(extract_even
)(src
+1, ydst
, width
);
2867 RENAME(extract_even2avg
)(src
-srcStride
, src
, udst
, vdst
, chromWidth
);
2875 #if COMPILE_TEMPLATE_MMX
2884 static void RENAME(uyvytoyuv422
)(uint8_t *ydst
, uint8_t *udst
, uint8_t *vdst
, const uint8_t *src
,
2885 long width
, long height
,
2886 long lumStride
, long chromStride
, long srcStride
)
2889 const long chromWidth
= -((-width
)>>1);
2891 for (y
=0; y
<height
; y
++) {
2892 RENAME(extract_even
)(src
+1, ydst
, width
);
2893 RENAME(extract_even2
)(src
, udst
, vdst
, chromWidth
);
2900 #if COMPILE_TEMPLATE_MMX
2909 static inline void RENAME(rgb2rgb_init
)(void)
2911 rgb15to16
= RENAME(rgb15to16
);
2912 rgb15tobgr24
= RENAME(rgb15tobgr24
);
2913 rgb15to32
= RENAME(rgb15to32
);
2914 rgb16tobgr24
= RENAME(rgb16tobgr24
);
2915 rgb16to32
= RENAME(rgb16to32
);
2916 rgb16to15
= RENAME(rgb16to15
);
2917 rgb24tobgr16
= RENAME(rgb24tobgr16
);
2918 rgb24tobgr15
= RENAME(rgb24tobgr15
);
2919 rgb24tobgr32
= RENAME(rgb24tobgr32
);
2920 rgb32to16
= RENAME(rgb32to16
);
2921 rgb32to15
= RENAME(rgb32to15
);
2922 rgb32tobgr24
= RENAME(rgb32tobgr24
);
2923 rgb24to15
= RENAME(rgb24to15
);
2924 rgb24to16
= RENAME(rgb24to16
);
2925 rgb24tobgr24
= RENAME(rgb24tobgr24
);
2926 shuffle_bytes_2103
= RENAME(shuffle_bytes_2103
);
2927 rgb32tobgr16
= RENAME(rgb32tobgr16
);
2928 rgb32tobgr15
= RENAME(rgb32tobgr15
);
2929 yv12toyuy2
= RENAME(yv12toyuy2
);
2930 yv12touyvy
= RENAME(yv12touyvy
);
2931 yuv422ptoyuy2
= RENAME(yuv422ptoyuy2
);
2932 yuv422ptouyvy
= RENAME(yuv422ptouyvy
);
2933 yuy2toyv12
= RENAME(yuy2toyv12
);
2934 planar2x
= RENAME(planar2x
);
2935 rgb24toyv12
= RENAME(rgb24toyv12
);
2936 interleaveBytes
= RENAME(interleaveBytes
);
2937 vu9_to_vu12
= RENAME(vu9_to_vu12
);
2938 yvu9_to_yuy2
= RENAME(yvu9_to_yuy2
);
2940 uyvytoyuv420
= RENAME(uyvytoyuv420
);
2941 uyvytoyuv422
= RENAME(uyvytoyuv422
);
2942 yuyvtoyuv420
= RENAME(yuyvtoyuv420
);
2943 yuyvtoyuv422
= RENAME(yuyvtoyuv422
);