1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3 * yuv-converter.cpp: YUV2RGB converters for the pipeline
6 * Moonlight List (moonlight-list@lists.ximian.com)
8 * Copyright 2008 Novell, Inc. (http://www.novell.com)
10 * See the LICENSE file included with the distribution for details.
20 #include "yuv-converter.h"
22 /* R = 1.164 * (Y - 16) + 1.596 * (V - 128)
23 * G = 1.164 * (Y - 16) - 0.813 * (V - 128) - 0.391 * (U - 128)
24 * B = 1.164 * (Y - 16) + 2.018 * (U - 128)
26 * R V coefficient = 1.596*64 = 102 = 0x66
27 * G V coefficient = 0.813*64 = 52 = 0x34 (-ve) == 0xFFCC
28 * G U coefficient = 0.391*64 = 25 = 0x19 (-ve) == 0xFFE7
29 * B U coefficient = 2.018*64 = 129 = 0x81
30 * Y coefficient = 1.164*64 = 74 = 0x4a
33 #define RED_V_C 0x0066006600660066ULL
34 #define GREEN_V_C 0xffccffccffccffccULL
35 #define GREEN_U_C 0xffe7ffe7ffe7ffe7ULL
36 #define BLUE_U_C 0x0081008100810081ULL
37 #define Y_C 0x004a004a004a004aULL
38 #define UV_128 0x0080008000800080ULL
39 #define Y_16 0x1010101010101010ULL
40 #define ALPHA_MASK 0xFFFFFFFFFFFFFFFFULL
42 #if HAVE_SSE2 || HAVE_MMX
43 static const guint64 simd_table
[16] __attribute__ ((aligned (16))) = {
51 ALPHA_MASK
, ALPHA_MASK
,
54 #define PREFETCH(memory) do { \
55 __asm__ __volatile__ ( \
60 #if defined(__x86_64__)
61 #define ALIGN_CMP_REG "rax"
63 #define ALIGN_CMP_REG "eax"
66 #define CALC_COLOR_MODIFIERS(mov_instr, reg_type, alignment, align_reg, u, v, coeff_storage) do { \
67 __asm__ __volatile__ ( \
68 "mov %0, %%"align_reg";" \
69 "and $"alignment", %%"align_reg";" \
70 "test %%"align_reg", %%"align_reg";" \
73 mov_instr " 48(%2), %%"reg_type"2;" /* restore Dred */ \
74 mov_instr " 64(%2), %%"reg_type"3;" /* restore Dgreen */ \
75 mov_instr " 80(%2), %%"reg_type"1;" /* restore Dblue */ \
77 mov_instr " %%"reg_type"2, (%2);" /* backup Dred */ \
78 mov_instr " %%"reg_type"3, 16(%2);" /* backup Dgreen */ \
79 mov_instr " %%"reg_type"1, 32(%2);" /* backup Dblue */ \
84 "pxor %%"reg_type"7, %%"reg_type"7;" \
86 mov_instr " (%0), %%"reg_type"1;" \
87 mov_instr " (%1), %%"reg_type"2;" \
89 mov_instr " %%"reg_type"1, %%"reg_type"5;" \
90 mov_instr " %%"reg_type"2, %%"reg_type"6;" \
92 "punpckhbw %%"reg_type"7, %%"reg_type"5;" \
93 "punpckhbw %%"reg_type"7, %%"reg_type"6;" \
95 "punpcklbw %%"reg_type"7, %%"reg_type"1;" \
96 "punpcklbw %%"reg_type"7, %%"reg_type"2;" \
98 mov_instr " 80(%3), %%"reg_type"7;" \
100 "psubsw %%"reg_type"7, %%"reg_type"5;" /* U[hi] = U[hi] - 128 */ \
101 "psubsw %%"reg_type"7, %%"reg_type"6;" /* V[hi] = V[hi] - 128 */ \
103 "psubsw %%"reg_type"7, %%"reg_type"1;" /* U[lo] = U[lo] - 128 */ \
104 "psubsw %%"reg_type"7, %%"reg_type"2;" /* V[lo] = V[lo] - 128 */ \
106 mov_instr " %%"reg_type"5, %%"reg_type"3;" \
107 mov_instr " %%"reg_type"6, %%"reg_type"4;" \
109 mov_instr " 32(%3), %%"reg_type"7;" \
110 "pmullw %%"reg_type"7, %%"reg_type"3;" /* calculate Ugreen[hi] */ \
111 "psraw $6, %%"reg_type"3;" /* Ugreen[hi] = Ugreen[hi] / 64 */ \
112 mov_instr " 16(%3), %%"reg_type"7;" \
113 "pmullw %%"reg_type"7, %%"reg_type"4;" /* calculate Vgreen[hi] */ \
114 "psraw $6, %%"reg_type"4;" /* Vgreen[hi] = Vgreen[hi] / 64 */ \
115 "paddsw %%"reg_type"4, %%"reg_type"3;" /* Dgreen[hi] = Ugreen[hi] + Vgreen[hi] */ \
117 mov_instr " %%"reg_type"3, 64(%2);" /* backup Dgreen[hi] (clobbered) */ \
119 mov_instr " %%"reg_type"1, %%"reg_type"3;" \
120 mov_instr " %%"reg_type"2, %%"reg_type"4;" \
122 mov_instr " 32(%3), %%"reg_type"7;" \
123 "pmullw %%"reg_type"7, %%"reg_type"3;" /* calculate Ugreen[lo] */ \
124 "psraw $6, %%"reg_type"3;" /* Ugreen[lo] = Ugreen[lo] / 64 */ \
125 mov_instr " 16(%3), %%"reg_type"7;" \
126 "pmullw %%"reg_type"7, %%"reg_type"4;" /* calculate Vgreen[lo] */ \
127 "psraw $6, %%"reg_type"4;" /* Vgreen[lo] = Vgreen[lo] / 64 */ \
128 "paddsw %%"reg_type"4, %%"reg_type"3;" /* Dgreen[lo] = Ugreen[lo] + Vgreen[lo] */ \
130 mov_instr " 48(%3), %%"reg_type"7;" \
131 "pmullw %%"reg_type"7, %%"reg_type"5;" /* calculate Dblue[hi] */ \
132 "psraw $6, %%"reg_type"5;" /* Dblue[hi] = Dblue[hi] / 64 */ \
133 "pmullw %%"reg_type"7, %%"reg_type"1;" /* calculate Dblue[lo] */ \
134 "psraw $6, %%"reg_type"1;" /* Dblue[lo] = Dblue[lo] / 64 */ \
136 mov_instr " (%3), %%"reg_type"7;" \
137 "pmullw %%"reg_type"7, %%"reg_type"6;" /* calculate Dred[hi] */ \
138 "psraw $6, %%"reg_type"6;" /* Dred[hi] = Dred[hi] / 64 */ \
139 "pmullw %%"reg_type"7, %%"reg_type"2;" /* calculate Dred[lo] */ \
140 "psraw $6, %%"reg_type"2;" /* Dred[lo] = Dred[lo] / 64 */ \
142 mov_instr " %%"reg_type"6, 48(%2);" /* backup Dred[hi] */ \
143 mov_instr " %%"reg_type"5, 80(%2);" /* backup Dblue[hi] */ \
145 mov_instr " %%"reg_type"2, 0(%2);" /* backup Dred[lo] */ \
146 mov_instr " %%"reg_type"3, 16(%2);" /* backup Dgreen[lo] */ \
147 mov_instr " %%"reg_type"1, 32(%2);" /* backup Dblue[lo] */ \
149 : : "r" (u), "r" (v), "r" (coeff_storage), "r" (&simd_table) : "%"align_reg); \
152 #define RESTORE_COLOR_MODIFIERS(mov_instr, reg_type, coeff_storage) do { \
153 __asm__ __volatile__ ( \
154 mov_instr " (%0), %%"reg_type"2;" /* restore Dred */ \
155 mov_instr " 16(%0), %%"reg_type"3;" /* restore Dgreen */ \
156 mov_instr " 32(%0), %%"reg_type"1;" /* restore Dblue */ \
157 : : "r" (coeff_storage)); \
160 #define YUV2RGB_INTEL_SIMD(mov_instr, reg_type, output_offset1, output_offset2, output_offset3, y_plane, dest) do { \
161 __asm__ __volatile__ ( \
162 mov_instr " (%0), %%"reg_type"0;" /* Load Y plane into r0 */ \
163 mov_instr " 96(%2), %%"reg_type"7;" /* Load 16 into r7 */ \
164 "psubusb %%"reg_type"7, %%"reg_type"0;" /* Y = Y - 16 */ \
166 mov_instr " %%"reg_type"0, %%"reg_type"4;" /* r4 == r0 */ \
168 "psllw $8, %%"reg_type"0;" /* r0 [00 Y0 00 Y2 ...] */ \
169 "psrlw $8, %%"reg_type"0;" /* r0 [Y0 00 Y2 00 ...] */ \
170 "psrlw $8, %%"reg_type"4;" /* r4 [Y1 00 Y3 00 ...] */ \
172 mov_instr " 64(%2), %%"reg_type"7;" \
173 "pmullw %%"reg_type"7, %%"reg_type"0;" /* calculate Y*Yc[even] */ \
174 "pmullw %%"reg_type"7, %%"reg_type"4;" /* calculate Y*Yc[odd] */ \
175 "psraw $6, %%"reg_type"0;" /* Yyc[even] = Yyc[even] / 64 */ \
176 "psraw $6, %%"reg_type"4;" /* Yyc[odd] = Yyc[odd] / 64 */ \
178 mov_instr " %%"reg_type"2, %%"reg_type"6;" \
179 mov_instr " %%"reg_type"3, %%"reg_type"7;" \
180 mov_instr " %%"reg_type"1, %%"reg_type"5;" \
182 "paddsw %%"reg_type"0, %%"reg_type"2;" /* CY[even] + DR */ \
183 "paddsw %%"reg_type"0, %%"reg_type"3;" /* CY[even] + DG */ \
184 "paddsw %%"reg_type"0, %%"reg_type"1;" /* CY[even] + DB */ \
186 "paddsw %%"reg_type"4, %%"reg_type"6;" /* CY[odd] + DR */ \
187 "paddsw %%"reg_type"4, %%"reg_type"7;" /* CY[odd] + DG */ \
188 "paddsw %%"reg_type"4, %%"reg_type"5;" /* CY[odd] + DB */ \
190 "packuswb %%"reg_type"2, %%"reg_type"2;" /* Clamp RGB to [0-255] */ \
191 "packuswb %%"reg_type"3, %%"reg_type"3;" \
192 "packuswb %%"reg_type"1, %%"reg_type"1;" \
194 "packuswb %%"reg_type"6, %%"reg_type"6;" \
195 "packuswb %%"reg_type"7, %%"reg_type"7;" \
196 "packuswb %%"reg_type"5, %%"reg_type"5;" \
198 "punpcklbw %%"reg_type"6, %%"reg_type"2;" /* r2 [R0 R1 R2 R3 ...] */ \
199 "punpcklbw %%"reg_type"7, %%"reg_type"3;" /* r3 [G0 G1 G2 G3 ...] */ \
200 "punpcklbw %%"reg_type"5, %%"reg_type"1;" /* r1 [B0 B1 B2 B3 ...] */ \
202 mov_instr " %%"reg_type"2, %%"reg_type"5;" /* copy RGB */ \
203 mov_instr " %%"reg_type"3, %%"reg_type"7;" \
204 mov_instr " %%"reg_type"1, %%"reg_type"6;" \
206 mov_instr " 112(%2), %%"reg_type"4;" \
207 "punpcklbw %%"reg_type"2, %%"reg_type"1;" /* r1 [B0 R0 B1 R1 ...] */ \
208 "punpcklbw %%"reg_type"4, %%"reg_type"3;" /* r4 [G0 FF G1 FF ...] */ \
210 mov_instr " %%"reg_type"1, %%"reg_type"0;" /* r3 [G0 FF G1 FF ...] */ \
212 "punpcklbw %%"reg_type"3, %%"reg_type"1;" /* r2 [B0 G0 R0 FF B1 G1 R1 FF ...] */ \
213 "punpckhbw %%"reg_type"3, %%"reg_type"0;" /* r3 [B2 G2 R2 FF B3 G3 R3 FF ...] */ \
215 mov_instr " %%"reg_type"1, (%1);" /* output BGRA */ \
216 mov_instr " %%"reg_type"0, "output_offset1"(%1);" \
218 "punpckhbw %%"reg_type"5, %%"reg_type"6;" \
219 "punpckhbw %%"reg_type"4, %%"reg_type"7;" \
221 mov_instr " %%"reg_type"6, %%"reg_type"0;" \
223 "punpcklbw %%"reg_type"7, %%"reg_type"6;" \
224 "punpckhbw %%"reg_type"7, %%"reg_type"0;" \
226 mov_instr " %%"reg_type"6, "output_offset2"(%1);" \
227 mov_instr " %%"reg_type"0, "output_offset3"(%1);" \
228 : : "r" (y_plane), "r" (dest), "r" (&simd_table)); \
233 #define YUV2RGB_SSE(y_plane, dest) YUV2RGB_INTEL_SIMD("movdqa", "xmm", "16", "32", "48", y_plane, dest)
237 #define YUV2RGB_MMX(y_plane, dest) YUV2RGB_INTEL_SIMD("movq", "mm", "8", "16", "24", y_plane, dest)
240 static inline void YUV444ToBGRA(guint8 Y
, guint8 U
, guint8 V
, guint8
*dst
)
242 dst
[2] = CLAMP((298 * (Y
- 16) + 409 * (V
- 128) + 128) >> 8, 0, 255);
243 dst
[1] = CLAMP((298 * (Y
- 16) - 100 * (U
- 128) - 208 * (V
- 128) + 128) >> 8, 0, 255);
244 dst
[0] = CLAMP((298 * (Y
- 16) + 516 * (U
- 128) + 128) >> 8, 0, 255);
253 YUVConverterInfo::Supports (MoonPixelFormat input
, MoonPixelFormat output
)
255 return input
!= MoonPixelFormatNone
&& output
!= MoonPixelFormatNone
;
259 YUVConverterInfo::Create (Media
* media
, VideoStream
* stream
)
261 return new YUVConverter (media
, stream
);
268 YUVConverter::YUVConverter (Media
* media
, VideoStream
* stream
) : IImageConverter (Type::YUVCONVERTER
, media
, stream
)
270 #if defined(__amd64__) && defined(__x86_64__)
281 __asm__
__volatile__ (
285 "xorl $0x200000, %%eax;"
291 "andl $0x200000, %%eax;"
299 __asm__
__volatile__ (
300 "movl $0x0000001, %%eax;"
310 have_mmx
= features
& 0x00800000;
311 have_sse2
= features
& 0x04000000;
318 if (posix_memalign ((void **)(&rgb_uv
), 16, 96))
322 YUVConverter::~YUVConverter ()
328 YUVConverter::Open ()
330 if (input_format
== MoonPixelFormatNone
) {
331 Media::Warning (MEDIA_CONVERTER_ERROR
, "Invalid input format.");
332 return MEDIA_CONVERTER_ERROR
;
335 if (output_format
== MoonPixelFormatNone
) {
336 Media::Warning (MEDIA_CONVERTER_ERROR
, "Invalid output format.");
337 return MEDIA_CONVERTER_ERROR
;
340 return MEDIA_SUCCESS
;
344 YUVConverter::Convert (guint8
*src
[], int srcStride
[], int srcSlideY
, int srcSlideH
, guint8
* dest
[], int dstStride
[])
346 guint8
*y_row1
= src
[0];
347 guint8
*y_row2
= src
[0]+srcStride
[0];
349 guint8
*u_plane
= src
[1];
350 guint8
*v_plane
= src
[2];
352 guint8
*dest_row1
= dest
[0];
353 guint8
*dest_row2
= dest
[0]+dstStride
[0];
357 int width
= dstStride
[0] >> 2;
358 int height
= srcSlideH
;
362 if (width
!= srcStride
[0]) {
363 pad
= (srcStride
[0] - width
);
365 g_warning ("This video has padding that prevents us from doing aligned SIMD operations on it.");
370 if (rgb_uv
== NULL
&& posix_memalign ((void **)(&rgb_uv
), 16, 96) != 0) {
371 g_warning ("Could not allocate memory for YUVConverter");
372 return MEDIA_OUT_OF_MEMORY
;
376 if (have_sse2
&& aligned
) {
377 for (i
= 0; i
< height
>> 1; i
++, y_row1
+= srcStride
[0], y_row2
+= srcStride
[0], dest_row1
+= dstStride
[0], dest_row2
+= dstStride
[0]) {
378 for (j
= 0; j
< width
>> 4; j
++, y_row1
+= 16, y_row2
+= 16, u_plane
+= 8, v_plane
+= 8, dest_row1
+= 64, dest_row2
+= 64) {
380 CALC_COLOR_MODIFIERS("movdqa", "xmm", "15", ALIGN_CMP_REG
, u_plane
, v_plane
, rgb_uv
);
382 YUV2RGB_SSE(y_row1
, dest_row1
);
385 RESTORE_COLOR_MODIFIERS("movdqa", "xmm", rgb_uv
);
387 YUV2RGB_SSE(y_row2
, dest_row2
);
397 if (have_mmx
&& aligned
) {
398 for (i
= 0; i
< height
>> 1; i
++, y_row1
+= srcStride
[0], y_row2
+= srcStride
[0], dest_row1
+= dstStride
[0], dest_row2
+= dstStride
[0]) {
399 for (j
= 0; j
< width
>> 3; j
++, y_row1
+= 8, y_row2
+= 8, u_plane
+= 4, v_plane
+= 4, dest_row1
+= 32, dest_row2
+= 32) {
401 CALC_COLOR_MODIFIERS("movq", "mm", "7", ALIGN_CMP_REG
, u_plane
, v_plane
, rgb_uv
);
403 YUV2RGB_MMX(y_row1
, dest_row1
);
406 RESTORE_COLOR_MODIFIERS("movq", "mm", rgb_uv
);
408 YUV2RGB_MMX(y_row2
, dest_row2
);
415 __asm__
__volatile__ ("emms");
418 for (i
= 0; i
< height
>> 1; i
++, y_row1
+= srcStride
[0], y_row2
+= srcStride
[0], dest_row1
+= dstStride
[0], dest_row2
+= dstStride
[0]) {
419 for (j
= 0; j
< width
>> 1; j
++, dest_row1
+= 8, dest_row2
+= 8, y_row1
+= 2, y_row2
+= 2, u_plane
+= 1, v_plane
+= 1) {
420 YUV444ToBGRA (*y_row1
, *u_plane
, *v_plane
, dest_row1
);
421 YUV444ToBGRA (y_row1
[1], *u_plane
, *v_plane
, (dest_row1
+4));
423 YUV444ToBGRA (*y_row2
, *u_plane
, *v_plane
, dest_row2
);
424 YUV444ToBGRA (y_row2
[1], *u_plane
, *v_plane
, (dest_row2
+4));
437 return MEDIA_SUCCESS
;