2 Copyright (C) 2003 Commonwealth Scientific and Industrial Research
3 Organisation (CSIRO) Australia
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 - Neither the name of CSIRO Australia nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ORGANISATION OR
24 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 * YUV to RGB conversion using x86 CPU extensions
36 #include "oggplay_private.h"
37 #include "oggplay_yuv2rgb_template.h"
41 # if defined(_M_AMD64)
42 /* MSVC with x64 doesn't support inline assembler */
43 #include <emmintrin.h>
45 #include "yuv2rgb_x86_vs.h"
46 #elif defined(__GNUC__)
47 #include "yuv2rgb_x86.h"
53 unsigned long long uq
[2];
61 } ATTR_ALIGN(16) simd_t
;
63 #define UV_128 0x0080008000800080LL
64 #define Y_16 0x1010101010101010LL
65 #define Y_Co 0x253f253f253f253fLL
66 #define GU_Co 0xf37df37df37df37dLL
67 #define GV_Co 0xe5fce5fce5fce5fcLL
68 #define BU_Co 0x4093409340934093LL
69 #define RV_Co 0x3312331233123312LL
70 #define Y_MASK 0x00ff00ff00ff00ffLL
71 #define ALFA 0xffffffffffffffffLL
74 * coefficients and constants for yuv to rgb SIMD conversion
76 static const simd_t simd_table
[9] = {
88 /* MMX intristics are not supported by VS in x64 mode, thus disable it */
89 #if !(defined(_MSC_VER) && defined(_M_AMD64))
91 * the conversion functions using MMX instructions
94 /* template for the MMX conversion functions */
95 #define YUV_CONVERT_MMX(FUNC, CONVERT, CONV_BY_PIXEL, UV_SHIFT, UV_VERT_SUB) YUV_CONVERT(FUNC, CONVERT, CONV_BY_PIXEL, 8, 32, 8, UV_SHIFT, UV_VERT_SUB)
97 #define CLEANUP emms()
98 #define OUT_RGBA_32 OUTPUT_RGBA_32(movq, mm, 8, 16, 24)
99 #define OUT_ARGB_32 OUTPUT_ARGB_32(movq, mm, 8, 16, 24)
100 #define OUT_BGRA_32 OUTPUT_BGRA_32(movq, mm, 8, 16, 24)
101 #define MOVNTQ MMX_MOVNTQ
104 #define CONVERT(OUTPUT_FUNC) LOAD_YUV(movq, movd, mm) \
105 YUV_2_RGB(movq, mm) \
108 /* conversion function for yuv444 */
109 #define CONVERT_444(OUTPUT_FUNC) LOAD_YUV444(movq, mm) \
110 YUV444_2_RGB(movq, mm) \
113 /* MMX yuv420 functions */
114 YUV_CONVERT_MMX(yuv420_to_rgba_mmx
, CONVERT(OUT_RGBA_32
), VANILLA_RGBA_OUT
, 4, 2)
115 YUV_CONVERT_MMX(yuv420_to_bgra_mmx
, CONVERT(OUT_BGRA_32
), VANILLA_BGRA_OUT
, 4, 2)
116 YUV_CONVERT_MMX(yuv420_to_argb_mmx
, CONVERT(OUT_ARGB_32
), VANILLA_ARGB_OUT
, 4, 2)
118 /* MMX yuv422 functions */
119 YUV_CONVERT_MMX(yuv422_to_rgba_mmx
, CONVERT(OUT_RGBA_32
), VANILLA_RGBA_OUT
, 4, 1)
120 YUV_CONVERT_MMX(yuv422_to_bgra_mmx
, CONVERT(OUT_BGRA_32
), VANILLA_BGRA_OUT
, 4, 1)
121 YUV_CONVERT_MMX(yuv422_to_argb_mmx
, CONVERT(OUT_ARGB_32
), VANILLA_ARGB_OUT
, 4, 1)
123 /* MMX yuv444 functions */
124 YUV_CONVERT_MMX(yuv444_to_rgba_mmx
, CONVERT_444(OUT_RGBA_32
), VANILLA_RGBA_OUT
, 8, 1)
125 YUV_CONVERT_MMX(yuv444_to_bgra_mmx
, CONVERT_444(OUT_BGRA_32
), VANILLA_BGRA_OUT
, 8, 1)
126 YUV_CONVERT_MMX(yuv444_to_argb_mmx
, CONVERT_444(OUT_ARGB_32
), VANILLA_ARGB_OUT
, 8, 1)
130 /* template for the SSE conversion functions */
131 #define MOVNTQ SSE_MOVNTQ
133 /* SSE yuv420 functions */
134 YUV_CONVERT_MMX(yuv420_to_rgba_sse
, CONVERT(OUT_RGBA_32
), VANILLA_RGBA_OUT
, 4, 2)
135 YUV_CONVERT_MMX(yuv420_to_bgra_sse
, CONVERT(OUT_BGRA_32
), VANILLA_BGRA_OUT
, 4, 2)
136 YUV_CONVERT_MMX(yuv420_to_argb_sse
, CONVERT(OUT_ARGB_32
), VANILLA_ARGB_OUT
, 4, 2)
138 /* SSE yuv422 functions */
139 YUV_CONVERT_MMX(yuv422_to_rgba_sse
, CONVERT(OUT_RGBA_32
), VANILLA_RGBA_OUT
, 4, 1)
140 YUV_CONVERT_MMX(yuv422_to_bgra_sse
, CONVERT(OUT_BGRA_32
), VANILLA_BGRA_OUT
, 4, 1)
141 YUV_CONVERT_MMX(yuv422_to_argb_sse
, CONVERT(OUT_ARGB_32
), VANILLA_ARGB_OUT
, 4, 1)
143 /* SSE yuv444 functions */
144 YUV_CONVERT_MMX(yuv444_to_rgba_sse
, CONVERT_444(OUT_RGBA_32
), VANILLA_RGBA_OUT
, 8, 1)
145 YUV_CONVERT_MMX(yuv444_to_bgra_sse
, CONVERT_444(OUT_BGRA_32
), VANILLA_BGRA_OUT
, 8, 1)
146 YUV_CONVERT_MMX(yuv444_to_argb_sse
, CONVERT_444(OUT_ARGB_32
), VANILLA_ARGB_OUT
, 8, 1)
159 * the conversion functions using SSE2 instructions
162 /* template for the SSE2 conversion functions */
163 #define YUV_CONVERT_SSE2(FUNC, CONVERT, CONV_BY_PIX, UV_SHIFT, UV_VERT_SUB) YUV_CONVERT(FUNC, CONVERT, CONV_BY_PIX, 16, 64, 16, UV_SHIFT, UV_VERT_SUB)
165 /* Output functions */
166 #define OUT_RGBA_32 OUTPUT_RGBA_32(movdqa, xmm, 16, 32, 48)
167 #define OUT_ARGB_32 OUTPUT_ARGB_32(movdqa, xmm, 16, 32, 48)
168 #define OUT_BGRA_32 OUTPUT_BGRA_32(movdqa, xmm, 16, 32, 48)
169 #define MOVNTQ SSE2_MOVNTQ
172 #if defined(_MSC_VER) && defined(_M_AMD64)
173 /* yuv420, yuv422 conversion function */
174 #define CONVERT(OUTPUT_FUNC) __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; \
175 LOAD_YUV_PLANAR_2(movdqu, xmm) \
176 YUV_2_RGB(movdqa, xmm) \
179 /* yuv420, yuv422 conversion function */
180 #define CONVERT(OUTPUT_FUNC) LOAD_YUV(movdqu, movq, xmm) \
181 YUV_2_RGB(movdqa, xmm) \
184 /* conversion function for yuv444 */
185 #define CONVERT_444(OUTPUT_FUNC) LOAD_YUV444(movdqu, xmm) \
186 YUV444_2_RGB(movdqa, xmm) \
190 /* SSE2 yuv420 functions */
191 YUV_CONVERT_SSE2(yuv420_to_rgba_sse2
, CONVERT(OUT_RGBA_32
), VANILLA_RGBA_OUT
, 8, 2)
192 YUV_CONVERT_SSE2(yuv420_to_bgra_sse2
, CONVERT(OUT_BGRA_32
), VANILLA_BGRA_OUT
, 8, 2)
193 YUV_CONVERT_SSE2(yuv420_to_argb_sse2
, CONVERT(OUT_ARGB_32
), VANILLA_ARGB_OUT
, 8, 2)
195 /* SSE2 yuv422 functions */
196 YUV_CONVERT_SSE2(yuv422_to_rgba_sse2
, CONVERT(OUT_RGBA_32
), VANILLA_RGBA_OUT
, 8, 1)
197 YUV_CONVERT_SSE2(yuv422_to_bgra_sse2
, CONVERT(OUT_BGRA_32
), VANILLA_BGRA_OUT
, 8, 1)
198 YUV_CONVERT_SSE2(yuv422_to_argb_sse2
, CONVERT(OUT_ARGB_32
), VANILLA_ARGB_OUT
, 8, 1)
200 /* SSE2 yuv444 functions */
201 YUV_CONVERT_SSE2(yuv444_to_rgba_sse2
, CONVERT_444(OUT_RGBA_32
), VANILLA_RGBA_OUT
, 16, 1)
202 YUV_CONVERT_SSE2(yuv444_to_bgra_sse2
, CONVERT_444(OUT_BGRA_32
), VANILLA_BGRA_OUT
, 16, 1)
203 YUV_CONVERT_SSE2(yuv444_to_argb_sse2
, CONVERT_444(OUT_ARGB_32
), VANILLA_ARGB_OUT
, 16, 1)