1 // VirtualDub - Video processing and capture application
2 // Graphics support library
3 // Copyright (C) 1998-2007 Avery Lee
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 2 of the License, or
8 // (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 // - VDPixmapBlt is from VirtualDub
21 // - sse2 yv12 to yuy2 conversion by Haali
22 // (- vd.cpp/h should be renamed to something more sensible already :)
28 #pragma warning(disable : 4799) // no emms... blahblahblah
31 void __declspec(naked
) yuvtoyuy2row_MMX(BYTE
* dst
, BYTE
* srcy
, BYTE
* srcu
, BYTE
* srcv
, DWORD width
)
39 mov edi
, [esp
+20] // dst
40 mov ebp
, [esp
+24] // srcy
41 mov ebx
, [esp
+28] // srcu
42 mov esi
, [esp
+32] // srcv
43 mov ecx
, [esp
+36] // width
76 void __declspec(naked
) yuvtoyuy2row_avg_MMX(BYTE
* dst
, BYTE
* srcy
, BYTE
* srcu
, BYTE
* srcv
, DWORD width
, DWORD pitchuv
)
78 static const __int64 mask
= 0x7f7f7f7f7f7f7f7fi
64;
88 mov edi
, [esp
+20] // dst
89 mov ebp
, [esp
+24] // srcy
90 mov ebx
, [esp
+28] // srcu
91 mov esi
, [esp
+32] // srcv
92 mov ecx
, [esp
+36] // width
93 mov eax
, [esp
+40] // pitchuv
97 yuvtoyuy2row_avg_loop
:
103 movd mm2
, [ebx
+ eax
]
104 punpcklbw mm2
, [esi
+ eax
]
107 // (x+y)>>1 == (x&y)+((x^y)>>1)
129 jnz yuvtoyuy2row_avg_loop
139 void __declspec(naked
) yv12_yuy2_row_sse2() {
149 movdqa xmm0
, [ebx
+ eax
*2] // YYYYYYYY
150 movdqa xmm1
, [ebx
+ eax
*2 + 16] // YYYYYYYY
152 movdqa xmm2
, [edx
+ eax
] // UUUUUUUU
153 movdqa xmm3
, [esi
+ eax
] // VVVVVVVV
158 punpcklbw xmm2
, xmm3
// VUVUVUVU
159 punpckhbw xmm4
, xmm3
// VUVUVUVU
161 punpcklbw xmm0
, xmm2
// VYUYVYUY
166 movntdq
[edi
+ eax
*4], xmm0
167 movntdq
[edi
+ eax
*4 + 16], xmm5
168 movntdq
[edi
+ eax
*4 + 32], xmm1
169 movntdq
[edi
+ eax
*4 + 48], xmm6
180 void __declspec(naked
) yv12_yuy2_row_sse2_linear() {
191 movdqa xmm0
, [ebx
+ eax
*2] // YYYYYYYY
192 movdqa xmm1
, [ebx
+ eax
*2 + 16] // YYYYYYYY
196 pavgb xmm2
, [edx
+ ebp
] // UUUUUUUU
197 pavgb xmm3
, [esi
+ ebp
] // VVVVVVVV
202 punpcklbw xmm2
, xmm3
// VUVUVUVU
203 punpckhbw xmm4
, xmm3
// VUVUVUVU
205 punpcklbw xmm0
, xmm2
// VYUYVYUY
210 movntdq
[edi
+ eax
*4], xmm0
211 movntdq
[edi
+ eax
*4 + 16], xmm5
212 movntdq
[edi
+ eax
*4 + 32], xmm1
213 movntdq
[edi
+ eax
*4 + 48], xmm6
226 void __declspec(naked
) yv12_yuy2_row_sse2_linear_interlaced() {
237 movdqa xmm0
, [ebx
+ eax
*2] // YYYYYYYY
238 movdqa xmm1
, [ebx
+ eax
*2 + 16] // YYYYYYYY
242 pavgb xmm2
, [edx
+ ebp
*2] // UUUUUUUU
243 pavgb xmm3
, [esi
+ ebp
*2] // VVVVVVVV
248 punpcklbw xmm2
, xmm3
// VUVUVUVU
249 punpckhbw xmm4
, xmm3
// VUVUVUVU
251 punpcklbw xmm0
, xmm2
// VYUYVYUY
256 movntdq
[edi
+ eax
*4], xmm0
257 movntdq
[edi
+ eax
*4 + 16], xmm5
258 movntdq
[edi
+ eax
*4 + 32], xmm1
259 movntdq
[edi
+ eax
*4 + 48], xmm6
272 void __declspec(naked
) yv12_yuy2_sse2(const BYTE
*Y
, const BYTE
*U
, const BYTE
*V
,
273 int halfstride
, unsigned halfwidth
, unsigned height
,
274 BYTE
*YUY2
, int d_stride
)
282 mov ebx
, [esp
+ 20] // Y
283 mov edx
, [esp
+ 24] // U
284 mov esi
, [esp
+ 28] // V
285 mov edi
, [esp
+ 44] // D
286 mov ebp
, [esp
+ 32] // uv_stride
287 mov ecx
, [esp
+ 36] // uv_width
294 cmp dword ptr
[esp
+ 40], 2
298 sub dword ptr
[esp
+ 40], 2
299 call yv12_yuy2_row_sse2
301 lea ebx
, [ebx
+ ebp
*2]
304 call yv12_yuy2_row_sse2_linear
309 lea ebx
, [ebx
+ ebp
*2]
312 cmp dword ptr
[esp
+ 40], 2
316 call yv12_yuy2_row_sse2
318 dec dword ptr
[esp
+ 40]
321 lea ebx
, [ebx
+ ebp
*2]
323 call yv12_yuy2_row_sse2
335 void __declspec(naked
) yv12_yuy2_sse2_interlaced(const BYTE
*Y
, const BYTE
*U
, const BYTE
*V
,
336 int halfstride
, unsigned halfwidth
, unsigned height
,
337 BYTE
*YUY2
, int d_stride
)
345 mov ebx
, [esp
+ 20] // Y
346 mov edx
, [esp
+ 24] // U
347 mov esi
, [esp
+ 28] // V
348 mov edi
, [esp
+ 44] // D
349 mov ebp
, [esp
+ 32] // uv_stride
350 mov ecx
, [esp
+ 36] // uv_width
357 cmp dword ptr
[esp
+ 40], 4
361 sub dword ptr
[esp
+ 40], 4
362 call yv12_yuy2_row_sse2
// first row, first field
364 lea ebx
, [ebx
+ ebp
*2]
370 call yv12_yuy2_row_sse2
// first row, second field
372 lea ebx
, [ebx
+ ebp
*2]
378 call yv12_yuy2_row_sse2_linear_interlaced
// second row, first field
383 lea ebx
, [ebx
+ ebp
*2]
386 call yv12_yuy2_row_sse2_linear_interlaced
// second row, second field
391 lea ebx
, [ebx
+ ebp
*2]
394 cmp dword ptr
[esp
+ 40], 4
398 call yv12_yuy2_row_sse2
400 lea ebx
, [ebx
+ ebp
*2]
406 call yv12_yuy2_row_sse2
408 lea ebx
, [ebx
+ ebp
*2]
414 call yv12_yuy2_row_sse2
416 lea ebx
, [ebx
+ ebp
*2]
422 call yv12_yuy2_row_sse2