2 * Copyright (C) 2003-2006 Gabest
3 * http://www.gabest.org
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with GNU Make; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 * http://www.gnu.org/copyleft/gpl.html
23 #include "MemSubPic.h"
24 #include "color_conv_table.h"
31 static void SaveRect2File(const CRect
& cRect
, const char * filename
)
33 std::ofstream
os(filename
);
34 os
<<cRect
.left
<<","<<cRect
.top
<<","<<cRect
.right
<<","<<cRect
.bottom
;
36 static void SaveAxxx2File(SubPicDesc
& spd
, const CRect
& cRect
, const char * filename
)
38 std::ofstream
axxx(filename
);
39 int w
= cRect
.Width(), h
= cRect
.Height();
41 BYTE
* top
= (BYTE
*)spd
.bits
+ spd
.pitch
*cRect
.top
+ cRect
.left
*4;
42 BYTE
* bottom
= top
+ spd
.pitch
*h
;
44 for(; top
< bottom
; top
+= spd
.pitch
) {
47 for(; s
< e
; s
+=4) { // ARGB ARGB -> AxYU AxYV
48 axxx
<<(int)s
[0]<<","<<(int)s
[1]<<","<<(int)s
[2]<<","<<(int)s
[3];
61 static void SaveArgb2File(SubPicDesc
& spd
, const CRect
& cRect
, const char * filename
)
63 SaveAxxx2File(spd
, cRect
, filename
);
65 static void SaveAyuv2File(SubPicDesc
& spd
, const CRect
& cRect
, const char * filename
)
67 SaveAxxx2File(spd
, cRect
, filename
);
69 static void SaveNvxx2File(SubPicDesc
& spd
, const CRect
& cRect
, const char * filename
)
71 std::ofstream
os(filename
);
72 int w
= cRect
.Width(), h
= cRect
.Height();
74 BYTE
* top
= (BYTE
*)spd
.bits
;
75 BYTE
* bottom
= top
+ spd
.pitch
*h
;
77 for(; top
< bottom
; top
+= spd
.pitch
) {
81 BYTE
* sY
= s
+ spd
.pitch
*spd
.h
;
82 BYTE
* sU
= sY
+ spd
.pitch
*spd
.h
;
84 for(; s
< e
; s
++, sY
++, sU
+=2,sV
+=2) {
85 os
<<(int)s
[0]<<","<<(int)sY
[0]<<","<<(int)sU
[0]<<","<<(int)sV
[0];
99 #define ONCER(expr) {\
100 static bool entered=false;\
112 // alpha blend functions
114 #include "xy_intrinsics.h"
115 #include "../dsutil/vd.h"
118 static void AlphaBlt_YUY2_MMX(int w
, int h
, BYTE
* d
, int dstpitch
, PCUINT8 s
, int srcpitch
)
120 for(int j
= 0; j
< h
; j
++, s
+= srcpitch
, d
+= dstpitch
)
124 PCUINT8 s2end
= s2
+ w
*4;
125 DWORD
* d2
= (DWORD
*)d
;
127 int last_a
= w
>0?s2
[3]:0;
128 for(; s2
< s2end
; s2
+= 8, d2
++)
130 ia
= (last_a
+ 2*s2
[3] + s2
[7])>>2;
134 //int y1 = (BYTE)(((((*d2&0xff))*s2[3])>>8) + s2[1]); // + y1;
135 //int u = (BYTE)((((((*d2>>8)&0xff))*ia)>>8) + s2[0]); // + u;
136 //int y2 = (BYTE)((((((*d2>>16)&0xff))*s2[7])>>8) + s2[5]); // + y2;
137 //int v = (BYTE)((((((*d2>>24)&0xff))*ia)>>8) + s2[4]); // + v;
138 //*d2 = (v<<24)|(y2<<16)|(u<<8)|y1;
140 ia
= (ia
<<24)|(s2
[7]<<16)|(ia
<<8)|s2
[3];
141 c
= (s2
[4]<<24)|(s2
[5]<<16)|(s2
[0]<<8)|s2
[1]; // (v<<24)|(y2<<16)|(u<<8)|y1;
152 psraw mm4
, 1 //or else, overflow because psraw shift in sign bit
166 void AlphaBlt_YUY2_C(int w
, int h
, BYTE
* d
, int dstpitch
, PCUINT8 s
, int srcpitch
)
168 for(int j
= 0; j
< h
; j
++, s
+= srcpitch
, d
+= dstpitch
)
172 PCUINT8 s2end
= s2
+ w
*4;
173 DWORD
* d2
= (DWORD
*)d
;
175 int last_a
= w
>0?s2
[3]:0;
176 for(; s2
< s2end
; s2
+= 8, d2
++)
178 ia
= (last_a
+ 2*s2
[3] + s2
[7])>>2;
182 DWORD y1
= (BYTE
)(((((*d2
&0xff))*s2
[3])>>8) + s2
[1]); // + y1;
183 DWORD u
= (BYTE
)((((((*d2
>>8)&0xff))*ia
)>>8) + s2
[0]); // + u;
184 DWORD y2
= (BYTE
)((((((*d2
>>16)&0xff))*s2
[7])>>8) + s2
[5]); // + y2;
185 DWORD v
= (BYTE
)((((((*d2
>>24)&0xff))*ia
)>>8) + s2
[4]); // + v;
186 *d2
= (v
<<24)|(y2
<<16)|(u
<<8)|y1
;
197 CMemSubPic::CMemSubPic(SubPicDesc
& spd
, int alpha_blt_dst_type
)
198 : m_spd(spd
), m_alpha_blt_dst_type(alpha_blt_dst_type
)
200 m_maxsize
.SetSize(spd
.w
, spd
.h
);
201 // m_rcDirty.SetRect(0, 0, spd.w, spd.h);
202 CRect
allSpd(0,0,spd
.w
, spd
.h
);
203 m_rectListDirty
.AddTail(allSpd
);
206 CMemSubPic::~CMemSubPic()
208 delete [] m_spd
.bits
, m_spd
.bits
= NULL
;
213 STDMETHODIMP_(void*) CMemSubPic::GetObject() const
215 return (void*)&m_spd
;
218 STDMETHODIMP
CMemSubPic::GetDesc(SubPicDesc
& spd
) const
220 spd
.type
= m_spd
.type
;
224 spd
.pitch
= m_spd
.pitch
;
225 spd
.bits
= m_spd
.bits
;
226 spd
.bitsU
= m_spd
.bitsU
;
227 spd
.bitsV
= m_spd
.bitsV
;
228 spd
.vidrect
= m_vidrect
;
232 STDMETHODIMP
CMemSubPic::CopyTo(ISubPicEx
* pSubPic
)
235 if(FAILED(hr
= __super::CopyTo(pSubPic
))) {
240 if(FAILED(GetDesc(src
)) || FAILED(pSubPic
->GetDesc(dst
))) {
243 while(!m_rectListDirty
.IsEmpty())
245 CRect
& cRect
= m_rectListDirty
.GetHead();
246 int w
= cRect
.Width(), h
= cRect
.Height();
247 BYTE
* s
= (BYTE
*)src
.bits
+ src
.pitch
*cRect
.top
+ cRect
.left
*4;
248 BYTE
* d
= (BYTE
*)dst
.bits
+ dst
.pitch
*cRect
.top
+ cRect
.left
*4;
249 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
255 STDMETHODIMP
CMemSubPic::ClearDirtyRect(DWORD color
)
257 if(m_rectListDirty
.IsEmpty()) {
260 while(!m_rectListDirty
.IsEmpty())
262 //pDirtyRect = m_rectListDirty.RemoveHead();
263 CRect
& dirtyRect
= m_rectListDirty
.RemoveTail();
264 BYTE
* p
= (BYTE
*)m_spd
.bits
+ m_spd
.pitch
*(dirtyRect
.top
) + dirtyRect
.left
*(m_spd
.bpp
>>3);
265 int w
= dirtyRect
.Width();
266 if(m_spd
.type
!=MSP_AYUV_PLANAR
)
268 for(int j
= 0, h
= dirtyRect
.Height(); j
< h
; j
++, p
+= m_spd
.pitch
)
271 memsetd(p
, color
, w
*4); // nya
289 for(int j
= 0, h
= dirtyRect
.Height(); j
< h
; j
++, p
+= m_spd
.pitch
)
291 // memsetd(p, 0, m_rcDirty.Width());
292 //DbgLog((LOG_TRACE, 3, "w:%d", w));
293 //w = pDirtyRect->Width();
295 memset(p
+m_spd
.h
*m_spd
.pitch
, 0, w
);
296 memset(p
+m_spd
.h
*m_spd
.pitch
*2, 0, w
);
297 memset(p
+m_spd
.h
*m_spd
.pitch
*3, 0, w
);
301 m_rectListDirty
.RemoveAll();
305 STDMETHODIMP
CMemSubPic::Lock(SubPicDesc
& spd
)
310 STDMETHODIMP
CMemSubPic::Unlock( CAtlList
<CRect
>* dirtyRectList
)
312 int src_type
= m_spd
.type
;
313 int dst_type
= m_alpha_blt_dst_type
;
314 if( (src_type
==MSP_RGBA
&& (dst_type
== MSP_RGB32
||
315 dst_type
== MSP_RGB24
||
316 dst_type
== MSP_RGB16
||
317 dst_type
== MSP_RGB15
))
319 (src_type
==MSP_XY_AUYV
&& dst_type
== MSP_YUY2
)//ToDo: fix me MSP_AYUV
321 (src_type
==MSP_AYUV
&& dst_type
== MSP_AYUV
)
323 (src_type
==MSP_AYUV_PLANAR
&& (dst_type
== MSP_IYUV
||
324 dst_type
== MSP_YV12
||
325 dst_type
== MSP_P010
||
326 dst_type
== MSP_P016
||
327 dst_type
== MSP_NV12
||
328 dst_type
== MSP_NV21
)))
330 return UnlockOther(dirtyRectList
);
332 else if(src_type
==MSP_RGBA
&& (dst_type
== MSP_YUY2
||
333 dst_type
== MSP_AYUV
|| //ToDo: fix me MSP_AYUV
334 dst_type
== MSP_IYUV
||
335 dst_type
== MSP_YV12
||
336 dst_type
== MSP_NV12
||
337 dst_type
== MSP_NV21
||
338 dst_type
== MSP_P010
||
339 dst_type
== MSP_P016
))
341 return UnlockRGBA_YUV(dirtyRectList
);
346 HRESULT
CMemSubPic::UnlockOther(CAtlList
<CRect
>* dirtyRectList
)
348 SetDirtyRectEx(dirtyRectList
);
349 if(m_rectListDirty
.IsEmpty()) {
353 POSITION pos
= m_rectListDirty
.GetHeadPosition();
356 const CRect
& cRect
= m_rectListDirty
.GetNext(pos
);
357 int w
= cRect
.Width(), h
= cRect
.Height();
363 BYTE
* top
= (BYTE
*)m_spd
.bits
+ m_spd
.pitch
*(cRect
.top
) + cRect
.left
*4;
364 BYTE
* bottom
= top
+ m_spd
.pitch
*h
;
365 if(m_alpha_blt_dst_type
== MSP_RGB16
)
367 for(; top
< bottom
; top
+= m_spd
.pitch
)
369 DWORD
* s
= (DWORD
*)top
;
373 *s
= ((*s
>>3)&0x1f000000)|((*s
>>8)&0xf800)|((*s
>>5)&0x07e0)|((*s
>>3)&0x001f);
374 // *s = (*s&0xff000000)|((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x001f);
378 else if(m_alpha_blt_dst_type
== MSP_RGB15
)
380 for(; top
< bottom
; top
+= m_spd
.pitch
)
382 DWORD
* s
= (DWORD
*)top
;
386 *s
= ((*s
>>3)&0x1f000000)|((*s
>>9)&0x7c00)|((*s
>>6)&0x03e0)|((*s
>>3)&0x001f);
387 // *s = (*s&0xff000000)|((*s>>9)&0x7c00)|((*s>>6)&0x03e0)|((*s>>3)&0x001f);
391 else if(m_alpha_blt_dst_type
== MSP_YUY2
)
393 XY_DO_ONCE( xy_logger::write_file("G:\\b1_ul", top
, m_spd
.pitch
*(h
-1)) );
395 for(BYTE
* tempTop
=top
; tempTop
< bottom
; tempTop
+= m_spd
.pitch
)
399 BYTE last_v
= s
[0], last_u
=s
[2];
400 for(; s
< e
; s
+=8) // AUYV AUYV -> AxYU AxYV
403 s
[4] = (last_v
+ 2*s
[0] + s
[4] + 2)>>2;
406 s
[0] = (last_u
+ 2*s
[2] + s
[6] + 2)>>2;
411 XY_DO_ONCE( xy_logger::write_file("G:\\a1_ul", top
, m_spd
.pitch
*(h
-1)) );
413 else if(m_alpha_blt_dst_type
== MSP_YV12
|| m_alpha_blt_dst_type
== MSP_IYUV
)
417 else if ( m_alpha_blt_dst_type
== MSP_P010
|| m_alpha_blt_dst_type
== MSP_P016
418 || m_alpha_blt_dst_type
== MSP_NV12
)
420 SubsampleAndInterlace(cRect
, true);
422 else if( m_alpha_blt_dst_type
== MSP_NV21
)
424 SubsampleAndInterlace(cRect
, false);
430 HRESULT
CMemSubPic::UnlockRGBA_YUV(CAtlList
<CRect
>* dirtyRectList
)
433 ONCER( SaveRect2File(dirtyRectList
->GetHead(), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.rect") );
434 ONCER( SaveArgb2File(m_spd
, CRect(CPoint(0,0), m_size
), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.argb") );
436 SetDirtyRectEx(dirtyRectList
);
438 ONCER( SaveRect2File(dirtyRectList
->GetHead(), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.rect2") );
439 if(m_rectListDirty
.IsEmpty()) {
443 POSITION pos
= m_rectListDirty
.GetHeadPosition();
446 const CRect
& cRect
= m_rectListDirty
.GetNext(pos
);
447 int w
= cRect
.Width(), h
= cRect
.Height();
453 BYTE
* top
= (BYTE
*)m_spd
.bits
+ m_spd
.pitch
*cRect
.top
+ cRect
.left
*4;
454 BYTE
* bottom
= top
+ m_spd
.pitch
*h
;
456 if( m_alpha_blt_dst_type
== MSP_YUY2
||
457 m_alpha_blt_dst_type
== MSP_YV12
||
458 m_alpha_blt_dst_type
== MSP_IYUV
||
459 m_alpha_blt_dst_type
== MSP_P010
||
460 m_alpha_blt_dst_type
== MSP_P016
||
461 m_alpha_blt_dst_type
== MSP_NV12
||
462 m_alpha_blt_dst_type
== MSP_NV21
) {
463 for(; top
< bottom
; top
+= m_spd
.pitch
) {
466 DWORD last_yuv
= ColorConvTable::PreMulArgb2Ayuv(s
[3], s
[2], s
[1], s
[0]);
467 for(; s
< e
; s
+=8) { // ARGB ARGB -> AxYU AxYV
468 if((s
[3]+s
[7]+(last_yuv
>>24)) < 0xff*3) {
469 DWORD tmp1
= ColorConvTable::PreMulArgb2Ayuv(s
[3], s
[2], s
[1], s
[0]);
470 DWORD tmp2
= ColorConvTable::PreMulArgb2Ayuv(s
[7], s
[6], s
[5], s
[4]);
472 s
[1] = (tmp1
>>16)&0xff;
473 s
[5] = (tmp2
>>16)&0xff;
475 s
[0] = (((last_yuv
>>8)&0xff) + 2*((tmp1
>>8)&0xff) + ((tmp2
>>8)&0xff) + 2)/4;
476 s
[4] = ((last_yuv
&0xff) + 2*(tmp1
&0xff) + (tmp2
&0xff) + 2)/4;
479 last_yuv
= ColorConvTable::PreMulArgb2Ayuv(s
[7], s
[6], s
[5], s
[4]);
487 else if(m_alpha_blt_dst_type
== MSP_AYUV
) {
488 for(; top
< bottom
; top
+= m_spd
.pitch
) {
491 for(; s
< e
; s
+=4) { // ARGB -> AYUV
493 *((DWORD
*)s
) = ColorConvTable::PreMulArgb2Ayuv(s
[3], s
[2], s
[1], s
[0]);
503 ONCER( SaveAxxx2File(m_spd
, CRect(CPoint(0,0), m_size
), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.axuv") );
507 void CMemSubPic::SubsampleAndInterlace( const CRect
& cRect
, bool u_first
)
509 //fix me: check alignment and log error
510 int w
= cRect
.Width(), h
= cRect
.Height();
511 BYTE
* u_plan
= reinterpret_cast<BYTE
*>(m_spd
.bits
) + m_spd
.pitch
*m_spd
.h
*2;
512 BYTE
* u_start
= u_plan
+ m_spd
.pitch
*(cRect
.top
)+ cRect
.left
;
513 BYTE
* v_start
= u_start
+ m_spd
.pitch
*m_spd
.h
;
523 //Walkarround for alignment
524 if ( ((m_spd
.pitch
|w
) &15) == 0 && (g_cpuid
.m_flags
& CCpuID::sse2
) )
527 SubsampleAndInterlace(dst
, u_start
, v_start
, h
, w
, m_spd
.pitch
);
531 SubsampleAndInterlaceC(dst
, u_start
, v_start
, h
, w
, m_spd
.pitch
);
535 STDMETHODIMP
CMemSubPic::AlphaBlt( const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
537 if(!pSrc
|| !pDst
|| !pTarget
) {
540 int src_type
= m_spd
.type
;
541 int dst_type
= pTarget
->type
;
543 if( (src_type
==MSP_RGBA
&& (dst_type
== MSP_RGB32
||
544 dst_type
== MSP_RGB24
||
545 dst_type
== MSP_RGB16
||
546 dst_type
== MSP_RGB15
||
547 dst_type
== MSP_RGBA
||
548 dst_type
== MSP_YUY2
||//ToDo: fix me MSP_RGBA changed into AxYU AxYV after unlock, may be confusing
549 dst_type
== MSP_AYUV
))
551 (src_type
==MSP_XY_AUYV
&& dst_type
== MSP_YUY2
)//ToDo: fix me MSP_AYUV
553 (src_type
==MSP_AYUV
&& dst_type
== MSP_AYUV
)
555 (src_type
==MSP_AYUV_PLANAR
&& (dst_type
== MSP_IYUV
||
556 dst_type
== MSP_YV12
)) )
558 return AlphaBltOther(pSrc
, pDst
, pTarget
);
560 else if ( src_type
==MSP_AYUV_PLANAR
&& (dst_type
== MSP_NV12
||
561 dst_type
== MSP_NV21
) )
563 return AlphaBltAnv12_Nv12(pSrc
, pDst
, pTarget
);
566 else if( src_type
==MSP_AYUV_PLANAR
&& (dst_type
== MSP_P010
||
567 dst_type
== MSP_P016
) )
569 return AlphaBltAnv12_P010(pSrc
, pDst
, pTarget
);
571 else if( src_type
==MSP_RGBA
&& (dst_type
== MSP_IYUV
||
572 dst_type
== MSP_YV12
))
574 return AlphaBltAxyuAxyv_Yv12(pSrc
, pDst
, pTarget
);
576 else if( src_type
==MSP_RGBA
&& (dst_type
== MSP_NV12
||
577 dst_type
== MSP_NV21
))
579 return AlphaBltAxyuAxyv_Nv12(pSrc
, pDst
, pTarget
);
581 else if( src_type
==MSP_RGBA
&& (dst_type
== MSP_P010
||
582 dst_type
== MSP_P016
))
584 return AlphaBltAxyuAxyv_P010(pSrc
, pDst
, pTarget
);
589 HRESULT
CMemSubPic::AlphaBltOther(const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
591 const SubPicDesc
& src
= m_spd
;
592 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
594 CRect
rs(*pSrc
), rd(*pDst
);
598 rd
.bottom
= dst
.h
- rd
.bottom
;
599 rd
.top
= dst
.h
- rd
.top
;
601 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
604 int w
= rs
.Width(), h
= rs
.Height();
605 BYTE
* s
= (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);//rs.left*4
606 BYTE
* d
= (BYTE
*)dst
.bits
+ dst
.pitch
*rd
.top
+ ((rd
.left
*dst
.bpp
)>>3);
607 if(rd
.top
> rd
.bottom
)
609 if(dst
.type
== MSP_RGB32
|| dst
.type
== MSP_RGB24
610 || dst
.type
== MSP_RGB16
|| dst
.type
== MSP_RGB15
611 || dst
.type
== MSP_YUY2
|| dst
.type
== MSP_AYUV
)
613 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + (rd
.left
*dst
.bpp
>>3);
615 else if(dst
.type
== MSP_YV12
|| dst
.type
== MSP_IYUV
)
617 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + (rd
.left
*8>>3);
623 dst
.pitch
= -dst
.pitch
;
625 DbgLog((LOG_TRACE
, 5, TEXT("w=%d h=%d"), w
, h
));
629 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
632 BYTE
* s2end
= s2
+ w
*4;
633 DWORD
* d2
= (DWORD
*)d
;
634 for(; s2
< s2end
; s2
+= 4, d2
++)
638 DWORD bd
=0x00000100 -( (DWORD
) s2
[3]);
639 DWORD B
= ((*((DWORD
*)s2
)&0x000000ff)<<8)/bd
;
640 DWORD V
= ((*((DWORD
*)s2
)&0x0000ff00)/bd
)<<8;
641 DWORD R
= (((*((DWORD
*)s2
)&0x00ff0000)>>8)/bd
)<<16;
643 | (0xff000000-(*((DWORD
*)s2
)&0xff000000))&0xff000000;
649 case MSP_AYUV
: //ToDo: fix me MSP_VUYA indeed?
650 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
653 BYTE
* s2end
= s2
+ w
*4;
654 DWORD
* d2
= (DWORD
*)d
;
655 for(; s2
< s2end
; s2
+= 4, d2
++)
658 DWORD ia
= 256-s2
[3];
660 *d2
= ((((*d2
&0x00ff00ff)*s2
[3])>>8) + (((*((DWORD
*)s2
)&0x00ff00ff)*ia
)>>8)&0x00ff00ff)
661 | ((((*d2
&0x0000ff00)*s2
[3])>>8) + (((*((DWORD
*)s2
)&0x0000ff00)*ia
)>>8)&0x0000ff00);
666 *d2
= (((((*d2
&0x00ff00ff)*s2
[3])>>8) + (*((DWORD
*)s2
)&0x00ff00ff))&0x00ff00ff)
667 | (((((*d2
&0x0000ff00)*s2
[3])>>8) + (*((DWORD
*)s2
)&0x0000ff00))&0x0000ff00);
674 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
677 BYTE
* s2end
= s2
+ w
*4;
679 for(; s2
< s2end
; s2
+= 4, d2
+= 3)
683 d2
[0] = ((d2
[0]*s2
[3])>>8) + s2
[0];
684 d2
[1] = ((d2
[1]*s2
[3])>>8) + s2
[1];
685 d2
[2] = ((d2
[2]*s2
[3])>>8) + s2
[2];
691 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
694 BYTE
* s2end
= s2
+ w
*4;
696 for(; s2
< s2end
; s2
+= 4, d2
++)
700 *d2
= (WORD
)((((((*d2
&0xf81f)*s2
[3])>>5) + (*(DWORD
*)s2
&0xf81f))&0xf81f)
701 | (((((*d2
&0x07e0)*s2
[3])>>5) + (*(DWORD
*)s2
&0x07e0))&0x07e0));
702 /* *d2 = (WORD)((((((*d2&0xf800)*s2[3])>>8) + (*(DWORD*)s2&0xf800))&0xf800)
703 | (((((*d2&0x07e0)*s2[3])>>8) + (*(DWORD*)s2&0x07e0))&0x07e0)
704 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
711 for(int j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
)
714 BYTE
* s2end
= s2
+ w
*4;
716 for(; s2
< s2end
; s2
+= 4, d2
++)
720 *d2
= (WORD
)((((((*d2
&0x7c1f)*s2
[3])>>5) + (*(DWORD
*)s2
&0x7c1f))&0x7c1f)
721 | (((((*d2
&0x03e0)*s2
[3])>>5) + (*(DWORD
*)s2
&0x03e0))&0x03e0));
722 /* *d2 = (WORD)((((((*d2&0x7c00)*s2[3])>>8) + (*(DWORD*)s2&0x7c00))&0x7c00)
723 | (((((*d2&0x03e0)*s2[3])>>8) + (*(DWORD*)s2&0x03e0))&0x03e0)
724 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
731 AlphaBlt_YUY2(w
, h
, d
, dst
.pitch
, s
, src
.pitch
);
736 //dst.pitch = abs(dst.pitch);
740 dst
.pitchUV
= abs(dst
.pitch
)/2;
742 if(!dst
.bitsU
|| !dst
.bitsV
)
744 dst
.bitsU
= (BYTE
*)dst
.bits
+ abs(dst
.pitch
)*dst
.h
;
745 dst
.bitsV
= dst
.bitsU
+ dst
.pitchUV
*dst
.h
/2;
746 if(dst
.type
== MSP_YV12
)
749 dst
.bitsU
= dst
.bitsV
;
754 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
/2;
755 dd
[1] = dst
.bitsV
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
/2;
756 if(rd
.top
> rd
.bottom
)
758 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
/2;
759 dd
[1] = dst
.bitsV
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
/2;
760 dst
.pitchUV
= -dst
.pitchUV
;
763 BYTE
* src_origin
= (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ rs
.left
;
766 ss
[0] = src_origin
+ src
.pitch
*src
.h
*2;//U
767 ss
[1] = src_origin
+ src
.pitch
*src
.h
*3;//V
769 AlphaBltYv12Luma( d
, dst
.pitch
, w
, h
, src_origin
+ src
.pitch
*src
.h
, src_origin
, src
.pitch
);
771 AlphaBltYv12Chroma( dd
[0], dst
.pitchUV
, w
, h2
, ss
[0], src_origin
, src
.pitch
);
772 AlphaBltYv12Chroma( dd
[1], dst
.pitchUV
, w
, h2
, ss
[1], src_origin
, src
.pitch
);
784 //emmsÒª40¸öcpuÖÜÆÚ
789 HRESULT
CMemSubPic::AlphaBltAxyuAxyv_P010(const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
791 const SubPicDesc
& src
= m_spd
;
792 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
794 CRect
rs(*pSrc
), rd(*pDst
);
798 rd
.bottom
= dst
.h
- rd
.bottom
;
799 rd
.top
= dst
.h
- rd
.top
;
802 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
806 int w
= rs
.Width(), h
= rs
.Height();
809 BYTE
* s
= static_cast<BYTE
*>(src
.bits
) + src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);
810 BYTE
* d
= static_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*rd
.top
+ rd
.left
*2;
812 if(rd
.top
> rd
.bottom
) {
813 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + rd
.left
;
815 dst
.pitch
= -dst
.pitch
;
818 for(ptrdiff_t i
=0; i
<h
; i
++, s
+= src
.pitch
, d
+= dst
.pitch
)
821 BYTE
* s2end
= s2
+ w
*4;
822 WORD
* d2
= reinterpret_cast<WORD
*>(d
);
823 for(; s2
< s2end
; s2
+= 4, d2
++)
826 d2
[0] = ((d2
[0]*s2
[3])>>8) + (s2
[1]<<8);
835 dst
.pitchUV
= abs(dst
.pitch
);
837 if(!dst
.bitsU
|| !dst
.bitsV
)
839 dst
.bitsU
= static_cast<BYTE
*>(dst
.bits
) + abs(dst
.pitch
)*dst
.h
;
840 dst
.bitsV
= dst
.bitsU
+ 2;
842 BYTE
* ddUV
= dst
.bitsU
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
*2;
843 if(rd
.top
> rd
.bottom
)
845 ddUV
= dst
.bitsU
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
*2;
846 dst
.pitchUV
= -dst
.pitchUV
;
849 s
= static_cast<BYTE
*>(src
.bits
) + src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);
852 int pitch
= src
.pitch
;
853 for(int j
= 0; j
< h2
; j
++, s
+= 2*src
.pitch
, d
+= dst
.pitchUV
)
856 WORD
* d2
=reinterpret_cast<WORD
*>(d
);
857 WORD
* d2_end
= reinterpret_cast<WORD
*>(d
+2*w
);
858 DWORD last_alpha
= s2
[3]+s2
[3+src
.pitch
];
859 for( ; d2
<d2_end
; s2
+=8, d2
+=2)
863 (s2
[3] + s2
[3+src
.pitch
])*2 +
864 s2
[3+4]+ s2
[3+4+src
.pitch
]);
865 last_alpha
= s2
[3+4]+ s2
[3+4+src
.pitch
];
868 d2
[0] = (((d2
[0])*ia
)>>11) + ((s2
[0] + s2
[0+src
.pitch
])<<7);
869 d2
[1] = (((d2
[1])*ia
)>>11) + ((s2
[4] + s2
[4+src
.pitch
])<<7);
877 HRESULT
CMemSubPic::AlphaBltAxyuAxyv_Yv12(const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
879 const SubPicDesc
& src
= m_spd
;
880 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
882 CRect
rs(*pSrc
), rd(*pDst
);
886 rd
.bottom
= dst
.h
- rd
.bottom
;
887 rd
.top
= dst
.h
- rd
.top
;
890 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
894 int w
= rs
.Width(), h
= rs
.Height();
896 BYTE
* s
= (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);
897 BYTE
* d
= (BYTE
*)dst
.bits
+ dst
.pitch
*rd
.top
+ rd
.left
;
899 if(rd
.top
> rd
.bottom
) {
900 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + rd
.left
;
902 dst
.pitch
= -dst
.pitch
;
905 for(ptrdiff_t j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
) {
907 BYTE
* s2end
= s2
+ w
*4;
909 for(; s2
< s2end
; s2
+= 4, d2
++) {
911 d2
[0] = ((d2
[0]*s2
[3])>>8) + s2
[1];
915 dst
.pitch
= abs(dst
.pitch
);
920 dst
.pitchUV
= dst
.pitch
/2;
924 ss
[0] = (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ rs
.left
*4;
927 if(!dst
.bitsU
|| !dst
.bitsV
) {
928 dst
.bitsU
= (BYTE
*)dst
.bits
+ dst
.pitch
*dst
.h
;
929 dst
.bitsV
= dst
.bitsU
+ dst
.pitchUV
*dst
.h
/2;
931 if(dst
.type
== MSP_YV12
) {
933 dst
.bitsU
= dst
.bitsV
;
939 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
/2;
940 dd
[1] = dst
.bitsV
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
/2;
942 if(rd
.top
> rd
.bottom
) {
943 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
/2;
944 dd
[1] = dst
.bitsV
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
/2;
945 dst
.pitchUV
= -dst
.pitchUV
;
948 for(ptrdiff_t i
= 0; i
< 2; i
++) {
952 for(ptrdiff_t j
= 0; j
< h2
; j
++, s
+= src
.pitch
*2, d
+= dst
.pitchUV
, a
+= src
.pitch
*2) {
954 BYTE
* s2end
= s2
+ w
*4;
958 DWORD last_alpha
= a2
[0]+a2
[0+src
.pitch
];
959 for(; s2
< s2end
; s2
+= 8, d2
++, a2
+= 8) {
960 unsigned int ia
= (last_alpha
+ 2*(a2
[0]+a2
[0+src
.pitch
]) + a2
[4] + a2
[4+src
.pitch
] + 4 )>>3;
961 last_alpha
= a2
[4] + a2
[4+src
.pitch
];
963 *d2
= ((*d2
*ia
)>>8) + ((s2
[0]+s2
[src
.pitch
])>>1);
972 HRESULT
CMemSubPic::AlphaBltAxyuAxyv_Nv12(const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
974 ONCER( SaveArgb2File(*pTarget
, CRect(CPoint(0,0), m_size
), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.nv12") );
975 const SubPicDesc
& src
= m_spd
;
976 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
978 CRect
rs(*pSrc
), rd(*pDst
);
982 rd
.bottom
= dst
.h
- rd
.bottom
;
983 rd
.top
= dst
.h
- rd
.top
;
986 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
990 int w
= rs
.Width(), h
= rs
.Height();
992 BYTE
* s
= (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ ((rs
.left
*src
.bpp
)>>3);
993 BYTE
* d
= (BYTE
*)dst
.bits
+ dst
.pitch
*rd
.top
+ rd
.left
;
995 if(rd
.top
> rd
.bottom
) {
996 d
= (BYTE
*)dst
.bits
+ dst
.pitch
*(rd
.top
-1) + rd
.left
;
998 dst
.pitch
= -dst
.pitch
;
1001 for(ptrdiff_t j
= 0; j
< h
; j
++, s
+= src
.pitch
, d
+= dst
.pitch
) {
1003 BYTE
* s2end
= s2
+ w
*4;
1005 for(; s2
< s2end
; s2
+= 4, d2
++) {
1007 d2
[0] = ((d2
[0]*s2
[3])>>8) + s2
[1];
1011 dst
.pitch
= abs(dst
.pitch
);
1016 dst
.pitchUV
= dst
.pitch
;
1020 ss
[0] = (BYTE
*)src
.bits
+ src
.pitch
*rs
.top
+ rs
.left
*4;
1023 if(!dst
.bitsU
|| !dst
.bitsV
) {
1024 dst
.bitsU
= (BYTE
*)dst
.bits
+ dst
.pitch
*dst
.h
;
1025 dst
.bitsV
= dst
.bitsU
+ 1;
1027 if(dst
.type
== MSP_NV21
) {
1028 BYTE
* p
= dst
.bitsU
;
1029 dst
.bitsU
= dst
.bitsV
;
1035 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*rd
.top
/2 + rd
.left
;
1038 if(rd
.top
> rd
.bottom
) {
1039 dd
[0] = dst
.bitsU
+ dst
.pitchUV
*(rd
.top
/2-1) + rd
.left
;
1041 dst
.pitchUV
= -dst
.pitchUV
;
1044 for(ptrdiff_t i
= 0; i
< 2; i
++) {
1048 for(ptrdiff_t j
= 0; j
< h2
; j
++, s
+= src
.pitch
*2, d
+= dst
.pitchUV
, a
+= src
.pitch
*2) {
1050 BYTE
* s2end
= s2
+ w
*4;
1053 DWORD last_alpha
= a2
[0]+a2
[0+src
.pitch
];
1054 for(; s2
< s2end
; s2
+= 8, d2
+=2, a2
+= 8) {
1055 unsigned int ia
= (last_alpha
+2*(a2
[0]+a2
[0+src
.pitch
])+a2
[4]+a2
[4+src
.pitch
]+4)>>3;
1056 last_alpha
= a2
[4]+a2
[4+src
.pitch
];
1058 *d2
= ((*d2
*ia
)>>8) + ((s2
[0]+s2
[src
.pitch
])>>1);
1064 ONCER( SaveArgb2File(*pTarget
, CRect(CPoint(0,0), m_size
), "F:/mplayer_MinGW_full/MinGW/home/Administrator/xy_vsfilter/debug.nv12_2") );
1068 HRESULT
CMemSubPic::AlphaBltAnv12_P010( const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
1070 //fix me: check colorspace and log error
1071 const SubPicDesc
& src
= m_spd
;
1072 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
1074 CRect
rs(*pSrc
), rd(*pDst
);
1078 rd
.bottom
= dst
.h
- rd
.bottom
;
1079 rd
.top
= dst
.h
- rd
.top
;
1081 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
1082 return E_INVALIDARG
;
1084 int w
= rs
.Width(), h
= rs
.Height();
1085 bool bottom_down
= rd
.top
> rd
.bottom
;
1091 d
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*rd
.top
+ rd
.left
*2;
1092 dUV
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*dst
.h
+ dst
.pitch
*rd
.top
/2 + rd
.left
*2;
1096 d
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*(rd
.top
-1) + rd
.left
*2;
1097 dUV
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*dst
.h
+ dst
.pitch
*(rd
.top
/2-1) + rd
.left
*2;
1098 dst
.pitch
= -dst
.pitch
;
1100 ASSERT(dst
.pitchUV
==0 || dst
.pitchUV
==abs(dst
.pitch
));
1102 const BYTE
* sa
= reinterpret_cast<const BYTE
*>(src
.bits
) + src
.pitch
*rs
.top
+ rs
.left
;
1103 const BYTE
* sy
= sa
+ src
.pitch
*src
.h
;
1104 const BYTE
* s_uv
= sy
+ src
.pitch
*src
.h
;//UV
1105 return AlphaBltAnv12_P010(sa
, sy
, s_uv
, src
.pitch
, d
, dUV
, dst
.pitch
, w
, h
);
1108 HRESULT
CMemSubPic::AlphaBltAnv12_Nv12( const RECT
* pSrc
, const RECT
* pDst
, SubPicDesc
* pTarget
)
1110 //fix me: check colorspace and log error
1111 const SubPicDesc
& src
= m_spd
;
1112 SubPicDesc dst
= *pTarget
; // copy, because we might modify it
1114 CRect
rs(*pSrc
), rd(*pDst
);
1118 rd
.bottom
= dst
.h
- rd
.bottom
;
1119 rd
.top
= dst
.h
- rd
.top
;
1121 if(rs
.Width() != rd
.Width() || rs
.Height() != abs(rd
.Height())) {
1122 return E_INVALIDARG
;
1124 int w
= rs
.Width(), h
= rs
.Height();
1125 bool bottom_down
= rd
.top
> rd
.bottom
;
1131 d
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*rd
.top
+ rd
.left
;
1132 dUV
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*dst
.h
+ dst
.pitch
*rd
.top
/2 + rd
.left
;
1136 d
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*(rd
.top
-1) + rd
.left
;
1137 dUV
= reinterpret_cast<BYTE
*>(dst
.bits
) + dst
.pitch
*dst
.h
+ dst
.pitch
*(rd
.top
/2-1) + rd
.left
;
1138 dst
.pitch
= -dst
.pitch
;
1140 ASSERT(dst
.pitchUV
==0 || dst
.pitchUV
==abs(dst
.pitch
));
1142 const BYTE
* sa
= reinterpret_cast<const BYTE
*>(src
.bits
) + src
.pitch
*rs
.top
+ rs
.left
;
1143 const BYTE
* sy
= sa
+ src
.pitch
*src
.h
;
1144 const BYTE
* s_uv
= sy
+ src
.pitch
*src
.h
;//UV
1146 return AlphaBltAnv12_Nv12(sa
, sy
, s_uv
, src
.pitch
, d
, dUV
, dst
.pitch
, w
, h
);
1149 STDMETHODIMP
CMemSubPic::SetDirtyRectEx(CAtlList
<CRect
>* dirtyRectList
)
1151 //if(m_spd.type == MSP_YUY2 || m_spd.type == MSP_YV12 || m_spd.type == MSP_IYUV || m_spd.type == MSP_AYUV)
1152 if(dirtyRectList
!=NULL
)
1154 POSITION pos
= dirtyRectList
->GetHeadPosition();
1155 if(m_spd
.type
== MSP_AYUV_PLANAR
|| m_alpha_blt_dst_type
==MSP_IYUV
|| m_alpha_blt_dst_type
==MSP_YV12
1156 || m_alpha_blt_dst_type
==MSP_P010
|| m_alpha_blt_dst_type
==MSP_P016
1157 || m_alpha_blt_dst_type
==MSP_NV12
|| m_alpha_blt_dst_type
==MSP_NV21
)
1161 CRect
& cRectSrc
= dirtyRectList
->GetNext(pos
);
1162 cRectSrc
.left
&= ~15;
1163 cRectSrc
.right
= (cRectSrc
.right
+15)&~15;
1164 if(cRectSrc
.right
>m_spd
.w
)
1166 cRectSrc
.right
= m_spd
.w
;
1169 cRectSrc
.bottom
= (cRectSrc
.bottom
+1)&~1;
1172 else if(m_spd
.type
== MSP_XY_AUYV
|| m_alpha_blt_dst_type
==MSP_YUY2
)
1176 CRect
& cRectSrc
= dirtyRectList
->GetNext(pos
);
1177 cRectSrc
.left
&= ~3;
1178 cRectSrc
.right
= (cRectSrc
.right
+3)&~3;
1182 return __super::SetDirtyRectEx(dirtyRectList
);
1189 void CMemSubPic::AlphaBltYv12Luma(byte
* dst
, int dst_pitch
,
1191 const byte
* sub
, const byte
* alpha
, int sub_pitch
)
1194 ((reinterpret_cast<intptr_t>(alpha
) ^ reinterpret_cast<intptr_t>(sub
))
1195 |(reinterpret_cast<intptr_t>(alpha
) ^ reinterpret_cast<intptr_t>(dst
))
1196 | static_cast<intptr_t>(sub_pitch
)
1197 | static_cast<intptr_t>(dst_pitch
) ) & 15 )==0
1198 && w
> 32 && (g_cpuid
.m_flags
& CCpuID::sse2
))
1200 int head
= (16 - (reinterpret_cast<intptr_t>(alpha
)&15))&15;
1201 int tail
= (w
-head
) & 15;
1202 int w1
= w
- head
- tail
;
1203 for(int i
=0; i
<h
; i
++, dst
+= dst_pitch
, alpha
+= sub_pitch
, sub
+= sub_pitch
)
1205 const BYTE
* sa
= alpha
;
1206 const BYTE
* s2
= sub
;
1207 const BYTE
* s2end_mod16
= s2
+ w1
;
1208 const BYTE
* s2end
= s2
+ w
;
1211 for( ; (reinterpret_cast<intptr_t>(s2
)&15) != 0; s2
++, sa
++, d2
++)
1215 d2
[0] = ((d2
[0]*sa
[0])>>8) + s2
[0];
1218 for(; s2
< s2end_mod16
; s2
+=16, sa
+=16, d2
+=16)
1220 pix_alpha_blend_yv12_luma_sse2(d2
, sa
, s2
);
1222 for(; s2
< s2end
; s2
++, sa
++, d2
++)
1226 d2
[0] = ((d2
[0]*sa
[0])>>8) + s2
[0];
1231 else //fix me: only a workaround for non-mod-16 size video
1233 CMemSubPic::AlphaBltYv12LumaC(dst
, dst_pitch
, w
, h
, sub
, alpha
, sub_pitch
);
1237 void CMemSubPic::AlphaBltYv12LumaC( byte
* dst
, int dst_pitch
, int w
, int h
, const byte
* sub
, const byte
* alpha
, int sub_pitch
)
1239 for(int i
=0; i
<h
; i
++, dst
+= dst_pitch
, alpha
+= sub_pitch
, sub
+= sub_pitch
)
1241 const BYTE
* sa
= alpha
;
1242 const BYTE
* s2
= sub
;
1243 const BYTE
* s2end
= s2
+ w
;
1245 for(; s2
< s2end
; s2
+=1, sa
+=1, d2
+=1)
1249 // d2[0] = (((d2[0]-0x10)*s2[3])>>8) + s2[1];
1250 d2
[0] = ((d2
[0]*sa
[0])>>8) + s2
[0];
1256 void CMemSubPic::AlphaBltYv12Chroma(byte
* dst_uv
, int dst_pitch
,
1257 int w
, int chroma_h
,
1258 const byte
* src_uv
, const byte
* src_a
, int src_pitch
)
1261 ((reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(src_uv
))
1262 |(reinterpret_cast<intptr_t>(src_a
) ^ (2*reinterpret_cast<intptr_t>(dst_uv
)))
1263 | static_cast<intptr_t>(src_pitch
)
1264 | (2*static_cast<intptr_t>(dst_pitch
)) ) & 15) ==0 &&
1265 w
> 16 && (g_cpuid
.m_flags
& CCpuID::sse2
))
1267 int head
= (16 - (reinterpret_cast<intptr_t>(src_a
)&15))&15;
1268 int tail
= (w
-head
) & 15;
1269 int w00
= w
- head
- tail
;
1271 int pitch
= src_pitch
;
1272 for(int j
= 0; j
< chroma_h
; j
++, src_uv
+= src_pitch
*2, src_a
+= src_pitch
*2, dst_uv
+= dst_pitch
)
1274 hleft_vmid_mix_uv_yv12_c2(dst_uv
, head
, src_uv
, src_a
, src_pitch
);
1275 hleft_vmid_mix_uv_yv12_sse2(dst_uv
+(head
>>1), w00
, src_uv
+head
, src_a
+head
, src_pitch
, head
>0 ? -1 : 0);
1276 hleft_vmid_mix_uv_yv12_c2(dst_uv
+((head
+w00
)>>1), tail
, src_uv
+head
+w00
, src_a
+head
+w00
, src_pitch
, (w00
+head
)>0 ? -1 : 0);
1279 else//fix me: only a workaround for non-mod-16 size video
1281 AlphaBltYv12ChromaC(dst_uv
, dst_pitch
, w
, chroma_h
, src_uv
, src_a
, src_pitch
);
1285 void CMemSubPic::AlphaBltYv12ChromaC( byte
* dst
, int dst_pitch
, int w
, int chroma_h
, const byte
* sub_chroma
, const byte
* alpha
, int sub_pitch
)
1287 for(int j
= 0; j
< chroma_h
; j
++, sub_chroma
+= sub_pitch
*2, alpha
+= sub_pitch
*2, dst
+= dst_pitch
)
1289 hleft_vmid_mix_uv_yv12_c(dst
, w
, sub_chroma
, alpha
, sub_pitch
);
1293 HRESULT
CMemSubPic::AlphaBltAnv12_P010( const BYTE
* src_a
, const BYTE
* src_y
, const BYTE
* src_uv
, int src_pitch
,
1294 BYTE
* dst_y
, BYTE
* dst_uv
, int dst_pitch
, int w
, int h
)
1296 if ( g_cpuid
.m_flags
& CCpuID::sse2
)
1298 const BYTE
* sa
= src_a
;
1300 ((reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(src_y
))
1301 |(reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(dst_y
))
1302 | static_cast<intptr_t>(src_pitch
)
1303 | static_cast<intptr_t>(dst_pitch
) ) & 15 )==0 &&
1306 int head
= (16 - reinterpret_cast<intptr_t>(src_a
)&15)&15;
1307 int tail
= (w
- head
) & 15;
1309 for(int i
=0; i
<h
; i
++, sa
+= src_pitch
, src_y
+= src_pitch
, dst_y
+= dst_pitch
)
1311 const BYTE
* sa2
= sa
;
1312 const BYTE
* s2
= src_y
;
1313 const BYTE
* s2end_mod16
= s2
+ (w
&~15);
1315 WORD
* d_w
=reinterpret_cast<WORD
*>(dst_y
);
1317 switch( head
)//important: it is safe since w > 16
1320 #define _XY_MIX_ONE if(sa2[0] < 0xff) { d_w[0] = ((d_w[0]*sa2[0])>>8) + (s2[0]<<8); } sa2++;d_w++;s2++;
1348 case 1://fall through on purpose
1351 for(; s2
< s2end_mod16
; s2
+=16, sa2
+=16, d_w
+=16)
1353 mix_16_y_p010_sse2( reinterpret_cast<BYTE
*>(d_w
), s2
, sa2
);
1355 switch( tail
)//important: it is safe since w > 16
1385 case 1://fall through on purpose
1390 else //fix me: only a workaround for non-mod-16 size video
1392 for(int i
=0; i
<h
; i
++, sa
+= src_pitch
, src_y
+= src_pitch
, dst_y
+= dst_pitch
)
1394 const BYTE
* sa2
= sa
;
1395 const BYTE
* s2
= src_y
;
1396 const BYTE
* s2end
= s2
+ w
;
1397 WORD
* d_w
= reinterpret_cast<WORD
*>(dst_y
);
1398 for(; s2
< s2end
; s2
+=1, sa2
+=1, d_w
+=1)
1402 d_w
[0] = ((d_w
[0]*sa2
[0])>>8) + (s2
[0]<<8);
1411 ((reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(src_uv
))
1412 |(reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(dst_uv
))
1413 | static_cast<intptr_t>(src_pitch
)
1414 | static_cast<intptr_t>(dst_pitch
) ) & 15) ==0 &&
1417 int head
= (16-(reinterpret_cast<intptr_t>(src_a
)&15))&15;
1418 int tail
= (w
-head
) & 15;
1419 int w00
= w
- head
- tail
;
1421 ASSERT(w
>0);//the calls to mix may failed if w==0
1422 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1424 hleft_vmid_mix_uv_p010_c2(d
, head
, src_uv
, src_a
, src_pitch
);
1425 hleft_vmid_mix_uv_p010_sse2(d
+2*head
, w00
, src_uv
+head
, src_a
+head
, src_pitch
, head
>0 ? -1 : 0);
1426 hleft_vmid_mix_uv_p010_c2(d
+2*(head
+w00
), tail
, src_uv
+head
+w00
, src_a
+head
+w00
, src_pitch
, (w00
+head
)>0 ? -1 : 0);
1431 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1433 hleft_vmid_mix_uv_p010_c(d
, w
, src_uv
, src_a
, src_pitch
);
1444 return AlphaBltAnv12_P010_C(src_a
, src_y
, src_uv
, src_pitch
, dst_y
, dst_uv
, dst_pitch
, w
, h
);
1448 HRESULT
CMemSubPic::AlphaBltAnv12_P010_C( const BYTE
* src_a
, const BYTE
* src_y
, const BYTE
* src_uv
, int src_pitch
, BYTE
* dst_y
, BYTE
* dst_uv
, int dst_pitch
, int w
, int h
)
1450 const BYTE
* sa
= src_a
;
1451 for(int i
=0; i
<h
; i
++, sa
+= src_pitch
, src_y
+= src_pitch
, dst_y
+= dst_pitch
)
1453 const BYTE
* sa2
= sa
;
1454 const BYTE
* s2
= src_y
;
1455 const BYTE
* s2end
= s2
+ w
;
1456 WORD
* d2
= reinterpret_cast<WORD
*>(dst_y
);
1457 for(; s2
< s2end
; s2
+=1, sa2
+=1, d2
+=1)
1461 d2
[0] = ((d2
[0]*sa2
[0])>>8) + (s2
[0]<<8);
1468 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1470 hleft_vmid_mix_uv_p010_c(d
, w
, src_uv
, src_a
, src_pitch
);
1475 HRESULT
CMemSubPic::AlphaBltAnv12_Nv12( const BYTE
* src_a
, const BYTE
* src_y
, const BYTE
* src_uv
, int src_pitch
,
1476 BYTE
* dst_y
, BYTE
* dst_uv
, int dst_pitch
, int w
, int h
)
1478 AlphaBltYv12Luma( dst_y
, dst_pitch
, w
, h
, src_y
, src_a
, src_pitch
);
1482 ((reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(src_uv
))
1483 |(reinterpret_cast<intptr_t>(src_a
) ^ reinterpret_cast<intptr_t>(dst_uv
))
1484 | static_cast<intptr_t>(src_pitch
)
1485 | static_cast<intptr_t>(dst_pitch
) ) & 15) ==0 &&
1486 w
> 16 && (g_cpuid
.m_flags
& CCpuID::sse2
) )
1490 int head
= (16-(reinterpret_cast<intptr_t>(src_a
)&15))&15;
1491 int tail
= (w
-head
) & 15;
1492 int w00
= w
- head
- tail
;
1494 ASSERT(w
>0);//the calls to mix may failed if w==0
1495 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1497 hleft_vmid_mix_uv_nv12_c2(d
, head
, src_uv
, src_a
, src_pitch
);
1498 hleft_vmid_mix_uv_nv12_sse2(d
+head
, w00
, src_uv
+head
, src_a
+head
, src_pitch
, head
>0 ? -1 : 0);
1499 hleft_vmid_mix_uv_nv12_c2(d
+head
+w00
, tail
, src_uv
+head
+w00
, src_a
+head
+w00
, src_pitch
, (w00
+head
)>0 ? -1 : 0);
1509 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1511 hleft_vmid_mix_uv_nv12_c(d
, w
, src_uv
, src_a
, src_pitch
);
1517 HRESULT
CMemSubPic::AlphaBltAnv12_Nv12_C( const BYTE
* src_a
, const BYTE
* src_y
, const BYTE
* src_uv
, int src_pitch
, BYTE
* dst_y
, BYTE
* dst_uv
, int dst_pitch
, int w
, int h
)
1519 AlphaBltYv12LumaC( dst_y
, dst_pitch
, w
, h
, src_y
, src_a
, src_pitch
);
1522 for(int j
= 0; j
< h2
; j
++, src_uv
+= src_pitch
, src_a
+= src_pitch
*2, d
+= dst_pitch
)
1524 hleft_vmid_mix_uv_nv12_c(d
, w
, src_uv
, src_a
, src_pitch
);
1529 void CMemSubPic::SubsampleAndInterlace( BYTE
* dst
, const BYTE
* u
, const BYTE
* v
, int h
, int w
, int pitch
)
1531 for (int i
=0;i
<h
;i
+=2)
1533 hleft_vmid_subsample_and_interlace_2_line_sse2(dst
, u
, v
, w
, pitch
);
1540 void CMemSubPic::SubsampleAndInterlaceC( BYTE
* dst
, const BYTE
* u
, const BYTE
* v
, int h
, int w
, int pitch
)
1542 for (int i
=0;i
<h
;i
+=2)
1544 hleft_vmid_subsample_and_interlace_2_line_c(dst
, u
, v
, w
, pitch
);
1551 void CMemSubPic::AlphaBlt_YUY2(int w
, int h
, BYTE
* d
, int dstpitch
, PCUINT8 s
, int srcpitch
)
1554 AlphaBlt_YUY2_C(w
, h
, d
, dstpitch
, s
, srcpitch
);
1556 AlphaBlt_YUY2_MMX(w
, h
, d
, dstpitch
, s
, srcpitch
);
1561 // CMemSubPicAllocator
1564 CMemSubPicAllocator::CMemSubPicAllocator(int alpha_blt_dst_type
, SIZE maxsize
, int type
/*=-1*/)
1565 : CSubPicExAllocatorImpl(maxsize
, false, false)
1566 , m_alpha_blt_dst_type(alpha_blt_dst_type
)
1567 , m_maxsize(maxsize
)
1572 switch(alpha_blt_dst_type
)
1575 m_type
= MSP_XY_AUYV
;
1586 m_type
= MSP_AYUV_PLANAR
;
1595 // ISubPicAllocatorImpl
1597 bool CMemSubPicAllocator::AllocEx(bool fStatic
, ISubPicEx
** ppSubPic
)
1603 spd
.w
= m_maxsize
.cx
;
1604 spd
.h
= m_maxsize
.cy
;
1606 spd
.pitch
= (spd
.w
*spd
.bpp
)>>3;
1608 spd
.bits
= DNew BYTE
[spd
.pitch
*spd
.h
];
1612 *ppSubPic
= DNew
CMemSubPic(spd
, m_alpha_blt_dst_type
);
1616 (*ppSubPic
)->AddRef();