X64 transport [Part 5] (Update plugins.cpp)
[xy_vsfilter.git] / src / subpic / SimpleSubpicImpl.cpp
blob8cc9a9f26c41ffbe7fb06ed53abac29297751d46
1 #include "stdafx.h"
2 #include "SimpleSubpicImpl.h"
3 #include "ISimpleSubPic.h"
4 #include "xy_intrinsics.h"
5 #include "../subtitles/xy_malloc.h"
6 #include "MemSubPic.h"
8 //////////////////////////////////////////////////////////////////////////
9 //
10 // SimpleSubpic
13 SimpleSubpic::SimpleSubpic( IXySubRenderFrame*sub_render_frame, int alpha_blt_dst_type )
14 : CUnknown(NAME("SimpleSubpic"), NULL)
15 , m_sub_render_frame(sub_render_frame)
16 , m_alpha_blt_dst_type(alpha_blt_dst_type)
18 ConvertColorSpace();
21 SimpleSubpic::~SimpleSubpic()
23 for(unsigned i=0;i<m_buffers.GetCount();i++)
24 xy_free(m_buffers.GetAt(i));
27 STDMETHODIMP SimpleSubpic::NonDelegatingQueryInterface( REFIID riid, void** ppv )
29 return
30 QI(ISimpleSubPic)
31 __super::NonDelegatingQueryInterface(riid, ppv);
34 STDMETHODIMP SimpleSubpic::AlphaBlt( SubPicDesc* target )
36 ASSERT(target!=NULL);
37 HRESULT hr = S_FALSE;
38 int count = m_bitmap.GetCount();
39 for(int i=0;i<count;i++)
41 switch(target->type)
43 case MSP_NV12:
44 case MSP_NV21:
45 hr = AlphaBltAnv12_Nv12(target, m_bitmap.GetAt(i));
46 break;
47 case MSP_P010:
48 case MSP_P016:
49 hr = AlphaBltAnv12_P010(target, m_bitmap.GetAt(i));
50 break;
51 default:
52 hr = AlphaBlt(target, m_bitmap.GetAt(i));
53 break;
56 if (FAILED(hr))
58 return hr;
62 return hr;
65 HRESULT SimpleSubpic::AlphaBltAnv12_P010( SubPicDesc* target, const Bitmap& src )
67 //fix me: check colorspace and log error
68 SubPicDesc dst = *target; // copy, because we might modify it
70 CRect rd(src.pos, src.size);
71 if(dst.h < 0)
73 dst.h = -dst.h;
74 rd.bottom = dst.h - rd.bottom;
75 rd.top = dst.h - rd.top;
78 int w = src.size.cx, h = src.size.cy;
79 bool bottom_down = rd.top > rd.bottom;
81 BYTE* d = NULL;
82 BYTE* dUV = NULL;
83 if(!bottom_down)
85 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*rd.top + rd.left*2;
86 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*rd.top/2 + rd.left*2;
88 else
90 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*(rd.top-1) + rd.left*2;
91 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*(rd.top/2-1) + rd.left*2;
92 dst.pitch = -dst.pitch;
94 ASSERT(dst.pitchUV==0 || dst.pitchUV==abs(dst.pitch));
96 enum PLANS{A=0,Y,UV};
97 const BYTE* sa = reinterpret_cast<const BYTE*>(src.extra.plans[A]);
98 const BYTE* sy = reinterpret_cast<const BYTE*>(src.extra.plans[Y]);
99 const BYTE* s_uv = reinterpret_cast<const BYTE*>(src.extra.plans[UV]);
100 return CMemSubPic::AlphaBltAnv12_P010(sa, sy, s_uv, src.pitch, d, dUV, dst.pitch, w, h);
103 HRESULT SimpleSubpic::AlphaBltAnv12_Nv12( SubPicDesc* target, const Bitmap& src )
105 //fix me: check colorspace and log error
106 SubPicDesc dst = *target; // copy, because we might modify it
108 CRect rd(src.pos, src.size);
109 if(dst.h < 0)
111 dst.h = -dst.h;
112 rd.bottom = dst.h - rd.bottom;
113 rd.top = dst.h - rd.top;
116 int w = src.size.cx, h = src.size.cy;
117 bool bottom_down = rd.top > rd.bottom;
119 BYTE* d = NULL;
120 BYTE* dUV = NULL;
121 if (!bottom_down)
123 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*rd.top + rd.left;
124 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*rd.top/2 + rd.left;
126 else
128 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*(rd.top-1) + rd.left;
129 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*(rd.top/2-1) + rd.left;
130 dst.pitch = -dst.pitch;
132 ASSERT(dst.pitchUV==0 || dst.pitchUV==abs(dst.pitch));
134 enum PLANS{A=0,Y,UV};
135 const BYTE* sa = reinterpret_cast<const BYTE*>(src.extra.plans[A]);
136 const BYTE* sy = reinterpret_cast<const BYTE*>(src.extra.plans[Y]);
137 const BYTE* s_uv = reinterpret_cast<const BYTE*>(src.extra.plans[UV]);
138 return CMemSubPic::AlphaBltAnv12_Nv12(sa, sy, s_uv, src.pitch, d, dUV, dst.pitch, w, h);
141 HRESULT SimpleSubpic::AlphaBlt( SubPicDesc* target, const Bitmap& src )
143 SubPicDesc dst = *target; // copy, because we might modify it
145 CRect rd(src.pos, src.size);
146 if(dst.h < 0)
148 dst.h = -dst.h;
149 rd.bottom = dst.h - rd.bottom;
150 rd.top = dst.h - rd.top;
153 int w = src.size.cx, h = src.size.cy;
154 const BYTE* s = reinterpret_cast<const BYTE*>(src.pixels);
155 BYTE* d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*rd.top + ((rd.left*dst.bpp)>>3);
157 if(rd.top > rd.bottom)
159 if(dst.type == MSP_RGB32 || dst.type == MSP_RGB24
160 || dst.type == MSP_RGB16 || dst.type == MSP_RGB15
161 || dst.type == MSP_YUY2 || dst.type == MSP_AYUV)
163 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*(rd.top-1) + (rd.left*dst.bpp>>3);
165 else if(dst.type == MSP_YV12 || dst.type == MSP_IYUV)
167 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*(rd.top-1) + (rd.left*8>>3);
169 else
171 return E_NOTIMPL;
173 dst.pitch = -dst.pitch;
175 DbgLog((LOG_TRACE, 5, TEXT("w=%d h=%d"), w, h));
176 switch(dst.type)
178 case MSP_RGBA:
179 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
181 const BYTE* s2 = s;
182 const BYTE* s2end = s2 + w*4;
183 DWORD* d2 = reinterpret_cast<DWORD*>(d);
184 for(; s2 < s2end; s2 += 4, d2++)
186 if(s2[3] < 0xff)
188 DWORD bd =0x00000100 -( (DWORD) s2[3]);
189 DWORD B = ((*((DWORD*)s2)&0x000000ff)<<8)/bd;
190 DWORD V = ((*((DWORD*)s2)&0x0000ff00)/bd)<<8;
191 DWORD R = (((*((DWORD*)s2)&0x00ff0000)>>8)/bd)<<16;
192 *d2 = B | V | R
193 | (0xff000000-(*((DWORD*)s2)&0xff000000))&0xff000000;
197 break;
198 case MSP_RGB32:
199 case MSP_AYUV: //ToDo: fix me MSP_VUYA indeed?
200 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
202 const BYTE* s2 = s;
203 const BYTE* s2end = s2 + w*4;
204 DWORD* d2 = reinterpret_cast<DWORD*>(d);
205 for(; s2 < s2end; s2 += 4, d2++)
207 #ifdef _WIN64
208 DWORD ia = 256-s2[3];
209 if(s2[3] < 0xff) {
210 *d2 = ((((*d2&0x00ff00ff)*s2[3])>>8) + (((*((DWORD*)s2)&0x00ff00ff)*ia)>>8)&0x00ff00ff)
211 | ((((*d2&0x0000ff00)*s2[3])>>8) + (((*((DWORD*)s2)&0x0000ff00)*ia)>>8)&0x0000ff00);
213 #else
214 if(s2[3] < 0xff)
216 *d2 = (((((*d2&0x00ff00ff)*s2[3])>>8) + (*((DWORD*)s2)&0x00ff00ff))&0x00ff00ff)
217 | (((((*d2&0x0000ff00)*s2[3])>>8) + (*((DWORD*)s2)&0x0000ff00))&0x0000ff00);
219 #endif
222 break;
223 case MSP_RGB24:
224 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
226 const BYTE* s2 = s;
227 const BYTE* s2end = s2 + w*4;
228 BYTE* d2 = d;
229 for(; s2 < s2end; s2 += 4, d2 += 3)
231 if(s2[3] < 0xff)
233 d2[0] = ((d2[0]*s2[3])>>8) + s2[0];
234 d2[1] = ((d2[1]*s2[3])>>8) + s2[1];
235 d2[2] = ((d2[2]*s2[3])>>8) + s2[2];
239 break;
240 case MSP_RGB16:
241 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
243 const BYTE* s2 = s;
244 const BYTE* s2end = s2 + w*4;
245 WORD* d2 = reinterpret_cast<WORD*>(d);
246 for(; s2 < s2end; s2 += 4, d2++)
248 if(s2[3] < 0x1f)
250 *d2 = (WORD)((((((*d2&0xf81f)*s2[3])>>5) + (*(DWORD*)s2&0xf81f))&0xf81f)
251 | (((((*d2&0x07e0)*s2[3])>>5) + (*(DWORD*)s2&0x07e0))&0x07e0));
252 /* *d2 = (WORD)((((((*d2&0xf800)*s2[3])>>8) + (*(DWORD*)s2&0xf800))&0xf800)
253 | (((((*d2&0x07e0)*s2[3])>>8) + (*(DWORD*)s2&0x07e0))&0x07e0)
254 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
259 break;
260 case MSP_RGB15:
261 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
263 const BYTE* s2 = s;
264 const BYTE* s2end = s2 + w*4;
265 WORD* d2 = reinterpret_cast<WORD*>(d);
266 for(; s2 < s2end; s2 += 4, d2++)
268 if(s2[3] < 0x1f)
270 *d2 = (WORD)((((((*d2&0x7c1f)*s2[3])>>5) + (*(DWORD*)s2&0x7c1f))&0x7c1f)
271 | (((((*d2&0x03e0)*s2[3])>>5) + (*(DWORD*)s2&0x03e0))&0x03e0));
272 /* *d2 = (WORD)((((((*d2&0x7c00)*s2[3])>>8) + (*(DWORD*)s2&0x7c00))&0x7c00)
273 | (((((*d2&0x03e0)*s2[3])>>8) + (*(DWORD*)s2&0x03e0))&0x03e0)
274 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
279 break;
280 case MSP_YUY2:
281 CMemSubPic::AlphaBlt_YUY2(w,h,d,dst.pitch,s, src.pitch);
282 break;
283 case MSP_YV12:
284 case MSP_IYUV:
286 //dst.pitch = abs(dst.pitch);
287 int h2 = h/2;
288 if(!dst.pitchUV)
290 dst.pitchUV = abs(dst.pitch)/2;
292 if(!dst.bitsU || !dst.bitsV)
294 dst.bitsU = reinterpret_cast<BYTE*>(dst.bits) + abs(dst.pitch)*dst.h;
295 dst.bitsV = dst.bitsU + dst.pitchUV*dst.h/2;
296 if(dst.type == MSP_YV12)
298 BYTE* p = dst.bitsU;
299 dst.bitsU = dst.bitsV;
300 dst.bitsV = p;
303 BYTE* dd[2];
304 dd[0] = dst.bitsU + dst.pitchUV*rd.top/2 + rd.left/2;
305 dd[1] = dst.bitsV + dst.pitchUV*rd.top/2 + rd.left/2;
306 if(rd.top > rd.bottom)
308 dd[0] = dst.bitsU + dst.pitchUV*(rd.top/2-1) + rd.left/2;
309 dd[1] = dst.bitsV + dst.pitchUV*(rd.top/2-1) + rd.left/2;
310 dst.pitchUV = -dst.pitchUV;
313 enum PLANS{A=0,Y,U,V};
314 const BYTE* sa = reinterpret_cast<const BYTE*>(src.extra.plans[A]);
315 const BYTE* sy = reinterpret_cast<const BYTE*>(src.extra.plans[Y]);
316 const BYTE* su = reinterpret_cast<const BYTE*>(src.extra.plans[U]);
317 const BYTE* sv = reinterpret_cast<const BYTE*>(src.extra.plans[V]);
318 CMemSubPic::AlphaBltYv12Luma( d, dst.pitch, w, h, sy, sa, src.pitch );
319 CMemSubPic::AlphaBltYv12Chroma( dd[0], dst.pitchUV, w, h2, su, sa, src.pitch);
320 CMemSubPic::AlphaBltYv12Chroma( dd[1], dst.pitchUV, w, h2, sv, sa, src.pitch);
321 #ifndef _WIN64
322 // TODOX64 : fixme!
323 _mm_empty();
324 #endif
326 break;
327 default:
328 return E_NOTIMPL;
329 break;
332 //emmsÒª40¸öcpuÖÜÆÚ
333 //__asm emms;
334 return S_OK;
337 HRESULT SimpleSubpic::ConvertColorSpace()
339 int count = 0;
340 HRESULT hr = m_sub_render_frame->GetBitmapCount(&count);
341 if (FAILED(hr) || count==0)
343 return hr;
345 int xy_color_space = 0;
346 hr = m_sub_render_frame->GetXyColorSpace(&xy_color_space);
347 if (FAILED(hr))
349 return hr;
351 m_bitmap.SetCount(count);
352 m_buffers.SetCount(count);
353 for (int i=0;i<count;i++)
355 m_buffers.GetAt(i) = NULL;//safe
357 Bitmap &bitmap = m_bitmap.GetAt(i);
358 hr = m_sub_render_frame->GetBitmap(i, &bitmap.id, &bitmap.pos, &bitmap.size, &bitmap.pixels, &bitmap.pitch);
359 if (FAILED(hr))
361 return hr;
363 if (xy_color_space==XY_CS_AYUV_PLANAR)
365 hr = m_sub_render_frame->GetBitmapExtra(i, &bitmap.extra);
366 if (FAILED(hr))
368 return hr;
372 int w = bitmap.size.cx, h = bitmap.size.cy;
373 if (w<=0 || h<=0)
375 continue;
378 const BYTE* top = reinterpret_cast<const BYTE*>(bitmap.pixels);
379 const BYTE* bottom = top + bitmap.pitch*h;
380 if(m_alpha_blt_dst_type == MSP_RGB16)
382 ASSERT(xy_color_space==XY_CS_ARGB);
384 BYTE* dst = reinterpret_cast<BYTE*>(xy_malloc(bitmap.pitch*h, (bitmap.pos.x*4)&15));
385 m_buffers.GetAt(i) = dst;
386 bitmap.pixels = dst;
387 for(; top < bottom ; top += bitmap.pitch, dst += bitmap.pitch)
389 const DWORD* s = reinterpret_cast<const DWORD*>(top);
390 const DWORD* e = s + w;
391 DWORD* dst2 = reinterpret_cast<DWORD*>(dst);
392 for(; s < e; s++, dst2++)
394 *dst2 = ((*s>>3)&0x1f000000)|((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x001f);
395 // *s = (*s&0xff000000)|((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x001f);
399 else if(m_alpha_blt_dst_type == MSP_RGB15)
401 ASSERT(xy_color_space==XY_CS_ARGB);
403 BYTE* dst = reinterpret_cast<BYTE*>(xy_malloc(bitmap.pitch*h, (bitmap.pos.x*4)&15));
404 m_buffers.GetAt(i) = dst;
405 bitmap.pixels = dst;
406 for(; top < bottom; top += bitmap.pitch, dst += bitmap.pitch)
408 const DWORD* s = reinterpret_cast<const DWORD*>(top);
409 const DWORD* e = s + w;
410 DWORD* dst2 = reinterpret_cast<DWORD*>(dst);
411 for(; s < e; s++, dst2++)
413 *dst2 = ((*s>>3)&0x1f000000)|((*s>>9)&0x7c00)|((*s>>6)&0x03e0)|((*s>>3)&0x001f);
414 // *s = (*s&0xff000000)|((*s>>9)&0x7c00)|((*s>>6)&0x03e0)|((*s>>3)&0x001f);
418 else if(m_alpha_blt_dst_type == MSP_YUY2)
420 ASSERT(xy_color_space==XY_CS_AUYV);
421 XY_DO_ONCE( xy_logger::write_file("G:\\b1_ul", top, bitmap.pitch*(h-1)) );
423 BYTE* dst = reinterpret_cast<BYTE*>(xy_malloc(bitmap.pitch*h, (bitmap.pos.x*4)&15));
424 m_buffers.GetAt(i) = dst;
425 memcpy(dst, bitmap.pixels, bitmap.pitch*h);
426 bitmap.pixels = dst;
427 for(BYTE* tempTop=dst; tempTop < dst+bitmap.pitch*h ; tempTop += bitmap.pitch)
429 BYTE* s = tempTop;
430 BYTE* e = s + w*4;
431 BYTE last_v = s[0], last_u=s[2];
432 for(; s < e; s+=8) // AUYV AUYV -> AxYU AxYV
434 BYTE tmp = s[4];
435 s[4] = (last_v + 2*s[0] + s[4] + 2)>>2;
436 last_v = tmp;
438 s[0] = (last_u + 2*s[2] + s[6] + 2)>>2;
439 last_u = s[6];
442 XY_DO_ONCE( xy_logger::write_file("G:\\a1_ul", dst, bitmap.pitch*(h-1)) );
444 else if(m_alpha_blt_dst_type == MSP_YV12 || m_alpha_blt_dst_type == MSP_IYUV )
446 ASSERT(xy_color_space==XY_CS_AYUV_PLANAR);
447 //nothing to do
449 else if ( m_alpha_blt_dst_type == MSP_P010 || m_alpha_blt_dst_type == MSP_P016
450 || m_alpha_blt_dst_type == MSP_NV12 )
452 ASSERT(xy_color_space==XY_CS_AYUV_PLANAR);
453 SubsampleAndInterlace(i, &bitmap, true);
455 else if( m_alpha_blt_dst_type == MSP_NV21 )
457 ASSERT(xy_color_space==XY_CS_AYUV_PLANAR);
458 SubsampleAndInterlace(i, &bitmap, false);
461 return S_OK;
464 void SimpleSubpic::SubsampleAndInterlace( int index, Bitmap*bitmap, bool u_first )
466 ASSERT(bitmap!=NULL);
467 //fix me: check alignment and log error
468 int w = bitmap->size.cx, h = bitmap->size.cy;
469 ASSERT(h%2==0);
470 const BYTE* u_start = reinterpret_cast<const BYTE*>(bitmap->extra.plans[2]);
471 const BYTE* v_start = reinterpret_cast<const BYTE*>(bitmap->extra.plans[3]);
473 BYTE* dst = reinterpret_cast<BYTE*>(xy_malloc(bitmap->pitch*h/2, bitmap->pos.x&15));
474 m_buffers.GetAt(index) = dst;
475 bitmap->extra.plans[2] = dst;
477 if(!u_first)
479 const BYTE* tmp = v_start;
480 v_start = u_start;
481 u_start = tmp;
484 //Todo: fix me.
485 //Walkarround for alignment
486 if ( ((bitmap->pitch | (int)u_start | (int)v_start)&15) == 0 && (g_cpuid.m_flags & CCpuID::sse2) )
488 for (int i=0;i<h;i+=2)
490 int w16 = w&~15;
491 hleft_vmid_subsample_and_interlace_2_line_sse2(dst, u_start, v_start, w16, bitmap->pitch);
492 ASSERT(w>0);
493 hleft_vmid_subsample_and_interlace_2_line_c(dst+w16, u_start+w16, v_start+w16, w&15, bitmap->pitch, -1);
494 u_start += 2*bitmap->pitch;
495 v_start += 2*bitmap->pitch;
496 dst += bitmap->pitch;
499 else
501 for (int i=0;i<h;i+=2)
503 hleft_vmid_subsample_and_interlace_2_line_c(dst, u_start, v_start, w, bitmap->pitch);
504 u_start += 2*bitmap->pitch;
505 v_start += 2*bitmap->pitch;
506 dst += bitmap->pitch;