Support unrar64.dll
[xy_vsfilter.git] / src / subpic / SimpleSubpicImpl.cpp
blobc034b9e67f096d7c4783753a96235212c19a96ac
1 #include "stdafx.h"
2 #include "SimpleSubpicImpl.h"
3 #include "ISimpleSubPic.h"
4 #include "xy_intrinsics.h"
5 #include "../subtitles/xy_malloc.h"
6 #include "MemSubPic.h"
8 //////////////////////////////////////////////////////////////////////////
9 //
10 // SimpleSubpic
13 SimpleSubpic::SimpleSubpic( IXySubRenderFrame*sub_render_frame, int alpha_blt_dst_type )
14 : CUnknown(NAME("SimpleSubpic"), NULL)
15 , m_sub_render_frame(sub_render_frame)
16 , m_alpha_blt_dst_type(alpha_blt_dst_type)
18 ConvertColorSpace();
21 SimpleSubpic::~SimpleSubpic()
23 for(unsigned i=0;i<m_buffers.GetCount();i++)
24 xy_free(m_buffers.GetAt(i));
27 STDMETHODIMP SimpleSubpic::NonDelegatingQueryInterface( REFIID riid, void** ppv )
29 return
30 QI(ISimpleSubPic)
31 __super::NonDelegatingQueryInterface(riid, ppv);
34 STDMETHODIMP SimpleSubpic::AlphaBlt( SubPicDesc* target )
36 ASSERT(target!=NULL);
37 HRESULT hr = S_FALSE;
38 int count = m_bitmap.GetCount();
39 for(int i=0;i<count;i++)
41 switch(target->type)
43 case MSP_NV12:
44 case MSP_NV21:
45 hr = AlphaBltAnv12_Nv12(target, m_bitmap.GetAt(i));
46 break;
47 case MSP_P010:
48 case MSP_P016:
49 hr = AlphaBltAnv12_P010(target, m_bitmap.GetAt(i));
50 break;
51 default:
52 hr = AlphaBlt(target, m_bitmap.GetAt(i));
53 break;
56 if (FAILED(hr))
58 return hr;
62 return hr;
65 HRESULT SimpleSubpic::AlphaBltAnv12_P010( SubPicDesc* target, const Bitmap& src )
67 //fix me: check colorspace and log error
68 SubPicDesc dst = *target; // copy, because we might modify it
70 CRect rd(src.pos, src.size);
71 if(dst.h < 0)
73 dst.h = -dst.h;
74 rd.bottom = dst.h - rd.bottom;
75 rd.top = dst.h - rd.top;
78 int w = src.size.cx, h = src.size.cy;
79 bool bottom_down = rd.top > rd.bottom;
81 BYTE* d = NULL;
82 BYTE* dUV = NULL;
83 if(!bottom_down)
85 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*rd.top + rd.left*2;
86 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*rd.top/2 + rd.left*2;
88 else
90 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*(rd.top-1) + rd.left*2;
91 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*(rd.top/2-1) + rd.left*2;
92 dst.pitch = -dst.pitch;
94 ASSERT(dst.pitchUV==0 || dst.pitchUV==abs(dst.pitch));
96 enum PLANS{A=0,Y,UV};
97 const BYTE* sa = reinterpret_cast<const BYTE*>(src.extra.plans[A]);
98 const BYTE* sy = reinterpret_cast<const BYTE*>(src.extra.plans[Y]);
99 const BYTE* s_uv = reinterpret_cast<const BYTE*>(src.extra.plans[UV]);
100 return CMemSubPic::AlphaBltAnv12_P010(sa, sy, s_uv, src.pitch, d, dUV, dst.pitch, w, h);
103 HRESULT SimpleSubpic::AlphaBltAnv12_Nv12( SubPicDesc* target, const Bitmap& src )
105 //fix me: check colorspace and log error
106 SubPicDesc dst = *target; // copy, because we might modify it
108 CRect rd(src.pos, src.size);
109 if(dst.h < 0)
111 dst.h = -dst.h;
112 rd.bottom = dst.h - rd.bottom;
113 rd.top = dst.h - rd.top;
116 int w = src.size.cx, h = src.size.cy;
117 bool bottom_down = rd.top > rd.bottom;
119 BYTE* d = NULL;
120 BYTE* dUV = NULL;
121 if (!bottom_down)
123 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*rd.top + rd.left;
124 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*rd.top/2 + rd.left;
126 else
128 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*(rd.top-1) + rd.left;
129 dUV = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*dst.h + dst.pitch*(rd.top/2-1) + rd.left;
130 dst.pitch = -dst.pitch;
132 ASSERT(dst.pitchUV==0 || dst.pitchUV==abs(dst.pitch));
134 enum PLANS{A=0,Y,UV};
135 const BYTE* sa = reinterpret_cast<const BYTE*>(src.extra.plans[A]);
136 const BYTE* sy = reinterpret_cast<const BYTE*>(src.extra.plans[Y]);
137 const BYTE* s_uv = reinterpret_cast<const BYTE*>(src.extra.plans[UV]);
138 return CMemSubPic::AlphaBltAnv12_Nv12(sa, sy, s_uv, src.pitch, d, dUV, dst.pitch, w, h);
141 HRESULT SimpleSubpic::AlphaBlt( SubPicDesc* target, const Bitmap& src )
143 SubPicDesc dst = *target; // copy, because we might modify it
145 CRect rd(src.pos, src.size);
146 if(dst.h < 0)
148 dst.h = -dst.h;
149 rd.bottom = dst.h - rd.bottom;
150 rd.top = dst.h - rd.top;
153 int w = src.size.cx, h = src.size.cy;
154 const BYTE* s = reinterpret_cast<const BYTE*>(src.pixels);
155 BYTE* d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*rd.top + ((rd.left*dst.bpp)>>3);
157 if(rd.top > rd.bottom)
159 if(dst.type == MSP_RGB32 || dst.type == MSP_RGB24
160 || dst.type == MSP_RGB16 || dst.type == MSP_RGB15
161 || dst.type == MSP_YUY2 || dst.type == MSP_AYUV)
163 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*(rd.top-1) + (rd.left*dst.bpp>>3);
165 else if(dst.type == MSP_YV12 || dst.type == MSP_IYUV)
167 d = reinterpret_cast<BYTE*>(dst.bits) + dst.pitch*(rd.top-1) + (rd.left*8>>3);
169 else
171 return E_NOTIMPL;
173 dst.pitch = -dst.pitch;
175 DbgLog((LOG_TRACE, 5, TEXT("w=%d h=%d"), w, h));
176 switch(dst.type)
178 case MSP_RGBA:
179 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
181 const BYTE* s2 = s;
182 const BYTE* s2end = s2 + w*4;
183 DWORD* d2 = reinterpret_cast<DWORD*>(d);
184 for(; s2 < s2end; s2 += 4, d2++)
186 if(s2[3] < 0xff)
188 DWORD bd =0x00000100 -( (DWORD) s2[3]);
189 DWORD B = ((*((DWORD*)s2)&0x000000ff)<<8)/bd;
190 DWORD V = ((*((DWORD*)s2)&0x0000ff00)/bd)<<8;
191 DWORD R = (((*((DWORD*)s2)&0x00ff0000)>>8)/bd)<<16;
192 *d2 = B | V | R
193 | (0xff000000-(*((DWORD*)s2)&0xff000000))&0xff000000;
197 break;
198 case MSP_RGB32:
199 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
201 const BYTE* s2 = s;
202 const BYTE* s2end = s2 + w*4;
203 DWORD* d2 = (DWORD*)d;
204 for(; s2 < s2end; s2 += 4, d2++)
206 if(s2[3] < 0xff)
208 *d2 = (((((*d2&0x00ff00ff)*s2[3])>>8) + (*((DWORD*)s2)&0x00ff00ff))&0x00ff00ff)
209 | (((((*d2&0x0000ff00)*s2[3])>>8) + (*((DWORD*)s2)&0x0000ff00))&0x0000ff00);
213 break;
214 case MSP_AYUV:
215 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
217 const BYTE* s2 = s;
218 const BYTE* s2end = s2 + w*4;
219 DWORD* d2 = (DWORD*)d;
220 for(; s2 < s2end; s2 += 4, d2++)
222 if(s2[3] < 0xff)
224 *d2 = (((((*d2&0x00ff00ff)*s2[3])>>8) + (*((DWORD*)s2)&0x00ff00ff))&0x00ff00ff)
225 | (((((*d2&0x0000ff00)*s2[3])>>8) + (*((DWORD*)s2)&0x0000ff00))&0x0000ff00)
226 | (*d2&0xff000000);
230 break;
231 case MSP_RGB24:
232 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
234 const BYTE* s2 = s;
235 const BYTE* s2end = s2 + w*4;
236 BYTE* d2 = d;
237 for(; s2 < s2end; s2 += 4, d2 += 3)
239 if(s2[3] < 0xff)
241 d2[0] = ((d2[0]*s2[3])>>8) + s2[0];
242 d2[1] = ((d2[1]*s2[3])>>8) + s2[1];
243 d2[2] = ((d2[2]*s2[3])>>8) + s2[2];
247 break;
248 case MSP_RGB16:
249 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
251 const BYTE* s2 = s;
252 const BYTE* s2end = s2 + w*4;
253 WORD* d2 = reinterpret_cast<WORD*>(d);
254 for(; s2 < s2end; s2 += 4, d2++)
256 if(s2[3] < 0x1f)
258 *d2 = (WORD)((((((*d2&0xf81f)*s2[3])>>5) + (*(DWORD*)s2&0xf81f))&0xf81f)
259 | (((((*d2&0x07e0)*s2[3])>>5) + (*(DWORD*)s2&0x07e0))&0x07e0));
260 /* *d2 = (WORD)((((((*d2&0xf800)*s2[3])>>8) + (*(DWORD*)s2&0xf800))&0xf800)
261 | (((((*d2&0x07e0)*s2[3])>>8) + (*(DWORD*)s2&0x07e0))&0x07e0)
262 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
267 break;
268 case MSP_RGB15:
269 for(int j = 0; j < h; j++, s += src.pitch, d += dst.pitch)
271 const BYTE* s2 = s;
272 const BYTE* s2end = s2 + w*4;
273 WORD* d2 = reinterpret_cast<WORD*>(d);
274 for(; s2 < s2end; s2 += 4, d2++)
276 if(s2[3] < 0x1f)
278 *d2 = (WORD)((((((*d2&0x7c1f)*s2[3])>>5) + (*(DWORD*)s2&0x7c1f))&0x7c1f)
279 | (((((*d2&0x03e0)*s2[3])>>5) + (*(DWORD*)s2&0x03e0))&0x03e0));
280 /* *d2 = (WORD)((((((*d2&0x7c00)*s2[3])>>8) + (*(DWORD*)s2&0x7c00))&0x7c00)
281 | (((((*d2&0x03e0)*s2[3])>>8) + (*(DWORD*)s2&0x03e0))&0x03e0)
282 | (((((*d2&0x001f)*s2[3])>>8) + (*(DWORD*)s2&0x001f))&0x001f));
287 break;
288 case MSP_YUY2:
289 CMemSubPic::AlphaBlt_YUY2(w,h,d,dst.pitch,s, src.pitch);
290 break;
291 case MSP_YV12:
292 case MSP_IYUV:
294 //dst.pitch = abs(dst.pitch);
295 int h2 = h/2;
296 if(!dst.pitchUV)
298 dst.pitchUV = abs(dst.pitch)/2;
300 if(!dst.bitsU || !dst.bitsV)
302 dst.bitsU = reinterpret_cast<BYTE*>(dst.bits) + abs(dst.pitch)*dst.h;
303 dst.bitsV = dst.bitsU + dst.pitchUV*dst.h/2;
304 if(dst.type == MSP_YV12)
306 BYTE* p = dst.bitsU;
307 dst.bitsU = dst.bitsV;
308 dst.bitsV = p;
311 BYTE* dd[2];
312 dd[0] = dst.bitsU + dst.pitchUV*rd.top/2 + rd.left/2;
313 dd[1] = dst.bitsV + dst.pitchUV*rd.top/2 + rd.left/2;
314 if(rd.top > rd.bottom)
316 dd[0] = dst.bitsU + dst.pitchUV*(rd.top/2-1) + rd.left/2;
317 dd[1] = dst.bitsV + dst.pitchUV*(rd.top/2-1) + rd.left/2;
318 dst.pitchUV = -dst.pitchUV;
321 enum PLANS{A=0,Y,U,V};
322 const BYTE* sa = reinterpret_cast<const BYTE*>(src.extra.plans[A]);
323 const BYTE* sy = reinterpret_cast<const BYTE*>(src.extra.plans[Y]);
324 const BYTE* su = reinterpret_cast<const BYTE*>(src.extra.plans[U]);
325 const BYTE* sv = reinterpret_cast<const BYTE*>(src.extra.plans[V]);
326 CMemSubPic::AlphaBltYv12Luma( d, dst.pitch, w, h, sy, sa, src.pitch );
327 CMemSubPic::AlphaBltYv12Chroma( dd[0], dst.pitchUV, w, h2, su, sa, src.pitch);
328 CMemSubPic::AlphaBltYv12Chroma( dd[1], dst.pitchUV, w, h2, sv, sa, src.pitch);
329 #ifndef _WIN64
330 // TODOX64 : fixme!
331 _mm_empty();
332 #endif
334 break;
335 default:
336 return E_NOTIMPL;
337 break;
340 //emmsÒª40¸öcpuÖÜÆÚ
341 //__asm emms;
342 return S_OK;
345 HRESULT SimpleSubpic::ConvertColorSpace()
347 int count = 0;
348 HRESULT hr = m_sub_render_frame->GetBitmapCount(&count);
349 if (FAILED(hr) || count==0)
351 return hr;
353 int xy_color_space = 0;
354 hr = m_sub_render_frame->GetXyColorSpace(&xy_color_space);
355 if (FAILED(hr))
357 return hr;
359 m_bitmap.SetCount(count);
360 m_buffers.SetCount(count);
361 for (int i=0;i<count;i++)
363 m_buffers.GetAt(i) = NULL;//safe
365 Bitmap &bitmap = m_bitmap.GetAt(i);
366 hr = m_sub_render_frame->GetBitmap(i, &bitmap.id, &bitmap.pos, &bitmap.size, &bitmap.pixels, &bitmap.pitch);
367 if (FAILED(hr))
369 return hr;
371 if (xy_color_space==XY_CS_AYUV_PLANAR)
373 hr = m_sub_render_frame->GetBitmapExtra(i, &bitmap.extra);
374 if (FAILED(hr))
376 return hr;
380 int w = bitmap.size.cx, h = bitmap.size.cy;
381 if (w<=0 || h<=0)
383 continue;
386 const BYTE* top = reinterpret_cast<const BYTE*>(bitmap.pixels);
387 const BYTE* bottom = top + bitmap.pitch*h;
388 if(m_alpha_blt_dst_type == MSP_RGB16)
390 ASSERT(xy_color_space==XY_CS_ARGB);
392 BYTE* dst = reinterpret_cast<BYTE*>(xy_malloc(bitmap.pitch*h, (bitmap.pos.x*4)&15));
393 m_buffers.GetAt(i) = dst;
394 bitmap.pixels = dst;
395 for(; top < bottom ; top += bitmap.pitch, dst += bitmap.pitch)
397 const DWORD* s = reinterpret_cast<const DWORD*>(top);
398 const DWORD* e = s + w;
399 DWORD* dst2 = reinterpret_cast<DWORD*>(dst);
400 for(; s < e; s++, dst2++)
402 *dst2 = ((*s>>3)&0x1f000000)|((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x001f);
403 // *s = (*s&0xff000000)|((*s>>8)&0xf800)|((*s>>5)&0x07e0)|((*s>>3)&0x001f);
407 else if(m_alpha_blt_dst_type == MSP_RGB15)
409 ASSERT(xy_color_space==XY_CS_ARGB);
411 BYTE* dst = reinterpret_cast<BYTE*>(xy_malloc(bitmap.pitch*h, (bitmap.pos.x*4)&15));
412 m_buffers.GetAt(i) = dst;
413 bitmap.pixels = dst;
414 for(; top < bottom; top += bitmap.pitch, dst += bitmap.pitch)
416 const DWORD* s = reinterpret_cast<const DWORD*>(top);
417 const DWORD* e = s + w;
418 DWORD* dst2 = reinterpret_cast<DWORD*>(dst);
419 for(; s < e; s++, dst2++)
421 *dst2 = ((*s>>3)&0x1f000000)|((*s>>9)&0x7c00)|((*s>>6)&0x03e0)|((*s>>3)&0x001f);
422 // *s = (*s&0xff000000)|((*s>>9)&0x7c00)|((*s>>6)&0x03e0)|((*s>>3)&0x001f);
426 else if(m_alpha_blt_dst_type == MSP_YUY2)
428 ASSERT(xy_color_space==XY_CS_AUYV);
429 XY_DO_ONCE( xy_logger::write_file("G:\\b1_ul", top, bitmap.pitch*(h-1)) );
431 BYTE* dst = reinterpret_cast<BYTE*>(xy_malloc(bitmap.pitch*h, (bitmap.pos.x*4)&15));
432 m_buffers.GetAt(i) = dst;
433 memcpy(dst, bitmap.pixels, bitmap.pitch*h);
434 bitmap.pixels = dst;
435 for(BYTE* tempTop=dst; tempTop < dst+bitmap.pitch*h ; tempTop += bitmap.pitch)
437 BYTE* s = tempTop;
438 BYTE* e = s + w*4;
439 BYTE last_v = s[0], last_u=s[2];
440 for(; s < e; s+=8) // AUYV AUYV -> AxYU AxYV
442 BYTE tmp = s[4];
443 s[4] = (last_v + 2*s[0] + s[4] + 2)>>2;
444 last_v = tmp;
446 s[0] = (last_u + 2*s[2] + s[6] + 2)>>2;
447 last_u = s[6];
450 XY_DO_ONCE( xy_logger::write_file("G:\\a1_ul", dst, bitmap.pitch*(h-1)) );
452 else if(m_alpha_blt_dst_type == MSP_YV12 || m_alpha_blt_dst_type == MSP_IYUV )
454 ASSERT(xy_color_space==XY_CS_AYUV_PLANAR);
455 //nothing to do
457 else if ( m_alpha_blt_dst_type == MSP_P010 || m_alpha_blt_dst_type == MSP_P016
458 || m_alpha_blt_dst_type == MSP_NV12 )
460 ASSERT(xy_color_space==XY_CS_AYUV_PLANAR);
461 SubsampleAndInterlace(i, &bitmap, true);
463 else if( m_alpha_blt_dst_type == MSP_NV21 )
465 ASSERT(xy_color_space==XY_CS_AYUV_PLANAR);
466 SubsampleAndInterlace(i, &bitmap, false);
469 return S_OK;
472 void SimpleSubpic::SubsampleAndInterlace( int index, Bitmap*bitmap, bool u_first )
474 ASSERT(bitmap!=NULL);
475 //fix me: check alignment and log error
476 int w = bitmap->size.cx, h = bitmap->size.cy;
477 ASSERT(h%2==0);
478 const BYTE* u_start = reinterpret_cast<const BYTE*>(bitmap->extra.plans[2]);
479 const BYTE* v_start = reinterpret_cast<const BYTE*>(bitmap->extra.plans[3]);
481 BYTE* dst = reinterpret_cast<BYTE*>(xy_malloc(bitmap->pitch*h/2, bitmap->pos.x&15));
482 m_buffers.GetAt(index) = dst;
483 bitmap->extra.plans[2] = dst;
485 if(!u_first)
487 const BYTE* tmp = v_start;
488 v_start = u_start;
489 u_start = tmp;
492 //Todo: fix me.
493 //Walkarround for alignment
494 if ( ((bitmap->pitch | (int)u_start | (int)v_start)&15) == 0 && (g_cpuid.m_flags & CCpuID::sse2) )
496 for (int i=0;i<h;i+=2)
498 int w16 = w&~15;
499 hleft_vmid_subsample_and_interlace_2_line_sse2(dst, u_start, v_start, w16, bitmap->pitch);
500 ASSERT(w>0);
501 hleft_vmid_subsample_and_interlace_2_line_c(dst+w16, u_start+w16, v_start+w16, w&15, bitmap->pitch, -1);
502 u_start += 2*bitmap->pitch;
503 v_start += 2*bitmap->pitch;
504 dst += bitmap->pitch;
507 else
509 for (int i=0;i<h;i+=2)
511 hleft_vmid_subsample_and_interlace_2_line_c(dst, u_start, v_start, w, bitmap->pitch);
512 u_start += 2*bitmap->pitch;
513 v_start += 2*bitmap->pitch;
514 dst += bitmap->pitch;