1 ; Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al.
2 ; http://www.avisynth.org
4 ; This program is free software; you can redistribute it and/or modify
5 ; it under the terms of the GNU General Public License as published by
6 ; the Free Software Foundation; either version 2 of the License, or
7 ; (at your option) any later version.
9 ; This program is distributed in the hope that it will be useful,
10 ; but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 ; GNU General Public License for more details.
14 ; You should have received a copy of the GNU General Public License
15 ; along with this program; if not, write to the Free Software
16 ; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
17 ; http://www.gnu.org/copyleft/gpl.html .
19 ; Linking Avisynth statically or dynamically with other modules is making a
20 ; combined work based on Avisynth. Thus, the terms and conditions of the GNU
21 ; General Public License cover the whole combination.
23 ; As a special exception, the copyright holders of Avisynth give you
24 ; permission to link Avisynth with independent modules that communicate with
25 ; Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
26 ; terms of these independent modules, and to copy and distribute the
27 ; resulting combined work under terms of your choice, provided that
28 ; every copy of the combined work is accompanied by a complete copy of
29 ; the source code of Avisynth (the version of Avisynth used to produce the
30 ; combined work), being distributed under the terms of the GNU General
31 ; Public License plus this exception. An independent module is a module
32 ; which is not derived from or based on Avisynth, such as 3rd-party filters,
33 ; import and export plugins, or graphical user interfaces.
39 ; alignment has to be 'page' so that I can use 'align 32' below
41 _TEXT64
segment page public use32
'CODE'
43 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
49 x0000_0000_0010_0010
dq 00000000000100010h
50 x0080_0080_0080_0080
dq 00080008000800080h
51 x00FF_00FF_00FF_00FF
dq 000FF00FF00FF00FFh
52 x00002000_00002000
dq 00000200000002000h
53 xFF000000_FF000000
dq 0FF000000FF000000h
54 cy
dq 000004A8500004A85h
55 crv
dq 03313000033130000h
56 cgu_cgv
dq 0E5FCF377E5FCF377h
57 cbu
dq 00000408D0000408Dh
59 yuv2rgb_constants_rec709:
71 ofs_x0000_0000_0010_0010
= 0
72 ofs_x0080_0080_0080_0080
= 8
73 ofs_x00FF_00FF_00FF_00FF
= 16
74 ofs_x00002000_00002000
= 24
75 ofs_xFF000000_FF000000
= 32
81 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
87 pand mma
,[edx+ofs_x00FF_00FF_00FF_00FF
]
95 YUV2RGB_INNER_LOOP
MACRO uyvy
,rgb32
,no_next_pixel
97 ;; This YUV422->RGB conversion code uses only four MMX registers per
98 ;; source dword, so I convert two dwords in parallel. Lines corresponding
99 ;; to the "second pipe" are indented an extra space. There's almost no
100 ;; overlap, except at the end and in the three lines marked ***.
101 ;; revised 4july,2002 to properly set alpha in rgb32 to default "on" & other small memory optimizations
103 movd mm0
, dword ptr [esi]
104 movd mm5
, dword ptr [esi+4]
106 GET_Y mm0
,&uyvy
; mm0 = __________Y1__Y0
108 GET_UV mm1
,&uyvy
; mm1 = __________V0__U0
110 movq mm2
,mm5
; *** avoid reload from [esi+4]
112 psubw mm0
, qword ptr [edx+ofs_x0000_0000_0010_0010
]
113 movd mm6
, dword ptr [esi+8-4*(no_next_pixel
)]
114 GET_UV mm2
,&uyvy
; mm2 = __________V2__U2
115 psubw mm4
, qword ptr [edx+ofs_x0000_0000_0010_0010
]
118 psubw mm1
, qword ptr [edx+ofs_x0080_0080_0080_0080
]
121 psubw mm5
, qword ptr [edx+ofs_x0080_0080_0080_0080
]
122 punpcklwd mm0
,mm2
; mm0 = ______Y1______Y0
124 pmaddwd mm0
, qword ptr [edx+ofs_cy
]
127 pmaddwd mm4
, qword ptr [edx+ofs_cy
]
129 paddw mm1
,mm2
; mm1 = __V1__U1__V0__U0 * 2
130 paddd mm0
,[edx+ofs_x00002000_00002000
]
133 paddd mm4
,[edx+ofs_x00002000_00002000
]
136 pmaddwd mm1
,[edx+ofs_crv
]
139 pmaddwd mm5
,[edx+ofs_crv
]
140 psrad mm1
,14 ; mm1 = RRRRRRRRrrrrrrrr
142 pmaddwd mm2
,[edx+ofs_cgu_cgv
]
145 pmaddwd mm6
,[edx+ofs_cgu_cgv
]
146 psrad mm2
,14 ; mm2 = GGGGGGGGgggggggg
148 pmaddwd mm3
,[edx+ofs_cbu
]
151 pmaddwd mm7
,[edx+ofs_cbu
]
157 psrad mm3
,14 ; mm3 = BBBBBBBBbbbbbbbb
161 packssdw mm3
,mm2
; mm3 = GGGGggggBBBBbbbb
163 packssdw mm1
,mm0
; mm1 = ________RRRRrrrr
164 packssdw mm5
,mm0
; *** avoid pxor mm4,mm4
167 punpcklwd mm2
,mm1
; mm2 = RRRRBBBBrrrrbbbb
169 punpckhwd mm3
,mm1
; mm3 = ____GGGG____gggg
173 punpcklwd mm0
,mm3
; mm0 = ____rrrrggggbbbb
179 punpckhwd mm2
,mm3
; mm2 = ____RRRRGGGGBBBB
181 packuswb mm0
,mm2
; mm0 = __RRGGBB__rrggbb <- ta dah!
185 por mm0
, [edx+ofs_xFF000000_FF000000
] ; set alpha channels "on"
186 por mm4
, [edx+ofs_xFF000000_FF000000
]
187 movq
[edi-16],mm0
; store the quadwords independently
190 psrlq mm0
,8 ; pack the two quadwords into 12 bytes
191 psllq mm4
,8 ; (note: the two shifts above leave
192 movd
dword ptr [edi-12],mm0
; mm0,4 = __RRGGBBrrggbb__)
195 movd
dword ptr [edi-8],mm4
197 movd
dword ptr [edi-4],mm4
202 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
204 YUV2RGB_PROC
MACRO procname
,uyvy
,rgb32
208 ;;void __cdecl procname(
209 ;; [esp+ 4] const BYTE* src,
210 ;; [esp+ 8] BYTE* dst,
211 ;; [esp+12] const BYTE* src_end,
212 ;; [esp+16] int src_pitch,
213 ;; [esp+20] int row_size,
214 ;; [esp+24] bool rec709);
223 mov esi,[esp+12+12] ; read source bottom-up
226 mov edx,offset yuv2rgb_constants
227 test byte ptr [esp+24+12],1
229 mov edx,offset yuv2rgb_constants_rec709
237 YUV2RGB_INNER_LOOP uyvy
,rgb32
,0
240 YUV2RGB_INNER_LOOP uyvy
,rgb32
,1
256 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
258 YUV2RGB_PROC mmx_YUY2toRGB24
,0,0
259 YUV2RGB_PROC mmx_YUY2toRGB32
,0,1
261 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;