4 ;;; Extended MMX prediction composition
5 ;;; routines handling the four different interpolation cases.
..
7 ;;; Copyright
(C
) 2000 Andrew Stevens
<as@comlab.ox.ac.uk
>
10 ;;; This program is free software; you can reaxstribute it
and/or
11 ;;; modify it under the terms of the GNU General Public License
12 ;;; as published by the Free Software Foundation; either version
2
13 ;;; of the License
, or (at your option
) any later version.
15 ;;; This program is distributed in the hope that it will
be useful
,
16 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;;; MERCHANTABILITY
or FITNESS FOR
A PARTICULAR PURPOSE. See the
18 ;;; GNU General Public License for more details.
20 ;;; You should have received
a copy of the GNU General Public License
21 ;;; along with this program; if
not, write to the Free Software
22 ;;; Foundation
, Inc.
, 59 Temple Place
- Suite
330, Boston
, MA
28 ;;; The no interpolation case.
..
30 global predcomp_00_mmx
32 ;;; void predcomp_
<ix
><iy
>_mmx
(char
*src
,char
*dst,int
lx, int w
, int h
, int addflag
);
34 ;;; ix
- Interpolation in x iy
- Interpolation in y
43 ;;; mm1
= one
's mask for src
44 ;;; mm0 = zero mask for src...
50 push ebp ; save frame pointer
64 mov ebx, [ebp+8] ; get psrc
65 mov eax, [ebp+12] ; get pdst
66 mov edx, [ebp+16] ; get lx
67 mov edi, [ebp+20] ; get w
68 mov ecx, [ebp+24] ; get h
69 mov esi, [ebp+28] ; get addflag
70 ;; Extend addflag into bit-mask
72 jmp predrow00m ; align for speed
75 movq mm4, [ebx] ; first 8 bytes of row
100 movq mm4, [ebx+8] ; first 8 bytes of row
123 add eax, edx ; update pointer to next row
126 sub ecx, 1 ; check h left
140 ;;; The x-axis interpolation case...
142 global predcomp_10_mmx
147 push ebp ; save frame pointer
161 mov ebx, [ebp+8] ; get psrc
162 mov eax, [ebp+12] ; get pdst
163 mov edx, [ebp+16] ; get lx
164 mov edi, [ebp+20] ; get w
165 mov ecx, [ebp+24] ; get h
166 mov esi, [ebp+28] ; get addflag
167 ;; Extend addflag into bit-mask
169 jmp predrow10m ; align for speed
172 movq mm4, [ebx] ; first 8 bytes of row
181 paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
191 movq mm2, [eax] ; Add
195 paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
208 movq mm4, [ebx+8] ; first 8 bytes of row
217 paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
227 movq mm2, [eax+8] ; Add
231 paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
243 add eax, edx ; update pointer to next row
246 sub ecx, 1 ; check h left
259 ;;; The y-axis interpolation case...
261 global predcomp_01_mmx
266 push ebp ; save frame pointer
280 mov ebx, [ebp+8] ; get psrc
281 mov eax, [ebp+12] ; get pdst
282 mov edx, [ebp+16] ; get lx
283 mov edi, [ebp+20] ; get w
284 mov ecx, [ebp+24] ; get h
285 mov esi, [ebp+28] ; get addflag
287 jmp predrow01m ; align for speed
291 movq mm4, [ebx] ; first 8 bytes of row
293 add ebx, edx ; Next row
302 paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
312 movq mm2, [eax] ; Add
316 paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
329 sub ebx, edx ; Back to first row...
330 movq mm4, [ebx+8] ; first 8 bytes of row
332 add ebx, edx ; Next row
340 paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
350 movq mm2, [eax+8] ; Add
354 paddw mm4, mm2 ; Average mm4/mm5 and mm2/mm3
368 sub ecx, 1 ; check h left
382 ;;; The x-axis and y-axis interpolation case...
384 global predcomp_11_mmx
391 push ebp ; save frame pointer
409 mov ebx, [ebp+8] ; get psrc
410 mov eax, [ebp+12] ; get pdst
411 mov edx, [ebp+16] ; get lx
412 mov edi, [ebp+20] ; get w
413 mov ecx, [ebp+24] ; get h
414 mov esi, [ebp+28] ; Addflags
415 ;; Extend addflag into bit-mask
418 jmp predrow11 ; align for speed
421 movq mm4, [ebx] ; mm4 and mm6 accumulate partial sums for interp.
433 add ebx, edx ; update pointer to next row
435 movq mm5, [ebx] ; first 8 bytes 1st row: avg src in x
437 punpcklbw mm5, mm0 ; Accumulate partial interpolation
458 movq mm5, [eax] ; Add
462 paddw mm4, mm5 ; Average mm4/mm6 and mm5/mm7
476 sub ebx, edx ; Back to first row...
478 movq mm4, [ebx+8] ; mm4 and mm6 accumulate partial sums for interp.
490 add ebx, edx ; update pointer to next row
492 movq mm5, [ebx+8] ; first 8 bytes 1st row: avg src in x
494 punpcklbw mm5, mm0 ; Accumulate partial interpolation
515 movq mm5, [eax+8] ; Add and average
519 paddw mm4, mm5 ; Average mm4/mm6 and mm5/mm7
530 add eax, edx ; update pointer to next row
533 sub ecx, 1 ; check h left