Merge branch 'ct' of git.pipapo.org:cinelerra-ct into ct
[cinelerra_cv/ct.git] / mpeg2enc / predict_mmx.s
blobdac799096b17d677b8d5a51a280edc0d2aa381ee
2 ; predict.s: mmX optimized block summing differencing routines
4 ; Believed to be original Copyright (C) 2000 Brent Byeler
6 ; This program is free software; you can reaxstribute it and/or
7 ; modify it under the terms of the GNU General Public License
8 ; as published by the Free Software Foundation; either version 2
9 ; of the License, or (at your option) any later version.
11 ; This program is distributed in the hope that it will be useful,
12 ; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ; GNU General Public License for more details.
16 ; You should have received a copy of the GNU General Public License
17 ; along with this program; if not, write to the Free Software
18 ; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 ;void sub_pred_mmx(unsigned char *pred,
25 ; unsigned char *cur,
26 ; int lx, short *blk)
28 align 32
29 global sub_pred_mmx
30 sub_pred_mmx:
32 push ebp ; save frame pointer
33 mov ebp, esp ; link
34 push eax
35 push ebx
36 push ecx
37 push esi
38 push edi
40 mov eax, [ebp+12] ;cur
41 mov ebx, [ebp+8] ;pred
42 mov ecx, [ebp+20] ;blk
43 mov edi, [ebp+16] ;lx
44 mov esi, 8
45 pxor mm7, mm7
46 sub_top:
47 movq mm0, [eax]
48 add eax, edi
49 movq mm2, [ebx]
50 add ebx, edi
51 movq mm1, mm0
52 punpcklbw mm0, mm7
53 punpckhbw mm1, mm7
54 movq mm3, mm2
55 punpcklbw mm2, mm7
56 punpckhbw mm3, mm7
58 psubw mm0, mm2
59 psubw mm1, mm3
61 movq [ecx], mm0
62 movq [ecx+8], mm1
63 add ecx, 16
65 dec esi
66 jg sub_top
68 pop edi
69 pop esi
70 pop ecx
71 pop ebx
72 pop eax
73 pop ebp ; restore stack pointer
75 emms ; clear mmx registers
76 ret
78 ; add prediction and prediction error, saturate to 0...255
79 ;void add_pred_mmx(unsigned char *pred,
80 ; unsigned char *cur,
81 ; int lx, short *blk)
83 align 32
84 global add_pred_mmx
85 add_pred_mmx:
87 push ebp ; save frame pointer
88 mov ebp, esp ; link
89 push eax
90 push ebx
91 push ecx
92 push esi
93 push edi
95 mov eax, [ebp+12] ;cur
96 mov ebx, [ebp+8] ;pred
97 mov ecx, [ebp+20] ;blk
98 mov edi, [ebp+16] ;lx
99 mov esi, 8
100 pxor mm7, mm7
101 add_top:
102 movq mm0, [ecx]
103 movq mm1, [ecx+8]
104 add ecx, 16
105 movq mm2, [ebx]
106 add ebx, edi
107 movq mm3, mm2
108 punpcklbw mm2, mm7
109 punpckhbw mm3, mm7
111 paddw mm0, mm2
112 paddw mm1, mm3
113 packuswb mm0, mm1
115 movq [eax], mm0
116 add eax, edi
118 dec esi
119 jg add_top
121 pop edi
122 pop esi
123 pop ecx
124 pop ebx
125 pop eax
126 pop ebp ; restore stack pointer
128 emms ; clear mmx registers
129 ret