2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
16 /****************************************************************************
18 ****************************************************************************/
20 #define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
22 /****************************************************************************
24 ****************************************************************************/
25 extern void vpx_get_processor_flags(int *mmx_enabled
, int *xmm_enabled
, int *wmt_enabled
);
27 /****************************************************************************
28 * Exported Global Variables
29 ****************************************************************************/
30 void (*temp_filter
)(pre_proc_instance
*ppi
, unsigned char *s
, unsigned char *d
, int bytes
, int strength
);
32 /****************************************************************************
34 * ROUTINE : temp_filter_wmt
36 * INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
37 * unsigned char *s : Pointer to source frame.
38 * unsigned char *d : Pointer to destination frame.
39 * int bytes : Number of bytes to filter.
40 * int strength : Strength of filter to apply.
46 * FUNCTION : Performs a closesness adjusted temporarl blur
48 * SPECIAL NOTES : Destination frame can be same as source frame.
50 ****************************************************************************/
53 pre_proc_instance
*ppi
,
61 unsigned char *frameptr
= ppi
->frame_buffer
;
63 __declspec(align(16)) unsigned short threes
[] = { 3, 3, 3, 3, 3, 3, 3, 3};
64 __declspec(align(16)) unsigned short sixteens
[] = {16, 16, 16, 16, 16, 16, 16, 16};
75 for (i
= 0; i
< 8; i
++)
77 *frameptr
= s
[byte
+i
];
83 while (frame
< FRAMECOUNT
);
85 for (i
= 0; i
< 8; i
++)
86 d
[byte
+i
] = s
[byte
+i
];
96 int offset2
= (ppi
->frame
% FRAMECOUNT
);
100 __declspec(align(16)) unsigned short counts
[8];
101 __declspec(align(16)) unsigned short sums
[8];
105 mov edi
, s
// source pixels
106 pxor xmm1
, xmm1
// accumulator
110 mov esi
, frameptr
// accumulator
111 pxor xmm2
, xmm2
// count
113 movq xmm3
, QWORD PTR
[edi
]
115 movq QWORD PTR
[esi
+8*eax
], xmm3
117 punpcklbw xmm3
, xmm2
// xmm3 source pixels
121 movq xmm4
, QWORD PTR
[esi
] // get frame buffer values
122 punpcklbw xmm4
, xmm7
// xmm4 frame buffer pixels
123 movdqa xmm6
, xmm4
// save the pixel values
124 psubsw xmm4
, xmm3
// subtracted pixel values
125 pmullw xmm4
, xmm4
// square xmm4
127 psrlw xmm4
, xmm5
// should be strength
128 pmullw xmm4
, threes
// 3 * modifier
129 movdqa xmm5
, sixteens
// 16s
130 psubusw xmm5
, xmm4
// 16 - modifiers
131 movdqa xmm4
, xmm5
// save the modifiers
132 pmullw xmm4
, xmm6
// multiplier values
133 paddusw xmm1
, xmm4
// accumulator
134 paddusw xmm2
, xmm5
// count
135 add esi
, 8 // next frame
136 dec ecx
// next set of eight pixels
140 psrlw xmm2
, 1 // divide count by 2 for rounding
141 paddusw xmm1
, xmm2
// rounding added in
148 for (i
= 0; i
< 8; i
++)
150 int blurvalue
= sums
[i
] * ppi
->fixed_divide
[counts
[i
]];
159 while (byte
< bytes
);
166 /****************************************************************************
168 * ROUTINE : temp_filter_mmx
170 * INPUTS : pre_proc_instance *ppi : Pointer to pre-processor instance.
171 * unsigned char *s : Pointer to source frame.
172 * unsigned char *d : Pointer to destination frame.
173 * int bytes : Number of bytes to filter.
174 * int strength : Strength of filter to apply.
180 * FUNCTION : Performs a closesness adjusted temporarl blur
182 * SPECIAL NOTES : Destination frame can be same as source frame.
184 ****************************************************************************/
187 pre_proc_instance
*ppi
,
195 unsigned char *frameptr
= ppi
->frame_buffer
;
197 __declspec(align(16)) unsigned short threes
[] = { 3, 3, 3, 3};
198 __declspec(align(16)) unsigned short sixteens
[] = {16, 16, 16, 16};
209 for (i
= 0; i
< 4; i
++)
211 *frameptr
= s
[byte
+i
];
217 while (frame
< FRAMECOUNT
);
219 for (i
= 0; i
< 4; i
++)
220 d
[byte
+i
] = s
[byte
+i
];
225 while (byte
< bytes
);
230 int offset2
= (ppi
->frame
% FRAMECOUNT
);
234 __declspec(align(16)) unsigned short counts
[8];
235 __declspec(align(16)) unsigned short sums
[8];
240 mov edi
, s
// source pixels
241 pxor mm1
, mm1
// accumulator
244 mov esi
, frameptr
// accumulator
245 pxor mm2
, mm2
// count
247 movd mm3
, DWORD PTR
[edi
]
248 movd DWORD PTR
[esi
+4*eax
], mm3
250 punpcklbw mm3
, mm2
// mm3 source pixels
254 movd mm4
, DWORD PTR
[esi
] // get frame buffer values
255 punpcklbw mm4
, mm7
// mm4 frame buffer pixels
256 movq mm6
, mm4
// save the pixel values
257 psubsw mm4
, mm3
// subtracted pixel values
258 pmullw mm4
, mm4
// square mm4
260 psrlw mm4
, mm5
// should be strength
261 pmullw mm4
, threes
// 3 * modifier
262 movq mm5
, sixteens
// 16s
263 psubusw mm5
, mm4
// 16 - modifiers
264 movq mm4
, mm5
// save the modifiers
265 pmullw mm4
, mm6
// multiplier values
266 paddusw mm1
, mm4
// accumulator
267 paddusw mm2
, mm5
// count
268 add esi
, 4 // next frame
269 dec ecx
// next set of eight pixels
273 psrlw mm2
, 1 // divide count by 2 for rounding
274 paddusw mm1
, mm2
// rounding added in
282 for (i
= 0; i
< 4; i
++)
284 int blurvalue
= sums
[i
] * ppi
->fixed_divide
[counts
[i
]];
293 while (byte
< bytes
);