Merge "respect alignment in arm asm files"
[libvpx.git] / vp8 / common / loopfilter_filters.c
blob6940529241c77a3cdd313a5c37e6151e77959ed3
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
12 #include <stdlib.h>
13 #include "loopfilter.h"
14 #include "onyxc_int.h"
16 typedef unsigned char uc;
18 static __inline signed char vp8_signed_char_clamp(int t)
20 t = (t < -128 ? -128 : t);
21 t = (t > 127 ? 127 : t);
22 return (signed char) t;
26 /* should we apply any filter at all ( 11111111 yes, 00000000 no) */
27 static __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
28 uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
30 signed char mask = 0;
31 mask |= (abs(p3 - p2) > limit) * -1;
32 mask |= (abs(p2 - p1) > limit) * -1;
33 mask |= (abs(p1 - p0) > limit) * -1;
34 mask |= (abs(q1 - q0) > limit) * -1;
35 mask |= (abs(q2 - q1) > limit) * -1;
36 mask |= (abs(q3 - q2) > limit) * -1;
37 mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit) * -1;
38 mask = ~mask;
39 return mask;
42 /* is there high variance internal edge ( 11111111 yes, 00000000 no) */
43 static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
45 signed char hev = 0;
46 hev |= (abs(p1 - p0) > thresh) * -1;
47 hev |= (abs(q1 - q0) > thresh) * -1;
48 return hev;
51 static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
54 signed char ps0, qs0;
55 signed char ps1, qs1;
56 signed char vp8_filter, Filter1, Filter2;
57 signed char u;
59 ps1 = (signed char) * op1 ^ 0x80;
60 ps0 = (signed char) * op0 ^ 0x80;
61 qs0 = (signed char) * oq0 ^ 0x80;
62 qs1 = (signed char) * oq1 ^ 0x80;
64 /* add outer taps if we have high edge variance */
65 vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
66 vp8_filter &= hev;
68 /* inner taps */
69 vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
70 vp8_filter &= mask;
72 /* save bottom 3 bits so that we round one side +4 and the other +3
73 * if it equals 4 we'll set to adjust by -1 to account for the fact
74 * we'd round 3 the other way
76 Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
77 Filter2 = vp8_signed_char_clamp(vp8_filter + 3);
78 Filter1 >>= 3;
79 Filter2 >>= 3;
80 u = vp8_signed_char_clamp(qs0 - Filter1);
81 *oq0 = u ^ 0x80;
82 u = vp8_signed_char_clamp(ps0 + Filter2);
83 *op0 = u ^ 0x80;
84 vp8_filter = Filter1;
86 /* outer tap adjustments */
87 vp8_filter += 1;
88 vp8_filter >>= 1;
89 vp8_filter &= ~hev;
91 u = vp8_signed_char_clamp(qs1 - vp8_filter);
92 *oq1 = u ^ 0x80;
93 u = vp8_signed_char_clamp(ps1 + vp8_filter);
94 *op1 = u ^ 0x80;
97 void vp8_loop_filter_horizontal_edge_c
99 unsigned char *s,
100 int p, /* pitch */
101 const signed char *flimit,
102 const signed char *limit,
103 const signed char *thresh,
104 int count
107 int hev = 0; /* high edge variance */
108 signed char mask = 0;
109 int i = 0;
111 /* loop filter designed to work using chars so that we can make maximum use
112 * of 8 bit simd instructions.
116 mask = vp8_filter_mask(limit[i], flimit[i],
117 s[-4*p], s[-3*p], s[-2*p], s[-1*p],
118 s[0*p], s[1*p], s[2*p], s[3*p]);
120 hev = vp8_hevmask(thresh[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
122 vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
124 ++s;
126 while (++i < count * 8);
129 void vp8_loop_filter_vertical_edge_c
131 unsigned char *s,
132 int p,
133 const signed char *flimit,
134 const signed char *limit,
135 const signed char *thresh,
136 int count
139 int hev = 0; /* high edge variance */
140 signed char mask = 0;
141 int i = 0;
143 /* loop filter designed to work using chars so that we can make maximum use
144 * of 8 bit simd instructions.
148 mask = vp8_filter_mask(limit[i], flimit[i],
149 s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
151 hev = vp8_hevmask(thresh[i], s[-2], s[-1], s[0], s[1]);
153 vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
155 s += p;
157 while (++i < count * 8);
160 static __inline void vp8_mbfilter(signed char mask, signed char hev,
161 uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
163 signed char s, u;
164 signed char vp8_filter, Filter1, Filter2;
165 signed char ps2 = (signed char) * op2 ^ 0x80;
166 signed char ps1 = (signed char) * op1 ^ 0x80;
167 signed char ps0 = (signed char) * op0 ^ 0x80;
168 signed char qs0 = (signed char) * oq0 ^ 0x80;
169 signed char qs1 = (signed char) * oq1 ^ 0x80;
170 signed char qs2 = (signed char) * oq2 ^ 0x80;
172 /* add outer taps if we have high edge variance */
173 vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
174 vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
175 vp8_filter &= mask;
177 Filter2 = vp8_filter;
178 Filter2 &= hev;
180 /* save bottom 3 bits so that we round one side +4 and the other +3 */
181 Filter1 = vp8_signed_char_clamp(Filter2 + 4);
182 Filter2 = vp8_signed_char_clamp(Filter2 + 3);
183 Filter1 >>= 3;
184 Filter2 >>= 3;
185 qs0 = vp8_signed_char_clamp(qs0 - Filter1);
186 ps0 = vp8_signed_char_clamp(ps0 + Filter2);
189 /* only apply wider filter if not high edge variance */
190 vp8_filter &= ~hev;
191 Filter2 = vp8_filter;
193 /* roughly 3/7th difference across boundary */
194 u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
195 s = vp8_signed_char_clamp(qs0 - u);
196 *oq0 = s ^ 0x80;
197 s = vp8_signed_char_clamp(ps0 + u);
198 *op0 = s ^ 0x80;
200 /* roughly 2/7th difference across boundary */
201 u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
202 s = vp8_signed_char_clamp(qs1 - u);
203 *oq1 = s ^ 0x80;
204 s = vp8_signed_char_clamp(ps1 + u);
205 *op1 = s ^ 0x80;
207 /* roughly 1/7th difference across boundary */
208 u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
209 s = vp8_signed_char_clamp(qs2 - u);
210 *oq2 = s ^ 0x80;
211 s = vp8_signed_char_clamp(ps2 + u);
212 *op2 = s ^ 0x80;
215 void vp8_mbloop_filter_horizontal_edge_c
217 unsigned char *s,
218 int p,
219 const signed char *flimit,
220 const signed char *limit,
221 const signed char *thresh,
222 int count
225 signed char hev = 0; /* high edge variance */
226 signed char mask = 0;
227 int i = 0;
229 /* loop filter designed to work using chars so that we can make maximum use
230 * of 8 bit simd instructions.
235 mask = vp8_filter_mask(limit[i], flimit[i],
236 s[-4*p], s[-3*p], s[-2*p], s[-1*p],
237 s[0*p], s[1*p], s[2*p], s[3*p]);
239 hev = vp8_hevmask(thresh[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
241 vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
243 ++s;
245 while (++i < count * 8);
250 void vp8_mbloop_filter_vertical_edge_c
252 unsigned char *s,
253 int p,
254 const signed char *flimit,
255 const signed char *limit,
256 const signed char *thresh,
257 int count
260 signed char hev = 0; /* high edge variance */
261 signed char mask = 0;
262 int i = 0;
267 mask = vp8_filter_mask(limit[i], flimit[i],
268 s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
270 hev = vp8_hevmask(thresh[i], s[-2], s[-1], s[0], s[1]);
272 vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
274 s += p;
276 while (++i < count * 8);
280 /* should we apply any filter at all ( 11111111 yes, 00000000 no) */
281 static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
283 /* Why does this cause problems for win32?
284 * error C2143: syntax error : missing ';' before 'type'
285 * (void) limit;
287 signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= flimit * 2 + limit) * -1;
288 return mask;
291 static __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
293 signed char vp8_filter, Filter1, Filter2;
294 signed char p1 = (signed char) * op1 ^ 0x80;
295 signed char p0 = (signed char) * op0 ^ 0x80;
296 signed char q0 = (signed char) * oq0 ^ 0x80;
297 signed char q1 = (signed char) * oq1 ^ 0x80;
298 signed char u;
300 vp8_filter = vp8_signed_char_clamp(p1 - q1);
301 vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (q0 - p0));
302 vp8_filter &= mask;
304 /* save bottom 3 bits so that we round one side +4 and the other +3 */
305 Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
306 Filter1 >>= 3;
307 u = vp8_signed_char_clamp(q0 - Filter1);
308 *oq0 = u ^ 0x80;
310 Filter2 = vp8_signed_char_clamp(vp8_filter + 3);
311 Filter2 >>= 3;
312 u = vp8_signed_char_clamp(p0 + Filter2);
313 *op0 = u ^ 0x80;
316 void vp8_loop_filter_simple_horizontal_edge_c
318 unsigned char *s,
319 int p,
320 const signed char *flimit,
321 const signed char *limit,
322 const signed char *thresh,
323 int count
326 signed char mask = 0;
327 int i = 0;
328 (void) thresh;
332 /*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);*/
333 mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
334 vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
335 ++s;
337 while (++i < count * 8);
340 void vp8_loop_filter_simple_vertical_edge_c
342 unsigned char *s,
343 int p,
344 const signed char *flimit,
345 const signed char *limit,
346 const signed char *thresh,
347 int count
350 signed char mask = 0;
351 int i = 0;
352 (void) thresh;
356 /*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);*/
357 mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2], s[-1], s[0], s[1]);
358 vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
359 s += p;
361 while (++i < count * 8);