Merge "respect alignment in arm asm files"
[libvpx.git] / vp8 / common / reconinter.c
blob3b0405ca1c640319773fe27e607e4f53b22cb6e3
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
12 #include "vpx_ports/config.h"
13 #include "recon.h"
14 #include "subpixel.h"
15 #include "blockd.h"
16 #include "reconinter.h"
17 #if CONFIG_RUNTIME_CPU_DETECT
18 #include "onyxc_int.h"
19 #endif
21 /* use this define on systems where unaligned int reads and writes are
22 * not allowed, i.e. ARM architectures
24 /*#define MUST_BE_ALIGNED*/
27 static const int bbb[4] = {0, 2, 8, 10};
31 void vp8_copy_mem16x16_c(
32 unsigned char *src,
33 int src_stride,
34 unsigned char *dst,
35 int dst_stride)
38 int r;
40 for (r = 0; r < 16; r++)
42 #ifdef MUST_BE_ALIGNED
43 dst[0] = src[0];
44 dst[1] = src[1];
45 dst[2] = src[2];
46 dst[3] = src[3];
47 dst[4] = src[4];
48 dst[5] = src[5];
49 dst[6] = src[6];
50 dst[7] = src[7];
51 dst[8] = src[8];
52 dst[9] = src[9];
53 dst[10] = src[10];
54 dst[11] = src[11];
55 dst[12] = src[12];
56 dst[13] = src[13];
57 dst[14] = src[14];
58 dst[15] = src[15];
60 #else
61 ((int *)dst)[0] = ((int *)src)[0] ;
62 ((int *)dst)[1] = ((int *)src)[1] ;
63 ((int *)dst)[2] = ((int *)src)[2] ;
64 ((int *)dst)[3] = ((int *)src)[3] ;
66 #endif
67 src += src_stride;
68 dst += dst_stride;
74 void vp8_copy_mem8x8_c(
75 unsigned char *src,
76 int src_stride,
77 unsigned char *dst,
78 int dst_stride)
80 int r;
82 for (r = 0; r < 8; r++)
84 #ifdef MUST_BE_ALIGNED
85 dst[0] = src[0];
86 dst[1] = src[1];
87 dst[2] = src[2];
88 dst[3] = src[3];
89 dst[4] = src[4];
90 dst[5] = src[5];
91 dst[6] = src[6];
92 dst[7] = src[7];
93 #else
94 ((int *)dst)[0] = ((int *)src)[0] ;
95 ((int *)dst)[1] = ((int *)src)[1] ;
96 #endif
97 src += src_stride;
98 dst += dst_stride;
104 void vp8_copy_mem8x4_c(
105 unsigned char *src,
106 int src_stride,
107 unsigned char *dst,
108 int dst_stride)
110 int r;
112 for (r = 0; r < 4; r++)
114 #ifdef MUST_BE_ALIGNED
115 dst[0] = src[0];
116 dst[1] = src[1];
117 dst[2] = src[2];
118 dst[3] = src[3];
119 dst[4] = src[4];
120 dst[5] = src[5];
121 dst[6] = src[6];
122 dst[7] = src[7];
123 #else
124 ((int *)dst)[0] = ((int *)src)[0] ;
125 ((int *)dst)[1] = ((int *)src)[1] ;
126 #endif
127 src += src_stride;
128 dst += dst_stride;
136 void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf)
138 int r;
139 unsigned char *ptr_base;
140 unsigned char *ptr;
141 unsigned char *pred_ptr = d->predictor;
143 ptr_base = *(d->base_pre);
145 if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
147 ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
148 sppf(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
150 else
152 ptr_base += d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
153 ptr = ptr_base;
155 for (r = 0; r < 4; r++)
157 #ifdef MUST_BE_ALIGNED
158 pred_ptr[0] = ptr[0];
159 pred_ptr[1] = ptr[1];
160 pred_ptr[2] = ptr[2];
161 pred_ptr[3] = ptr[3];
162 #else
163 *(int *)pred_ptr = *(int *)ptr ;
164 #endif
165 pred_ptr += pitch;
166 ptr += d->pre_stride;
171 static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)
173 unsigned char *ptr_base;
174 unsigned char *ptr;
175 unsigned char *pred_ptr = d->predictor;
177 ptr_base = *(d->base_pre);
178 ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
180 if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
182 x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
184 else
186 RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, pred_ptr, pitch);
190 static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)
192 unsigned char *ptr_base;
193 unsigned char *ptr;
194 unsigned char *pred_ptr = d->predictor;
196 ptr_base = *(d->base_pre);
197 ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
199 if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
201 x->subpixel_predict8x4(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
203 else
205 RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr, d->pre_stride, pred_ptr, pitch);
210 /*encoder only*/
211 void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
213 int i;
215 if (x->mode_info_context->mbmi.mode != SPLITMV)
217 unsigned char *uptr, *vptr;
218 unsigned char *upred_ptr = &x->predictor[256];
219 unsigned char *vpred_ptr = &x->predictor[320];
221 int mv_row = x->block[16].bmi.mv.as_mv.row;
222 int mv_col = x->block[16].bmi.mv.as_mv.col;
223 int offset;
224 int pre_stride = x->block[16].pre_stride;
226 offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
227 uptr = x->pre.u_buffer + offset;
228 vptr = x->pre.v_buffer + offset;
230 if ((mv_row | mv_col) & 7)
232 x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8);
233 x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8);
235 else
237 RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, upred_ptr, 8);
238 RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vpred_ptr, 8);
241 else
243 for (i = 16; i < 24; i += 2)
245 BLOCKD *d0 = &x->block[i];
246 BLOCKD *d1 = &x->block[i+1];
248 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
249 build_inter_predictors2b(x, d0, 8);
250 else
252 vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict);
253 vp8_build_inter_predictors_b(d1, 8, x->subpixel_predict);
259 /*encoder only*/
260 void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x)
262 unsigned char *ptr_base;
263 unsigned char *ptr;
264 unsigned char *pred_ptr = x->predictor;
265 int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
266 int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
267 int pre_stride = x->block[0].pre_stride;
269 ptr_base = x->pre.y_buffer;
270 ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
272 if ((mv_row | mv_col) & 7)
274 x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
276 else
278 RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16);
282 void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
283 unsigned char *dst_y,
284 unsigned char *dst_u,
285 unsigned char *dst_v,
286 int dst_ystride,
287 int dst_uvstride)
289 int offset;
290 unsigned char *ptr;
291 unsigned char *uptr, *vptr;
293 int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
294 int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
296 unsigned char *ptr_base = x->pre.y_buffer;
297 int pre_stride = x->block[0].pre_stride;
299 ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
301 if ((mv_row | mv_col) & 7)
303 x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_y, dst_ystride);
305 else
307 RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_y, dst_ystride);
310 mv_row = x->block[16].bmi.mv.as_mv.row;
311 mv_col = x->block[16].bmi.mv.as_mv.col;
312 pre_stride >>= 1;
313 offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
314 uptr = x->pre.u_buffer + offset;
315 vptr = x->pre.v_buffer + offset;
317 if ((mv_row | mv_col) & 7)
319 x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, dst_u, dst_uvstride);
320 x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, dst_v, dst_uvstride);
322 else
324 RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, dst_u, dst_uvstride);
325 RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, dst_v, dst_uvstride);
330 void vp8_build_inter4x4_predictors_mb(MACROBLOCKD *x)
332 int i;
334 if (x->mode_info_context->mbmi.partitioning < 3)
336 for (i = 0; i < 4; i++)
338 BLOCKD *d = &x->block[bbb[i]];
339 build_inter_predictors4b(x, d, 16);
342 else
344 for (i = 0; i < 16; i += 2)
346 BLOCKD *d0 = &x->block[i];
347 BLOCKD *d1 = &x->block[i+1];
349 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
350 build_inter_predictors2b(x, d0, 16);
351 else
353 vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict);
354 vp8_build_inter_predictors_b(d1, 16, x->subpixel_predict);
361 for (i = 16; i < 24; i += 2)
363 BLOCKD *d0 = &x->block[i];
364 BLOCKD *d1 = &x->block[i+1];
366 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
367 build_inter_predictors2b(x, d0, 8);
368 else
370 vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict);
371 vp8_build_inter_predictors_b(d1, 8, x->subpixel_predict);
376 void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
378 if (x->mode_info_context->mbmi.mode != SPLITMV)
380 vp8_build_inter16x16_predictors_mb(x, x->predictor, &x->predictor[256],
381 &x->predictor[320], 16, 8);
383 else
385 vp8_build_inter4x4_predictors_mb(x);
389 void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
391 int i, j;
393 if (x->mode_info_context->mbmi.mode == SPLITMV)
395 for (i = 0; i < 2; i++)
397 for (j = 0; j < 2; j++)
399 int yoffset = i * 8 + j * 2;
400 int uoffset = 16 + i * 2 + j;
401 int voffset = 20 + i * 2 + j;
403 int temp;
405 temp = x->block[yoffset ].bmi.mv.as_mv.row
406 + x->block[yoffset+1].bmi.mv.as_mv.row
407 + x->block[yoffset+4].bmi.mv.as_mv.row
408 + x->block[yoffset+5].bmi.mv.as_mv.row;
410 if (temp < 0) temp -= 4;
411 else temp += 4;
413 x->block[uoffset].bmi.mv.as_mv.row = temp / 8;
415 if (fullpixel)
416 x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & 0xfffffff8;
418 temp = x->block[yoffset ].bmi.mv.as_mv.col
419 + x->block[yoffset+1].bmi.mv.as_mv.col
420 + x->block[yoffset+4].bmi.mv.as_mv.col
421 + x->block[yoffset+5].bmi.mv.as_mv.col;
423 if (temp < 0) temp -= 4;
424 else temp += 4;
426 x->block[uoffset].bmi.mv.as_mv.col = temp / 8;
428 if (fullpixel)
429 x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & 0xfffffff8;
431 x->block[voffset].bmi.mv.as_mv.row = x->block[uoffset].bmi.mv.as_mv.row ;
432 x->block[voffset].bmi.mv.as_mv.col = x->block[uoffset].bmi.mv.as_mv.col ;
436 else
438 int mvrow = x->mode_info_context->mbmi.mv.as_mv.row;
439 int mvcol = x->mode_info_context->mbmi.mv.as_mv.col;
441 if (mvrow < 0)
442 mvrow -= 1;
443 else
444 mvrow += 1;
446 if (mvcol < 0)
447 mvcol -= 1;
448 else
449 mvcol += 1;
451 mvrow /= 2;
452 mvcol /= 2;
454 for (i = 0; i < 8; i++)
456 x->block[ 16 + i].bmi.mv.as_mv.row = mvrow;
457 x->block[ 16 + i].bmi.mv.as_mv.col = mvcol;
459 if (fullpixel)
461 x->block[ 16 + i].bmi.mv.as_mv.row = mvrow & 0xfffffff8;
462 x->block[ 16 + i].bmi.mv.as_mv.col = mvcol & 0xfffffff8;