2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 EXPORT |vp8_recon16x16mb_neon|
17 AREA ||.text||
, CODE
, READONLY
, ALIGN=2
19 ; r0 unsigned char *pred_ptr,
21 ; r2 unsigned char *dst_ptr,
23 ; stack unsigned char *udst_ptr,
24 ; stack unsigned char *vdst_ptr
26 |vp8_recon16x16mb_neon|
PROC
27 mov r12
, #
4 ;loop counter for Y loop
30 vld1.u8
{q12, q13}, [r0
]! ;load data from pred_ptr
31 vld1.16
{q8, q9}, [r1
]! ;load data from diff_ptr
32 vld1.u8
{q14, q15}, [r0
]!
33 vld1.16
{q10, q11}, [r1
]!
35 vmovl.u8 q0
, d24
;modify Pred data from 8 bits to 16 bits
42 vld1.16
{q12, q13}, [r1
]!
44 vld1.16
{q14, q15}, [r1
]!
50 vadd.s16 q0
, q0
, q8
;add Diff data and Pred data together
59 vqmovun.s16 d0
, q0
;CLAMP() saturation
65 vst1.u8
{q0}, [r2
], r3
;store result
67 vst1.u8
{q1}, [r2
], r3
69 vst1.u8
{q2}, [r2
], r3
72 moveq r12
, #
2 ;loop counter for UV loop
74 vst1.u8
{q3}, [r2
], r3
75 bne recon16x16mb_loop_y
77 mov r3
, r3
, lsr #
1 ;uv_stride = ystride>>1
78 ldr r2
, [sp] ;load upred_ptr
81 vld1.u8
{q12, q13}, [r0
]! ;load data from pred_ptr
82 vld1.16
{q8, q9}, [r1
]! ;load data from diff_ptr
83 vld1.u8
{q14, q15}, [r0
]!
84 vld1.16
{q10, q11}, [r1
]!
86 vmovl.u8 q0
, d24
;modify Pred data from 8 bits to 16 bits
93 vld1.16
{q12, q13}, [r1
]!
95 vld1.16
{q14, q15}, [r1
]!
97 vadd.s16 q0
, q0
, q8
;add Diff data and Pred data together
105 vqmovun.s16 d0
, q0
;CLAMP() saturation
110 vst1.u8
{d0}, [r2
], r3
;store result
112 vst1.u8
{d1}, [r2
], r3
114 vst1.u8
{d2}, [r2
], r3
116 vst1.u8
{d3}, [r2
], r3
118 vst1.u8
{d4}, [r2
], r3
121 vst1.u8
{d5}, [r2
], r3
122 vst1.u8
{d6}, [r2
], r3
123 vst1.u8
{d7}, [r2
], r3
125 ldrne r2
, [sp, #
4] ;load vpred_ptr
126 bne recon16x16mb_loop_uv