2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 EXPORT |vp8_variance16x16_armv6|
18 AREA ||.text||
, CODE
, READONLY
, ALIGN=2
20 ; r0 unsigned char *src_ptr
21 ; r1 int source_stride
22 ; r2 unsigned char *ref_ptr
24 ; stack unsigned int *sse
25 |vp8_variance16x16_armv6|
PROC
27 stmfd
sp!, {r4-r12, lr}
28 mov r12
, #
16 ; set loop counter to 16 (=block height)
29 mov r8
, #
0 ; initialize sum = 0
30 mov r11
, #
0 ; initialize sse = 0
34 ldr r4
, [r0
, #
0x0] ; load 4 src pixels
35 ldr r5
, [r2
, #
0x0] ; load 4 ref pixels
37 mov lr
, #
0 ; constant zero
39 usub8 r6
, r4
, r5
; calculate difference
40 sel r7
, r6
, lr
; select bytes with positive difference
41 usub8 r9
, r5
, r4
; calculate difference with reversed operands
42 sel r6
, r9
, lr
; select bytes with negative difference
44 ; calculate partial sums
45 usad8 r4
, r7
, lr
; calculate sum of positive differences
46 usad8 r5
, r6
, lr
; calculate sum of negative differences
47 orr r6
, r6
, r7
; differences of all 4 pixels
49 adds r8
, r8
, r4
; add positive differences to sum
50 subs r8
, r8
, r5
; substract negative differences from sum
53 uxtb16 r5
, r6
; byte (two pixels) to halfwords
54 uxtb16 r10
, r6
, ror #
8 ; another two pixels to halfwords
55 smlad r11
, r5
, r5
, r11
; dual signed multiply, add and accumulate (1)
58 ldr r4
, [r0
, #
0x4] ; load 4 src pixels
59 ldr r5
, [r2
, #
0x4] ; load 4 ref pixels
60 smlad r11
, r10
, r10
, r11
; dual signed multiply, add and accumulate (2)
62 usub8 r6
, r4
, r5
; calculate difference
63 sel r7
, r6
, lr
; select bytes with positive difference
64 usub8 r9
, r5
, r4
; calculate difference with reversed operands
65 sel r6
, r9
, lr
; select bytes with negative difference
67 ; calculate partial sums
68 usad8 r4
, r7
, lr
; calculate sum of positive differences
69 usad8 r5
, r6
, lr
; calculate sum of negative differences
70 orr r6
, r6
, r7
; differences of all 4 pixels
73 add r8
, r8
, r4
; add positive differences to sum
74 sub r8
, r8
, r5
; substract negative differences from sum
77 uxtb16 r5
, r6
; byte (two pixels) to halfwords
78 uxtb16 r10
, r6
, ror #
8 ; another two pixels to halfwords
79 smlad r11
, r5
, r5
, r11
; dual signed multiply, add and accumulate (1)
82 ldr r4
, [r0
, #
0x8] ; load 4 src pixels
83 ldr r5
, [r2
, #
0x8] ; load 4 ref pixels
84 smlad r11
, r10
, r10
, r11
; dual signed multiply, add and accumulate (2)
86 usub8 r6
, r4
, r5
; calculate difference
87 sel r7
, r6
, lr
; select bytes with positive difference
88 usub8 r9
, r5
, r4
; calculate difference with reversed operands
89 sel r6
, r9
, lr
; select bytes with negative difference
91 ; calculate partial sums
92 usad8 r4
, r7
, lr
; calculate sum of positive differences
93 usad8 r5
, r6
, lr
; calculate sum of negative differences
94 orr r6
, r6
, r7
; differences of all 4 pixels
97 add r8
, r8
, r4
; add positive differences to sum
98 sub r8
, r8
, r5
; substract negative differences from sum
101 uxtb16 r5
, r6
; byte (two pixels) to halfwords
102 uxtb16 r10
, r6
, ror #
8 ; another two pixels to halfwords
103 smlad r11
, r5
, r5
, r11
; dual signed multiply, add and accumulate (1)
106 ldr r4
, [r0
, #
0xc] ; load 4 src pixels
107 ldr r5
, [r2
, #
0xc] ; load 4 ref pixels
108 smlad r11
, r10
, r10
, r11
; dual signed multiply, add and accumulate (2)
110 usub8 r6
, r4
, r5
; calculate difference
111 add r0
, r0
, r1
; set src_ptr to next row
112 sel r7
, r6
, lr
; select bytes with positive difference
113 usub8 r9
, r5
, r4
; calculate difference with reversed operands
114 add r2
, r2
, r3
; set dst_ptr to next row
115 sel r6
, r9
, lr
; select bytes with negative difference
117 ; calculate partial sums
118 usad8 r4
, r7
, lr
; calculate sum of positive differences
119 usad8 r5
, r6
, lr
; calculate sum of negative differences
120 orr r6
, r6
, r7
; differences of all 4 pixels
122 ; calculate total sum
123 add r8
, r8
, r4
; add positive differences to sum
124 sub r8
, r8
, r5
; substract negative differences from sum
127 uxtb16 r5
, r6
; byte (two pixels) to halfwords
128 uxtb16 r10
, r6
, ror #
8 ; another two pixels to halfwords
129 smlad r11
, r5
, r5
, r11
; dual signed multiply, add and accumulate (1)
130 smlad r11
, r10
, r10
, r11
; dual signed multiply, add and accumulate (2)
138 ldr r6
, [sp, #
0x28] ; get address of sse
139 mul r0
, r8
, r8
; sum * sum
140 str r11
, [r6
] ; store sse
141 sub r0
, r11
, r0
, ASR #
8 ; return (sse - ((sum * sum) >> 8))
143 ldmfd
sp!, {r4-r12, pc}