Adds armv6 optimized variance calculation
[libvpx.git] / vp8 / encoder / arm / armv6 / vp8_variance16x16_armv6.asm
blob8d7258af7d5fcd80cc9e3fdc9778ae02b6b13d10
2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 EXPORT |vp8_variance16x16_armv6|
14 ARM
15 REQUIRE8
16 PRESERVE8
18 AREA ||.text||, CODE, READONLY, ALIGN=2
20 ; r0 unsigned char *src_ptr
21 ; r1 int source_stride
22 ; r2 unsigned char *ref_ptr
23 ; r3 int recon_stride
24 ; stack unsigned int *sse
25 |vp8_variance16x16_armv6| PROC
27 stmfd sp!, {r4-r12, lr}
28 mov r12, #16 ; set loop counter to 16 (=block height)
29 mov r8, #0 ; initialize sum = 0
30 mov r11, #0 ; initialize sse = 0
32 loop
33 ; 1st 4 pixels
34 ldr r4, [r0, #0x0] ; load 4 src pixels
35 ldr r5, [r2, #0x0] ; load 4 ref pixels
37 mov lr, #0 ; constant zero
39 usub8 r6, r4, r5 ; calculate difference
40 sel r7, r6, lr ; select bytes with positive difference
41 usub8 r9, r5, r4 ; calculate difference with reversed operands
42 sel r6, r9, lr ; select bytes with negative difference
44 ; calculate partial sums
45 usad8 r4, r7, lr ; calculate sum of positive differences
46 usad8 r5, r6, lr ; calculate sum of negative differences
47 orr r6, r6, r7 ; differences of all 4 pixels
48 ; calculate total sum
49 adds r8, r8, r4 ; add positive differences to sum
50 subs r8, r8, r5 ; substract negative differences from sum
52 ; calculate sse
53 uxtb16 r5, r6 ; byte (two pixels) to halfwords
54 uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
55 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
57 ; 2nd 4 pixels
58 ldr r4, [r0, #0x4] ; load 4 src pixels
59 ldr r5, [r2, #0x4] ; load 4 ref pixels
60 smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
62 usub8 r6, r4, r5 ; calculate difference
63 sel r7, r6, lr ; select bytes with positive difference
64 usub8 r9, r5, r4 ; calculate difference with reversed operands
65 sel r6, r9, lr ; select bytes with negative difference
67 ; calculate partial sums
68 usad8 r4, r7, lr ; calculate sum of positive differences
69 usad8 r5, r6, lr ; calculate sum of negative differences
70 orr r6, r6, r7 ; differences of all 4 pixels
72 ; calculate total sum
73 add r8, r8, r4 ; add positive differences to sum
74 sub r8, r8, r5 ; substract negative differences from sum
76 ; calculate sse
77 uxtb16 r5, r6 ; byte (two pixels) to halfwords
78 uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
79 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
81 ; 3rd 4 pixels
82 ldr r4, [r0, #0x8] ; load 4 src pixels
83 ldr r5, [r2, #0x8] ; load 4 ref pixels
84 smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
86 usub8 r6, r4, r5 ; calculate difference
87 sel r7, r6, lr ; select bytes with positive difference
88 usub8 r9, r5, r4 ; calculate difference with reversed operands
89 sel r6, r9, lr ; select bytes with negative difference
91 ; calculate partial sums
92 usad8 r4, r7, lr ; calculate sum of positive differences
93 usad8 r5, r6, lr ; calculate sum of negative differences
94 orr r6, r6, r7 ; differences of all 4 pixels
96 ; calculate total sum
97 add r8, r8, r4 ; add positive differences to sum
98 sub r8, r8, r5 ; substract negative differences from sum
100 ; calculate sse
101 uxtb16 r5, r6 ; byte (two pixels) to halfwords
102 uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
103 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
105 ; 4th 4 pixels
106 ldr r4, [r0, #0xc] ; load 4 src pixels
107 ldr r5, [r2, #0xc] ; load 4 ref pixels
108 smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
110 usub8 r6, r4, r5 ; calculate difference
111 add r0, r0, r1 ; set src_ptr to next row
112 sel r7, r6, lr ; select bytes with positive difference
113 usub8 r9, r5, r4 ; calculate difference with reversed operands
114 add r2, r2, r3 ; set dst_ptr to next row
115 sel r6, r9, lr ; select bytes with negative difference
117 ; calculate partial sums
118 usad8 r4, r7, lr ; calculate sum of positive differences
119 usad8 r5, r6, lr ; calculate sum of negative differences
120 orr r6, r6, r7 ; differences of all 4 pixels
122 ; calculate total sum
123 add r8, r8, r4 ; add positive differences to sum
124 sub r8, r8, r5 ; substract negative differences from sum
126 ; calculate sse
127 uxtb16 r5, r6 ; byte (two pixels) to halfwords
128 uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
129 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
130 smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
133 subs r12, r12, #1
135 bne loop
137 ; return stuff
138 ldr r6, [sp, #0x28] ; get address of sse
139 mul r0, r8, r8 ; sum * sum
140 str r11, [r6] ; store sse
141 sub r0, r11, r0, ASR #8 ; return (sse - ((sum * sum) >> 8))
143 ldmfd sp!, {r4-r12, pc}
145 ENDP