Merge "Skip computation of distortion in vp8_pick_inter_mode if active_map is used"
[libvpx.git] / vp8 / common / ppc / recon_altivec.asm
blobdd39e05a83663080a6f4f7b5ea210a55a0822862
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 .globl recon4b_ppc
13 .globl recon2b_ppc
14 .globl recon_b_ppc
16 .macro row_of16 Diff Pred Dst Stride
17 lvx v1, 0, \Pred ;# v1 = pred = p0..p15
18 addi \Pred, \Pred, 16 ;# next pred
19 vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7
20 lvx v3, 0, \Diff ;# v3 = d0..d7
21 vaddshs v2, v2, v3 ;# v2 = r0..r7
22 vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15
23 lvx v3, r8, \Diff ;# v3 = d8..d15
24 addi \Diff, \Diff, 32 ;# next diff
25 vaddshs v3, v3, v1 ;# v3 = r8..r15
26 vpkshus v2, v2, v3 ;# v2 = 8-bit r0..r15
27 stvx v2, 0, \Dst ;# to dst
28 add \Dst, \Dst, \Stride ;# next dst
29 .endm
31 .text
32 .align 2
33 ;# r3 = short *diff_ptr,
34 ;# r4 = unsigned char *pred_ptr,
35 ;# r5 = unsigned char *dst_ptr,
36 ;# r6 = int stride
37 recon4b_ppc:
38 mfspr r0, 256 ;# get old VRSAVE
39 stw r0, -8(r1) ;# save old VRSAVE to stack
40 oris r0, r0, 0xf000
41 mtspr 256,r0 ;# set VRSAVE
43 vxor v0, v0, v0
44 li r8, 16
46 row_of16 r3, r4, r5, r6
47 row_of16 r3, r4, r5, r6
48 row_of16 r3, r4, r5, r6
49 row_of16 r3, r4, r5, r6
51 lwz r12, -8(r1) ;# restore old VRSAVE from stack
52 mtspr 256, r12 ;# reset old VRSAVE
54 blr
56 .macro two_rows_of8 Diff Pred Dst Stride write_first_four_pels
57 lvx v1, 0, \Pred ;# v1 = pred = p0..p15
58 vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7
59 lvx v3, 0, \Diff ;# v3 = d0..d7
60 vaddshs v2, v2, v3 ;# v2 = r0..r7
61 vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15
62 lvx v3, r8, \Diff ;# v2 = d8..d15
63 vaddshs v3, v3, v1 ;# v3 = r8..r15
64 vpkshus v2, v2, v3 ;# v3 = 8-bit r0..r15
65 stvx v2, 0, r10 ;# 2 rows to dst from buf
66 lwz r0, 0(r10)
67 .if \write_first_four_pels
68 stw r0, 0(\Dst)
69 .else
70 stwux r0, \Dst, \Stride
71 .endif
72 lwz r0, 4(r10)
73 stw r0, 4(\Dst)
74 lwz r0, 8(r10)
75 stwux r0, \Dst, \Stride ;# advance dst to next row
76 lwz r0, 12(r10)
77 stw r0, 4(\Dst)
78 .endm
80 .align 2
81 ;# r3 = short *diff_ptr,
82 ;# r4 = unsigned char *pred_ptr,
83 ;# r5 = unsigned char *dst_ptr,
84 ;# r6 = int stride
86 recon2b_ppc:
87 mfspr r0, 256 ;# get old VRSAVE
88 stw r0, -8(r1) ;# save old VRSAVE to stack
89 oris r0, r0, 0xf000
90 mtspr 256,r0 ;# set VRSAVE
92 vxor v0, v0, v0
93 li r8, 16
95 la r10, -48(r1) ;# buf
97 two_rows_of8 r3, r4, r5, r6, 1
99 addi r4, r4, 16; ;# next pred
100 addi r3, r3, 32; ;# next diff
102 two_rows_of8 r3, r4, r5, r6, 0
104 lwz r12, -8(r1) ;# restore old VRSAVE from stack
105 mtspr 256, r12 ;# reset old VRSAVE
109 .macro get_two_diff_rows
110 stw r0, 0(r10)
111 lwz r0, 4(r3)
112 stw r0, 4(r10)
113 lwzu r0, 32(r3)
114 stw r0, 8(r10)
115 lwz r0, 4(r3)
116 stw r0, 12(r10)
117 lvx v3, 0, r10
118 .endm
120 .align 2
121 ;# r3 = short *diff_ptr,
122 ;# r4 = unsigned char *pred_ptr,
123 ;# r5 = unsigned char *dst_ptr,
124 ;# r6 = int stride
125 recon_b_ppc:
126 mfspr r0, 256 ;# get old VRSAVE
127 stw r0, -8(r1) ;# save old VRSAVE to stack
128 oris r0, r0, 0xf000
129 mtspr 256,r0 ;# set VRSAVE
131 vxor v0, v0, v0
133 la r10, -48(r1) ;# buf
135 lwz r0, 0(r4)
136 stw r0, 0(r10)
137 lwz r0, 16(r4)
138 stw r0, 4(r10)
139 lwz r0, 32(r4)
140 stw r0, 8(r10)
141 lwz r0, 48(r4)
142 stw r0, 12(r10)
144 lvx v1, 0, r10; ;# v1 = pred = p0..p15
146 lwz r0, 0(r3) ;# v3 = d0..d7
148 get_two_diff_rows
150 vmrghb v2, v0, v1; ;# v2 = 16-bit p0..p7
151 vaddshs v2, v2, v3; ;# v2 = r0..r7
153 lwzu r0, 32(r3) ;# v3 = d8..d15
155 get_two_diff_rows
157 vmrglb v1, v0, v1; ;# v1 = 16-bit p8..p15
158 vaddshs v3, v3, v1; ;# v3 = r8..r15
160 vpkshus v2, v2, v3; ;# v2 = 8-bit r0..r15
161 stvx v2, 0, r10; ;# 16 pels to dst from buf
163 lwz r0, 0(r10)
164 stw r0, 0(r5)
165 lwz r0, 4(r10)
166 stwux r0, r5, r6
167 lwz r0, 8(r10)
168 stwux r0, r5, r6
169 lwz r0, 12(r10)
170 stwx r0, r5, r6
172 lwz r12, -8(r1) ;# restore old VRSAVE from stack
173 mtspr 256, r12 ;# reset old VRSAVE