2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 .globl vp8_short_fdct4x4_ppc
13 .globl vp8_short_fdct8x4_ppc
15 .
macro load_c V
, LABEL, OFF
, R0
, R1
21 ;# Forward and inverse DCTs are nearly identical; only differences are
22 ;# in normalization (fwd is twice unitary, inv is half unitary)
23 ;# and that they are of course transposes of each other.
25 ;# The following three accomplish most of implementation and
26 ;# are used only by ppc_idct.c and ppc_fdct.c.
28 mfspr r11
, 256 ;# get old VRSAVE
30 mtspr
256, r12
;# set VRSAVE
32 stwu r1
,-32(r1
) ;# create space on the stack
36 load_c v0
, dct_tab
, 0, r9
, r10
42 load_c v4
, ppc_dctperm_tab
, 0, r9
, r10
43 load_c v5
, ppc_dctperm_tab
, r6
, r9
, r10
45 load_c v6
, round_tab
, 0, r10
, r9
49 addi r1
, r1
, 32 ;# recover stack
51 mtspr
256, r11
;# reset old VRSAVE
54 ;# Do horiz xf on two rows of coeffs v8 = a0 a1 a2 a3 b0 b1 b2 b3.
55 ;# a/A are the even rows 0,2 b/B are the odd rows 1,3
56 ;# For fwd transform, indices are horizontal positions, then frequencies.
57 ;# For inverse transform, frequencies then positions.
58 ;# The two resulting A0..A3 B0..B3 are later combined
59 ;# and vertically transformed.
61 .
macro two_rows_horiz Dst
62 vperm v9
, v8
, v8
, v4
;# v9 = a2 a3 a0 a1 b2 b3 b0 b1
64 vmsumshm v10
, v0
, v8
, v6
65 vmsumshm v10
, v1
, v9
, v10
66 vsraw v10
, v10
, v7
;# v10 = A0 A1 B0 B1
68 vmsumshm v11
, v2
, v8
, v6
69 vmsumshm v11
, v3
, v9
, v11
70 vsraw v11
, v11
, v7
;# v11 = A2 A3 B2 B3
72 vpkuwum v10
, v10
, v11
;# v10 = A0 A1 B0 B1 A2 A3 B2 B3
73 vperm \Dst
, v10
, v10
, v5
;# Dest = A0 B0 A1 B1 A2 B2 A3 B3
76 ;# Vertical xf on two rows. DCT values in comments are for inverse transform;
77 ;# forward transform uses transpose.
79 .
macro two_rows_vert Ceven
, Codd
80 vspltw v8
, \Ceven
, 0 ;# v8 = c00 c10 or c02 c12 four times
81 vspltw v9
, \Codd
, 0 ;# v9 = c20 c30 or c22 c32 ""
82 vmsumshm v8
, v8
, v12
, v6
83 vmsumshm v8
, v9
, v13
, v8
86 vspltw v8
, \Codd
, 1 ;# v8 = c01 c11 or c03 c13
87 vspltw v9
, \Ceven
, 1 ;# v9 = c21 c31 or c23 c33
88 vmsumshm v8
, v8
, v12
, v6
89 vmsumshm v8
, v9
, v13
, v8
92 vpkuwum v8
, v10
, v8
;# v8 = rows 0,1 or 2,3
95 .
macro two_rows_h Dest
111 vp8_short_fdct4x4_ppc:
115 vspltisw v7
, 14 ;# == 14, fits in 5 signed bits
120 two_rows_h v12
;# v12 = H00 H10 H01 H11 H02 H12 H03 H13
123 two_rows_h v13
;# v13 = H20 H30 H21 H31 H22 H32 H23 H33
125 lvx v6
, r6
, r9
;# v6 = Vround
126 vspltisw v7
, -16 ;# == 16 == -16, only low 5 bits matter
141 vp8_short_fdct8x4_ppc:
144 vspltisw v7
, 14 ;# == 14, fits in 5 signed bits
149 two_rows_h v12
;# v12 = H00 H10 H01 H11 H02 H12 H03 H13
152 two_rows_h v13
;# v13 = H20 H30 H21 H31 H22 H32 H23 H33
154 lvx v6
, r6
, r9
;# v6 = Vround
155 vspltisw v7
, -16 ;# == 16 == -16, only low 5 bits matter
165 lvx v6
, 0, r9
;# v6 = Hround
167 vspltisw v7
, 14 ;# == 14, fits in 5 signed bits
171 two_rows_h v12
;# v12 = H00 H10 H01 H11 H02 H12 H03 H13
174 two_rows_h v13
;# v13 = H20 H30 H21 H31 H22 H32 H23 H33
176 lvx v6
, r6
, r9
;# v6 = Vround
177 vspltisw v7
, -16 ;# == 16 == -16, only low 5 bits matter
191 .
byte 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11
192 .
byte 0,1,4,5, 2,3,6,7, 8,9,12,13, 10,11,14,15
196 .
short 23170, 23170,-12540,-30274, 23170, 23170,-12540,-30274
197 .
short 23170, 23170, 30274, 12540, 23170, 23170, 30274, 12540
199 .
short 23170,-23170, 30274,-12540, 23170,-23170, 30274,-12540
200 .
short -23170, 23170, 12540,-30274,-23170, 23170, 12540,-30274
204 .long
(1 << (14-1)), (1 << (14-1)), (1 << (14-1)), (1 << (14-1))
205 .long
(1 << (16-1)), (1 << (16-1)), (1 << (16-1)), (1 << (16-1))