1 ;*****************************************************************************
2 ;* x86-optimized AC-3 downmixing
3 ;* Copyright (c) 2012 Justin Ruggles
5 ;* This file is part of Libav.
7 ;* Libav is free software; you can redistribute it and/or
8 ;* modify it under the terms of the GNU Lesser General Public
9 ;* License as published by the Free Software Foundation; either
10 ;* version 2.1 of the License, or (at your option) any later version.
12 ;* Libav is distributed in the hope that it will be useful,
13 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;* Lesser General Public License for more details.
17 ;* You should have received a copy of the GNU Lesser General Public
18 ;* License along with Libav; if not, write to the Free Software
19 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 ;******************************************************************************
22 ;******************************************************************************
23 ;* This is based on the channel mixing asm in libavresample, but it is
24 ;* simplified for only float coefficients and only 3 to 6 channels.
25 ;******************************************************************************
27 %include "libavutil/x86/x86util.asm"
31 ;-----------------------------------------------------------------------------
32 ; functions to downmix from 3 to 6 channels to mono or stereo
33 ; void ff_ac3_downmix_*(float **samples, float **matrix, int len);
34 ;-----------------------------------------------------------------------------
36 %macro AC3_DOWNMIX
2 ; %1 = in channels, %2 = out channels
37 ; define some names to make the code clearer
38 %assign in_channels
%1
39 %assign out_channels
%2
40 %assign stereo out_channels
- 1
42 ; determine how many matrix elements must go on the stack vs. mmregs
43 %assign matrix_elements in_channels
* out_channels
45 %assign needed_mmregs
4
47 %assign needed_mmregs
3
49 %assign matrix_elements_mm num_mmregs
- needed_mmregs
50 %if matrix_elements
< matrix_elements_mm
51 %assign matrix_elements_mm matrix_elements
53 %assign total_mmregs needed_mmregs
+matrix_elements_mm
54 %if matrix_elements_mm
< matrix_elements
55 %assign matrix_elements_stack matrix_elements
- matrix_elements_mm
57 %assign matrix_elements_stack
0
60 cglobal ac3_downmix_
%1_to_
%2, 3,in_channels
+1,total_mmregs
,0-matrix_elements_stack
*mmsize
, src0
, src1
, len
, src2
, src3
, src4
, src5
62 ; load matrix pointers
66 mov matrix1q
, [matrix0q
+gprsize
]
68 mov matrix0q
, [matrix0q
]
70 ; define matrix coeff names
72 %assign
%%j needed_mmregs
74 %if
%%i
>= matrix_elements_mm
75 CAT_XDEFINE mx_stack_0_
, %%i
, 1
76 CAT_XDEFINE mx_0_
, %%i
, [rsp
+(%%i
-matrix_elements_mm
)*mmsize
]
78 CAT_XDEFINE mx_stack_0_
, %%i
, 0
79 CAT_XDEFINE mx_0_
, %%i
, m
%+ %%j
87 %if in_channels
+ %%i
>= matrix_elements_mm
88 CAT_XDEFINE mx_stack_1_
, %%i
, 1
89 CAT_XDEFINE mx_1_
, %%i
, [rsp
+(in_channels
+%%i
-matrix_elements_mm
)*mmsize
]
91 CAT_XDEFINE mx_stack_1_
, %%i
, 0
92 CAT_XDEFINE mx_1_
, %%i
, m
%+ %%j
99 ; load/splat matrix coeffs
102 %if mx_stack_0_
%+ %%i
103 VBROADCASTSS m0
, [matrix0q
+4*%%i
]
104 mova mx_0_
%+ %%i
, m0
106 VBROADCASTSS mx_0_
%+ %%i
, [matrix0q
+4*%%i
]
109 %if mx_stack_1_
%+ %%i
110 VBROADCASTSS m0
, [matrix1q
+4*%%i
]
111 mova mx_1_
%+ %%i
, m0
113 VBROADCASTSS mx_1_
%+ %%i
, [matrix1q
+4*%%i
]
120 ; load channel pointers to registers
122 %rep (in_channels
- 1)
123 mov src
%+ %%i
%+ q
, [src0q
+%%i
*gprsize
]
124 add src
%+ %%i
%+ q
, lenq
131 %if stereo || mx_stack_0_0
132 mova m0
, [src0q
+lenq
]
137 %if stereo || mx_stack_0_0
140 mulps m0
, mx_0_0
, [src0q
+lenq
]
143 %rep (in_channels
- 1)
144 %define src_ptr src
%+ %%i
%+ q
145 ; avoid extra load for mono if matrix is in a mm register
146 %if stereo || mx_stack_0_
%+ %%i
147 mova m2
, [src_ptr
+lenq
]
150 FMULADD_PS m1
, m2
, mx_1_
%+ %%i
, m1
, m3
152 %if stereo || mx_stack_0_
%+ %%i
153 FMULADD_PS m0
, m2
, mx_0_
%+ %%i
, m0
, m2
155 FMULADD_PS m0
, mx_0_
%+ %%i
, [src_ptr
+lenq
], m0
, m1
159 mova
[src0q
+lenq
], m0
161 mova
[src1q
+lenq
], m1
169 %macro AC3_DOWNMIX_FUNCS
0
178 %if HAVE_FMA3_EXTERNAL