3 libdemac - A Monkey's Audio decoder
7 Copyright (C) Dave Chapman 2007
9 ARMv6 vector math copyright (C) 2008 Jens Arnold
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
27 /* This version fetches data as 32 bit words, and *requires* v1 to be
28 * 32 bit aligned, otherwise it will result either in a data abort, or
29 * incorrect results (if ARM aligncheck is disabled). */
30 static inline void vector_add(int16_t* v1
, int16_t* v2
)
37 #define ADD_SUB_BLOCKS "4"
39 #define ADD_SUB_BLOCKS "2"
47 "bic %[v2], %[v2], #2 \n"
48 "ldmia %[v2]!, {r4-r5} \n"
50 ".rept " ADD_SUB_BLOCKS
"\n"
51 "ldmia %[v2]!, {r6-r7} \n"
52 "ldmia %[v1], {r0-r3} \n"
53 "mov r5, r5, ror #16 \n"
54 "pkhtb r4, r5, r4, asr #16 \n"
55 "sadd16 r0, r0, r4 \n"
56 "pkhbt r5, r5, r6, lsl #16 \n"
57 "sadd16 r1, r1, r5 \n"
58 "ldmia %[v2]!, {r4-r5} \n"
59 "mov r7, r7, ror #16 \n"
60 "pkhtb r6, r7, r6, asr #16 \n"
61 "sadd16 r2, r2, r6 \n"
62 "pkhbt r7, r7, r4, lsl #16 \n"
63 "sadd16 r3, r3, r7 \n"
64 "stmia %[v1]!, {r0-r3} \n"
67 "subs %[cnt], %[cnt], #1 \n"
74 ".rept " ADD_SUB_BLOCKS
"\n"
75 "ldmia %[v2]!, {r4-r7} \n"
76 "ldmia %[v1], {r0-r3} \n"
77 "sadd16 r0, r0, r4 \n"
78 "sadd16 r1, r1, r5 \n"
79 "sadd16 r2, r2, r6 \n"
80 "sadd16 r3, r3, r7 \n"
81 "stmia %[v1]!, {r0-r3} \n"
84 "subs %[cnt], %[cnt], #1 \n"
97 "r0", "r1", "r2", "r3", "r4",
98 "r5", "r6", "r7", "memory"
102 /* This version fetches data as 32 bit words, and *requires* v1 to be
103 * 32 bit aligned, otherwise it will result either in a data abort, or
104 * incorrect results (if ARM aligncheck is disabled). */
105 static inline void vector_sub(int16_t* v1
, int16_t* v2
)
116 "bic %[v2], %[v2], #2 \n"
117 "ldmia %[v2]!, {r4-r5} \n"
119 ".rept " ADD_SUB_BLOCKS
"\n"
120 "ldmia %[v2]!, {r6-r7} \n"
121 "ldmia %[v1], {r0-r3} \n"
122 "mov r5, r5, ror #16 \n"
123 "pkhtb r4, r5, r4, asr #16 \n"
124 "ssub16 r0, r0, r4 \n"
125 "pkhbt r5, r5, r6, lsl #16 \n"
126 "ssub16 r1, r1, r5 \n"
127 "ldmia %[v2]!, {r4-r5} \n"
128 "mov r7, r7, ror #16 \n"
129 "pkhtb r6, r7, r6, asr #16 \n"
130 "ssub16 r2, r2, r6 \n"
131 "pkhbt r7, r7, r4, lsl #16 \n"
132 "ssub16 r3, r3, r7 \n"
133 "stmia %[v1]!, {r0-r3} \n"
136 "subs %[cnt], %[cnt], #1 \n"
143 ".rept " ADD_SUB_BLOCKS
"\n"
144 "ldmia %[v2]!, {r4-r7} \n"
145 "ldmia %[v1], {r0-r3} \n"
146 "ssub16 r0, r0, r4 \n"
147 "ssub16 r1, r1, r5 \n"
148 "ssub16 r2, r2, r6 \n"
149 "ssub16 r3, r3, r7 \n"
150 "stmia %[v1]!, {r0-r3} \n"
153 "subs %[cnt], %[cnt], #1 \n"
166 "r0", "r1", "r2", "r3", "r4",
167 "r5", "r6", "r7", "memory"
171 /* This version fetches data as 32 bit words, and *requires* v1 to be
172 * 32 bit aligned, otherwise it will result either in a data abort, or
173 * incorrect results (if ARM aligncheck is disabled). */
174 static inline int32_t scalarproduct(int16_t* v1
, int16_t* v2
)
182 #define MLA_BLOCKS "3"
184 #define MLA_BLOCKS "1"
195 "bic %[v2], %[v2], #2 \n"
196 "ldmia %[v2]!, {r5-r7} \n"
197 "ldmia %[v1]!, {r0-r1} \n"
199 "pkhbt r8, r6, r5 \n"
200 "ldmia %[v2]!, {r4-r5} \n"
202 "smladx %[res], r0, r8, %[res] \n"
204 "smuadx %[res], r0, r8 \n"
206 ".rept " MLA_BLOCKS
"\n"
207 "pkhbt r8, r7, r6 \n"
208 "ldmia %[v1]!, {r2-r3} \n"
209 "smladx %[res], r1, r8, %[res] \n"
210 "pkhbt r8, r4, r7 \n"
211 "ldmia %[v2]!, {r6-r7} \n"
212 "smladx %[res], r2, r8, %[res] \n"
213 "pkhbt r8, r5, r4 \n"
214 "ldmia %[v1]!, {r0-r1} \n"
215 "smladx %[res], r3, r8, %[res] \n"
216 "pkhbt r8, r6, r5 \n"
217 "ldmia %[v2]!, {r4-r5} \n"
218 "smladx %[res], r0, r8, %[res] \n"
221 "pkhbt r8, r7, r6 \n"
222 "ldmia %[v1]!, {r2-r3} \n"
223 "smladx %[res], r1, r8, %[res] \n"
224 "pkhbt r8, r4, r7 \n"
226 "subs %[cnt], %[cnt], #1 \n"
227 "ldmneia %[v2]!, {r6-r7} \n"
228 "smladx %[res], r2, r8, %[res] \n"
229 "pkhbt r8, r5, r4 \n"
230 "ldmneia %[v1]!, {r0-r1} \n"
231 "smladx %[res], r3, r8, %[res] \n"
234 "pkhbt r5, r5, r4 \n"
235 "smladx %[res], r2, r8, %[res] \n"
236 "smladx %[res], r3, r5, %[res] \n"
241 "ldmia %[v1]!, {r0-r1} \n"
242 "ldmia %[v2]!, {r5-r7} \n"
244 "ldmia %[v1]!, {r2-r3} \n"
246 "smlad %[res], r0, r5, %[res] \n"
248 "smuad %[res], r0, r5 \n"
250 ".rept " MLA_BLOCKS
"\n"
251 "ldmia %[v2]!, {r4-r5} \n"
252 "smlad %[res], r1, r6, %[res] \n"
253 "ldmia %[v1]!, {r0-r1} \n"
254 "smlad %[res], r2, r7, %[res] \n"
255 "ldmia %[v2]!, {r6-r7} \n"
256 "smlad %[res], r3, r4, %[res] \n"
257 "ldmia %[v1]!, {r2-r3} \n"
258 "smlad %[res], r0, r5, %[res] \n"
261 "ldmia %[v2]!, {r4-r5} \n"
262 "smlad %[res], r1, r6, %[res] \n"
264 "subs %[cnt], %[cnt], #1 \n"
265 "ldmneia %[v1]!, {r0-r1} \n"
266 "smlad %[res], r2, r7, %[res] \n"
267 "ldmneia %[v2]!, {r6-r7} \n"
268 "smlad %[res], r3, r4, %[res] \n"
271 "smlad %[res], r2, r7, %[res] \n"
272 "smlad %[res], r3, r4, %[res] \n"
285 "r0", "r1", "r2", "r3", "r4",
286 "r5", "r6", "r7", "r8"