FS#8961 - Anti-Aliased Fonts.
[kugel-rb/myfork.git] / apps / codecs / demac / libdemac / vector_math32_armv4.h
blob89b24f2b063c1d3772f1eaaf67bb25fc1bcd659d
1 /*
3 libdemac - A Monkey's Audio decoder
5 $Id$
7 Copyright (C) Dave Chapman 2007
9 ARMv4 vector math copyright (C) 2008 Jens Arnold
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
27 static inline void vector_add(int32_t* v1, int32_t* v2)
29 #if ORDER > 32
30 int cnt = ORDER>>5;
31 #endif
33 #if ORDER > 16
34 #define ADD_SUB_BLOCKS "8"
35 #else
36 #define ADD_SUB_BLOCKS "4"
37 #endif
39 asm volatile (
40 "1: \n"
41 ".rept " ADD_SUB_BLOCKS "\n"
42 "ldmia %[v1], {r0-r3} \n"
43 "ldmia %[v2]!, {r4-r7} \n"
44 "add r0, r0, r4 \n"
45 "add r1, r1, r5 \n"
46 "add r2, r2, r6 \n"
47 "add r3, r3, r7 \n"
48 "stmia %[v1]!, {r0-r3} \n"
49 ".endr \n"
50 #if ORDER > 32
51 "subs %[cnt], %[cnt], #1 \n"
52 "bne 1b \n"
53 #endif
54 : /* outputs */
55 #if ORDER > 32
56 [cnt]"+r"(cnt),
57 #endif
58 [v1] "+r"(v1),
59 [v2] "+r"(v2)
60 : /* inputs */
61 : /* clobbers */
62 "r0", "r1", "r2", "r3", "r4",
63 "r5", "r6", "r7", "memory"
67 static inline void vector_sub(int32_t* v1, int32_t* v2)
69 #if ORDER > 32
70 int cnt = ORDER>>5;
71 #endif
73 asm volatile (
74 "1: \n"
75 ".rept " ADD_SUB_BLOCKS "\n"
76 "ldmia %[v1], {r0-r3} \n"
77 "ldmia %[v2]!, {r4-r7} \n"
78 "sub r0, r0, r4 \n"
79 "sub r1, r1, r5 \n"
80 "sub r2, r2, r6 \n"
81 "sub r3, r3, r7 \n"
82 "stmia %[v1]!, {r0-r3} \n"
83 ".endr \n"
84 #if ORDER > 32
85 "subs %[cnt], %[cnt], #1 \n"
86 "bne 1b \n"
87 #endif
88 : /* outputs */
89 #if ORDER > 32
90 [cnt]"+r"(cnt),
91 #endif
92 [v1] "+r"(v1),
93 [v2] "+r"(v2)
94 : /* inputs */
95 : /* clobbers */
96 "r0", "r1", "r2", "r3", "r4",
97 "r5", "r6", "r7", "memory"
101 static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
103 int res;
104 #if ORDER > 32
105 int cnt = ORDER>>5;
106 #endif
108 asm volatile (
109 #if ORDER > 16
110 #if ORDER > 32
111 "mov %[res], #0 \n"
112 #endif
113 "ldmia %[v2]!, {r6-r7} \n"
114 "1: \n"
115 "ldmia %[v1]!, {r0,r1,r3-r5} \n"
116 #if ORDER > 32
117 "mla %[res], r6, r0, %[res] \n"
118 #else
119 "mul %[res], r6, r0 \n"
120 #endif
121 "mla %[res], r7, r1, %[res] \n"
122 "ldmia %[v2]!, {r0-r2,r6-r8} \n"
123 "mla %[res], r0, r3, %[res] \n"
124 "mla %[res], r1, r4, %[res] \n"
125 "mla %[res], r2, r5, %[res] \n"
126 "ldmia %[v1]!, {r0-r4} \n"
127 "mla %[res], r6, r0, %[res] \n"
128 "mla %[res], r7, r1, %[res] \n"
129 "mla %[res], r8, r2, %[res] \n"
130 "ldmia %[v2]!, {r0,r1,r6-r8} \n"
131 "mla %[res], r0, r3, %[res] \n"
132 "mla %[res], r1, r4, %[res] \n"
133 "ldmia %[v1]!, {r0-r5} \n"
134 "mla %[res], r6, r0, %[res] \n"
135 "mla %[res], r7, r1, %[res] \n"
136 "mla %[res], r8, r2, %[res] \n"
137 "ldmia %[v2]!, {r0-r2,r6,r7} \n"
138 "mla %[res], r0, r3, %[res] \n"
139 "mla %[res], r1, r4, %[res] \n"
140 "mla %[res], r2, r5, %[res] \n"
141 "ldmia %[v1]!, {r0,r1,r3-r5} \n"
142 "mla %[res], r6, r0, %[res] \n"
143 "mla %[res], r7, r1, %[res] \n"
144 "ldmia %[v2]!, {r0-r2,r6-r8} \n"
145 "mla %[res], r0, r3, %[res] \n"
146 "mla %[res], r1, r4, %[res] \n"
147 "mla %[res], r2, r5, %[res] \n"
148 "ldmia %[v1]!, {r0-r4} \n"
149 "mla %[res], r6, r0, %[res] \n"
150 "mla %[res], r7, r1, %[res] \n"
151 "mla %[res], r8, r2, %[res] \n"
152 "ldmia %[v2]!, {r0,r1,r6-r8} \n"
153 "mla %[res], r0, r3, %[res] \n"
154 "mla %[res], r1, r4, %[res] \n"
155 "ldmia %[v1]!, {r0-r5} \n"
156 "mla %[res], r6, r0, %[res] \n"
157 "mla %[res], r7, r1, %[res] \n"
158 "mla %[res], r8, r2, %[res] \n"
159 #if ORDER > 32
160 "ldmia %[v2]!, {r0-r2,r6,r7} \n"
161 #else
162 "ldmia %[v2]!, {r0-r2} \n"
163 #endif
164 "mla %[res], r0, r3, %[res] \n"
165 "mla %[res], r1, r4, %[res] \n"
166 "mla %[res], r2, r5, %[res] \n"
167 #if ORDER > 32
168 "subs %[cnt], %[cnt], #1 \n"
169 "bne 1b \n"
170 #endif
172 #else /* ORDER <= 16 */
173 "ldmia %[v1]!, {r0-r3} \n"
174 "ldmia %[v2]!, {r4-r7} \n"
175 "mul %[res], r4, r0 \n"
176 "mla %[res], r5, r1, %[res] \n"
177 "mla %[res], r6, r2, %[res] \n"
178 "mla %[res], r7, r3, %[res] \n"
180 ".rept 3 \n"
181 "ldmia %[v1]!, {r0-r3} \n"
182 "ldmia %[v2]!, {r4-r7} \n"
183 "mla %[res], r4, r0, %[res] \n"
184 "mla %[res], r5, r1, %[res] \n"
185 "mla %[res], r6, r2, %[res] \n"
186 "mla %[res], r7, r3, %[res] \n"
187 ".endr \n"
188 #endif /* ORDER <= 16 */
189 : /* outputs */
190 #if ORDER > 32
191 [cnt]"+r"(cnt),
192 #endif
193 [v1] "+r"(v1),
194 [v2] "+r"(v2),
195 [res]"=r"(res)
196 : /* inputs */
197 : /* clobbers */
198 "r0", "r1", "r2", "r3",
199 "r4", "r5", "r6", "r7"
200 #if ORDER > 16
201 ,"r8"
202 #endif
204 return res;