common: prevent buffer overflow
[supercollider.git] / include / common / SC_VFP11.h
blobf767e59206e94fc6cff0a03f1a5769c38ec42379
1 #ifndef _SC_VFP11_
2 #define _SC_VFP11_
4 #include <TargetConditionals.h>
6 #define IPHONE_VEC
10 inline void initVFP()
12 #if !TARGET_IPHONE_SIMULATOR
13 int tmp;
14 __asm__ __volatile__(
15 "fmrx %[tmp], fpscr\n\t"
16 "orr %[tmp], %[tmp], #(3 << 16)\n\t" /* set vector size to 4 */
17 "fmxr fpscr, %[tmp]\n\t"
18 : [tmp] "=&r" (tmp)
20 : "cc", "memory");
21 #endif
24 inline void releaseVFP()
26 #if !TARGET_IPHONE_SIMULATOR
27 int tmp;
28 __asm__ __volatile__(
29 "fmrx %[tmp], fpscr\n\t"
30 "bic %[tmp], %[tmp], #(7 << 16)\n\t" /* set vector size back to 1 */
31 "fmxr fpscr, %[tmp]\n\t"
32 : [tmp] "=&r" (tmp)
34 : "cc", "memory");
35 #endif
39 inline void vfill(float *dest, float val, int len)
41 #if !TARGET_IPHONE_SIMULATOR
42 float t[4] = {val, val, val, val};
43 float *v = (float *) t;
44 int tmp;
45 __asm__ __volatile__(
46 "fmrx %[tmp], fpscr\n\t"
47 "orr %[tmp], %[tmp], #(3 << 16)\n\t" /* set vector size to 4 */
48 "fmxr fpscr, %[tmp]\n\t"
50 "fldmias %[src1], {s8-s11}\n\t"
51 "1:\n\t"
52 "subs %[len], %[len], #16\n\t"
53 "fstmias %[dst]!, {s8-s11}\n\t"
54 "fstmias %[dst]!, {s8-s11}\n\t"
55 "fstmiasge %[dst]!, {s8-s11}\n\t"
56 "fstmiasge %[dst]!, {s8-s11}\n\t"
57 "bgt 1b\n\t"
59 "bic %[tmp], %[tmp], #(7 << 16)\n\t" /* set vector size back to 1 */
60 "fmxr fpscr, %[tmp]\n\t"
61 : [dst] "+&r" (dest), [src1] "+&r" (v), [len] "+&r" (len), [tmp] "=&r" (tmp)
63 : "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
64 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
65 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
66 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
67 "cc", "memory");
68 #endif
72 // a + b*c
73 inline void vmuladd(float *dest, float *a, float *b, float *c, int len)
75 #if !TARGET_IPHONE_SIMULATOR
76 int tmp;
77 __asm__ __volatile__(
78 "fmrx %[tmp], fpscr\n\t"
79 "orr %[tmp], %[tmp], #(3 << 16)\n\t" /* set vector size to 4 */
80 "fmxr fpscr, %[tmp]\n\t"
82 "fldmias %[src2]!, {s0-s3}\n\t"
83 "fldmias %[src1]!, {s8-s11}\n\t"
84 "fldmias %[src3]!, {s24-s27}\n\t"
85 "fldmias %[src2]!, {s4-s7}\n\t"
86 "fldmias %[src1]!, {s12-s15}\n\t"
87 "fldmias %[src3]!, {s28-s31}\n\t"
88 "fmacs s8, s0, s24\n\t"
89 "1:\n\t"
90 "subs %[len], %[len], #16\n\t"
91 "fmacs s12, s4, s28\n\t"
92 "fldmiasge %[src2]!, {s0-s3}\n\t"
93 "fldmiasge %[src1]!, {s16-s19}\n\t"
94 "fldmiasge %[src3]!, {s24-s27}\n\t"
95 "fldmiasge %[src2]!, {s4-s7}\n\t"
96 "fldmiasge %[src1]!, {s20-s23}\n\t"
97 "fldmiasge %[src3]!, {s28-s31}\n\t"
98 "fmacsge s16, s0, s24\n\t"
99 "fstmias %[dst]!, {s8-s11}\n\t"
100 "fstmias %[dst]!, {s12-s15}\n\t"
101 "fmacsge s20, s4, s28\n\t"
102 "fldmiasgt %[src2]!, {s0-s3}\n\t"
103 "fldmiasgt %[src1]!, {s8-s11}\n\t"
104 "fldmiasgt %[src3]!, {s24-s27}\n\t"
105 "fldmiasgt %[src2]!, {s4-s7}\n\t"
106 "fldmiasgt %[src1]!, {s12-s15}\n\t"
107 "fldmiasgt %[src3]!, {s28-s31}\n\t"
108 "fmacsge s8, s0, s24\n\t"
109 "fstmiasge %[dst]!, {s16-s19}\n\t"
110 "fstmiasge %[dst]!, {s20-s23}\n\t"
111 "bgt 1b\n\t"
113 "bic %[tmp], %[tmp], #(7 << 16)\n\t" /* set vector size back to 1 */
114 "fmxr fpscr, %[tmp]\n\t"
115 : [dst] "+&r" (dest), [src1] "+&r" (a), [src2] "+&r" (b), [src3] "+&r" (c), [len] "+&r" (len), [tmp] "=&r" (tmp)
117 : "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
118 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
119 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
120 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
121 "cc", "memory");
122 #endif
125 inline void vscalarmul(float *dest, float scalar, float *b, int len)
127 #if !TARGET_IPHONE_SIMULATOR
128 float t[4] = {scalar, scalar, scalar, scalar};
129 float *s = t;
130 int tmp;
132 __asm__ __volatile__(
133 "fmrx %[tmp], fpscr\n\t"
134 "orr %[tmp], %[tmp], #(3 << 16)\n\t" /* set vector size to 4 */
135 "fmxr fpscr, %[tmp]\n\t"
137 "fldmias %[src1], {s0-s3}\n\t"
138 "fldmias %[src2]!, {s8-s11}\n\t"
139 "fldmias %[src2]!, {s12-s15}\n\t"
140 "fmuls s8, s8, s0\n\t"
141 "1:\n\t"
142 "subs %[len], %[len], #16\n\t"
143 "fmuls s12, s12, s0\n\t"
144 "fldmiasge %[src2]!, {s24-s27}\n\t"
145 "fldmiasge %[src2]!, {s28-s31}\n\t"
146 "fmulsge s24, s24, s0\n\t"
147 "fstmias %[dst]!, {s8-s11}\n\t"
148 "fstmias %[dst]!, {s12-s15}\n\t"
149 "fmulsge s28, s28, s0\n\t"
150 "fldmiasgt %[src2]!, {s8-s11}\n\t"
151 "fldmiasgt %[src2]!, {s12-s15}\n\t"
152 "fmulsge s8, s8, s0\n\t"
153 "fstmiasge %[dst]!, {s24-s27}\n\t"
154 "fstmiasge %[dst]!, {s28-s31}\n\t"
155 "bgt 1b\n\t"
157 "bic %[tmp], %[tmp], #(7 << 16)\n\t" /* set vector size back to 1 */
158 "fmxr fpscr, %[tmp]\n\t"
159 : [dst] "+&r" (dest), [src1] "+&r" (s), [src2] "+&r" (b), [len] "+&r" (len), [tmp] "=&r" (tmp)
161 : "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
162 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
163 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
164 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
165 "cc", "memory");
166 #endif
169 inline void vmul(float *dest, float *a, const float *b, int len)
171 #if !TARGET_IPHONE_SIMULATOR
172 int tmp;
173 __asm__ __volatile__(
174 "fmrx %[tmp], fpscr\n\t"
175 "orr %[tmp], %[tmp], #(3 << 16)\n\t" /* set vector size to 4 */
176 "fmxr fpscr, %[tmp]\n\t"
178 "fldmias %[src1]!, {s0-s3}\n\t"
179 "fldmias %[src2]!, {s8-s11}\n\t"
180 "fldmias %[src1]!, {s4-s7}\n\t"
181 "fldmias %[src2]!, {s12-s15}\n\t"
182 "fmuls s8, s0, s8\n\t"
183 "1:\n\t"
184 "subs %[len], %[len], #16\n\t"
185 "fmuls s12, s4, s12\n\t"
186 "fldmiasge %[src1]!, {s16-s19}\n\t"
187 "fldmiasge %[src2]!, {s24-s27}\n\t"
188 "fldmiasge %[src1]!, {s20-s23}\n\t"
189 "fldmiasge %[src2]!, {s28-s31}\n\t"
190 "fmulsge s24, s16, s24\n\t"
191 "fstmias %[dst]!, {s8-s11}\n\t"
192 "fstmias %[dst]!, {s12-s15}\n\t"
193 "fmulsge s28, s20, s28\n\t"
194 "fldmiasgt %[src1]!, {s0-s3}\n\t"
195 "fldmiasgt %[src2]!, {s8-s11}\n\t"
196 "fldmiasgt %[src1]!, {s4-s7}\n\t"
197 "fldmiasgt %[src2]!, {s12-s15}\n\t"
198 "fmulsge s8, s0, s8\n\t"
199 "fstmiasge %[dst]!, {s24-s27}\n\t"
200 "fstmiasge %[dst]!, {s28-s31}\n\t"
201 "bgt 1b\n\t"
203 "bic %[tmp], %[tmp], #(7 << 16)\n\t" /* set vector size back to 1 */
204 "fmxr fpscr, %[tmp]\n\t"
205 : [dst] "+&r" (dest), [src1] "+&r" (a), [src2] "+&r" (b), [len] "+&r" (len), [tmp] "=&r" (tmp)
207 : "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
208 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
209 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
210 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
211 "cc", "memory");
212 #endif
216 inline void vadd(float *dest, float *a, const float *b, int len)
218 #if !TARGET_IPHONE_SIMULATOR
219 int tmp;
220 __asm__ __volatile__(
221 "fmrx %[tmp], fpscr\n\t"
222 "orr %[tmp], %[tmp], #(3 << 16)\n\t" /* set vector size to 4 */
223 "fmxr fpscr, %[tmp]\n\t"
225 "fldmias %[src1]!, {s0-s3}\n\t"
226 "fldmias %[src2]!, {s8-s11}\n\t"
227 "fldmias %[src1]!, {s4-s7}\n\t"
228 "fldmias %[src2]!, {s12-s15}\n\t"
229 "fadds s8, s0, s8\n\t"
230 "1:\n\t"
231 "subs %[len], %[len], #16\n\t"
232 "fadds s12, s4, s12\n\t"
233 "fldmiasge %[src1]!, {s16-s19}\n\t"
234 "fldmiasge %[src2]!, {s24-s27}\n\t"
235 "fldmiasge %[src1]!, {s20-s23}\n\t"
236 "fldmiasge %[src2]!, {s28-s31}\n\t"
237 "faddsge s24, s16, s24\n\t"
238 "fstmias %[dst]!, {s8-s11}\n\t"
239 "fstmias %[dst]!, {s12-s15}\n\t"
240 "faddsge s28, s20, s28\n\t"
241 "fldmiasgt %[src1]!, {s0-s3}\n\t"
242 "fldmiasgt %[src2]!, {s8-s11}\n\t"
243 "fldmiasgt %[src1]!, {s4-s7}\n\t"
244 "fldmiasgt %[src2]!, {s12-s15}\n\t"
245 "faddsge s8, s0, s8\n\t"
246 "fstmiasge %[dst]!, {s24-s27}\n\t"
247 "fstmiasge %[dst]!, {s28-s31}\n\t"
248 "bgt 1b\n\t"
250 "bic %[tmp], %[tmp], #(7 << 16)\n\t" /* set vector size back to 1 */
251 "fmxr fpscr, %[tmp]\n\t"
252 : [dst] "+&r" (dest), [src1] "+&r" (a), [src2] "+&r" (b), [len] "+&r" (len), [tmp] "=&r" (tmp)
254 : "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
255 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
256 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
257 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
258 "cc", "memory");
259 #endif
262 inline void vcopy(float *dest, float *a, int len)
264 #if !TARGET_IPHONE_SIMULATOR
265 int tmp;
266 __asm__ __volatile__(
267 "fmrx %[tmp], fpscr\n\t"
268 "orr %[tmp], %[tmp], #(3 << 16)\n\t" /* set vector size to 4 */
269 "fmxr fpscr, %[tmp]\n\t"
271 "fldmias %[src1]!, {s8-s11}\n\t"
272 "fldmias %[src1]!, {s12-s15}\n\t"
273 "1:\n\t"
274 "subs %[len], %[len], #16\n\t"
275 "fldmiasge %[src1]!, {s24-s27}\n\t"
276 "fldmiasge %[src1]!, {s28-s31}\n\t"
277 "fstmias %[dst]!, {s8-s11}\n\t"
278 "fstmias %[dst]!, {s12-s15}\n\t"
279 "fldmiasgt %[src1]!, {s8-s11}\n\t"
280 "fldmiasgt %[src1]!, {s12-s15}\n\t"
281 "fstmiasge %[dst]!, {s24-s27}\n\t"
282 "fstmiasge %[dst]!, {s28-s31}\n\t"
283 "bgt 1b\n\t"
285 "bic %[tmp], %[tmp], #(7 << 16)\n\t" /* set vector size back to 1 */
286 "fmxr fpscr, %[tmp]\n\t"
287 : [dst] "+&r" (dest), [src1] "+&r" (a), [len] "+&r" (len), [tmp] "=&r" (tmp)
289 : "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
290 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
291 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
292 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
293 "cc", "memory");
294 #endif
299 #endif