2 SuperCollider real time audio synthesis system
3 Copyright (c) 2002 James McCartney. All rights reserved.
4 http://www.audiosynth.com
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * Created by James McCartney on Sat Mar 15 2003.
26 * Copyright (c) 2003 __MyCompanyName__. All rights reserved.
34 // Return true if CPU has vector unit.
35 bool sc_HasVectorUnit();
37 // Return true if vector unit is present and should be used.
38 // Vectorisation can be turned off by exporting SC_NOVEC in the
39 // process environment.
40 bool sc_UseVectorUnit();
42 // Set denormal FTZ mode on CPUs that need/support it.
43 void sc_SetDenormalFlags();
45 bool sc_UseVectorUnit()
47 if (sc_HasVectorUnit()) {
48 const char* novec
= getenv("SC_NOVEC");
49 return !novec
|| (strcmp(novec
, "0") == 0);
54 #if defined(__APPLE__) && !defined(SC_IPHONE)
55 # include <Carbon/Carbon.h>
56 #include <TargetConditionals.h>
58 bool sc_HasVectorUnit()
61 Gestalt(gestaltPowerPCProcessorFeatures
, &response
);
62 //printf("HasAltivec %p %d\n", response, response & (1<<gestaltPowerPCHasVectorInstructions));
63 return response
& (1<<gestaltPowerPCHasVectorInstructions
);
67 #include <xmmintrin.h>
69 void sc_SetDenormalFlags()
72 _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON
);
73 _mm_setcsr(_mm_getcsr() | 0x40);
78 void sc_SetDenormalFlags()
84 #elif defined(__linux__) && defined(__ALTIVEC__)
91 // kernel independent altivec detection
92 // contributed by niklas werner
93 // borrowed from mplayer, who borrowed from libmpeg2
96 static sigjmp_buf sigIllJmpBuf
;
97 static volatile sig_atomic_t sigIllCanJump
= 0;
98 static sighandler_t sigIllOldHandler
= SIG_DFL
;
100 static void sigIllHandler(int sig
)
102 if (!sigIllCanJump
) {
103 signal(sig
, sigIllOldHandler
);
107 siglongjmp(sigIllJmpBuf
, 1);
110 bool sc_HasVectorUnit()
112 sigIllOldHandler
= signal(SIGILL
, sigIllHandler
);
114 if (sigsetjmp(sigIllJmpBuf
, 1)) {
115 signal(SIGILL
, sigIllOldHandler
);
118 asm volatile ("mtspr 256, %0\n\t"
119 "vand %%v0, %%v0, %%v0"
122 signal(SIGILL
, sigIllOldHandler
);
128 void sc_SetDenormalFlags()
131 #elif (defined(__linux__) || defined(__FreeBSD__)) && defined(__SSE__)
132 # include <xmmintrin.h>
134 // cpuid function that works with -fPIC from `minor' at http://red-bean.com
135 // http://svn.red-bean.com/repos/minor/trunk/gc/barriers-ia-32.c
136 // http://svn.red-bean.com/repos/minor/trunk/gc/barriers-amd64.c
139 # if defined(__i386__)
140 /* If the current processor supports the CPUID instruction, execute
141 one, with REQUEST in %eax, and set *EAX, *EBX, *ECX, and *EDX to
142 the values the 'cpuid' stored in those registers. Return true if
143 the current processor supports CPUID, false otherwise. */
146 uint32_t *eax
, uint32_t *ebx
, uint32_t *ecx
, uint32_t *edx
)
148 uint32_t pre_change
, post_change
;
149 const uint32_t id_flag
= 0x200000;
151 /* This is pretty much the standard way to detect whether the CPUID
152 instruction is supported, as recommended in the Intel
153 documentation: try to change the ID bit in the EFLAGS register.
154 If we can change it, then the CPUID instruction is
156 asm ("pushfl\n\t" /* Save %eflags to restore later. */
157 "pushfl\n\t" /* Push second copy, for manipulation. */
158 "popl %1\n\t" /* Pop it into post_change. */
159 "movl %1,%0\n\t" /* Save present value in pre_change. */
160 "xorl %2,%1\n\t" /* Tweak bit in post_change. */
161 "pushl %1\n\t" /* Push tweaked copy... */
162 "popfl\n\t" /* ... and pop it into %eflags. */
163 "pushfl\n\t" /* Did it change? Push new %eflags... */
164 "popl %1\n\t" /* ... and pop it into post_change. */
165 "popfl" /* Restore original value of %eflags. */
166 : "=&r" (pre_change
), "=&r" (post_change
)
169 /* If the bit changed, then we support the CPUID instruction. */
170 if ((pre_change
^ post_change
) & id_flag
)
172 /* The IA-32 ABI specifies that, in position-independent code,
173 %ebx holds the address of the global offset table. GCC seems
174 to be unable to handle asms in PIC with result operands that
175 live in %ebx. For example:
187 $ gcc -c -fPIC pic-ebx-asm.c
188 pic-ebx-asm.c: In function `foo':
189 pic-ebx-asm.c:6: error: can't find a register in class `BREG'
190 while reloading `asm'
193 But the CPUID instruction leaves results in %ebx, so we have
196 To work around this, we save %ebx in %esi, restore %ebx after
197 we've done the 'cpuid', and return %ebx's value in %esi.
198 Thus the interesting constraint / operand pair '"=S" (*ebx)'.
200 We include "memory" in the clobber list, because this is a
201 synchronizing instruction; the caller may be expecting other
202 processors' writes to become visible here. */
203 asm volatile ("mov %%ebx, %%esi\n\t" /* Save %ebx. */
205 "xchgl %%ebx, %%esi" /* Restore %ebx. */
206 : "=a" (*eax
), "=S" (*ebx
), "=c" (*ecx
), "=d" (*edx
)
215 # elif defined(__x86_64__)
216 /* Execute a CPUID instruction with REQUEST in %eax, and set *EAX,
217 *EBX, *ECX, and *EDX to the values the 'cpuid' stored in those
221 uint32_t *eax
, uint32_t *ebx
, uint32_t *ecx
, uint32_t *edx
)
223 /* We include "memory" in the clobber list, because this is a
224 synchronizing instruction; the caller may be expecting other
225 processors' writes to become visible here. */
226 asm volatile ("cpuid\n\t"
227 : "=a" (*eax
), "=b" (*ebx
), "=c" (*ecx
), "=d" (*edx
)
233 # error Unknown SSE CPU
236 bool sc_HasVectorUnit()
238 // SSE detection from steve harris
239 // http://www.ecs.soton.ac.uk/~swh/denormal-finder/
242 if (cpuid(1, &a
, &b
, &c
, &d
)) {
249 void sc_SetDenormalFlags()
251 // SSE denormal setup from steve harris
252 // http://www.ecs.soton.ac.uk/~swh/denormal-finder/
256 if (sc_HasVectorUnit()) {
257 // FTZ flag (flush to zero)
258 _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON
);
260 cpuid(0, &a
, &b
, &c
, &d
);
261 if (b
== 0x756e6547) { /* It's an Intel */
262 int stepping
, model
, family
, extfamily
;
264 family
= (a
>> 8) & 0xf;
265 extfamily
= (a
>> 20) & 0xff;
266 model
= (a
>> 4) & 0xf;
268 if (family
== 15 && extfamily
== 0 && model
== 0 && stepping
< 7) {
273 cpuid(1, &a
, &b
, &c
, &d
);
274 if (d
& 1<<26) { /* bit 26, SSE2 support */
275 // DAZ flag (disable denormals)
276 _mm_setcsr(_mm_getcsr() | 0x40);
281 bool sc_HasVectorUnit()
285 void sc_SetDenormalFlags()