1 #ifndef _WDL_DENORMAL_H_
2 #define _WDL_DENORMAL_H_
6 // note: the _aggressive versions filter out anything less than around 1.0e-16 or so (approximately) to 0.0, including -0.0 (becomes 0.0)
7 // note: new! the _aggressive versions also filter inf and NaN to 0.0
10 #define WDL_DENORMAL_INLINE inline
11 #elif defined(_MSC_VER)
12 #define WDL_DENORMAL_INLINE __inline
14 #ifdef WDL_STATICFUNC_UNUSED
15 #define WDL_DENORMAL_INLINE WDL_STATICFUNC_UNUSED
17 #define WDL_DENORMAL_INLINE
21 static WDL_DENORMAL_INLINE
unsigned int WDL_DENORMAL_FLOAT_W(const float *a
) { unsigned int v
; memcpy(&v
,a
,sizeof(v
)); return v
; }
22 static WDL_DENORMAL_INLINE
unsigned int WDL_DENORMAL_DOUBLE_HW(const double *a
) { WDL_UINT64 v
; memcpy(&v
,(char*)a
,sizeof(v
)); return (unsigned int) (v
>>32); }
24 #define WDL_DENORMAL_DOUBLE_AGGRESSIVE_CUTOFF 0x3cA00000 // 0x3B8000000 maybe instead? that's 10^-5 smaller or so
25 #define WDL_DENORMAL_FLOAT_AGGRESSIVE_CUTOFF 0x25000000
28 // define WDL_DENORMAL_WANTS_SCOPED_FTZ, and then use a WDL_denormal_ftz_scope in addition to denormal_*(), then
29 // if FTZ is available it will be used instead...
31 #ifdef WDL_DENORMAL_WANTS_SCOPED_FTZ
33 #if defined(__SSE2__) || _M_IX86_FP >= 2 || defined(_M_X64)
34 #define WDL_DENORMAL_FTZMODE
35 #define WDL_DENORMAL_FTZSTATE_TYPE unsigned int
39 #include <xmmintrin.h>
41 #define wdl_denorm_mm_getcsr() _mm_getcsr()
42 #define wdl_denorm_mm_setcsr(x) _mm_setcsr(x)
44 #define wdl_denorm_mm_csr_mask (32768|4096|2048|1024|512|256|128|64) // FTZ, all exceptions, DAZ
46 #define wdl_denorm_mm_csr_mask (32768|4096|2048|1024|512|256|128) // FTZ and all exceptions (target SSE2)
48 #elif defined(__arm__) || defined(__aarch64__)
49 #define WDL_DENORMAL_FTZMODE
50 #define WDL_DENORMAL_FTZSTATE_TYPE unsigned long
51 static unsigned long __attribute__((unused
)) wdl_denorm_mm_getcsr()
55 asm volatile ( "mrs %0, fpcr" : "=r" (rv
));
57 asm volatile ( "fmrx %0, fpscr" : "=r" (rv
));
61 static void __attribute__((unused
)) wdl_denorm_mm_setcsr(unsigned long v
)
64 asm volatile ( "msr fpcr, %0" :: "r"(v
));
66 asm volatile ( "fmxr fpscr, %0" :: "r"(v
));
69 #define wdl_denorm_mm_csr_mask (1<<24)
72 class WDL_denormal_ftz_scope
75 WDL_denormal_ftz_scope()
77 #ifdef WDL_DENORMAL_FTZMODE
78 const WDL_DENORMAL_FTZSTATE_TYPE b
= wdl_denorm_mm_csr_mask
;
79 old_state
= wdl_denorm_mm_getcsr();
80 if ((need_restore
= (old_state
& b
) != b
))
81 wdl_denorm_mm_setcsr(old_state
|b
);
84 ~WDL_denormal_ftz_scope()
86 #ifdef WDL_DENORMAL_FTZMODE
87 if (need_restore
) wdl_denorm_mm_setcsr(old_state
);
91 #ifdef WDL_DENORMAL_FTZMODE
92 WDL_DENORMAL_FTZSTATE_TYPE old_state
;
102 #if !defined(WDL_DENORMAL_FTZMODE) && !defined(WDL_DENORMAL_DO_NOT_FILTER)
104 static double WDL_DENORMAL_INLINE
denormal_filter_double(double a
)
106 return (WDL_DENORMAL_DOUBLE_HW(&a
)&0x7ff00000) ? a
: 0.0;
109 static double WDL_DENORMAL_INLINE
denormal_filter_double2(double a
)
111 return ((WDL_DENORMAL_DOUBLE_HW(&a
)+0x100000)&0x7ff00000) > 0x100000 ? a
: 0.0;
114 static double WDL_DENORMAL_INLINE
denormal_filter_double_aggressive(double a
)
116 return ((WDL_DENORMAL_DOUBLE_HW(&a
)+0x100000)&0x7ff00000) >= WDL_DENORMAL_DOUBLE_AGGRESSIVE_CUTOFF
? a
: 0.0;
119 static float WDL_DENORMAL_INLINE
denormal_filter_float(float a
)
121 return (WDL_DENORMAL_FLOAT_W(&a
)&0x7f800000) ? a
: 0.0f
;
124 static float WDL_DENORMAL_INLINE
denormal_filter_float2(float a
)
126 return ((WDL_DENORMAL_FLOAT_W(&a
)+0x800000)&0x7f800000) > 0x800000 ? a
: 0.0f
;
130 static float WDL_DENORMAL_INLINE
denormal_filter_float_aggressive(float a
)
132 return ((WDL_DENORMAL_FLOAT_W(&a
)+0x800000)&0x7f800000) >= WDL_DENORMAL_FLOAT_AGGRESSIVE_CUTOFF
? a
: 0.0f
;
134 static void WDL_DENORMAL_INLINE
denormal_fix_double(double *a
)
136 if (!(WDL_DENORMAL_DOUBLE_HW(a
)&0x7ff00000)) *a
=0.0;
139 static void WDL_DENORMAL_INLINE
denormal_fix_double_aggressive(double *a
)
141 if (((WDL_DENORMAL_DOUBLE_HW(a
)+0x100000)&0x7ff00000) < WDL_DENORMAL_DOUBLE_AGGRESSIVE_CUTOFF
) *a
=0.0;
144 static void WDL_DENORMAL_INLINE
denormal_fix_float(float *a
)
146 if (!(WDL_DENORMAL_FLOAT_W(a
)&0x7f800000)) *a
=0.0f
;
148 static void WDL_DENORMAL_INLINE
denormal_fix_float_aggressive(float *a
)
150 if (((WDL_DENORMAL_FLOAT_W(a
)+0x800000)&0x7f800000) < WDL_DENORMAL_FLOAT_AGGRESSIVE_CUTOFF
) *a
=0.0f
;
155 #ifdef __cplusplus // automatic typed versions (though one should probably use the explicit versions...
158 static double WDL_DENORMAL_INLINE
denormal_filter(double a
)
160 return (WDL_DENORMAL_DOUBLE_HW(&a
)&0x7ff00000) ? a
: 0.0;
162 static double WDL_DENORMAL_INLINE
denormal_filter_aggressive(double a
)
164 return ((WDL_DENORMAL_DOUBLE_HW(&a
)+0x100000)&0x7ff00000) >= WDL_DENORMAL_DOUBLE_AGGRESSIVE_CUTOFF
? a
: 0.0;
167 static float WDL_DENORMAL_INLINE
denormal_filter(float a
)
169 return (WDL_DENORMAL_FLOAT_W(&a
)&0x7f800000) ? a
: 0.0f
;
172 static float WDL_DENORMAL_INLINE
denormal_filter_aggressive(float a
)
174 return ((WDL_DENORMAL_FLOAT_W(&a
)+0x800000)&0x7f800000) >= WDL_DENORMAL_FLOAT_AGGRESSIVE_CUTOFF
? a
: 0.0f
;
177 static void WDL_DENORMAL_INLINE
denormal_fix(double *a
)
179 if (!(WDL_DENORMAL_DOUBLE_HW(a
)&0x7ff00000)) *a
=0.0;
181 static void WDL_DENORMAL_INLINE
denormal_fix_aggressive(double *a
)
183 if (((WDL_DENORMAL_DOUBLE_HW(a
)+0x100000)&0x7ff00000) < WDL_DENORMAL_DOUBLE_AGGRESSIVE_CUTOFF
) *a
=0.0;
185 static void WDL_DENORMAL_INLINE
denormal_fix(float *a
)
187 if (!(WDL_DENORMAL_FLOAT_W(a
)&0x7f800000)) *a
=0.0f
;
189 static void WDL_DENORMAL_INLINE
denormal_fix_aggressive(float *a
)
191 if (((WDL_DENORMAL_FLOAT_W(a
)+0x800000)&0x7f800000) < WDL_DENORMAL_FLOAT_AGGRESSIVE_CUTOFF
) *a
=0.0f
;
196 #endif // cplusplus versions
198 #else // end of !WDL_DENORMAL_DO_NOT_FILTER (and other platform-specific checks)
200 #define denormal_filter(x) (x)
201 #define denormal_filter2(x) (x)
202 #define denormal_filter_double(x) (x)
203 #define denormal_filter_double2(x) (x)
204 #define denormal_filter_double_aggressive(x) (x)
205 #define denormal_filter_float(x) (x)
206 #define denormal_filter_float2(x) (x)
207 #define denormal_filter_float_aggressive(x) (x)
208 #define denormal_filter_aggressive(x) (x)
209 #define denormal_fix(x) do { } while(0)
210 #define denormal_fix_aggressive(x) do { } while(0)
211 #define denormal_fix_double(x) do { } while(0)
212 #define denormal_fix_double_aggressive(x) do { } while(0)
213 #define denormal_fix_float(x) do { } while(0)
214 #define denormal_fix_float_aggressive(x) do { } while(0)
220 // this isnt a denormal function but it is similar, so we'll put it here as a bonus
222 static void WDL_DENORMAL_INLINE
GetDoubleMaxAbsValue(double *out
, const double *in
) // note: the value pointed to by "out" must be >=0.0, __NOT__ <= -0.0
225 memcpy(&i
,in
,sizeof(i
));
226 memcpy(&o
,out
,sizeof(o
));
227 i
&= WDL_UINT64_CONST(0x7fffffffffffffff);
228 if (i
> o
) memcpy(out
,&i
,sizeof(i
));
231 static void WDL_DENORMAL_INLINE
GetFloatMaxAbsValue(float *out
, const float *in
) // note: the value pointed to by "out" must be >=0.0, __NOT__ <= -0.0
234 memcpy(&i
, in
, sizeof(i
));
235 memcpy(&o
, out
, sizeof(o
));
237 if (i
> o
) memcpy(out
, &i
, sizeof(i
));
242 static void WDL_DENORMAL_INLINE
GetFloatMaxAbsValue(double *out
, const double *in
) // note: the value pointed to by "out" must be >=0.0, __NOT__ <= -0.0
244 GetDoubleMaxAbsValue(out
,in
);