lib/libm/arch/i387/fenv.c

   1 /* $NetBSD: fenv.c,v 1.6 2013/11/11 00:31:51 joerg Exp $ */
   2
   3 /*-
   4  * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26  * SUCH DAMAGE.
  27  */
  28
  29 #include <sys/cdefs.h>
  30 __RCSID("$NetBSD: fenv.c,v 1.6 2013/11/11 00:31:51 joerg Exp $");
  31
  32 #include <sys/param.h>
  33 #include <sys/sysctl.h>
  34 #include <assert.h>
  35 #include <fenv.h>
  36 #include <stddef.h>
  37 #include <string.h>
  38
  39 /* Load x87 Control Word */
  40 #define __fldcw(__cw)           __asm__ __volatile__    \
  41         ("fldcw %0" : : "m" (__cw))
  42
  43 /* No-Wait Store Control Word */
  44 #define __fnstcw(__cw)          __asm__ __volatile__    \
  45         ("fnstcw %0" : "=m" (*(__cw)))
  46
  47 /* No-Wait Store Status Word */
  48 #define __fnstsw(__sw)          __asm__ __volatile__    \
  49         ("fnstsw %0" : "=am" (*(__sw)))
  50
  51 /* No-Wait Clear Exception Flags */
  52 #define __fnclex()              __asm__ __volatile__    \
  53         ("fnclex")
  54
  55 /* Load x87 Environment */
  56 #define __fldenv(__env)         __asm__ __volatile__    \
  57         ("fldenv %0" : : "m" (__env))
  58
  59 /* No-Wait Store x87 environment */
  60 #define __fnstenv(__env)        __asm__ __volatile__    \
  61         ("fnstenv %0" : "=m" (*(__env)))
  62
  63 /* Check for and handle pending unmasked x87 pending FPU exceptions */
  64 #define __fwait(__env)          __asm__ __volatile__    \
  65         ("fwait")
  66
  67 /* Load the MXCSR register */
  68 #define __ldmxcsr(__mxcsr)      __asm__ __volatile__    \
  69         ("ldmxcsr %0" : : "m" (__mxcsr))
  70
  71 /* Store the MXCSR register state */
  72 #define __stmxcsr(__mxcsr)      __asm__ __volatile__    \
  73         ("stmxcsr %0" : "=m" (*(__mxcsr)))
  74
  75 /*
  76  * The following constant represents the default floating-point environment
  77  * (that is, the one installed at program startup) and has type pointer to
  78  * const-qualified fenv_t.
  79  *
  80  * It can be used as an argument to the functions within the <fenv.h> header
  81  * that manage the floating-point environment, namely fesetenv() and
  82  * feupdateenv().
  83  *
  84  * x87 fpu registers are 16bit wide. The upper bits, 31-16, are marked as
  85  * RESERVED. We provide a partial floating-point environment, where we
  86  * define only the lower bits. The reserved bits are extracted and set by the
  87  * consumers of FE_DFL_ENV, during runtime.
  88  */
  89 fenv_t __fe_dfl_env = {
  90         {
  91                 __NetBSD_NPXCW__,       /* Control word register */
  92                 0x0,                    /* Unused */
  93                 0x0000,                 /* Status word register */
  94                 0x0,                    /* Unused */
  95                 0x0000ffff,             /* Tag word register */
  96                 0x0,                    /* Unused */
  97                 {
  98                         0x0000, 0x0000,
  99                         0x0000, 0xffff
 100                 }
 101         },
 102         __INITIAL_MXCSR__               /* MXCSR register */
 103 };
 104
 105 /*
 106  * Test for SSE support on this processor.
 107  *
 108  * We need to use ldmxcsr/stmxcsr to get correct results if any part
 109  * of the program was compiled to use SSE floating-point, but we can't
 110  * use SSE on older processors.
 111  *
 112  * In order to do so, we need to query the processor capabilities via the CPUID
 113  * instruction. We can make it even simpler though, by querying the machdep.sse
 114  * sysctl.
 115  */
 116 static int __HAS_SSE = 0;
 117
 118 static void __init_libm(void) __attribute__ ((constructor, used));
 119
 120 static void __init_libm(void)
 121 {
 122 #if !defined(__minix)
 123         size_t oldlen = sizeof(__HAS_SSE);
 124         int rv;
 125         uint16_t control;
 126
 127         rv = sysctlbyname("machdep.sse", &__HAS_SSE, &oldlen, NULL, 0);
 128         if (rv == -1)
 129                 __HAS_SSE = 0;
 130 #else
 131         uint16_t control;
 132         __HAS_SSE = 0;
 133 #endif /* !defined(__minix) */
 134
 135         __fnstcw(&control);
 136         __fe_dfl_env.x87.control = control;
 137 }
 138
 139 /*
 140  * The feclearexcept() function clears the supported floating-point exceptions
 141  * represented by `excepts'.
 142  */
 143 int
 144 feclearexcept(int excepts)
 145 {
 146         fenv_t env;
 147         uint32_t mxcsr;
 148         int ex;
 149
 150         _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
 151
 152         ex = excepts & FE_ALL_EXCEPT;
 153
 154         /* It's ~3x faster to call fnclex, than store/load fp env */
 155         if (ex == FE_ALL_EXCEPT) {
 156                 __fnclex();
 157         } else {
 158                 __fnstenv(&env);
 159                 env.x87.status &= ~ex;
 160                 __fldenv(env);
 161         }
 162
 163         if (__HAS_SSE) {
 164                 __stmxcsr(&mxcsr);
 165                 mxcsr &= ~ex;
 166                 __ldmxcsr(mxcsr);
 167         }
 168
 169         /* Success */
 170         return (0);
 171 }
 172
 173 /*
 174  * The fegetexceptflag() function stores an implementation-defined
 175  * representation of the states of the floating-point status flags indicated by
 176  * the argument excepts in the object pointed to by the argument flagp.
 177  */
 178 int
 179 fegetexceptflag(fexcept_t *flagp, int excepts)
 180 {
 181         uint32_t mxcsr;
 182         uint16_t status;
 183         int ex;
 184
 185         _DIAGASSERT(flagp != NULL);
 186         _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
 187
 188         ex = excepts & FE_ALL_EXCEPT;
 189
 190         __fnstsw(&status);
 191         if (__HAS_SSE)
 192                 __stmxcsr(&mxcsr);
 193         else
 194                 mxcsr = 0;
 195
 196         *flagp = (mxcsr | status) & ex;
 197
 198         /* Success */
 199         return (0);
 200 }
 201
 202 /*
 203  * The feraiseexcept() function raises the supported floating-point exceptions
 204  * represented by the argument `excepts'.
 205  *
 206  * The standard explicitly allows us to execute an instruction that has the
 207  * exception as a side effect, but we choose to manipulate the status register
 208  * directly.
 209  *
 210  * The validation of input is being deferred to fesetexceptflag().
 211  */
 212 int
 213 feraiseexcept(int excepts)
 214 {
 215         fexcept_t ex;
 216
 217         _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
 218
 219         ex = excepts & FE_ALL_EXCEPT;
 220         fesetexceptflag(&ex, excepts);
 221         __fwait();
 222
 223         /* Success */
 224         return (0);
 225 }
 226
 227 /*
 228  * This function sets the floating-point status flags indicated by the argument
 229  * `excepts' to the states stored in the object pointed to by `flagp'. It does
 230  * NOT raise any floating-point exceptions, but only sets the state of the flags.
 231  */
 232 int
 233 fesetexceptflag(const fexcept_t *flagp, int excepts)
 234 {
 235         fenv_t env;
 236         uint32_t mxcsr;
 237         int ex;
 238
 239         _DIAGASSERT(flagp != NULL);
 240         _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
 241
 242         ex = excepts & FE_ALL_EXCEPT;
 243
 244         __fnstenv(&env);
 245         env.x87.status &= ~ex;
 246         env.x87.status |= *flagp & ex;
 247         __fldenv(env);
 248
 249         if (__HAS_SSE) {
 250                 __stmxcsr(&mxcsr);
 251                 mxcsr &= ~ex;
 252                 mxcsr |= *flagp & ex;
 253                 __ldmxcsr(mxcsr);
 254         }
 255
 256         /* Success */
 257         return (0);
 258 }
 259
 260 /*
 261  * The fetestexcept() function determines which of a specified subset of the
 262  * floating-point exception flags are currently set. The `excepts' argument
 263  * specifies the floating-point status flags to be queried.
 264  */
 265 int
 266 fetestexcept(int excepts)
 267 {
 268         uint32_t mxcsr;
 269         uint16_t status;
 270         int ex;
 271
 272         _DIAGASSERT((excepts & ~FE_ALL_EXCEPT) == 0);
 273
 274         ex = excepts & FE_ALL_EXCEPT;
 275
 276         __fnstsw(&status);
 277         if (__HAS_SSE)
 278                 __stmxcsr(&mxcsr);
 279         else
 280                 mxcsr = 0;
 281
 282         return ((status | mxcsr) & ex);
 283 }
 284
 285 int
 286 fegetround(void)
 287 {
 288         uint16_t control;
 289
 290         /*
 291          * We assume that the x87 and the SSE unit agree on the
 292          * rounding mode.  Reading the control word on the x87 turns
 293          * out to be about 5 times faster than reading it on the SSE
 294          * unit on an Opteron 244.
 295          */
 296         __fnstcw(&control);
 297
 298         return (control & __X87_ROUND_MASK);
 299 }
 300
 301 /*
 302  * The fesetround() function shall establish the rounding direction represented
 303  * by its argument round. If the argument is not equal to the value of a
 304  * rounding direction macro, the rounding direction is not changed.
 305  */
 306 int
 307 fesetround(int round)
 308 {
 309         uint32_t mxcsr;
 310         uint16_t control;
 311
 312         if (round & ~__X87_ROUND_MASK) {
 313                 /* Failure */
 314                 return (-1);
 315         }
 316
 317         __fnstcw(&control);
 318         control &= ~__X87_ROUND_MASK;
 319         control |= round;
 320         __fldcw(control);
 321
 322         if (__HAS_SSE) {
 323                 __stmxcsr(&mxcsr);
 324                 mxcsr &= ~(__X87_ROUND_MASK << __SSE_ROUND_SHIFT);
 325                 mxcsr |= round << __SSE_ROUND_SHIFT;
 326                 __ldmxcsr(mxcsr);
 327         }
 328
 329         /* Success */
 330         return (0);
 331 }
 332
 333 /*
 334  * The fegetenv() function attempts to store the current floating-point
 335  * environment in the object pointed to by envp.
 336  */
 337 int
 338 fegetenv(fenv_t *envp)
 339 {
 340         uint32_t mxcsr;
 341
 342         _DIAGASSERT(flagp != NULL);
 343
 344         /*
 345          * fnstenv masks all exceptions, so we need to restore the old control
 346          * word to avoid this side effect.
 347          */
 348         __fnstenv(envp);
 349         __fldcw(envp->x87.control);
 350         if (__HAS_SSE) {
 351                 __stmxcsr(&mxcsr);
 352                 envp->mxcsr = mxcsr;
 353         }
 354
 355         /* Success */
 356         return (0);
 357 }
 358
 359 /*
 360  * The feholdexcept() function saves the current floating-point environment in
 361  * the object pointed to by envp, clears the floating-point status flags, and
 362  * then installs a non-stop (continue on floating-point exceptions) mode, if
 363  * available, for all floating-point exceptions.
 364  */
 365 int
 366 feholdexcept(fenv_t *envp)
 367 {
 368         uint32_t mxcsr;
 369
 370         _DIAGASSERT(envp != NULL);
 371
 372         __fnstenv(envp);
 373         __fnclex();
 374         if (__HAS_SSE) {
 375                 __stmxcsr(&mxcsr);
 376                 envp->mxcsr = mxcsr;
 377                 mxcsr &= ~FE_ALL_EXCEPT;
 378                 mxcsr |= FE_ALL_EXCEPT << __SSE_EMASK_SHIFT;
 379                 __ldmxcsr(mxcsr);
 380         }
 381
 382         /* Success */
 383         return (0);
 384 }
 385
 386 /*
 387  * The fesetenv() function attempts to establish the floating-point environment
 388  * represented by the object pointed to by envp. The argument `envp' points
 389  * to an object set by a call to fegetenv() or feholdexcept(), or equal a
 390  * floating-point environment macro. The fesetenv() function does not raise
 391  * floating-point exceptions, but only installs the state of the floating-point
 392  * status flags represented through its argument.
 393  */
 394 int
 395 fesetenv(const fenv_t *envp)
 396 {
 397         fenv_t env;
 398
 399         _DIAGASSERT(envp != NULL);
 400
 401         /* Store the x87 floating-point environment */
 402         memset(&env, 0, sizeof(env));
 403         __fnstenv(&env);
 404
 405         __fe_dfl_env.x87.unused1 = env.x87.unused1;
 406         __fe_dfl_env.x87.unused2 = env.x87.unused2;
 407         __fe_dfl_env.x87.unused3 = env.x87.unused3;
 408         memcpy(__fe_dfl_env.x87.others,
 409                env.x87.others,
 410                sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t));
 411
 412         __fldenv(envp->x87);
 413         if (__HAS_SSE)
 414                 __ldmxcsr(envp->mxcsr);
 415
 416         /* Success */
 417         return (0);
 418 }
 419
 420 /*
 421  * The feupdateenv() function saves the currently raised floating-point
 422  * exceptions in its automatic storage, installs the floating-point environment
 423  * represented by the object pointed to by `envp', and then raises the saved
 424  * floating-point exceptions. The argument `envp' shall point to an object set
 425  * by a call to feholdexcept() or fegetenv(), or equal a floating-point
 426  * environment macro.
 427  */
 428 int
 429 feupdateenv(const fenv_t *envp)
 430 {
 431         fenv_t env;
 432         uint32_t mxcsr;
 433         uint16_t status;
 434
 435         _DIAGASSERT(envp != NULL);
 436
 437         /* Store the x87 floating-point environment */
 438         memset(&env, 0, sizeof(env));
 439         __fnstenv(&env);
 440
 441         __fe_dfl_env.x87.unused1 = env.x87.unused1;
 442         __fe_dfl_env.x87.unused2 = env.x87.unused2;
 443         __fe_dfl_env.x87.unused3 = env.x87.unused3;
 444         memcpy(__fe_dfl_env.x87.others,
 445                env.x87.others,
 446                sizeof(__fe_dfl_env.x87.others) / sizeof(uint32_t));
 447
 448         __fnstsw(&status);
 449         if (__HAS_SSE)
 450                 __stmxcsr(&mxcsr);
 451         else
 452                 mxcsr = 0;
 453         fesetenv(envp);
 454         feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
 455
 456         /* Success */
 457         return (0);
 458 }
 459
 460 /*
 461  * The following functions are extentions to the standard
 462  */
 463 int
 464 feenableexcept(int mask)
 465 {
 466         uint32_t mxcsr, omask;
 467         uint16_t control;
 468
 469         mask &= FE_ALL_EXCEPT;
 470         __fnstcw(&control);
 471         if (__HAS_SSE)
 472                 __stmxcsr(&mxcsr);
 473         else
 474                 mxcsr = 0;
 475
 476         omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
 477         control &= ~mask;
 478         __fldcw(control);
 479         if (__HAS_SSE) {
 480                 mxcsr &= ~(mask << __SSE_EMASK_SHIFT);
 481                 __ldmxcsr(mxcsr);
 482         }
 483
 484         return (FE_ALL_EXCEPT & ~omask);
 485 }
 486
 487 int
 488 fedisableexcept(int mask)
 489 {
 490         uint32_t mxcsr, omask;
 491         uint16_t control;
 492
 493         mask &= FE_ALL_EXCEPT;
 494         __fnstcw(&control);
 495         if (__HAS_SSE)
 496                 __stmxcsr(&mxcsr);
 497         else
 498                 mxcsr = 0;
 499
 500         omask = (control | mxcsr >> __SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
 501         control |= mask;
 502         __fldcw(control);
 503         if (__HAS_SSE) {
 504                 mxcsr |= mask << __SSE_EMASK_SHIFT;
 505                 __ldmxcsr(mxcsr);
 506         }
 507
 508         return (FE_ALL_EXCEPT & ~omask);
 509 }
 510
 511 int
 512 fegetexcept(void)
 513 {
 514         uint16_t control;
 515
 516         /*
 517          * We assume that the masks for the x87 and the SSE unit are
 518          * the same.
 519          */
 520         __fnstcw(&control);
 521
 522         return (~control & FE_ALL_EXCEPT);
 523 }