exp-bbv/tests/amd64-linux/fldcw_check.S

   1
   2 .globl _start
   3
   4 _start:
   5         # This code tests for the fldcw "load floating point command word"
   6         #   instruction.  On most x86 processors the retired_instruction
   7         #   performance counter counts this as one instruction.  However,
   8         #   on Pentium 4 systems it counts as two.  Therefore this can
   9         #   affect BBV results on such a system.
  10         # fldcw is most often used to set the rouding mode when doing
  11         #   floating point to integer conversions
  12
  13         # It is encoded as "d9 /5" which means
  14         #   1101 1001 xx10 1yyy
  15         # Where xx is the "mod" which will be 00, 01, or 10 indicating offset
  16         #   and yyy is the register field
  17
  18         # these are instructions with similar encodings to fldcw
  19         # that can cause false positives if the test isn't explicit enough
  20 similar:
  21         fld1                            # d9 e8
  22         fldl2t                          # d9 e9
  23         fldl2e                          # d9 ea
  24         fldpi                           # d9 eb
  25         fldlg2                          # d9 ec
  26         fldln2                          # d9 ed
  27         fldz                            # d9 ee
  28
  29         # check some varied ways of calling fldcw
  30
  31         # offset on stack
  32 stack:
  33         sub     $8,%rsp                 # allocate space on stack
  34         fnstcw  2(%rsp)
  35         fldcw   2(%rsp)
  36         add     $8,%rsp                 # restore stack
  37
  38         # 64-bit register
  39 sixtyfour_reg:
  40         fnstcw  cw
  41         mov     $cw,%rax
  42         fldcw   0(%rax)                 # rax
  43         mov     $cw,%rbx
  44         fldcw   0(%rbx)                 # rbx
  45         mov     $cw,%rcx
  46         fldcw   0(%rcx)                 # rcx
  47         mov     $cw,%rdx
  48         fldcw   0(%rdx)                 # rdx
  49
  50         # 32-bit register
  51
  52         # Note!  The assembler that comes with SuSE 9.1
  53         #        cannot assemble 32-bit fldcw on 64-bit systems
  54         #        Hence the need to hand-code them
  55
  56
  57 thirtytwo_reg:
  58         fnstcw  cw
  59         mov     $cw,%eax
  60
  61 #       fldcw   0(%eax)                 # eax
  62         .byte   0x67,0xd9,0x28
  63
  64         mov     $cw,%ebx
  65
  66 #       fldcw   0(%ebx)                 # ebx
  67         .byte   0x67,0xd9,0x2b
  68
  69         mov     $cw,%ecx
  70
  71 #       fldcw   0(%ecx)                 # ecx
  72         .byte   0x67,0xd9,0x29
  73
  74         mov     $cw,%edx
  75
  76 #       fldcw   0(%edx)                 # edx
  77         .byte   0x67,0xd9,0x2a
  78
  79         # register + 8-bit offset
  80 eight_bit:
  81         mov     $cw,%eax
  82         sub     $32,%eax
  83
  84 #       fldcw   32(%eax)                # eax + 8 bit offset
  85         .byte 0x67,0xd9,0x68,0x20
  86
  87         mov     %eax,%ebx
  88 #       fldcw   32(%ebx)                # ebx + 8 bit offset
  89         .byte   0x67,0xd9,0x6b,0x20
  90
  91         mov     %eax,%ecx
  92
  93 #       fldcw   32(%ecx)                # ecx + 8 bit offset
  94         .byte   0x67,0xd9,0x69,0x20
  95
  96         mov     %eax,%edx
  97
  98 #       fldcw   32(%edx)                # edx + 8 bit offset
  99         .byte   0x67,0xd9,0x6a,0x20
 100
 101
 102         # register + 32-bit offset
 103 thirtytwo_bit:
 104         mov     $cw,%eax
 105         sub     $30000,%eax
 106
 107 #       fldcw   30000(%eax)             # eax + 16 bit offset
 108         .byte   0x67,0xd9,0xa8,0x30,0x75,0x00,0x00
 109
 110         mov     %eax,%ebx
 111
 112 #       fldcw   30000(%ebx)             # ebx + 16 bit offset
 113         .byte   0x67,0xd9,0xab,0x30,0x75,0x00,0x00
 114
 115         mov     %eax,%ecx
 116
 117 #       fldcw   30000(%ecx)             # ecx + 16 bit offset
 118         .byte   0x67,0xd9,0xa9,0x30,0x75,0x00,0x00
 119
 120         mov     %eax,%edx
 121
 122 #       fldcw   30000(%edx)             # edx + 16 bit offset
 123         .byte   0x67,0xd9,0xaa,0x30,0x75,0x00,0x00
 124
 125         # check an fp/integer conversion
 126         # in a loop to give a bigger count
 127
 128         mov     $1024,%rcx
 129 big_loop:
 130
 131         fldl    three                   # load value onto fp stack
 132         fnstcw  saved_cw                # store control word to mem
 133         movzwl  saved_cw, %eax          # load cw from mem, zero extending
 134         movb    $12, %ah                # set cw for "round to zero"
 135         movw    %ax, cw                 # store back to memory
 136         fldcw   cw                      # save new rounding mode
 137         fistpl  result                  # save stack value as integer to mem
 138         fldcw   saved_cw                # restore old cw
 139
 140         loop    big_loop                # loop to make the count more obvious
 141
 142         movl    result, %ebx            # sanity check to see if the
 143         cmp     $3,%rbx                 # result is the expected one
 144         je      exit
 145
 146 print_error:
 147         mov     $1,%rax                 # write syscall
 148         mov     $1,%rdi                 # stdout
 149         mov     $error,%rsi             # string
 150         mov     $22,%rdx                # length of string
 151         syscall
 152
 153 exit:
 154         xor     %rdi, %rdi              # return 0
 155         mov     $60, %rax               # SYSCALL_EXIT
 156         syscall
 157
 158
 159
 160 .data
 161 saved_cw:       .long 0
 162 cw:     .long   0
 163 result: .long   0
 164 three:  .long   0                       # a floating point 3.0
 165         .long   1074266112
 166 error:  .asciz  "Error!  Wrong result!\n"