From ea6e34db64c7da7cb885197316c6b5e7d048bdb9 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 30 Apr 2002 20:51:32 +0000 Subject: [PATCH] NASM 0.91 --- Licence | 75 +++ Makefile | 93 ++++ Makefile.bor | 76 ++++ Makefile.dos | 72 +++ Readme | 54 +++ assemble.c | 945 ++++++++++++++++++++++++++++++++++++++ assemble.h | 17 + disasm.c | 667 +++++++++++++++++++++++++++ disasm.h | 18 + float.c | 389 ++++++++++++++++ float.h | 16 + insns.dat | 984 ++++++++++++++++++++++++++++++++++++++++ insns.h | 66 +++ insns.pl | 160 +++++++ internal.doc | 268 +++++++++++ labels.c | 292 ++++++++++++ labels.h | 17 + lcc/Readme | 57 +++ lcc/bind.c | 23 + lcc/lin-aout.c | 44 ++ lcc/lin-elf.c | 45 ++ lcc/x86nasm.md | 703 ++++++++++++++++++++++++++++ misc/magic | 6 + misc/nasm.sl | 305 +++++++++++++ names.c | 79 ++++ nasm.c | 648 ++++++++++++++++++++++++++ nasm.doc | 996 ++++++++++++++++++++++++++++++++++++++++ nasm.h | 443 ++++++++++++++++++ nasmlib.c | 488 ++++++++++++++++++++ nasmlib.h | 115 +++++ ndisasm.c | 270 +++++++++++ ndisasm.doc | 199 ++++++++ outaout.c | 466 +++++++++++++++++++ outas86.c | 548 ++++++++++++++++++++++ outbin.c | 303 +++++++++++++ outcoff.c | 611 +++++++++++++++++++++++++ outdbg.c | 138 ++++++ outelf.c | 620 +++++++++++++++++++++++++ outform.c | 42 ++ outform.h | 167 +++++++ outobj.c | 1229 +++++++++++++++++++++++++++++++++++++++++++++++++ outrdf.c | 467 +++++++++++++++++++ parser.c | 1306 +++++++++++++++++++++++++++++++++++++++++++++++++++++ parser.h | 18 + rdoff/Makefile | 43 ++ rdoff/collectn.c | 40 ++ rdoff/collectn.h | 22 + rdoff/ldrdf.c | 540 ++++++++++++++++++++++ rdoff/rdf.doc | 99 ++++ rdoff/rdfdump.c | 156 +++++++ rdoff/rdfload.c | 173 +++++++ rdoff/rdfload.h | 29 ++ rdoff/rdoff.c | 367 +++++++++++++++ rdoff/rdoff.h | 112 +++++ rdoff/rdx.c | 61 +++ rdoff/symtab.c | 80 ++++ rdoff/symtab.h | 22 + sync.c | 84 ++++ sync.h | 16 + test/Makefile | 2 + test/aouttest.asm | 83 ++++ test/aouttest.c | 35 ++ test/bintest.asm | 56 +++ test/cofftest.asm | 82 ++++ test/cofftest.c | 34 ++ test/elftest.asm | 83 ++++ test/elftest.c | 35 ++ test/inc1.asm | 4 + test/inc2.asm | 8 + test/inctest.asm | 15 + test/objlink.c | 30 ++ test/objtest.asm | 82 ++++ 72 files changed, 16938 insertions(+) create mode 100644 Licence create mode 100644 Makefile create mode 100644 Makefile.bor create mode 100644 Makefile.dos create mode 100644 Readme create mode 100644 assemble.c create mode 100644 assemble.h create mode 100644 disasm.c create mode 100644 disasm.h create mode 100644 float.c create mode 100644 float.h create mode 100644 insns.dat create mode 100644 insns.h create mode 100644 insns.pl create mode 100644 internal.doc create mode 100644 labels.c create mode 100644 labels.h create mode 100644 lcc/Readme create mode 100644 lcc/bind.c create mode 100644 lcc/lin-aout.c create mode 100644 lcc/lin-elf.c create mode 100644 lcc/x86nasm.md create mode 100644 misc/magic create mode 100644 misc/nasm.sl create mode 100644 names.c create mode 100644 nasm.c create mode 100644 nasm.doc create mode 100644 nasm.h create mode 100644 nasmlib.c create mode 100644 nasmlib.h create mode 100644 ndisasm.c create mode 100644 ndisasm.doc create mode 100644 outaout.c create mode 100644 outas86.c create mode 100644 outbin.c create mode 100644 outcoff.c create mode 100644 outdbg.c create mode 100644 outelf.c create mode 100644 outform.c create mode 100644 outform.h create mode 100644 outobj.c create mode 100644 outrdf.c create mode 100644 parser.c create mode 100644 parser.h create mode 100644 rdoff/Makefile create mode 100644 rdoff/collectn.c create mode 100644 rdoff/collectn.h create mode 100644 rdoff/ldrdf.c create mode 100644 rdoff/rdf.doc create mode 100644 rdoff/rdfdump.c create mode 100644 rdoff/rdfload.c create mode 100644 rdoff/rdfload.h create mode 100644 rdoff/rdoff.c create mode 100644 rdoff/rdoff.h create mode 100644 rdoff/rdx.c create mode 100644 rdoff/symtab.c create mode 100644 rdoff/symtab.h create mode 100644 sync.c create mode 100644 sync.h create mode 100644 test/Makefile create mode 100644 test/aouttest.asm create mode 100644 test/aouttest.c create mode 100644 test/bintest.asm create mode 100644 test/cofftest.asm create mode 100644 test/cofftest.c create mode 100644 test/elftest.asm create mode 100644 test/elftest.c create mode 100644 test/inc1.asm create mode 100644 test/inc2.asm create mode 100644 test/inctest.asm create mode 100644 test/objlink.c create mode 100644 test/objtest.asm diff --git a/Licence b/Licence new file mode 100644 index 00000000..8cd2f6d9 --- /dev/null +++ b/Licence @@ -0,0 +1,75 @@ +Terms and Conditions for the use of the Netwide Assembler +========================================================= + +Can I have the gist without reading the legalese? +------------------------------------------------- + +Basically, NASM is free. You can't charge for it. You can copy it as +much as you like. You can incorporate it, or bits of it, into other +free programs if you want. (But we want to know about it if you do, +and we want to be mentioned in the credits.) We may well allow you +to incorporate it into commercial software too, but we'll probably +demand some money for it, and we'll certainly demand to be given +credit. And in extreme cases (although I can't immediately think of +a reason we might actually want to do this) we may refuse to let you +do it at all. + +NASM LICENCE AGREEMENT +====================== + +By "the Software" this licence refers to the complete contents of +the NASM archive, excluding this licence document itself, and +excluding the contents of the `test' directory. The Netwide +Disassembler, NDISASM, is specifically included under this licence. + +I. The Software is freely redistributable; anyone may copy the +Software, or parts of the Software, and give away as many copies as +they like to anyone, as long as this licence document is kept with +the Software. Charging a fee for the Software is prohibited, +although a fee may be charged for the act of transferring a copy, +and you can offer warranty protection and charge a fee for that. + +II. The Software, or parts thereof, may be incorporated into other +freely redistributable software (by which we mean software that may +be obtained free of charge) without requiring permission from the +authors, as long as due credit is given to the authors of the +Software in the resulting work, as long as the authors are informed +of this action, and as long as those parts of the Software that are +used remain under this licence. + +III. The Software, or parts thereof, may be incorporated into other +software which is not freely redistributable (i.e. software for +which a fee is charged), as long as permission is granted from the +authors of the Software. The authors reserve the right to grant this +permission only for a fee, which may at our option take the form of +royalty payments. The authors also reserve the right to refuse to +grant permission if they deem it necessary. + +IV. You may not copy, modify or distribute the Software except under +the terms given in this licence document. You may not sublicense the +Software or in any way place it under any other licence than this +one. Since you have not signed this licence, you are not of course +required to accept it; however, no other licence applies to the +Software, and nothing else grants you any permission to copy, +modify, sublicense or distribute the Software in any way. These +actions are therefore prohibited if you do not accept this licence. + +V. There is no warranty for the Software, to the extent permitted by +applicable law. The authors provide the Software "as is" without +warranty of any kind, either expressed or implied, including but not +limited to the implied warranties of merchantability and fitness for +a particular purpose. The entire risk as to the quality and +performance of the Software is with you. Should the Software prove +defective, you assume the cost of all necessary servicing, repair or +correction. + +VI. In no event, unless required by applicable law or agreed to in +writing, will any of the authors be liable to you for damages, +including any general, special, incidental or consequential damages, +arising out of the use or the inability to use the Software, +including but not limited to loss of data or data being rendered +inaccurate or a failure of the Software to operate with any other +programs, even if you have been advised of the possibility of such +damages. + +END OF LICENCE AGREEMENT diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..605ab419 --- /dev/null +++ b/Makefile @@ -0,0 +1,93 @@ +# Makefile for the Netwide Assembler +# +# The Netwide Assembler is copyright (C) 1996 Simon Tatham and +# Julian Hall. All rights reserved. The software is +# redistributable under the licence given in the file "Licence" +# distributed in the NASM archive. +# +# This Makefile is designed for use under Unix (probably fairly +# portably). It can also be used without change to build NASM using +# DJGPP. The makefile "Makefile.dos" can be used to build NASM using +# a 16-bit DOS C compiler such as Microsoft C. +# +# The `make dist' section at the end of the Makefile is not +# guaranteed to work anywhere except Linux. Come to think of it, +# I'm not sure I want to guarantee it to work anywhere except on +# _my_ computer. :-) + +CC = gcc +CCFLAGS = -c -g -O -Wall -ansi -pedantic +LINK = gcc +LINKFLAGS = -o nasm +DLINKFLAGS = -o ndisasm +LIBRARIES = +STRIP = strip +EXE =# +OBJ = o# + +.c.$(OBJ): + $(CC) $(CCFLAGS) $*.c + +NASMOBJS = nasm.$(OBJ) nasmlib.$(OBJ) float.$(OBJ) insnsa.$(OBJ) \ + assemble.$(OBJ) labels.$(OBJ) parser.$(OBJ) outform.$(OBJ) \ + outbin.$(OBJ) outaout.$(OBJ) outcoff.$(OBJ) outelf.$(OBJ) \ + outobj.$(OBJ) outas86.$(OBJ) outrdf.$(OBJ) outdbg.$(OBJ) + +NDISASMOBJS = ndisasm.$(OBJ) disasm.$(OBJ) sync.$(OBJ) nasmlib.$(OBJ) \ + insnsd.$(OBJ) + +all : nasm$(EXE) ndisasm$(EXE) + +nasm$(EXE): $(NASMOBJS) + $(LINK) $(LINKFLAGS) $(NASMOBJS) $(LIBRARIES) + +ndisasm$(EXE): $(NDISASMOBJS) + $(LINK) $(DLINKFLAGS) $(NDISASMOBJS) $(LIBRARIES) + +assemble.$(OBJ): assemble.c nasm.h assemble.h insns.h +disasm.$(OBJ): disasm.c nasm.h disasm.h sync.h insns.h names.c +float.$(OBJ): float.c nasm.h +insnsa.$(OBJ): insnsa.c nasm.h insns.h +insnsd.$(OBJ): insnsd.c nasm.h insns.h +labels.$(OBJ): labels.c nasm.h nasmlib.h +nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h outform.h +nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h +ndisasm.$(OBJ): ndisasm.c nasm.h sync.h disasm.h +outas86.$(OBJ): outas86.c nasm.h nasmlib.h +outaout.$(OBJ): outaout.c nasm.h nasmlib.h +outbin.$(OBJ): outbin.c nasm.h nasmlib.h +outcoff.$(OBJ): outcoff.c nasm.h nasmlib.h +outelf.$(OBJ): outelf.c nasm.h nasmlib.h +outobj.$(OBJ): outobj.c nasm.h nasmlib.h +outform.$(OBJ): outform.c outform.h nasm.h +parser.$(OBJ): parser.c nasm.h nasmlib.h parser.h float.h names.c +sync.$(OBJ): sync.c sync.h + +# These two source files are automagically generated from a single +# instruction-table file by a Perl script. They're distributed, +# though, so it isn't necessary to have Perl just to recompile NASM +# from the distribution. + +AUTOSRCS = insnsa.c insnsd.c +$(AUTOSRCS): insns.dat insns.pl + perl insns.pl + +clean : + rm -f $(NASMOBJS) $(NDISASMOBJS) nasm$(EXE) ndisasm$(EXE) + make -C rdoff clean + make -C test clean + +# Here the `make dist' section begins. Nothing is guaranteed hereafter +# unless you're using the Makefile under Linux, running bash, with +# gzip, GNU tar and a sensible version of zip readily available. + +DOSEXES = nasm.exe ndisasm.exe +MANPAGES = nasm.man ndisasm.man + +.SUFFIXES: .man .1 + +.1.man: + -man ./$< | ul > $@ + +dist: $(AUTOSRCS) $(MANPAGES) $(DOSEXES) clean + makedist.sh diff --git a/Makefile.bor b/Makefile.bor new file mode 100644 index 00000000..75aed4f5 --- /dev/null +++ b/Makefile.bor @@ -0,0 +1,76 @@ +# Makefile for the Netwide Assembler under 16-bit DOS +# +# The Netwide Assembler is copyright (C) 1996 Simon Tatham and +# Julian Hall. All rights reserved. The software is +# redistributable under the licence given in the file "Licence" +# distributed in the NASM archive. +# +# This Makefile is designed to build NASM using a 16-bit DOS C +# compiler such as Borland C, and has been tested with Borland C 2.3 +# and Borland Make. + +# CC = cl +# CCFLAGS = /c /O /AL +# LINK = cl +CC = bcc +CCFLAGS = -c -O -ml -A +LINK = tlink /c /Lc:\bc\lib +LINKFLAGS = +LIBRARIES = +EXE = .exe# +OBJ = obj# + +.c.$(OBJ): + $(CC) $(CCFLAGS) $*.c + +NASMOBJS1 = nasm.$(OBJ) nasmlib.$(OBJ) float.$(OBJ) insnsa.$(OBJ) +NASMOBJS2 = assemble.$(OBJ) labels.$(OBJ) parser.$(OBJ) outform.$(OBJ) +NASMOBJS3 = outbin.$(OBJ) outaout.$(OBJ) outcoff.$(OBJ) outelf.$(OBJ) +NASMOBJS4 = outobj.$(OBJ) outas86.$(OBJ) outdbg.$(OBJ) outrdf.$(OBJ) + +NASMOBJS = $(NASMOBJS1) $(NASMOBJS2) $(NASMOBJS3) $(NASMOBJS4) + +NDISASMOBJS = ndisasm.$(OBJ) disasm.$(OBJ) sync.$(OBJ) nasmlib.$(OBJ) \ + insnsd.$(OBJ) + +all : nasm$(EXE) ndisasm$(EXE) + +# We have to have a horrible kludge here to get round the 128 character +# limit, as usual... +nasm$(EXE): $(NASMOBJS) +# $(LINK) /Fenasm.exe a*.obj f*.obj insnsa.obj l*.obj na*.obj o*.obj p*.obj + echo c0l.obj $(NASMOBJS1) +> nasmobjs.tmp + echo $(NASMOBJS2) +>> nasmobjs.tmp + echo $(NASMOBJS3) +>> nasmobjs.tmp + echo $(NASMOBJS4),nasm.exe,,cl.lib, >> nasmobjs.tmp + $(LINK) /Tde @nasmobjs.tmp + +ndisasm$(EXE): $(NDISASMOBJS) +# $(LINK) /Fendisasm.exe $(NDISASMOBJS) + $(LINK) /Tde $(NDISASMOBJS),ndisasm.exe,,cl.lib, + +assemble.$(OBJ): assemble.c nasm.h assemble.h insns.h +disasm.$(OBJ): disasm.c nasm.h disasm.h sync.h insns.h names.c +float.$(OBJ): float.c nasm.h +insnsa.$(OBJ): insnsa.c nasm.h insns.h +insnsd.$(OBJ): insnsd.c nasm.h insns.h +labels.$(OBJ): labels.c nasm.h nasmlib.h +nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h outform.h +nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h +ndisasm.$(OBJ): ndisasm.c nasm.h sync.h disasm.h +outas86.$(OBJ): outas86.c nasm.h nasmlib.h +outaout.$(OBJ): outaout.c nasm.h nasmlib.h +outbin.$(OBJ): outbin.c nasm.h nasmlib.h +outcoff.$(OBJ): outcoff.c nasm.h nasmlib.h +outdbg.$(OBJ): outdbg.c nasm.h nasmlib.h +outelf.$(OBJ): outelf.c nasm.h nasmlib.h +outobj.$(OBJ): outobj.c nasm.h nasmlib.h +outrdf.$(OBJ): outrdf.c nasm.h nasmlib.h +outform.$(OBJ): outform.c outform.h nasm.h +parser.$(OBJ): parser.c nasm.h nasmlib.h parser.h float.h names.c +sync.$(OBJ): sync.c sync.h + +clean : + del *.obj + del nasm$(EXE) + del ndisasm$(EXE) diff --git a/Makefile.dos b/Makefile.dos new file mode 100644 index 00000000..cb757087 --- /dev/null +++ b/Makefile.dos @@ -0,0 +1,72 @@ +# Makefile for the Netwide Assembler under 16-bit DOS +# +# The Netwide Assembler is copyright (C) 1996 Simon Tatham and +# Julian Hall. All rights reserved. The software is +# redistributable under the licence given in the file "Licence" +# distributed in the NASM archive. +# +# This Makefile is designed to build NASM using a 16-bit DOS C +# compiler such as Microsoft C, provided you have a compatible MAKE. +# It's been tested with Microsoft C 5.x plus Borland Make. (Yes, I +# know it's silly, but...) + +CC = cl +CCFLAGS = /c /O /AL +LINK = cl +LINKFLAGS = +LIBRARIES = +EXE = .exe# +OBJ = obj# + +.c.$(OBJ): + $(CC) $(CCFLAGS) $*.c + +NASMOBJS = nasm.$(OBJ) nasmlib.$(OBJ) float.$(OBJ) insnsa.$(OBJ) \ + assemble.$(OBJ) labels.$(OBJ) parser.$(OBJ) outform.$(OBJ) \ + outbin.$(OBJ) outaout.$(OBJ) outcoff.$(OBJ) outelf.$(OBJ) \ + outobj.$(OBJ) outas86.$(OBJ) outrdf.$(OBJ) outdbg.$(OBJ) + +NDISASMOBJS = ndisasm.$(OBJ) disasm.$(OBJ) sync.$(OBJ) nasmlib.$(OBJ) \ + insnsd.$(OBJ) + +all : nasm$(EXE) ndisasm$(EXE) + +# We have to have a horrible kludge here to get round the 128 character +# limit, as usual... +nasm$(EXE): $(NASMOBJS) + cl /Fenasm.exe a*.obj f*.obj insnsa.obj l*.obj na*.obj o*.obj p*.obj + +ndisasm$(EXE): $(NDISASMOBJS) + cl /Fendisasm.exe $(NDISASMOBJS) + +assemble.$(OBJ): assemble.c nasm.h assemble.h insns.h +disasm.$(OBJ): disasm.c nasm.h disasm.h sync.h insns.h names.c +float.$(OBJ): float.c nasm.h +labels.$(OBJ): labels.c nasm.h nasmlib.h +nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h outform.h +nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h +ndisasm.$(OBJ): ndisasm.c nasm.h sync.h disasm.h +outas86.$(OBJ): outas86.c nasm.h nasmlib.h +outaout.$(OBJ): outaout.c nasm.h nasmlib.h +outbin.$(OBJ): outbin.c nasm.h nasmlib.h +outcoff.$(OBJ): outcoff.c nasm.h nasmlib.h +outdbg.$(OBJ): outdbg.c nasm.h nasmlib.h +outelf.$(OBJ): outelf.c nasm.h nasmlib.h +outobj.$(OBJ): outobj.c nasm.h nasmlib.h +outrdf.$(OBJ): outrdf.c nasm.h nasmlib.h +outform.$(OBJ): outform.c outform.h nasm.h +parser.$(OBJ): parser.c nasm.h nasmlib.h parser.h float.h names.c +sync.$(OBJ): sync.c sync.h + +# Another grotty hack: QC is less likely to run out of memory than +# CL proper; and we don't need any optimisation in these modules +# since they're just data. +insnsa.$(OBJ): insnsa.c nasm.h insns.h + qcl /c /AL insnsa.c +insnsd.$(OBJ): insnsd.c nasm.h insns.h + qcl /c /AL insnsd.c + +clean : + del *.obj + del nasm$(EXE) + del ndisasm$(EXE) diff --git a/Readme b/Readme new file mode 100644 index 00000000..5a036feb --- /dev/null +++ b/Readme @@ -0,0 +1,54 @@ +This is a distribution of NASM, the Netwide Assembler. NASM is a +prototype general-purpose x86 assembler. It will currently output +flat-form binary files, a.out, COFF and ELF Unix object files, +Microsoft 16-bit DOS and Win32 object files, the as86 object format, +and a home-grown format called RDF. + +Also included is NDISASM, a prototype x86 binary-file disassembler +which uses the same instruction table as NASM. + +To install NASM, you will need GCC. Type `make', and then when it +has finished copy the file `nasm' (and maybe `ndisasm') to a +directory on your search path (I use /usr/local/bin on my linux +machine at home, and ~/bin on other machines where I don't have root +access). You may also want to copy the man page `nasm.1' (and maybe +`ndisasm.1') to somewhere sensible. + +If you want to build a restricted version of NASM containing only +some of the object file formats, you can achieve this by adding +#defines to `outform.h' (see the file itself for documentation), or +equivalently by adding compiler command line options in the +Makefile. + +There is a machine description file for the `LCC' retargetable C +compiler, in the directory `lcc', along with instructions for its +use. This means that NASM can now be used as the code-generator back +end for a useful C compiler. + +Michael `Wuschel' Tippach has ported his DOS extender `WDOSX' to +enable it to work with the 32-bit binary files NASM can output: the +original extender and his port `WDOSX/N' are available from his web +page, http://www.geocities.com/SiliconValley/Park/4493. + +The `misc' directory contains `nasm.sl', a NASM editing mode for the +JED programmers' editor (see http://space.mit.edu/~davis/jed.html +for details about JED). The comment at the start of the file gives +instructions on how to install the mode. This directory also +contains a file (`magic') containing lines to add to /etc/magic on +Unix systems to allow the `file' command to recognise RDF files. + +The `rdoff' directory contains sources for a linker and loader for +the RDF object file format, to run under Linux, and also +documentation on the internal structure of RDF files. + +For information about how you can distribute and use NASM, see the +file Licence. We were tempted to put NASM under the GPL, but decided +that in many ways it was too restrictive for developers. + +For information about how to use NASM, see `nasm.doc'. For +information about how to use NDISASM, see `ndisasm.doc'. For +information about the internal structure of NASM, see +`internals.doc'. + +Bug reports (and patches if you can) should be sent to +jules@dcs.warwick.ac.uk or anakin@pobox.com. diff --git a/assemble.c b/assemble.c new file mode 100644 index 00000000..bab6f292 --- /dev/null +++ b/assemble.c @@ -0,0 +1,945 @@ +/* assemble.c code generation for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * the actual codes (C syntax, i.e. octal): + * \0 - terminates the code. (Unless it's a literal of course.) + * \1, \2, \3 - that many literal bytes follow in the code stream + * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS + * (POP is never used for CS) depending on operand 0 + * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending + * on operand 0 + * \10, \11, \12 - a literal byte follows in the code stream, to be added + * to the register value of operand 0, 1 or 2 + * \17 - encodes the literal byte 0. (Some compilers don't take + * kindly to a zero byte in the _middle_ of a compile time + * string constant, so I had to put this hack in.) + * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2 + * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2 + * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2 + * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2 + * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit + * assembly mode or the address-size override on the operand + * \37 - a word constant, from the _segment_ part of operand 0 + * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2 + * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2 + * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2 + * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit + * assembly mode or the address-size override on the operand + * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2 + * \1ab - a ModRM, calculated on EA in operand a, with the spare + * field the register value of operand b. + * \2ab - a ModRM, calculated on EA in operand a, with the spare + * field equal to digit b. + * \30x - might be an 0x67 byte, depending on the address size of + * the memory reference in operand x. + * \310 - indicates fixed 16-bit address size, i.e. optional 0x67. + * \311 - indicates fixed 32-bit address size, i.e. optional 0x67. + * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66. + * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66. + * \322 - indicates that this instruction is only valid when the + * operand size is the default (instruction to disassembler, + * generates no code in the assembler) + * \330 - a literal byte follows in the code stream, to be added + * to the condition code value of the instruction. + * \340 - reserve bytes of uninitialised storage. + * Operand 0 had better be a segmentless constant. + */ + +#include +#include + +#include "nasm.h" +#include "assemble.h" +#include "insns.h" + +extern struct itemplate *nasm_instructions[]; + +typedef struct { + int sib_present; /* is a SIB byte necessary? */ + int bytes; /* # of bytes of offset needed */ + int size; /* lazy - this is sib+bytes+1 */ + unsigned char modrm, sib; /* the bytes themselves */ +} ea; + +static efunc errfunc; +static struct ofmt *outfmt; + +static long calcsize (long, long, int, insn *, char *); +static void gencode (long, long, int, insn *, char *, long); +static int regval (operand *o); +static int matches (struct itemplate *, insn *); +static ea *process_ea (operand *, ea *, int, int); +static int chsize (operand *, int); + +long assemble (long segment, long offset, int bits, + insn *instruction, struct ofmt *output, efunc error) { + int j, itimes, size_prob; + long insn_end; + long start = offset; + struct itemplate *temp; + + errfunc = error; /* to pass to other functions */ + outfmt = output; /* likewise */ + + if (instruction->opcode == -1) + return 0; + + if (instruction->opcode == I_DB || + instruction->opcode == I_DW || + instruction->opcode == I_DD || + instruction->opcode == I_DQ || + instruction->opcode == I_DT) { + extop *e; + long osize, wsize = 0; /* placate gcc */ + int t = instruction->times; + + switch (instruction->opcode) { + case I_DB: wsize = 1; break; + case I_DW: wsize = 2; break; + case I_DD: wsize = 4; break; + case I_DQ: wsize = 8; break; + case I_DT: wsize = 10; break; + } + + while (t--) { + for (e = instruction->eops; e; e = e->next) { + osize = 0; + if (e->type == EOT_DB_NUMBER) { + if (wsize == 1) { + if (e->segment != NO_SEG) + errfunc (ERR_NONFATAL, + "one-byte relocation attempted"); + else { + unsigned char c = e->offset; + outfmt->output (segment, &c, OUT_RAWDATA+1, + NO_SEG, NO_SEG); + } + } else if (wsize > 5) { + errfunc (ERR_NONFATAL, "integer supplied to a D%c" + " instruction", wsize==8 ? 'Q' : 'T'); + } else + outfmt->output (segment, &e->offset, + OUT_ADDRESS+wsize, e->segment, + e->wrt); + offset += wsize; + } else if (e->type == EOT_DB_STRING) { + int align; + + align = (-e->stringlen) % wsize; + if (align < 0) + align += wsize; + outfmt->output (segment, e->stringval, + OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG); + if (align) + outfmt->output (segment, "\0\0\0\0", + OUT_RAWDATA+align, NO_SEG, NO_SEG); + offset += e->stringlen + align; + } + } + } + return offset - start; + } + + size_prob = FALSE; + temp = nasm_instructions[instruction->opcode]; + while (temp->opcode != -1) { + int m = matches (temp, instruction); + if (m == 100) { /* matches! */ + char *codes = temp->code; + long insn_size = calcsize(segment, offset, bits, + instruction, codes); + itimes = instruction->times; + if (insn_size < 0) /* shouldn't be, on pass two */ + error (ERR_PANIC, "errors made it through from pass one"); + else while (itimes--) { + insn_end = offset + insn_size; + for (j=0; jnprefix; j++) { + unsigned char c; + switch (instruction->prefixes[j]) { + case P_LOCK: + c = 0xF0; break; + case P_REPNE: case P_REPNZ: + c = 0xF2; break; + case P_REPE: case P_REPZ: case P_REP: + c = 0xF3; break; + case R_CS: c = 0x2E; break; + case R_DS: c = 0x3E; break; + case R_ES: c = 0x26; break; + case R_FS: c = 0x64; break; + case R_GS: c = 0x65; break; + case R_SS: c = 0x36; break; + case P_A16: + if (bits == 16) + c = 0; /* no prefix */ + else + c = 0x67; + break; + case P_A32: + if (bits == 32) + c = 0; /* no prefix */ + else + c = 0x67; + break; + case P_O16: + if (bits == 16) + c = 0; /* no prefix */ + else + c = 0x66; + break; + case P_O32: + if (bits == 32) + c = 0; /* no prefix */ + else + c = 0x66; + break; + default: + error (ERR_PANIC, + "invalid instruction prefix"); + } + if (c != 0) + outfmt->output (segment, &c, OUT_RAWDATA+1, + NO_SEG, NO_SEG); + offset++; + } + gencode (segment, offset, bits, instruction, codes, insn_end); + offset += insn_size; + } + return offset - start; + } else if (m > 0) { + size_prob = m; + } + temp++; + } + if (temp->opcode == -1) { /* didn't match any instruction */ + if (size_prob == 1) /* would have matched, but for size */ + error (ERR_NONFATAL, "operation size not specified"); + else if (size_prob == 2) + error (ERR_NONFATAL, "mismatch in operand sizes"); + else + error (ERR_NONFATAL, + "invalid combination of opcode and operands"); + } + return 0; +} + +long insn_size (long segment, long offset, int bits, + insn *instruction, efunc error) { + struct itemplate *temp; + + errfunc = error; /* to pass to other functions */ + + if (instruction->opcode == -1) + return 0; + + if (instruction->opcode == I_DB || + instruction->opcode == I_DW || + instruction->opcode == I_DD || + instruction->opcode == I_DQ || + instruction->opcode == I_DT) { + extop *e; + long isize, osize, wsize = 0; /* placate gcc */ + + isize = 0; + switch (instruction->opcode) { + case I_DB: wsize = 1; break; + case I_DW: wsize = 2; break; + case I_DD: wsize = 4; break; + case I_DQ: wsize = 8; break; + case I_DT: wsize = 10; break; + } + + for (e = instruction->eops; e; e = e->next) { + long align; + + osize = 0; + if (e->type == EOT_DB_NUMBER) + osize = 1; + else if (e->type == EOT_DB_STRING) + osize = e->stringlen; + + align = (-osize) % wsize; + if (align < 0) + align += wsize; + isize += osize + align; + } + return isize * instruction->times; + } + + temp = nasm_instructions[instruction->opcode]; + while (temp->opcode != -1) { + if (matches(temp, instruction) == 100) { + /* we've matched an instruction. */ + long isize; + char *codes = temp->code; + int j; + + isize = calcsize(segment, offset, bits, instruction, codes); + if (isize < 0) + return -1; + for (j = 0; j < instruction->nprefix; j++) { + if ((instruction->prefixes[j] != P_A16 && + instruction->prefixes[j] != P_O16 && bits==16) || + (instruction->prefixes[j] != P_A32 && + instruction->prefixes[j] != P_O32 && bits==32)) + isize++; + } + return isize * instruction->times; + } + temp++; + } + return -1; /* didn't match any instruction */ +} + +static long calcsize (long segment, long offset, int bits, + insn *ins, char *codes) { + long length = 0; + unsigned char c; + + while (*codes) switch (c = *codes++) { + case 01: case 02: case 03: + codes += c, length += c; break; + case 04: case 05: case 06: case 07: + length++; break; + case 010: case 011: case 012: + codes++, length++; break; + case 017: + length++; break; + case 014: case 015: case 016: + length++; break; + case 020: case 021: case 022: + length++; break; + case 024: case 025: case 026: + length++; break; + case 030: case 031: case 032: + length += 2; break; + case 034: case 035: case 036: + length += ((ins->oprs[c-034].addr_size ? + ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4); break; + case 037: + length += 2; break; + case 040: case 041: case 042: + length += 4; break; + case 050: case 051: case 052: + length++; break; + case 060: case 061: case 062: + length += 2; break; + case 064: case 065: case 066: + length += ((ins->oprs[c-064].addr_size ? + ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4); break; + case 070: case 071: case 072: + length += 4; break; + case 0300: case 0301: case 0302: + length += chsize (&ins->oprs[c-0300], bits); + break; + case 0310: + length += (bits==32); + break; + case 0311: + length += (bits==16); + break; + case 0312: + break; + case 0320: + length += (bits==32); + break; + case 0321: + length += (bits==16); + break; + case 0322: + break; + case 0330: + codes++, length++; break; + case 0340: case 0341: case 0342: + if (ins->oprs[0].segment != NO_SEG) + errfunc (ERR_NONFATAL, "attempt to reserve non-constant" + " quantity of BSS space"); + else + length += ins->oprs[0].offset << (c-0340); + break; + default: /* can't do it by 'case' statements */ + if (c>=0100 && c<=0277) { /* it's an EA */ + ea ea_data; + + if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, 0)) { + errfunc (ERR_NONFATAL, "invalid effective address"); + return -1; + } else + length += ea_data.size; + } else + errfunc (ERR_PANIC, "internal instruction table corrupt" + ": instruction code 0x%02X given", c); + } + return length; +} + +static void gencode (long segment, long offset, int bits, + insn *ins, char *codes, long insn_end) { + static char condval[] = { /* conditional opcodes */ + 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2, + 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5, + 0x0, 0xA, 0xA, 0xB, 0x8, 0x4 + }; + unsigned char c, bytes[4]; + long data, size; + + while (*codes) switch (c = *codes++) { + case 01: case 02: case 03: + outfmt->output (segment, codes, OUT_RAWDATA+c, NO_SEG, NO_SEG); + codes += c; + offset += c; + break; + case 04: case 06: + switch (ins->oprs[0].basereg) { + case R_CS: bytes[0] = 0x0E + (c == 0x04 ? 1 : 0); break; + case R_DS: bytes[0] = 0x1E + (c == 0x04 ? 1 : 0); break; + case R_ES: bytes[0] = 0x06 + (c == 0x04 ? 1 : 0); break; + case R_SS: bytes[0] = 0x16 + (c == 0x04 ? 1 : 0); break; + default: + errfunc (ERR_PANIC, "bizarre 8086 segment register received"); + } + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset++; + break; + case 05: case 07: + switch (ins->oprs[0].basereg) { + case R_FS: bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0); break; + case R_GS: bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0); break; + default: + errfunc (ERR_PANIC, "bizarre 386 segment register received"); + } + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset++; + break; + case 010: case 011: case 012: + bytes[0] = *codes++ + regval(&ins->oprs[c-010]); + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + break; + case 017: + bytes[0] = 0; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + break; + case 014: case 015: case 016: + if (ins->oprs[c-014].offset < -128 || ins->oprs[c-014].offset > 127) + errfunc (ERR_WARNING, "signed byte value exceeds bounds"); + bytes[0] = ins->oprs[c-014].offset; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + break; + case 020: case 021: case 022: + if (ins->oprs[c-020].offset < -128 || ins->oprs[c-020].offset > 255) + errfunc (ERR_WARNING, "byte value exceeds bounds"); + bytes[0] = ins->oprs[c-020].offset; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + break; + case 024: case 025: case 026: + if (ins->oprs[c-024].offset < 0 || ins->oprs[c-024].offset > 255) + errfunc (ERR_WARNING, "unsigned byte value exceeds bounds"); + bytes[0] = ins->oprs[c-024].offset; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + break; + case 030: case 031: case 032: + if (ins->oprs[c-030].segment == NO_SEG && + ins->oprs[c-030].wrt == NO_SEG && + (ins->oprs[c-030].offset < -32768 || + ins->oprs[c-030].offset > 65535)) + errfunc (ERR_WARNING, "word value exceeds bounds"); + data = ins->oprs[c-030].offset; + outfmt->output (segment, &data, OUT_ADDRESS+2, + ins->oprs[c-030].segment, ins->oprs[c-030].wrt); + offset += 2; + break; + case 034: case 035: case 036: + data = ins->oprs[c-034].offset; + size = ((ins->oprs[c-034].addr_size ? + ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4); + if (size==16 && (data < -32768 || data > 65535)) + errfunc (ERR_WARNING, "word value exceeds bounds"); + outfmt->output (segment, &data, OUT_ADDRESS+size, + ins->oprs[c-034].segment, ins->oprs[c-034].wrt); + offset += size; + break; + case 037: + if (ins->oprs[0].segment == NO_SEG) + errfunc (ERR_NONFATAL, "value referenced by FAR is not" + " relocatable"); + data = 0L; + outfmt->output (segment, &data, OUT_ADDRESS+2, + outfmt->segbase(1+ins->oprs[0].segment), + ins->oprs[0].wrt); + offset += 2; + break; + case 040: case 041: case 042: + data = ins->oprs[c-040].offset; + outfmt->output (segment, &data, OUT_ADDRESS+4, + ins->oprs[c-040].segment, ins->oprs[c-040].wrt); + offset += 4; + break; + case 050: case 051: case 052: + if (ins->oprs[c-050].segment != segment) + errfunc (ERR_NONFATAL, "short relative jump outside segment"); + data = ins->oprs[c-050].offset - insn_end; + if (data > 127 || data < -128) + errfunc (ERR_NONFATAL, "short jump is out of range"); + bytes[0] = data; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + break; + case 060: case 061: case 062: + if (ins->oprs[c-060].segment != segment) { + data = ins->oprs[c-060].offset; + outfmt->output (segment, &data, OUT_REL2ADR+insn_end-offset, + ins->oprs[c-060].segment, ins->oprs[c-060].wrt); + } else { + data = ins->oprs[c-060].offset - insn_end; + outfmt->output (segment, &data, OUT_ADDRESS+2, NO_SEG, NO_SEG); + } + offset += 2; + break; + case 064: case 065: case 066: + size = ((ins->oprs[c-064].addr_size ? + ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4); + if (ins->oprs[c-064].segment != segment) { + data = ins->oprs[c-064].offset; + size = (bits == 16 ? OUT_REL2ADR : OUT_REL4ADR); + outfmt->output (segment, &data, size+insn_end-offset, + ins->oprs[c-064].segment, ins->oprs[c-064].wrt); + size = (bits == 16 ? 2 : 4); + } else { + data = ins->oprs[c-064].offset - insn_end; + outfmt->output (segment, &data, OUT_ADDRESS+size, NO_SEG, NO_SEG); + } + offset += size; + break; + case 070: case 071: case 072: + if (ins->oprs[c-070].segment != segment) { + data = ins->oprs[c-070].offset; + outfmt->output (segment, &data, OUT_REL4ADR+insn_end-offset, + ins->oprs[c-070].segment, ins->oprs[c-070].wrt); + } else { + data = ins->oprs[c-070].offset - insn_end; + outfmt->output (segment, &data, OUT_ADDRESS+4, NO_SEG, NO_SEG); + } + offset += 4; + break; + case 0300: case 0301: case 0302: + if (chsize (&ins->oprs[c-0300], bits)) { + *bytes = 0x67; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + } else + offset += 0; + break; + case 0310: + if (bits==32) { + *bytes = 0x67; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + } else + offset += 0; + break; + case 0311: + if (bits==16) { + *bytes = 0x67; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + } else + offset += 0; + break; + case 0312: + break; + case 0320: + if (bits==32) { + *bytes = 0x66; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + } else + offset += 0; + break; + case 0321: + if (bits==16) { + *bytes = 0x66; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + } else + offset += 0; + break; + case 0322: + break; + case 0330: + *bytes = *codes++ + condval[ins->condition]; + outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + offset += 1; + break; + case 0340: case 0341: case 0342: + if (ins->oprs[0].segment != NO_SEG) + errfunc (ERR_PANIC, "non-constant BSS size in pass two"); + else { + long size = ins->oprs[0].offset << (c-0340); + outfmt->output (segment, NULL, OUT_RESERVE+size, NO_SEG, NO_SEG); + offset += size; + } + break; + default: /* can't do it by 'case' statements */ + if (c>=0100 && c<=0277) { /* it's an EA */ + ea ea_data; + int rfield; + unsigned char *p; + long s; + + if (c<=0177) /* pick rfield from operand b */ + rfield = regval (&ins->oprs[c&7]); + else /* rfield is constant */ + rfield = c & 7; + if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, rfield)) + errfunc (ERR_NONFATAL, "invalid effective address"); + + p = bytes; + *p++ = ea_data.modrm; + if (ea_data.sib_present) + *p++ = ea_data.sib; + /* + * the cast in the next line is to placate MS C... + */ + outfmt->output (segment, bytes, OUT_RAWDATA+(long)(p-bytes), + NO_SEG, NO_SEG); + s = p-bytes; + + switch (ea_data.bytes) { + case 0: + break; + case 1: + *bytes = ins->oprs[(c>>3)&7].offset; + outfmt->output (segment, bytes, OUT_RAWDATA+1, + NO_SEG, NO_SEG); + s++; + break; + case 2: + case 4: + data = ins->oprs[(c>>3)&7].offset; + outfmt->output (segment, &data, OUT_ADDRESS+ea_data.bytes, + ins->oprs[(c>>3)&7].segment, + ins->oprs[(c>>3)&7].wrt); + s += ea_data.bytes; + break; + } + offset += s; + } else + errfunc (ERR_PANIC, "internal instruction table corrupt" + ": instruction code 0x%02X given", c); + } +} + +static int regval (operand *o) { + switch (o->basereg) { + case R_EAX: case R_AX: case R_AL: case R_ES: case R_CR0: case R_DR0: + case R_ST0: case R_MM0: + return 0; + case R_ECX: case R_CX: case R_CL: case R_CS: case R_DR1: case R_ST1: + case R_MM1: + return 1; + case R_EDX: case R_DX: case R_DL: case R_SS: case R_CR2: case R_DR2: + case R_ST2: case R_MM2: + return 2; + case R_EBX: case R_BX: case R_BL: case R_DS: case R_CR3: case R_DR3: + case R_TR3: case R_ST3: case R_MM3: + return 3; + case R_ESP: case R_SP: case R_AH: case R_FS: case R_CR4: case R_TR4: + case R_ST4: case R_MM4: + return 4; + case R_EBP: case R_BP: case R_CH: case R_GS: case R_TR5: case R_ST5: + case R_MM5: + return 5; + case R_ESI: case R_SI: case R_DH: case R_DR6: case R_TR6: case R_ST6: + case R_MM6: + return 6; + case R_EDI: case R_DI: case R_BH: case R_DR7: case R_TR7: case R_ST7: + case R_MM7: + return 7; + default: /* panic */ + errfunc (ERR_PANIC, "invalid register operand given to regval()"); + return 0; + } +} + +static int matches (struct itemplate *itemp, insn *instruction) { + int i, size, oprs, ret; + + ret = 100; + + /* + * Check the opcode + */ + if (itemp->opcode != instruction->opcode) return 0; + + /* + * Count the operands + */ + if (itemp->operands != instruction->operands) return 0; + + /* + * Check that no spurious colons or TOs are present + */ + for (i=0; ioperands; i++) + if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON|TO)) + return 0; + + /* + * Check that the operand flags all match up + */ + for (i=0; ioperands; i++) + if (itemp->opd[i] & ~instruction->oprs[i].type || + ((itemp->opd[i] & SIZE_MASK) && + ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) { + if ((itemp->opd[i] & ~instruction->oprs[i].type & NON_SIZE) || + (instruction->oprs[i].type & SIZE_MASK)) + return 0; + else + ret = 1; + } + + /* + * Check operand sizes + */ + if (itemp->flags & IF_SB) { + size = BITS8; + oprs = itemp->operands; + } else if (itemp->flags & IF_SD) { + size = BITS32; + oprs = itemp->operands; + } else if (itemp->flags & (IF_SM | IF_SM2)) { + oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands); + size = 0; /* placate gcc */ + for (i=0; iopd[i] & SIZE_MASK) != 0) + break; + } else { + size = 0; + oprs = itemp->operands; + } + + for (i=0; ioperands; i++) + if (!(itemp->opd[i] & SIZE_MASK) && + (instruction->oprs[i].type & SIZE_MASK & ~size)) + ret = 2; + + return ret; +} + +static ea *process_ea (operand *input, ea *output, int addrbits, int rfield) { + if (!(REGISTER & ~input->type)) { /* it's a single register */ + static int regs[] = { + R_MM0, R_EAX, R_AX, R_AL, R_MM1, R_ECX, R_CX, R_CL, + R_MM2, R_EDX, R_DX, R_DL, R_MM3, R_EBX, R_BX, R_BL, + R_MM4, R_ESP, R_SP, R_AH, R_MM5, R_EBP, R_BP, R_CH, + R_MM6, R_ESI, R_SI, R_DH, R_MM7, R_EDI, R_DI, R_BH + }; + int i; + + for (i=0; ibasereg == regs[i]) break; + if (isib_present = FALSE;/* no SIB necessary */ + output->bytes = 0; /* no offset necessary either */ + output->modrm = 0xC0 | (rfield << 3) | (i/4); + } else + return NULL; + } else { /* it's a memory reference */ + if (input->basereg==-1 && (input->indexreg==-1 || input->scale==0)) { + /* it's a pure offset */ + if (input->addr_size) + addrbits = input->addr_size; + output->sib_present = FALSE; + output->bytes = (addrbits==32 ? 4 : 2); + output->modrm = (addrbits==32 ? 5 : 6) | (rfield << 3); + } else { /* it's an indirection */ + int i=input->indexreg, b=input->basereg, s=input->scale; + long o=input->offset, seg=input->segment; + + if (s==0) i = -1; /* make this easy, at least */ + + if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX + || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI + || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX + || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI) { + /* it must be a 32-bit memory reference. Firstly we have + * to check that all registers involved are type Exx. */ + if (i!=-1 && i!=R_EAX && i!=R_EBX && i!=R_ECX && i!=R_EDX + && i!=R_EBP && i!=R_ESP && i!=R_ESI && i!=R_EDI) + return NULL; + if (b!=-1 && b!=R_EAX && b!=R_EBX && b!=R_ECX && b!=R_EDX + && b!=R_EBP && b!=R_ESP && b!=R_ESI && b!=R_EDI) + return NULL; + + /* While we're here, ensure the user didn't specify WORD. */ + if (input->addr_size == 16) + return NULL; + + /* now reorganise base/index */ + if (b==i) /* convert EAX+2*EAX to 3*EAX */ + b = -1, s++; + if (b==-1 && s==1) /* single register should be base */ + b = i, i = -1; + if (((s==2 && i!=R_ESP) || s==3 || s==5 || s==9) && b==-1) + b = i, s--; /* convert 3*EAX to EAX+2*EAX */ + if (i==R_ESP || (s!=1 && s!=2 && s!=4 && s!=8 && i!=-1)) + return NULL; /* wrong, for various reasons */ + + if (i==-1 && b!=R_ESP) {/* no SIB needed */ + int mod, rm; + switch(b) { + case R_EAX: rm = 0; break; + case R_ECX: rm = 1; break; + case R_EDX: rm = 2; break; + case R_EBX: rm = 3; break; + case R_EBP: rm = 5; break; + case R_ESI: rm = 6; break; + case R_EDI: rm = 7; break; + case -1: rm = 5; break; + default: /* should never happen */ + return NULL; + } + if (b==-1 || (b!=R_EBP && o==0 && seg==NO_SEG)) + mod = 0; + else if (o>=-128 && o<=127 && seg==NO_SEG) + mod = 1; + else + mod = 2; + output->sib_present = FALSE; + output->bytes = (b==-1 || mod==2 ? 4 : mod); + output->modrm = (mod<<6) | (rfield<<3) | rm; + } else { /* we need a SIB */ + int mod, scale, index, base; + + switch (b) { + case R_EAX: base = 0; break; + case R_ECX: base = 1; break; + case R_EDX: base = 2; break; + case R_EBX: base = 3; break; + case R_ESP: base = 4; break; + case R_EBP: case -1: base = 5; break; + case R_ESI: base = 6; break; + case R_EDI: base = 7; break; + default: /* then what the smeg is it? */ + return NULL; /* panic */ + } + + switch (i) { + case R_EAX: index = 0; break; + case R_ECX: index = 1; break; + case R_EDX: index = 2; break; + case R_EBX: index = 3; break; + case -1: index = 4; break; + case R_EBP: index = 5; break; + case R_ESI: index = 6; break; + case R_EDI: index = 7; break; + default: /* then what the smeg is it? */ + return NULL; /* panic */ + } + + if (i==-1) s = 1; + switch (s) { + case 1: scale = 0; break; + case 2: scale = 1; break; + case 4: scale = 2; break; + case 8: scale = 3; break; + default: /* then what the smeg is it? */ + return NULL; /* panic */ + } + + if (b==-1 || (b!=R_EBP && o==0 && seg==NO_SEG)) + mod = 0; + else if (o>=-128 && o<=127 && seg==NO_SEG) + mod = 1; + else + mod = 2; + + output->sib_present = TRUE; + output->bytes = (b==-1 || mod==2 ? 4 : mod); + output->modrm = (mod<<6) | (rfield<<3) | 4; + output->sib = (scale<<6) | (index<<3) | base; + } + } else { /* it's 16-bit */ + int mod, rm; + + /* check all registers are BX, BP, SI or DI */ + if ((b!=-1 && b!=R_BP && b!=R_BX && b!=R_SI && b!=R_DI) || + (i!=-1 && i!=R_BP && i!=R_BX && i!=R_SI && i!=R_DI)) + return NULL; + + /* ensure the user didn't specify DWORD */ + if (input->addr_size == 32) + return NULL; + + if (s!=1 && i!=-1) return NULL;/* no can do, in 16-bit EA */ + if (b==-1 && i!=-1) b ^= i ^= b ^= i; /* swap them round */ + if ((b==R_SI || b==R_DI) && i!=-1) + b ^= i ^= b ^= i; /* have BX/BP as base, SI/DI index */ + if (b==i) return NULL;/* shouldn't ever happen, in theory */ + if (i!=-1 && b!=-1 && + (i==R_BP || i==R_BX || b==R_SI || b==R_DI)) + return NULL; /* invalid combinations */ + if (b==-1) /* pure offset: handled above */ + return NULL; /* so if it gets to here, panic! */ + + rm = -1; + if (i!=-1) + switch (i*256 + b) { + case R_SI*256+R_BX: rm=0; break; + case R_DI*256+R_BX: rm=1; break; + case R_SI*256+R_BP: rm=2; break; + case R_DI*256+R_BP: rm=3; break; + } + else + switch (b) { + case R_SI: rm=4; break; + case R_DI: rm=5; break; + case R_BP: rm=6; break; + case R_BX: rm=7; break; + } + if (rm==-1) /* can't happen, in theory */ + return NULL; /* so panic if it does */ + + if (o==0 && seg==NO_SEG && rm!=6) + mod = 0; + else if (o>=-128 && o<=127 && seg==NO_SEG) + mod = 1; + else + mod = 2; + + output->sib_present = FALSE; /* no SIB - it's 16-bit */ + output->bytes = mod; /* bytes of offset needed */ + output->modrm = (mod<<6) | (rfield<<3) | rm; + } + } + } + output->size = 1 + output->sib_present + output->bytes; + return output; +} + +static int chsize (operand *input, int addrbits) { + if (!(MEMORY & ~input->type)) { + int i=input->indexreg, b=input->basereg; + + if (input->scale==0) i = -1; + + if (i == -1 && b == -1) /* pure offset */ + return (input->addr_size != 0 && input->addr_size != addrbits); + + if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX + || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI + || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX + || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI) + return (addrbits==16); + else + return (addrbits==32); + } else + return 0; +} diff --git a/assemble.h b/assemble.h new file mode 100644 index 00000000..cb93a2c5 --- /dev/null +++ b/assemble.h @@ -0,0 +1,17 @@ +/* assemble.h header file for assemble.c + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_ASSEMBLE_H +#define NASM_ASSEMBLE_H + +long insn_size (long segment, long offset, int bits, + insn *instruction, efunc error); +long assemble (long segment, long offset, int bits, + insn *instruction, struct ofmt *output, efunc error); + +#endif diff --git a/disasm.c b/disasm.c new file mode 100644 index 00000000..8ad263b2 --- /dev/null +++ b/disasm.c @@ -0,0 +1,667 @@ +/* disasm.c where all the _work_ gets done in the Netwide Disassembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * initial version 27/iii/95 by Simon Tatham + */ + +#include +#include + +#include "nasm.h" +#include "disasm.h" +#include "sync.h" +#include "insns.h" + +#include "names.c" + +extern struct itemplate **itable[]; + +/* + * Flags that go into the `segment' field of `insn' structures + * during disassembly. + */ +#define SEG_RELATIVE 1 +#define SEG_32BIT 2 +#define SEG_RMREG 4 +#define SEG_DISP8 8 +#define SEG_DISP16 16 +#define SEG_DISP32 32 +#define SEG_NODISP 64 +#define SEG_SIGNED 128 + +static int whichreg(long regflags, int regval) { + static int reg32[] = { + R_EAX, R_ECX, R_EDX, R_EBX, R_ESP, R_EBP, R_ESI, R_EDI }; + static int reg16[] = { + R_AX, R_CX, R_DX, R_BX, R_SP, R_BP, R_SI, R_DI }; + static int reg8[] = { + R_AL, R_CL, R_DL, R_BL, R_AH, R_CH, R_DH, R_BH }; + static int sreg[] = { + R_ES, R_CS, R_SS, R_DS, R_FS, R_GS, 0, 0 }; + static int creg[] = { + R_CR0, 0, R_CR2, R_CR3, R_CR4, 0, 0, 0 }; + static int dreg[] = { + R_DR0, R_DR1, R_DR2, R_DR3, 0, 0, R_DR6, R_DR7 }; + static int treg[] = { + 0, 0, 0, R_TR3, R_TR4, R_TR5, R_TR6, R_TR7 }; + static int fpureg[] = { + R_ST0, R_ST1, R_ST2, R_ST3, R_ST4, R_ST5, R_ST6, R_ST7 }; + static int mmxreg[] = { + R_MM0, R_MM1, R_MM2, R_MM3, R_MM4, R_MM5, R_MM6, R_MM7 }; + + if (!(REG_AL & ~regflags)) + return R_AL; + if (!(REG_AX & ~regflags)) + return R_AX; + if (!(REG_EAX & ~regflags)) + return R_EAX; + if (!(REG_DX & ~regflags)) + return R_DX; + if (!(REG_CL & ~regflags)) + return R_CL; + if (!(REG_CX & ~regflags)) + return R_CX; + if (!(REG_ECX & ~regflags)) + return R_ECX; + if (!(REG_CR4 & ~regflags)) + return R_CR4; + if (!(FPU0 & ~regflags)) + return R_ST0; + if (!((REGMEM|BITS8) & ~regflags)) + return reg8[regval]; + if (!((REGMEM|BITS16) & ~regflags)) + return reg16[regval]; + if (!((REGMEM|BITS32) & ~regflags)) + return reg32[regval]; + if (!(REG_SREG & ~regflags)) + return sreg[regval]; + if (!(REG_CREG & ~regflags)) + return creg[regval]; + if (!(REG_DREG & ~regflags)) + return dreg[regval]; + if (!(REG_TREG & ~regflags)) + return treg[regval]; + if (!(FPUREG & ~regflags)) + return fpureg[regval]; + if (!(MMXREG & ~regflags)) + return mmxreg[regval]; + return 0; +} + +static char *whichcond(int condval) { + static int conds[] = { + C_O, C_NO, C_B, C_AE, C_E, C_NE, C_BE, C_A, + C_S, C_NS, C_PE, C_PO, C_L, C_GE, C_LE, C_G + }; + return conditions[conds[condval]]; +} + +/* + * Process an effective address (ModRM) specification. + */ +static unsigned char *do_ea (unsigned char *data, int modrm, int asize, + int segsize, operand *op) { + int mod, rm, scale, index, base; + + mod = (modrm >> 6) & 03; + rm = modrm & 07; + + if (mod == 3) { /* pure register version */ + op->basereg = rm; + op->segment |= SEG_RMREG; + return data; + } + + op->addr_size = 0; + + if (asize == 16) { + /* + * specifies the displacement size (none, byte or + * word), and specifies the register combination. + * Exception: mod=0,rm=6 does not specify [BP] as one might + * expect, but instead specifies [disp16]. + */ + op->indexreg = op->basereg = -1; + op->scale = 1; /* always, in 16 bits */ + switch (rm) { + case 0: op->basereg = R_BX; op->indexreg = R_SI; break; + case 1: op->basereg = R_BX; op->indexreg = R_DI; break; + case 2: op->basereg = R_BP; op->indexreg = R_SI; break; + case 3: op->basereg = R_BP; op->indexreg = R_DI; break; + case 4: op->basereg = R_SI; break; + case 5: op->basereg = R_DI; break; + case 6: op->basereg = R_BP; break; + case 7: op->basereg = R_BX; break; + } + if (rm == 6 && mod == 0) { /* special case */ + op->basereg = -1; + if (segsize != 16) + op->addr_size = 16; + mod = 2; /* fake disp16 */ + } + switch (mod) { + case 0: + op->segment |= SEG_NODISP; + break; + case 1: + op->segment |= SEG_DISP8; + op->offset = (signed char) *data++; + break; + case 2: + op->segment |= SEG_DISP16; + op->offset = *data++; + op->offset |= (*data++) << 8; + break; + } + return data; + } else { + /* + * Once again, specifies displacement size (this time + * none, byte or *dword*), while specifies the base + * register. Again, [EBP] is missing, replaced by a pure + * disp32 (this time that's mod=0,rm=*5*). However, rm=4 + * indicates not a single base register, but instead the + * presence of a SIB byte... + */ + op->indexreg = -1; + switch (rm) { + case 0: op->basereg = R_EAX; break; + case 1: op->basereg = R_ECX; break; + case 2: op->basereg = R_EDX; break; + case 3: op->basereg = R_EBX; break; + case 5: op->basereg = R_EBP; break; + case 6: op->basereg = R_ESI; break; + case 7: op->basereg = R_EDI; break; + } + if (rm == 5 && mod == 0) { + op->basereg = -1; + if (segsize != 32) + op->addr_size = 32; + mod = 2; /* fake disp32 */ + } + if (rm == 4) { /* process SIB */ + scale = (*data >> 6) & 03; + index = (*data >> 3) & 07; + base = *data & 07; + data++; + + op->scale = 1 << scale; + switch (index) { + case 0: op->indexreg = R_EAX; break; + case 1: op->indexreg = R_ECX; break; + case 2: op->indexreg = R_EDX; break; + case 3: op->indexreg = R_EBX; break; + case 4: op->indexreg = -1; break; + case 5: op->indexreg = R_EBP; break; + case 6: op->indexreg = R_ESI; break; + case 7: op->indexreg = R_EDI; break; + } + + switch (base) { + case 0: op->basereg = R_EAX; break; + case 1: op->basereg = R_ECX; break; + case 2: op->basereg = R_EDX; break; + case 3: op->basereg = R_EBX; break; + case 4: op->basereg = R_ESP; break; + case 6: op->basereg = R_ESI; break; + case 7: op->basereg = R_EDI; break; + case 5: + if (mod == 0) { + mod = 2; + op->basereg = -1; + } else + op->basereg = R_EBP; + break; + } + } + switch (mod) { + case 0: + op->segment |= SEG_NODISP; + break; + case 1: + op->segment |= SEG_DISP8; + op->offset = (signed char) *data++; + break; + case 2: + op->segment |= SEG_DISP32; + op->offset = *data++; + op->offset |= (*data++) << 8; + op->offset |= ((long) *data++) << 16; + op->offset |= ((long) *data++) << 24; + break; + } + return data; + } +} + +/* + * Determine whether the code string in r corresponds to the data + * stream in data. Return the number of bytes matched if so. + */ +static int matches (unsigned char *r, unsigned char *data, int asize, + int osize, int segsize, insn *ins) { + unsigned char *origdata = data; + int a_used = FALSE, o_used = FALSE; + + while (*r) { + int c = *r++; + if (c >= 01 && c <= 03) { + while (c--) + if (*r++ != *data++) + return FALSE; + } + if (c == 04) { + switch (*data++) { + case 0x07: ins->oprs[0].basereg = 0; break; + case 0x17: ins->oprs[0].basereg = 2; break; + case 0x1F: ins->oprs[0].basereg = 3; break; + default: return FALSE; + } + } + if (c == 05) { + switch (*data++) { + case 0xA1: ins->oprs[0].basereg = 4; break; + case 0xA9: ins->oprs[0].basereg = 5; break; + default: return FALSE; + } + } + if (c == 06) { + switch (*data++) { + case 0x06: ins->oprs[0].basereg = 0; break; + case 0x0E: ins->oprs[0].basereg = 1; break; + case 0x16: ins->oprs[0].basereg = 2; break; + case 0x1E: ins->oprs[0].basereg = 3; break; + default: return FALSE; + } + } + if (c == 07) { + switch (*data++) { + case 0xA0: ins->oprs[0].basereg = 4; break; + case 0xA8: ins->oprs[0].basereg = 5; break; + default: return FALSE; + } + } + if (c >= 010 && c <= 012) { + int t = *r++, d = *data++; + if (d < t || d > t+7) + return FALSE; + else { + ins->oprs[c-010].basereg = d-t; + ins->oprs[c-010].segment |= SEG_RMREG; + } + } + if (c == 017) + if (*data++) + return FALSE; + if (c >= 014 && c <= 016) { + ins->oprs[c-014].offset = (signed char) *data++; + ins->oprs[c-014].segment |= SEG_SIGNED; + } + if (c >= 020 && c <= 022) + ins->oprs[c-020].offset = *data++; + if (c >= 024 && c <= 026) + ins->oprs[c-024].offset = *data++; + if (c >= 030 && c <= 032) { + ins->oprs[c-030].offset = *data++; + ins->oprs[c-030].offset |= (*data++ << 8); + } + if (c >= 034 && c <= 036) { + ins->oprs[c-034].offset = *data++; + ins->oprs[c-034].offset |= (*data++ << 8); + if (asize == 32) { + ins->oprs[c-034].offset |= (((long) *data++) << 16); + ins->oprs[c-034].offset |= (((long) *data++) << 24); + } + if (segsize != asize) + ins->oprs[c-034].addr_size = asize; + } + if (c >= 040 && c <= 042) { + ins->oprs[c-040].offset = *data++; + ins->oprs[c-040].offset |= (*data++ << 8); + ins->oprs[c-040].offset |= (((long) *data++) << 16); + ins->oprs[c-040].offset |= (((long) *data++) << 24); + } + if (c >= 050 && c <= 052) { + ins->oprs[c-050].offset = (signed char) *data++; + ins->oprs[c-050].segment |= SEG_RELATIVE; + } + if (c >= 060 && c <= 062) { + ins->oprs[c-060].offset = *data++; + ins->oprs[c-060].offset |= (*data++ << 8); + ins->oprs[c-060].segment |= SEG_RELATIVE; + ins->oprs[c-060].segment &= ~SEG_32BIT; + } + if (c >= 064 && c <= 066) { + ins->oprs[c-064].offset = *data++; + ins->oprs[c-064].offset |= (*data++ << 8); + if (asize == 32) { + ins->oprs[c-064].offset |= (((long) *data++) << 16); + ins->oprs[c-064].offset |= (((long) *data++) << 24); + ins->oprs[c-064].segment |= SEG_32BIT; + } else + ins->oprs[c-064].segment &= ~SEG_32BIT; + ins->oprs[c-064].segment |= SEG_RELATIVE; + if (segsize != asize) + ins->oprs[c-064].addr_size = asize; + } + if (c >= 070 && c <= 072) { + ins->oprs[c-070].offset = *data++; + ins->oprs[c-070].offset |= (*data++ << 8); + ins->oprs[c-070].offset |= (((long) *data++) << 16); + ins->oprs[c-070].offset |= (((long) *data++) << 24); + ins->oprs[c-070].segment |= SEG_32BIT | SEG_RELATIVE; + } + if (c >= 0100 && c <= 0177) { + int modrm = *data++; + ins->oprs[c & 07].basereg = (modrm >> 3) & 07; + ins->oprs[c & 07].segment |= SEG_RMREG; + data = do_ea (data, modrm, asize, segsize, + &ins->oprs[(c >> 3) & 07]); + } + if (c >= 0200 && c <= 0277) { + int modrm = *data++; + if (((modrm >> 3) & 07) != (c & 07)) + return FALSE; /* spare field doesn't match up */ + data = do_ea (data, modrm, asize, segsize, + &ins->oprs[(c >> 3) & 07]); + } + if (c >= 0300 && c <= 0302) { + if (asize) + ins->oprs[c-0300].segment |= SEG_32BIT; + else + ins->oprs[c-0300].segment &= ~SEG_32BIT; + a_used = TRUE; + } + if (c == 0310) { + if (asize == 32) + return FALSE; + else + a_used = TRUE; + } + if (c == 0311) { + if (asize == 16) + return FALSE; + else + a_used = TRUE; + } + if (c == 0312) { + if (asize != segsize) + return FALSE; + else + a_used = TRUE; + } + if (c == 0320) { + if (osize == 32) + return FALSE; + else + o_used = TRUE; + } + if (c == 0321) { + if (osize == 16) + return FALSE; + else + o_used = TRUE; + } + if (c == 0322) { + if (osize != segsize) + return FALSE; + else + o_used = TRUE; + } + if (c == 0330) { + int t = *r++, d = *data++; + if (d < t || d > t+15) + return FALSE; + else + ins->condition = d - t; + } + } + + /* + * Check for unused a/o prefixes. + */ + ins->nprefix = 0; + if (!a_used && asize != segsize) + ins->prefixes[ins->nprefix++] = (asize == 16 ? P_A16 : P_A32); + if (!o_used && osize != segsize) + ins->prefixes[ins->nprefix++] = (osize == 16 ? P_O16 : P_O32); + + return data - origdata; +} + +long disasm (unsigned char *data, char *output, int segsize, long offset, + int autosync) { + struct itemplate **p; + int length = 0; + char *segover; + int rep, lock, asize, osize, i, slen, colon; + unsigned char *origdata; + int works; + insn ins; + + /* + * Scan for prefixes. + */ + asize = osize = segsize; + segover = NULL; + rep = lock = 0; + origdata = data; + for (;;) { + if (*data == 0xF3 || *data == 0xF2) + rep = *data++; + else if (*data == 0xF0) + lock = *data++; + else if (*data == 0x2E || *data == 0x36 || *data == 0x3E || + *data == 0x26 || *data == 0x64 || *data == 0x65) { + switch (*data++) { + case 0x2E: segover = "cs"; break; + case 0x36: segover = "ss"; break; + case 0x3E: segover = "ds"; break; + case 0x26: segover = "es"; break; + case 0x64: segover = "fs"; break; + case 0x65: segover = "gs"; break; + } + } else if (*data == 0x66) + osize = 48 - segsize, data++; + else if (*data == 0x67) + asize = 48 - segsize, data++; + else + break; + } + + ins.oprs[0].segment = ins.oprs[1].segment = ins.oprs[2].segment = + ins.oprs[0].addr_size = ins.oprs[1].addr_size = ins.oprs[2].addr_size = + (segsize == 16 ? 0 : SEG_32BIT); + ins.condition = -1; + works = TRUE; + for (p = itable[*data]; *p; p++) + if ( (length = matches((unsigned char *)((*p)->code), data, + asize, osize, segsize, &ins)) ) { + works = TRUE; + /* + * Final check to make sure the types of r/m match up. + */ + for (i = 0; i < (*p)->operands; i++) + if (((ins.oprs[i].segment & SEG_RMREG) && + !(MEMORY & ~(*p)->opd[i])) || + (!(ins.oprs[i].segment & SEG_RMREG) && + !(REGNORM & ~(*p)->opd[i]) && + !((*p)->opd[i] & REG_SMASK))) + works = FALSE; + if (works) + break; + } + if (!length || !works) + return 0; /* no instruction was matched */ + + slen = 0; + + if (rep) { + slen += sprintf(output+slen, "rep%s ", + (rep == 0xF2 ? "ne" : + (*p)->opcode == I_CMPSB || + (*p)->opcode == I_CMPSW || + (*p)->opcode == I_CMPSD || + (*p)->opcode == I_SCASB || + (*p)->opcode == I_SCASW || + (*p)->opcode == I_SCASD ? "e" : "")); + } + if (lock) + slen += sprintf(output+slen, "lock "); + for (i = 0; i < ins.nprefix; i++) + switch (ins.prefixes[i]) { + case P_A16: slen += sprintf(output+slen, "a16 "); break; + case P_A32: slen += sprintf(output+slen, "a32 "); break; + case P_O16: slen += sprintf(output+slen, "o16 "); break; + case P_O32: slen += sprintf(output+slen, "o32 "); break; + } + + for (i = 0; i < elements(ico); i++) + if ((*p)->opcode == ico[i]) { + slen += sprintf(output+slen, "%s%s", icn[i], + whichcond(ins.condition)); + break; + } + if (i >= elements(ico)) + slen += sprintf(output+slen, "%s", insn_names[(*p)->opcode]); + colon = FALSE; + length += data - origdata; /* fix up for prefixes */ + for (i=0; i<(*p)->operands; i++) { + output[slen++] = (colon ? ':' : i==0 ? ' ' : ','); + + if (ins.oprs[i].segment & SEG_RELATIVE) { + ins.oprs[i].offset += offset + length; + /* + * sort out wraparound + */ + if (!(ins.oprs[i].segment & SEG_32BIT)) + ins.oprs[i].offset &= 0xFFFF; + /* + * add sync marker, if autosync is on + */ + if (autosync) + add_sync (ins.oprs[i].offset, 0L); + } + + if ((*p)->opd[i] & COLON) + colon = TRUE; + else + colon = FALSE; + + if (((*p)->opd[i] & (REGISTER | FPUREG)) || + (ins.oprs[i].segment & SEG_RMREG)) { + ins.oprs[i].basereg = whichreg ((*p)->opd[i], + ins.oprs[i].basereg); + slen += sprintf(output+slen, "%s", + reg_names[ins.oprs[i].basereg]); + } else if (!(UNITY & ~(*p)->opd[i])) { + output[slen++] = '1'; + } else if ( (*p)->opd[i] & IMMEDIATE ) { + if ( (*p)->opd[i] & BITS8 ) { + slen += sprintf(output+slen, "byte "); + if (ins.oprs[i].segment & SEG_SIGNED) { + if (ins.oprs[i].offset < 0) { + ins.oprs[i].offset *= -1; + output[slen++] = '-'; + } else + output[slen++] = '+'; + } + } else if ( (*p)->opd[i] & BITS16 ) { + slen += sprintf(output+slen, "word "); + } else if ( (*p)->opd[i] & BITS32 ) { + slen += sprintf(output+slen, "dword "); + } else if ( (*p)->opd[i] & NEAR ) { + slen += sprintf(output+slen, "near "); + } else if ( (*p)->opd[i] & SHORT ) { + slen += sprintf(output+slen, "short "); + } + slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset); + } else if ( !(MEM_OFFS & ~(*p)->opd[i]) ) { + slen += sprintf(output+slen, "[%s%s%s0x%lx]", + (segover ? segover : ""), + (segover ? ":" : ""), + (ins.oprs[i].addr_size == 32 ? "dword " : + ins.oprs[i].addr_size == 16 ? "word " : ""), + ins.oprs[i].offset); + segover = NULL; + } else if ( !(REGMEM & ~(*p)->opd[i]) ) { + int started = FALSE; + if ( (*p)->opd[i] & BITS8 ) + slen += sprintf(output+slen, "byte "); + if ( (*p)->opd[i] & BITS16 ) + slen += sprintf(output+slen, "word "); + if ( (*p)->opd[i] & BITS32 ) + slen += sprintf(output+slen, "dword "); + if ( (*p)->opd[i] & BITS64 ) + slen += sprintf(output+slen, "qword "); + if ( (*p)->opd[i] & BITS80 ) + slen += sprintf(output+slen, "tword "); + if ( (*p)->opd[i] & FAR ) + slen += sprintf(output+slen, "far "); + if ( (*p)->opd[i] & NEAR ) + slen += sprintf(output+slen, "near "); + output[slen++] = '['; + if (ins.oprs[i].addr_size) + slen += sprintf(output+slen, "%s", + (ins.oprs[i].addr_size == 32 ? "dword " : + ins.oprs[i].addr_size == 16 ? "word " : "")); + if (segover) { + slen += sprintf(output+slen, "%s:", segover); + segover = NULL; + } + if (ins.oprs[i].basereg != -1) { + slen += sprintf(output+slen, "%s", + reg_names[ins.oprs[i].basereg]); + started = TRUE; + } + if (ins.oprs[i].indexreg != -1) { + if (started) + output[slen++] = '+'; + slen += sprintf(output+slen, "%s", + reg_names[ins.oprs[i].indexreg]); + if (ins.oprs[i].scale > 1) + slen += sprintf(output+slen, "*%d", ins.oprs[i].scale); + started = TRUE; + } + if (ins.oprs[i].segment & SEG_DISP8) { + int sign = '+'; + if (ins.oprs[i].offset & 0x80) { + ins.oprs[i].offset = - (signed char) ins.oprs[i].offset; + sign = '-'; + } + slen += sprintf(output+slen, "%c0x%lx", sign, + ins.oprs[i].offset); + } else if (ins.oprs[i].segment & SEG_DISP16) { + if (started) + output[slen++] = '+'; + slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset); + } else if (ins.oprs[i].segment & SEG_DISP32) { + if (started) + output[slen++] = '+'; + slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset); + } + output[slen++] = ']'; + } else { + slen += sprintf(output+slen, "", i); + } + } + output[slen] = '\0'; + if (segover) { /* unused segment override */ + char *p = output; + int count = slen+1; + while (count--) + p[count+3] = p[count]; + strncpy (output, segover, 2); + output[2] = ' '; + } + return length; +} + +long eatbyte (unsigned char *data, char *output) { + sprintf(output, "db 0x%02X", *data); + return 1; +} diff --git a/disasm.h b/disasm.h new file mode 100644 index 00000000..845fd2e4 --- /dev/null +++ b/disasm.h @@ -0,0 +1,18 @@ +/* disasm.h header file for disasm.c + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_DISASM_H +#define NASM_DISASM_H + +#define INSN_MAX 32 /* one instruction can't be longer than this */ + +long disasm (unsigned char *data, char *output, int segsize, long offset, + int autosync); +long eatbyte (unsigned char *data, char *output); + +#endif diff --git a/float.c b/float.c new file mode 100644 index 00000000..e9b7f4a3 --- /dev/null +++ b/float.c @@ -0,0 +1,389 @@ +/* float.c floating-point constant support for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * initial version 13/ix/96 by Simon Tatham + */ + +#include +#include +#include + +#include "nasm.h" + +#define TRUE 1 +#define FALSE 0 + +#define MANT_WORDS 6 /* 64 bits + 32 for accuracy == 96 */ +#define MANT_DIGITS 28 /* 29 digits don't fit in 96 bits */ + +/* + * guaranteed top bit of from is set + * => we only have to worry about _one_ bit shift to the left + */ + +static int multiply(unsigned short *to, unsigned short *from) { + unsigned long temp[MANT_WORDS*2]; + int i, j; + + for (i=0; i> 16; + temp[i+j+1] += n & 0xFFFF; + } + + for (i=MANT_WORDS*2; --i ;) { + temp[i-1] += temp[i] >> 16; + temp[i] &= 0xFFFF; + } + if (temp[0] & 0x8000) { + for (i=0; i= '0' && *string <= '9') { + if (*string == '0' && !started) { + if (seendot) + tenpwr--; + } else { + started = TRUE; + if (p < digits+sizeof(digits)) + *p++ = *string - '0'; + if (!seendot) + tenpwr++; + } + } else { + fprintf(stderr, "`%c' is invalid char\n", *string); + return; + } + string++; + } + if (*string) { + string++; /* eat the E */ + tenpwr += atoi(string); + } + + /* + * At this point, the memory interval [digits,p) contains a + * series of decimal digits zzzzzzz such that our number X + * satisfies + * + * X = 0.zzzzzzz * 10^tenpwr + */ + + bit = 0x8000; + for (m=mant; m q && !p[-1]) + p--; + if (p <= q) + break; + for (r = p; r-- > q ;) { + int i; + + i = 2 * *r + carry; + if (i >= 10) + carry = 1, i -= 10; + else + carry = 0; + *r = i; + } + if (carry) + *m |= bit, started = TRUE; + if (started) { + if (bit == 1) + bit = 0x8000, m++; + else + bit >>= 1; + } else + twopwr--; + } + twopwr += tenpwr; + + /* + * At this point the `mant' array contains the first six + * fractional places of a base-2^16 real number, which when + * multiplied by 2^twopwr and 5^tenpwr gives X. So now we + * really do multiply by 5^tenpwr. + */ + + if (tenpwr < 0) { + for (m=mult; m 0) { + mult[0] = 0xA000; + for (m=mult+1; m>= 1; + } + + /* + * Conversion is done. The elements of `mant' contain the first + * fractional places of a base-2^16 real number in [0.5,1) + * which we can multiply by 2^twopwr to get X. Or, of course, + * it contains zero. + */ + *exponent = twopwr; +} + +/* + * Shift a mantissa to the right by i (i < 16) bits. + */ +static void shr(unsigned short *mant, int i) { + unsigned short n = 0, m; + int j; + + for (j=0; j> i) | n; + n = m; + } +} + +/* + * Round a mantissa off after i words. + */ +static int round(unsigned short *mant, int i) { + if (mant[i] & 0x8000) { + do { + ++mant[--i]; + mant[i] &= 0xFFFF; + } while (i > 0 && !mant[i]); + return !i && !mant[i]; + } + return 0; +} + +#define put(a,b) ( (*(a)=(b)), ((a)[1]=(b)>>8) ) + +static int to_double(char *str, long sign, unsigned char *result, + efunc error) { + unsigned short mant[MANT_WORDS]; + long exponent; + + sign = (sign < 0 ? 0x8000L : 0L); + + flconvert (str, mant, &exponent); + if (mant[0] & 0x8000) { + /* + * Non-zero. + */ + exponent--; + if (exponent >= -1022 && exponent <= 1024) { + /* + * Normalised. + */ + exponent += 1023; + shr(mant, 11); + round(mant, 4); + if (mant[0] & 0x20) /* did we scale up by one? */ + shr(mant, 1), exponent++; + mant[0] &= 0xF; /* remove leading one */ + put(result+6,(exponent << 4) | mant[0] | sign); + put(result+4,mant[1]); + put(result+2,mant[2]); + put(result+0,mant[3]); + } else if (exponent < -1022 && exponent >= -1074) { + /* + * Denormal. + */ + int shift = -(exponent+1011); + int sh = shift % 16, wds = shift / 16; + shr(mant, sh); + if (round(mant, 4-wds) || (sh>0 && (mant[0]&(0x8000>>(sh-1))))) { + shr(mant, 1); + if (sh==0) + mant[0] |= 0x8000; + exponent++; + } + put(result+6,(wds == 0 ? mant[0] : 0) | sign); + put(result+4,(wds <= 1 ? mant[1-wds] : 0)); + put(result+2,(wds <= 2 ? mant[2-wds] : 0)); + put(result+0,(wds <= 3 ? mant[3-wds] : 0)); + } else { + if (exponent > 0) { + error(ERR_NONFATAL, "overflow in floating-point constant"); + return 0; + } else + memset (result, 0, 8); + } + } else { + /* + * Zero. + */ + memset (result, 0, 8); + } + return 1; /* success */ +} + +static int to_float(char *str, long sign, unsigned char *result, + efunc error) { + unsigned short mant[MANT_WORDS]; + long exponent; + + sign = (sign < 0 ? 0x8000L : 0L); + + flconvert (str, mant, &exponent); + if (mant[0] & 0x8000) { + /* + * Non-zero. + */ + exponent--; + if (exponent >= -126 && exponent <= 128) { + /* + * Normalised. + */ + exponent += 127; + shr(mant, 8); + round(mant, 2); + if (mant[0] & 0x100) /* did we scale up by one? */ + shr(mant, 1), exponent++; + mant[0] &= 0x7F; /* remove leading one */ + put(result+2,(exponent << 7) | mant[0] | sign); + put(result+0,mant[1]); + } else if (exponent < -126 && exponent >= -149) { + /* + * Denormal. + */ + int shift = -(exponent+118); + int sh = shift % 16, wds = shift / 16; + shr(mant, sh); + if (round(mant, 2-wds) || (sh>0 && (mant[0]&(0x8000>>(sh-1))))) { + shr(mant, 1); + if (sh==0) + mant[0] |= 0x8000; + exponent++; + } + put(result+2,(wds == 0 ? mant[0] : 0) | sign); + put(result+0,(wds <= 1 ? mant[1-wds] : 0)); + } else { + if (exponent > 0) { + error(ERR_NONFATAL, "overflow in floating-point constant"); + return 0; + } else + memset (result, 0, 4); + } + } else { + memset (result, 0, 4); + } + return 1; +} + +static int to_ldoub(char *str, long sign, unsigned char *result, + efunc error) { + unsigned short mant[MANT_WORDS]; + long exponent; + + sign = (sign < 0 ? 0x8000L : 0L); + + flconvert (str, mant, &exponent); + if (mant[0] & 0x8000) { + /* + * Non-zero. + */ + exponent--; + if (exponent >= -16383 && exponent <= 16384) { + /* + * Normalised. + */ + exponent += 16383; + if (round(mant, 4)) /* did we scale up by one? */ + shr(mant, 1), mant[0] |= 0x8000, exponent++; + put(result+8,exponent | sign); + put(result+6,mant[0]); + put(result+4,mant[1]); + put(result+2,mant[2]); + put(result+0,mant[3]); + } else if (exponent < -16383 && exponent >= -16446) { + /* + * Denormal. + */ + int shift = -(exponent+16383); + int sh = shift % 16, wds = shift / 16; + shr(mant, sh); + if (round(mant, 4-wds) || (sh>0 && (mant[0]&(0x8000>>(sh-1))))) { + shr(mant, 1); + if (sh==0) + mant[0] |= 0x8000; + exponent++; + } + put(result+8,sign); + put(result+6,(wds == 0 ? mant[0] : 0)); + put(result+4,(wds <= 1 ? mant[1-wds] : 0)); + put(result+2,(wds <= 2 ? mant[2-wds] : 0)); + put(result+0,(wds <= 3 ? mant[3-wds] : 0)); + } else { + if (exponent > 0) { + error(ERR_NONFATAL, "overflow in floating-point constant"); + return 0; + } else + memset (result, 0, 10); + } + } else { + /* + * Zero. + */ + memset (result, 0, 10); + } + return 1; +} + +int float_const (char *number, long sign, unsigned char *result, int bytes, + efunc error) { + if (bytes == 4) + return to_float (number, sign, result, error); + else if (bytes == 8) + return to_double (number, sign, result, error); + else if (bytes == 10) + return to_ldoub (number, sign, result, error); + else { + error(ERR_PANIC, "strange value %d passed to float_const", bytes); + return 0; + } +} diff --git a/float.h b/float.h new file mode 100644 index 00000000..cc01ec03 --- /dev/null +++ b/float.h @@ -0,0 +1,16 @@ +/* float.h header file for the floating-point constant module of + * the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_FLOAT_H +#define NASM_FLOAT_H + +int float_const (char *number, long sign, unsigned char *result, int bytes, + efunc error); + +#endif diff --git a/insns.dat b/insns.dat new file mode 100644 index 00000000..f410613b --- /dev/null +++ b/insns.dat @@ -0,0 +1,984 @@ +; insns.dat table of instructions for the Netwide Assembler +; +; The Netwide Assembler is copyright (C) 1996 Simon Tatham and +; Julian Hall. All rights reserved. The software is +; redistributable under the licence given in the file "Licence" +; distributed in the NASM archive. +; +; Format of file: all four fields must be present on every functional +; line. Hence `void' for no-operand instructions, and `\0' for such +; as EQU. If the last three fields are all `ignore', no action is +; taken except to register the opcode as being present. _ALL_ opcodes +; listed in the `enum' in nasm.h must be present in here, in the +; same order. This is to build the main instruction table for NASM. + +AAA void \1\x37 8086 +AAD void \2\xD5\x0A 8086 +AAD imm \1\xD5\24 8086,UNDOC +AAM void \2\xD4\x0A 8086 +AAM imm \1\xD4\24 8086,UNDOC +AAS void \1\x3F 8086 +ADC mem,reg8 \300\1\x10\101 8086,SM +ADC reg8,reg8 \300\1\x10\101 8086 +ADC mem,reg16 \320\300\1\x11\101 8086,SM +ADC reg16,reg16 \320\300\1\x11\101 8086 +ADC mem,reg32 \321\300\1\x11\101 386,SM +ADC reg32,reg32 \321\300\1\x11\101 386 +ADC reg8,mem \301\1\x12\110 8086,SM +ADC reg8,reg8 \301\1\x12\110 8086 +ADC reg16,mem \320\301\1\x13\110 8086,SM +ADC reg16,reg16 \320\301\1\x13\110 8086 +ADC reg32,mem \321\301\1\x13\110 386,SM +ADC reg32,reg32 \321\301\1\x13\110 386 +ADC rm16,imm8 \320\300\1\x83\202\15 8086 +ADC rm32,imm8 \321\300\1\x83\202\15 386 +ADC reg_al,imm \1\x14\21 8086,SM +ADC reg_ax,imm \320\1\x15\31 8086,SM +ADC reg_eax,imm \321\1\x15\41 386,SM +ADC rm8,imm \300\1\x80\202\21 8086,SM +ADC rm16,imm \320\300\1\x81\202\31 8086,SM +ADC rm32,imm \321\300\1\x81\202\41 386,SM +ADC mem,imm8 \300\1\x80\202\21 8086,SM +ADC mem,imm16 \320\300\1\x81\202\31 8086,SM +ADC mem,imm32 \321\300\1\x81\202\41 386,SM +ADD mem,reg8 \300\17\101 8086,SM +ADD reg8,reg8 \300\17\101 8086 +ADD mem,reg16 \320\300\1\x01\101 8086,SM +ADD reg16,reg16 \320\300\1\x01\101 8086 +ADD mem,reg32 \321\300\1\x01\101 386,SM +ADD reg32,reg32 \321\300\1\x01\101 386 +ADD reg8,mem \301\1\x02\110 8086,SM +ADD reg8,reg8 \301\1\x02\110 8086 +ADD reg16,mem \320\301\1\x03\110 8086,SM +ADD reg16,reg16 \320\301\1\x03\110 8086 +ADD reg32,mem \321\301\1\x03\110 386,SM +ADD reg32,reg32 \321\301\1\x03\110 386 +ADD rm16,imm8 \320\300\1\x83\200\15 8086 +ADD rm32,imm8 \321\300\1\x83\200\15 386 +ADD reg_al,imm \1\x04\21 8086,SM +ADD reg_ax,imm \320\1\x05\31 8086,SM +ADD reg_eax,imm \321\1\x05\41 386,SM +ADD rm8,imm \300\1\x80\200\21 8086,SM +ADD rm16,imm \320\300\1\x81\200\31 8086,SM +ADD rm32,imm \321\300\1\x81\200\41 386,SM +ADD mem,imm8 \300\1\x80\200\21 8086,SM +ADD mem,imm16 \320\300\1\x81\200\31 8086,SM +ADD mem,imm32 \321\300\1\x81\200\41 386,SM +AND mem,reg8 \300\1\x20\101 8086,SM +AND reg8,reg8 \300\1\x20\101 8086 +AND mem,reg16 \320\300\1\x21\101 8086,SM +AND reg16,reg16 \320\300\1\x21\101 8086 +AND mem,reg32 \321\300\1\x21\101 386,SM +AND reg32,reg32 \321\300\1\x21\101 386 +AND reg8,mem \301\1\x22\110 8086,SM +AND reg8,reg8 \301\1\x22\110 8086 +AND reg16,mem \320\301\1\x23\110 8086,SM +AND reg16,reg16 \320\301\1\x23\110 8086 +AND reg32,mem \321\301\1\x23\110 386,SM +AND reg32,reg32 \321\301\1\x23\110 386 +AND rm16,imm8 \320\300\1\x83\204\15 8086 +AND rm32,imm8 \321\300\1\x83\204\15 386 +AND reg_al,imm \1\x24\21 8086,SM +AND reg_ax,imm \320\1\x25\31 8086,SM +AND reg_eax,imm \321\1\x25\41 386,SM +AND rm8,imm \300\1\x80\204\21 8086,SM +AND rm16,imm \320\300\1\x81\204\31 8086,SM +AND rm32,imm \321\300\1\x81\204\41 386,SM +AND mem,imm8 \300\1\x80\204\21 8086,SM +AND mem,imm16 \320\300\1\x81\204\31 8086,SM +AND mem,imm32 \321\300\1\x81\204\41 386,SM +ARPL mem,reg16 \300\1\x63\101 286,PRIV,SM +ARPL reg16,reg16 \300\1\x63\101 286,PRIV +BOUND reg16,mem \320\301\1\x62\110 186 +BOUND reg32,mem \321\301\1\x62\110 386 +BSF reg16,mem \320\301\2\x0F\xBC\110 386,SM +BSF reg16,reg16 \320\301\2\x0F\xBC\110 386 +BSF reg32,mem \321\301\2\x0F\xBC\110 386,SM +BSF reg32,reg32 \321\301\2\x0F\xBC\110 386 +BSR reg16,mem \320\301\2\x0F\xBD\110 386,SM +BSR reg16,reg16 \320\301\2\x0F\xBD\110 386 +BSR reg32,mem \321\301\2\x0F\xBD\110 386,SM +BSR reg32,reg32 \321\301\2\x0F\xBD\110 386 +BSWAP reg32 \321\1\x0F\10\xC8 486 +BT mem,reg16 \320\300\2\x0F\xA3\101 386,SM +BT reg16,reg16 \320\300\2\x0F\xA3\101 386 +BT mem,reg32 \321\300\2\x0F\xA3\101 386,SM +BT reg32,reg32 \321\300\2\x0F\xA3\101 386 +BT rm16,imm \320\300\2\x0F\xBA\204\25 386 +BT rm32,imm \321\300\2\x0F\xBA\204\25 386 +BTC mem,reg16 \320\300\2\x0F\xBB\101 386,SM +BTC reg16,reg16 \320\300\2\x0F\xBB\101 386 +BTC mem,reg32 \321\300\2\x0F\xBB\101 386,SM +BTC reg32,reg32 \321\300\2\x0F\xBB\101 386 +BTC rm16,imm \320\300\2\x0F\xBA\207\25 386 +BTC rm32,imm \321\300\2\x0F\xBA\207\25 386 +BTR mem,reg16 \320\300\2\x0F\xB3\101 386,SM +BTR reg16,reg16 \320\300\2\x0F\xB3\101 386 +BTR mem,reg32 \321\300\2\x0F\xB3\101 386,SM +BTR reg32,reg32 \321\300\2\x0F\xB3\101 386 +BTR rm16,imm \320\300\2\x0F\xBA\206\25 386 +BTR rm32,imm \321\300\2\x0F\xBA\206\25 386 +BTS mem,reg16 \320\300\2\x0F\xAB\101 386,SM +BTS reg16,reg16 \320\300\2\x0F\xAB\101 386 +BTS mem,reg32 \321\300\2\x0F\xAB\101 386,SM +BTS reg32,reg32 \321\300\2\x0F\xAB\101 386 +BTS rm16,imm \320\300\2\x0F\xBA\205\25 386 +BTS rm32,imm \321\300\2\x0F\xBA\205\25 386 +CALL imm \322\1\xE8\64 8086 +CALL imm|far \322\1\x9A\34\37 8086 +CALL imm:imm \322\1\x9A\35\30 8086 +CALL imm16:imm \320\1\x9A\31\30 8086 +CALL imm:imm16 \320\1\x9A\31\30 8086 +CALL imm32:imm \321\1\x9A\41\30 386 +CALL imm:imm32 \321\1\x9A\41\30 386 +CALL mem|far \322\300\1\xFF\203 8086 +CALL mem16|far \320\300\1\xFF\203 8086 +CALL mem32|far \321\300\1\xFF\203 386 +CALL mem|near \322\300\1\xFF\202 8086 +CALL mem16|near \320\300\1\xFF\202 8086 +CALL mem32|near \321\300\1\xFF\202 386 +CALL reg16 \320\300\1\xFF\202 8086 +CALL reg32 \321\300\1\xFF\202 386 +CALL mem \322\300\1\xFF\202 8086 +CALL mem16 \320\300\1\xFF\202 8086 +CALL mem32 \321\300\1\xFF\202 386 +CBW void \320\1\x98 8086 +CDQ void \321\1\x99 386 +CLC void \1\xF8 8086 +CLD void \1\xFC 8086 +CLI void \1\xFA 8086 +CLTS void \2\x0F\x06 286,PRIV +CMC void \1\xF5 8086 +CMP mem,reg8 \300\1\x38\101 8086,SM +CMP reg8,reg8 \300\1\x38\101 8086 +CMP mem,reg16 \320\300\1\x39\101 8086,SM +CMP reg16,reg16 \320\300\1\x39\101 8086 +CMP mem,reg32 \321\300\1\x39\101 386,SM +CMP reg32,reg32 \321\300\1\x39\101 386 +CMP reg8,mem \301\1\x3A\110 8086,SM +CMP reg8,reg8 \301\1\x3A\110 8086 +CMP reg16,mem \320\301\1\x3B\110 8086,SM +CMP reg16,reg16 \320\301\1\x3B\110 8086 +CMP reg32,mem \321\301\1\x3B\110 386,SM +CMP reg32,reg32 \321\301\1\x3B\110 386 +CMP rm16,imm8 \320\300\1\x83\207\15 8086 +CMP rm32,imm8 \321\300\1\x83\207\15 386 +CMP reg_al,imm \1\x3C\21 8086,SM +CMP reg_ax,imm \320\1\x3D\31 8086,SM +CMP reg_eax,imm \321\1\x3D\41 386,SM +CMP rm8,imm \300\1\x80\207\21 8086,SM +CMP rm16,imm \320\300\1\x81\207\31 8086,SM +CMP rm32,imm \321\300\1\x81\207\41 386,SM +CMP mem,imm8 \300\1\x80\207\21 8086,SM +CMP mem,imm16 \320\300\1\x81\207\31 8086,SM +CMP mem,imm32 \321\300\1\x81\207\41 386,SM +CMPSB void \1\xA6 8086 +CMPSD void \321\1\xA7 386 +CMPSW void \320\1\xA7 8086 +CMPXCHG mem,reg8 \300\2\x0F\xA6\101 486,SM +CMPXCHG reg8,reg8 \300\2\x0F\xA6\101 486 +CMPXCHG mem,reg16 \320\300\2\x0F\xA7\101 486,SM +CMPXCHG reg16,reg16 \320\300\2\x0F\xA7\101 486 +CMPXCHG mem,reg32 \321\300\2\x0F\xA7\101 486,SM +CMPXCHG reg32,reg32 \321\300\2\x0F\xA7\101 486 +CMPXCHG8B mem \300\2\x0F\xC7\201 PENT +CPUID void \2\x0F\xA2 PENT +CWD void \320\1\x99 8086 +CWDE void \321\1\x98 386 +DAA void \1\x27 8086 +DAS void \1\x2F 8086 +DB ignore ignore ignore +DD ignore ignore ignore +DEC reg16 \320\10\x48 8086 +DEC reg32 \321\10\x48 386 +DEC rm8 \300\1\xFE\201 8086 +DEC rm16 \320\300\1\xFF\201 8086 +DEC rm32 \321\300\1\xFF\201 386 +DIV rm8 \300\1\xF6\206 8086 +DIV rm16 \320\300\1\xF7\206 8086 +DIV rm32 \321\300\1\xF7\206 386 +DQ ignore ignore ignore +DT ignore ignore ignore +DW ignore ignore ignore +EMMS void \2\x0F\x77 PENT,MMX +ENTER imm,imm \1\xC8\30\25 186 +EQU imm \0 8086 +EQU imm:imm \0 8086 +F2XM1 void \2\xD9\xF0 8086,FPU +FABS void \2\xD9\xE1 8086,FPU +FADD mem32 \300\1\xD8\200 8086,FPU +FADD mem64 \300\1\xDC\200 8086,FPU +FADD fpureg|to \1\xDC\10\xC0 8086,FPU +FADD fpureg \1\xD8\10\xC0 8086,FPU +FADD fpureg,fpu0 \1\xDC\10\xC0 8086,FPU +FADD fpu0,fpureg \1\xD8\11\xC0 8086,FPU +FADDP fpureg \1\xDE\10\xC0 8086,FPU +FADDP fpureg,fpu0 \1\xDE\10\xC0 8086,FPU +FBLD mem80 \300\1\xDF\204 8086,FPU +FBSTP mem80 \300\1\xDF\206 8086,FPU +FCHS void \2\xD9\xE0 8086,FPU +FCLEX void \2\xDB\xE2 8086,FPU +FCMOVB fpureg \1\xDA\10\xC0 P6,FPU +FCMOVB fpu0,fpureg \1\xDA\11\xC0 P6,FPU +FCMOVBE fpureg \1\xDA\10\xD0 P6,FPU +FCMOVBE fpu0,fpureg \1\xDA\11\xD0 P6,FPU +FCMOVE fpureg \1\xDA\10\xC8 P6,FPU +FCMOVE fpu0,fpureg \1\xDA\11\xC8 P6,FPU +FCMOVNB fpureg \1\xDB\10\xC0 P6,FPU +FCMOVNB fpu0,fpureg \1\xDB\11\xC0 P6,FPU +FCMOVNBE fpureg \1\xDB\10\xD0 P6,FPU +FCMOVNBE fpu0,fpureg \1\xDB\11\xD0 P6,FPU +FCMOVNE fpureg \1\xDB\10\xC8 P6,FPU +FCMOVNE fpu0,fpureg \1\xDB\11\xC8 P6,FPU +FCMOVNU fpureg \1\xDB\10\xD8 P6,FPU +FCMOVNU fpu0,fpureg \1\xDB\11\xD8 P6,FPU +FCMOVU fpureg \1\xDA\10\xD8 P6,FPU +FCMOVU fpu0,fpureg \1\xDA\11\xD8 P6,FPU +FCOM mem32 \300\1\xD8\202 8086,FPU +FCOM mem64 \300\1\xDC\202 8086,FPU +FCOM fpureg \1\xD8\10\xD0 8086,FPU +FCOM fpu0,fpureg \1\xD8\11\xD0 8086,FPU +FCOMI fpureg \1\xDB\10\xF0 P6,FPU +FCOMI fpu0,fpureg \1\xDB\11\xF0 P6,FPU +FCOMIP fpureg \1\xDF\10\xF0 P6,FPU +FCOMIP fpu0,fpureg \1\xDF\11\xF0 P6,FPU +FCOMP mem32 \300\1\xD8\203 8086,FPU +FCOMP mem64 \300\1\xDC\203 8086,FPU +FCOMP fpureg \1\xD8\10\xD8 8086,FPU +FCOMP fpu0,fpureg \1\xD8\11\xD8 8086,FPU +FCOMPP void \2\xDE\xD9 8086,FPU +FCOS void \2\xD9\xFF 386,FPU +FDECSTP void \2\xD9\xF6 8086,FPU +FDISI void \2\xDB\xE1 8086,FPU +FDIV mem32 \300\1\xD8\206 8086,FPU +FDIV mem64 \300\1\xDC\206 8086,FPU +FDIV fpureg|to \1\xDC\10\xF0 8086,FPU +FDIV fpureg,fpu0 \1\xDC\10\xF0 8086,FPU +FDIV fpureg \1\xD8\10\xF0 8086,FPU +FDIV fpu0,fpureg \1\xD8\11\xF0 8086,FPU +FDIVP fpureg,fpu0 \1\xDE\10\xF0 8086,FPU +FDIVP fpureg \1\xDE\10\xF0 8086,FPU +FDIVR mem32 \300\1\xD8\207 8086,FPU +FDIVR mem64 \300\1\xDC\207 8086,FPU +FDIVR fpureg|to \1\xDC\10\xF8 8086,FPU +FDIVR fpureg,fpu0 \1\xDC\10\xF8 8086,FPU +FDIVR fpureg \1\xD8\10\xF8 8086,FPU +FDIVR fpu0,fpureg \1\xD8\11\xF8 8086,FPU +FDIVRP fpureg \1\xDE\10\xF8 8086,FPU +FDIVRP fpureg,fpu0 \1\xDE\10\xF8 8086,FPU +FENI void \2\xDB\xE0 8086,FPU +FFREE fpureg \1\xDD\10\xC0 8086,FPU +FIADD mem32 \300\1\xDA\200 8086,FPU +FIADD mem16 \300\1\xDE\200 8086,FPU +FICOM mem32 \300\1\xDA\202 8086,FPU +FICOM mem16 \300\1\xDE\202 8086,FPU +FICOMP mem32 \300\1\xDA\203 8086,FPU +FICOMP mem16 \300\1\xDE\203 8086,FPU +FIDIV mem32 \300\1\xDA\206 8086,FPU +FIDIV mem16 \300\1\xDE\206 8086,FPU +FIDIVR mem32 \300\1\xDA\207 8086,FPU +FIDIVR mem16 \300\1\xDE\207 8086,FPU +FILD mem32 \300\1\xDB\200 8086,FPU +FILD mem16 \300\1\xDF\200 8086,FPU +FILD mem64 \300\1\xDF\205 8086,FPU +FIMUL mem32 \300\1\xDA\201 8086,FPU +FIMUL mem16 \300\1\xDE\201 8086,FPU +FINCSTP void \2\xD9\xF7 8086,FPU +FINIT void \2\xDB\xE3 8086,FPU +FIST mem32 \300\1\xDB\202 8086,FPU +FIST mem16 \300\1\xDF\202 8086,FPU +FISTP mem32 \300\1\xDB\203 8086,FPU +FISTP mem16 \300\1\xDF\203 8086,FPU +FISTP mem64 \300\1\xDF\207 8086,FPU +FISUB mem32 \300\1\xDA\204 8086,FPU +FISUB mem16 \300\1\xDE\204 8086,FPU +FISUBR mem32 \300\1\xDA\205 8086,FPU +FISUBR mem16 \300\1\xDE\205 8086,FPU +FLD mem32 \300\1\xD9\200 8086,FPU +FLD mem64 \300\1\xDD\200 8086,FPU +FLD mem80 \300\1\xDB\205 8086,FPU +FLD fpureg \1\xD9\10\xC0 8086,FPU +FLD1 void \2\xD9\xE8 8086,FPU +FLDCW mem \300\1\xD9\205 8086,FPU +FLDENV mem \300\1\xD9\204 8086,FPU +FLDL2E void \2\xD9\xEA 8086,FPU +FLDL2T void \2\xD9\xE9 8086,FPU +FLDLG2 void \2\xD9\xEC 8086,FPU +FLDLN2 void \2\xD9\xED 8086,FPU +FLDPI void \2\xD9\xEB 8086,FPU +FLDZ void \2\xD9\xEE 8086,FPU +FMUL mem32 \300\1\xD8\201 8086,FPU +FMUL mem64 \300\1\xDC\201 8086,FPU +FMUL fpureg|to \1\xDC\10\xC8 8086,FPU +FMUL fpureg,fpu0 \1\xDC\10\xC8 8086,FPU +FMUL fpureg \1\xD8\10\xC8 8086,FPU +FMUL fpu0,fpureg \1\xD8\11\xC8 8086,FPU +FMULP fpureg \1\xDE\10\xC8 8086,FPU +FMULP fpureg,fpu0 \1\xDE\10\xC8 8086,FPU +FNOP void \2\xD9\xD0 8086,FPU +FPATAN void \2\xD9\xF3 8086,FPU +FPREM void \2\xD9\xF8 8086,FPU +FPREM1 void \2\xD9\xF5 386,FPU +FPTAN void \2\xD9\xF2 8086,FPU +FRNDINT void \2\xD9\xFC 8086,FPU +FRSTOR mem \300\1\xDD\204 8086,FPU +FSAVE mem \300\1\xDD\206 8086,FPU +FSCALE void \2\xD9\xFD 8086,FPU +FSETPM void \2\xDB\xE4 286,FPU +FSIN void \2\xD9\xFE 386,FPU +FSINCOS void \2\xD9\xFB 386,FPU +FSQRT void \2\xD9\xFA 8086,FPU +FST mem32 \300\1\xD9\202 8086,FPU +FST mem64 \300\1\xDD\202 8086,FPU +FST fpureg \1\xDD\10\xD0 8086,FPU +FSTCW mem \300\1\xD9\207 8086,FPU +FSTENV mem \300\1\xD9\206 8086,FPU +FSTP mem32 \300\1\xD9\203 8086,FPU +FSTP mem64 \300\1\xDD\203 8086,FPU +FSTP mem80 \300\1\xDB\207 8086,FPU +FSTP fpureg \1\xDD\10\xD8 8086,FPU +FSTSW mem \300\1\xDD\207 8086,FPU +FSTSW reg_ax \2\xDF\xE0 286,FPU +FSUB mem32 \300\1\xD8\204 8086,FPU +FSUB mem64 \300\1\xDC\204 8086,FPU +FSUB fpureg|to \1\xDC\10\xE0 8086,FPU +FSUB fpureg,fpu0 \1\xDC\10\xE0 8086,FPU +FSUB fpureg \1\xD8\10\xE0 8086,FPU +FSUB fpu0,fpureg \1\xD8\11\xE0 8086,FPU +FSUBP fpureg \1\xDE\10\xE0 8086,FPU +FSUBP fpureg,fpu0 \1\xDE\10\xE0 8086,FPU +FSUBR mem32 \300\1\xD8\205 8086,FPU +FSUBR mem64 \300\1\xDC\205 8086,FPU +FSUBR fpureg|to \1\xDC\10\xE8 8086,FPU +FSUBR fpureg,fpu0 \1\xDC\10\xE8 8086,FPU +FSUBR fpureg \1\xD8\10\xE8 8086,FPU +FSUBR fpu0,fpureg \1\xD8\11\xE8 8086,FPU +FSUBRP fpureg \1\xDE\10\xE8 8086,FPU +FSUBRP fpureg,fpu0 \1\xDE\10\xE8 8086,FPU +FTST void \2\xD9\xE4 8086,FPU +FUCOM fpureg \1\xDD\10\xE0 386,FPU +FUCOMI fpureg \1\xDB\10\xE8 P6,FPU +FUCOMI fpu0,fpureg \1\xDB\11\xE8 P6,FPU +FUCOMIP fpureg \1\xDF\10\xE8 P6,FPU +FUCOMIP fpu0,fpureg \1\xDF\11\xE8 P6,FPU +FUCOMP fpureg \1\xDD\10\xE8 386,FPU +FUCOMPP void \2\xDA\xE9 386,FPU +FXAM void \2\xD9\xE5 8086,FPU +FXCH void \2\xD9\xC9 8086,FPU +FXCH fpureg \1\xD9\10\xC8 8086,FPU +FXCH fpureg,fpu0 \1\xD9\10\xC8 8086,FPU +FXCH fpu0,fpureg \1\xD9\11\xC8 8086,FPU +FXTRACT void \2\xD9\xF4 8086,FPU +FYL2X void \2\xD9\xF1 8086,FPU +FYL2XP1 void \2\xD9\xF9 8086,FPU +HLT void \1\xF4 8086 +ICEBP void \1\xF1 286,UNDOC +IDIV rm8 \300\1\xF6\207 8086 +IDIV rm16 \320\300\1\xF7\207 8086 +IDIV rm32 \321\300\1\xF7\207 386 +IMUL rm8 \300\1\xF6\205 8086 +IMUL rm16 \320\300\1\xF7\205 8086 +IMUL rm32 \321\300\1\xF7\205 386 +IMUL reg16,mem \320\301\2\x0F\xAF\110 386,SM +IMUL reg16,reg16 \320\301\2\x0F\xAF\110 386 +IMUL reg32,mem \321\301\2\x0F\xAF\110 386,SM +IMUL reg32,reg32 \321\301\2\x0F\xAF\110 386 +IMUL reg16,mem,imm8 \320\301\1\x6B\110\16 286,SM +IMUL reg16,reg16,imm8 \320\301\1\x6B\110\16 286 +IMUL reg16,mem,imm \320\301\1\x69\110\32 286,SM +IMUL reg16,reg16,imm \320\301\1\x69\110\32 286 +IMUL reg32,mem,imm8 \321\301\1\x6B\110\16 386,SM +IMUL reg32,reg32,imm8 \321\301\1\x6B\110\16 386 +IMUL reg32,mem,imm \321\301\1\x69\110\42 386,SM +IMUL reg32,reg32,imm \321\301\1\x69\110\42 386,SM +IMUL reg16,imm8 \320\1\x6B\100\15 286 +IMUL reg16,imm \320\1\x69\100\31 286,SM +IMUL reg32,imm8 \321\1\x6B\100\15 386 +IMUL reg32,imm \321\1\x69\100\41 386,SM +IN reg_al,imm \1\xE4\25 8086 +IN reg_ax,imm \320\1\xE5\25 8086 +IN reg_eax,imm \321\1\xE5\25 386 +IN reg_al,reg_dx \1\xEC 8086 +IN reg_ax,reg_dx \320\1\xED 8086 +IN reg_eax,reg_dx \321\1\xED 386 +INC reg16 \320\10\x40 8086 +INC reg32 \321\10\x40 386 +INC rm8 \300\1\xFE\200 8086 +INC rm16 \320\300\1\xFF\200 8086 +INC rm32 \321\300\1\xFF\200 386 +INSB void \1\x6C 186 +INSD void \321\1\x6D 386 +INSW void \320\1\x6D 186 +INT imm \1\xCD\24 8086 +INT01 void \1\xF1 286,UNDOC +INT1 void \1\xF1 286,UNDOC +INT3 void \1\xCC 8086 +INTO void \1\xCE 8086 +INVD void \2\x0F\x08 486 +INVLPG mem \300\2\x0F\x01\207 486 +IRET void \1\xCF 8086 +IRETD void \321\1\xCF 386 +IRETW void \320\1\xCF 8086 +JCXZ imm \320\1\xE3\50 8086 +JECXZ imm \321\1\xE3\50 386 +JMP imm|short \1\xEB\50 8086 +JMP imm \322\1\xE9\64 8086 +JMP imm|far \322\1\xEA\34\37 8086 +JMP imm:imm \322\1\xEA\35\30 8086 +JMP imm16:imm \320\1\xEA\31\30 8086 +JMP imm:imm16 \320\1\xEA\31\30 8086 +JMP imm32:imm \321\1\xEA\41\30 386 +JMP imm:imm32 \321\1\xEA\41\30 386 +JMP mem|far \322\300\1\xFF\205 8086 +JMP mem16|far \320\300\1\xFF\205 8086 +JMP mem32|far \321\300\1\xFF\205 386 +JMP mem|near \322\300\1\xFF\204 8086 +JMP mem16|near \320\300\1\xFF\204 8086 +JMP mem32|near \321\300\1\xFF\204 386 +JMP reg16 \320\300\1\xFF\204 8086 +JMP reg32 \321\300\1\xFF\204 386 +JMP mem \322\300\1\xFF\204 8086 +JMP mem16 \320\300\1\xFF\204 8086 +JMP mem32 \321\300\1\xFF\204 386 +LAHF void \1\x9F 8086 +LAR reg16,mem \320\301\2\x0F\x02\110 286,PRIV,SM +LAR reg16,reg16 \320\301\2\x0F\x02\110 286,PRIV +LAR reg32,mem \321\301\2\x0F\x02\110 286,PRIV,SM +LAR reg32,reg32 \321\301\2\x0F\x02\110 286,PRIV +LDS reg16,mem \320\301\1\xC5\110 8086 +LDS reg32,mem \321\301\1\xC5\110 8086 +LEA reg16,mem \320\301\1\x8D\110 8086 +LEA reg32,mem \321\301\1\x8D\110 8086 +LEAVE void \1\xC9 186 +LES reg16,mem \320\301\1\xC4\110 8086 +LES reg32,mem \321\301\1\xC4\110 8086 +LFS reg16,mem \320\301\2\x0F\xB4\110 386 +LFS reg32,mem \321\301\2\x0F\xB4\110 386 +LGDT mem \300\2\x0F\x01\202 286,PRIV +LGS reg16,mem \320\301\2\x0F\xB5\110 386 +LGS reg32,mem \321\301\2\x0F\xB5\110 386 +LIDT mem \300\2\x0F\x01\203 286,PRIV +LLDT mem \300\1\x0F\17\202 286,PRIV +LLDT mem16 \300\1\x0F\17\202 286,PRIV +LLDT reg16 \300\1\x0F\17\202 286,PRIV +LMSW mem \300\2\x0F\x01\206 286,PRIV +LMSW mem16 \300\2\x0F\x01\206 286,PRIV +LMSW reg16 \300\2\x0F\x01\206 286,PRIV +LOADALL void \2\x0F\x07 386,UNDOC +LODSB void \1\xAC 8086 +LODSD void \321\1\xAD 386 +LODSW void \320\1\xAD 8086 +LOOP imm \312\1\xE2\50 8086 +LOOP imm,reg_cx \310\1\xE2\50 8086 +LOOP imm,reg_ecx \311\1\xE2\50 386 +LOOPE imm \312\1\xE1\50 8086 +LOOPE imm,reg_cx \310\1\xE1\50 8086 +LOOPE imm,reg_ecx \311\1\xE1\50 386 +LOOPNE imm \312\1\xE0\50 8086 +LOOPNE imm,reg_cx \310\1\xE0\50 8086 +LOOPNE imm,reg_ecx \311\1\xE0\50 386 +LOOPNZ imm \312\1\xE0\50 8086 +LOOPNZ imm,reg_cx \310\1\xE0\50 8086 +LOOPNZ imm,reg_ecx \311\1\xE0\50 386 +LOOPZ imm \312\1\xE1\50 8086 +LOOPZ imm,reg_cx \310\1\xE1\50 8086 +LOOPZ imm,reg_ecx \311\1\xE1\50 386 +LSL reg16,mem \320\301\2\x0F\x03\110 286,PRIV,SM +LSL reg16,reg16 \320\301\2\x0F\x03\110 286,PRIV +LSL reg32,mem \321\301\2\x0F\x03\110 286,PRIV,SM +LSL reg32,reg32 \321\301\2\x0F\x03\110 286,PRIV +LSS reg16,mem \320\301\2\x0F\xB2\110 386 +LSS reg32,mem \321\301\2\x0F\xB2\110 386 +LTR mem \300\1\x0F\17\203 286,PRIV +LTR mem16 \300\1\x0F\17\203 286,PRIV +LTR reg16 \300\1\x0F\17\203 286,PRIV +MOV mem,reg_cs \300\1\x8C\101 8086,SM +MOV mem,reg_dess \300\1\x8C\101 8086,SM +MOV mem,reg_fsgs \300\1\x8C\101 386,SM +MOV reg16,reg_cs \300\1\x8C\101 8086 +MOV reg16,reg_dess \300\1\x8C\101 8086 +MOV reg16,reg_fsgs \300\1\x8C\101 386 +MOV reg_dess,mem \301\1\x8E\110 8086,SM +MOV reg_dess,reg16 \301\1\x8E\110 8086 +MOV reg_fsgs,mem \301\1\x8E\110 386,SM +MOV reg_fsgs,reg16 \301\1\x8E\110 386 +MOV reg_al,mem_offs \301\1\xA0\35 8086,SM +MOV reg_ax,mem_offs \301\320\1\xA1\35 8086,SM +MOV reg_eax,mem_offs \301\321\1\xA1\35 386,SM +MOV mem_offs,reg_al \300\1\xA2\34 8086,SM +MOV mem_offs,reg_ax \300\320\1\xA3\34 8086,SM +MOV mem_offs,reg_eax \300\321\1\xA3\34 386,SM +MOV reg32,reg_cr4 \2\x0F\x20\204 PENT +MOV reg32,reg_creg \2\x0F\x20\101 386 +MOV reg32,reg_dreg \2\x0F\x21\101 386 +MOV reg32,reg_treg \2\x0F\x24\101 386 +MOV reg_cr4,reg32 \2\x0F\x22\214 PENT +MOV reg_creg,reg32 \2\x0F\x22\110 386 +MOV reg_dreg,reg32 \2\x0F\x23\110 386 +MOV reg_treg,reg32 \2\x0F\x26\110 386 +MOV mem,reg8 \300\1\x88\101 8086,SM +MOV reg8,reg8 \300\1\x88\101 8086 +MOV mem,reg16 \320\300\1\x89\101 8086,SM +MOV reg16,reg16 \320\300\1\x89\101 8086 +MOV mem,reg32 \321\300\1\x89\101 386,SM +MOV reg32,reg32 \321\300\1\x89\101 386 +MOV reg8,mem \301\1\x8A\110 8086,SM +MOV reg8,reg8 \301\1\x8A\110 8086 +MOV reg16,mem \320\301\1\x8B\110 8086,SM +MOV reg16,reg16 \320\301\1\x8B\110 8086 +MOV reg32,mem \321\301\1\x8B\110 386,SM +MOV reg32,reg32 \321\301\1\x8B\110 386 +MOV reg8,imm \10\xB0\21 8086,SM +MOV reg16,imm \320\10\xB8\31 8086,SM +MOV reg32,imm \321\10\xB8\41 386,SM +MOV rm8,imm \300\1\xC6\200\21 8086,SM +MOV rm16,imm \320\300\1\xC7\200\31 8086,SM +MOV rm32,imm \321\300\1\xC7\200\41 386,SM +MOV mem,imm8 \300\1\xC6\200\21 8086,SM +MOV mem,imm16 \320\300\1\xC7\200\31 8086,SM +MOV mem,imm32 \321\300\1\xC7\200\41 386,SM +MOVD mmxreg,mem \301\2\x0F\x6E\110 PENT,MMX,SD +MOVD mmxreg,reg32 \2\x0F\x6E\110 PENT,MMX +MOVD mem,mmxreg \300\2\x0F\x7E\101 PENT,MMX,SD +MOVD reg32,mmxreg \2\x0F\x7E\101 PENT,MMX +MOVQ mmxreg,mem \301\2\x0F\x6F\110 PENT,MMX,SM +MOVQ mmxreg,mmxreg \2\x0F\x6F\110 PENT,MMX +MOVQ mem,mmxreg \300\2\x0F\x7F\101 PENT,MMX,SM +MOVQ mmxreg,mmxreg \2\x0F\x7F\101 PENT,MMX +MOVSB void \1\xA4 8086 +MOVSD void \321\1\xA5 386 +MOVSW void \320\1\xA5 8086 +MOVSX reg16,mem \320\301\2\x0F\xBE\110 386,SB +MOVSX reg16,reg8 \320\301\2\x0F\xBE\110 386 +MOVSX reg32,rm8 \321\301\2\x0F\xBE\110 386 +MOVSX reg32,rm16 \321\301\2\x0F\xBF\110 386 +MOVZX reg16,mem \320\301\2\x0F\xB6\110 386,SB +MOVZX reg16,reg8 \320\301\2\x0F\xB6\110 386 +MOVZX reg32,rm8 \321\301\2\x0F\xB6\110 386 +MOVZX reg32,rm16 \321\301\2\x0F\xB7\110 386 +MUL rm8 \300\1\xF6\204 8086 +MUL rm16 \320\300\1\xF7\204 8086 +MUL rm32 \321\300\1\xF7\204 386 +NEG rm8 \300\1\xF6\203 8086 +NEG rm16 \320\300\1\xF7\203 8086 +NEG rm32 \321\300\1\xF7\203 386 +NOP void \1\x90 8086 +NOT rm8 \300\1\xF6\202 8086 +NOT rm16 \320\300\1\xF7\202 8086 +NOT rm32 \321\300\1\xF7\202 386 +OR mem,reg8 \300\1\x08\101 8086,SM +OR reg8,reg8 \300\1\x08\101 8086 +OR mem,reg16 \320\300\1\x09\101 8086,SM +OR reg16,reg16 \320\300\1\x09\101 8086 +OR mem,reg32 \321\300\1\x09\101 386,SM +OR reg32,reg32 \321\300\1\x09\101 386 +OR reg8,mem \301\1\x0A\110 8086,SM +OR reg8,reg8 \301\1\x0A\110 8086 +OR reg16,mem \320\301\1\x0B\110 8086,SM +OR reg16,reg16 \320\301\1\x0B\110 8086 +OR reg32,mem \321\301\1\x0B\110 386,SM +OR reg32,reg32 \321\301\1\x0B\110 386 +OR rm16,imm8 \320\300\1\x83\201\15 8086 +OR rm32,imm8 \321\300\1\x83\201\15 386 +OR reg_al,imm \1\x0C\21 8086,SM +OR reg_ax,imm \320\1\x0D\31 8086,SM +OR reg_eax,imm \321\1\x0D\41 386,SM +OR rm8,imm \300\1\x80\201\21 8086,SM +OR rm16,imm \320\300\1\x81\201\31 8086,SM +OR rm32,imm \321\300\1\x81\201\41 386,SM +OR mem,imm8 \300\1\x80\201\21 8086,SM +OR mem,imm16 \320\300\1\x81\201\31 8086,SM +OR mem,imm32 \321\300\1\x81\201\41 386,SM +OUT imm,reg_al \1\xE6\24 8086 +OUT imm,reg_ax \320\1\xE7\24 8086 +OUT imm,reg_eax \321\1\xE7\24 386 +OUT reg_dx,reg_al \1\xEE 8086 +OUT reg_dx,reg_ax \320\1\xEF 8086 +OUT reg_dx,reg_eax \321\1\xEF 386 +OUTSB void \1\x6E 186 +OUTSD void \321\1\x6F 386 +OUTSW void \320\1\x6F 186 +PACKSSDW mmxreg,mem \301\2\x0F\x6B\110 PENT,MMX,SM +PACKSSDW mmxreg,mmxreg \2\x0F\x6B\110 PENT,MMX +PACKSSWB mmxreg,mem \301\2\x0F\x63\110 PENT,MMX,SM +PACKSSWB mmxreg,mmxreg \2\x0F\x63\110 PENT,MMX +PACKUSWB mmxreg,mem \301\2\x0F\x67\110 PENT,MMX,SM +PACKUSWB mmxreg,mmxreg \2\x0F\x67\110 PENT,MMX +PADDB mmxreg,mem \301\2\x0F\xFC\110 PENT,MMX,SM +PADDB mmxreg,mmxreg \2\x0F\xFC\110 PENT,MMX +PADDD mmxreg,mem \301\2\x0F\xFE\110 PENT,MMX,SM +PADDD mmxreg,mmxreg \2\x0F\xFE\110 PENT,MMX +PADDSB mmxreg,mem \301\2\x0F\xEC\110 PENT,MMX,SM +PADDSB mmxreg,mmxreg \2\x0F\xEC\110 PENT,MMX +PADDSW mmxreg,mem \301\2\x0F\xED\110 PENT,MMX,SM +PADDSW mmxreg,mmxreg \2\x0F\xED\110 PENT,MMX +PADDUSB mmxreg,mem \301\2\x0F\xDC\110 PENT,MMX,SM +PADDUSB mmxreg,mmxreg \2\x0F\xDC\110 PENT,MMX +PADDUSW mmxreg,mem \301\2\x0F\xDD\110 PENT,MMX,SM +PADDUSW mmxreg,mmxreg \2\x0F\xDD\110 PENT,MMX +PADDW mmxreg,mem \301\2\x0F\xFD\110 PENT,MMX,SM +PADDW mmxreg,mmxreg \2\x0F\xFD\110 PENT,MMX +PAND mmxreg,mem \301\2\x0F\xDB\110 PENT,MMX,SM +PAND mmxreg,mmxreg \2\x0F\xDB\110 PENT,MMX +PANDN mmxreg,mem \301\2\x0F\xDF\110 PENT,MMX,SM +PANDN mmxreg,mmxreg \2\x0F\xDF\110 PENT,MMX +PCMPEQB mmxreg,mem \301\2\x0F\x74\110 PENT,MMX,SM +PCMPEQB mmxreg,mmxreg \2\x0F\x74\110 PENT,MMX +PCMPEQD mmxreg,mem \301\2\x0F\x76\110 PENT,MMX,SM +PCMPEQD mmxreg,mmxreg \2\x0F\x76\110 PENT,MMX +PCMPEQW mmxreg,mem \301\2\x0F\x75\110 PENT,MMX,SM +PCMPEQW mmxreg,mmxreg \2\x0F\x75\110 PENT,MMX +PCMPGTB mmxreg,mem \301\2\x0F\x64\110 PENT,MMX,SM +PCMPGTB mmxreg,mmxreg \2\x0F\x64\110 PENT,MMX +PCMPGTD mmxreg,mem \301\2\x0F\x66\110 PENT,MMX,SM +PCMPGTD mmxreg,mmxreg \2\x0F\x66\110 PENT,MMX +PCMPGTW mmxreg,mem \301\2\x0F\x65\110 PENT,MMX,SM +PCMPGTW mmxreg,mmxreg \2\x0F\x65\110 PENT,MMX +PMADDWD mmxreg,mem \301\2\x0F\xF5\110 PENT,MMX,SM +PMADDWD mmxreg,mmxreg \2\x0F\xF5\110 PENT,MMX +PMULHW mmxreg,mem \301\2\x0F\xE5\110 PENT,MMX,SM +PMULHW mmxreg,mmxreg \2\x0F\xE5\110 PENT,MMX +PMULLW mmxreg,mem \301\2\x0F\xD5\110 PENT,MMX,SM +PMULLW mmxreg,mmxreg \2\x0F\xD5\110 PENT,MMX +POP mem16 \320\300\1\x8F\200 8086 +POP mem32 \321\300\1\x8F\200 386 +POP reg_dess \4 8086 +POP reg_fsgs \1\x0F\5 386 +POP reg16 \320\10\x58 8086 +POP reg32 \321\10\x58 386 +POPA void \1\x61 186 +POPAD void \321\1\x61 386 +POPAW void \320\1\x61 186 +POPF void \1\x9D 186 +POPFD void \321\1\x9D 386 +POPFW void \320\1\x9D 186 +POR mmxreg,mem \301\2\x0F\xEB\110 PENT,MMX,SM +POR mmxreg,mmxreg \2\x0F\xEB\110 PENT,MMX +PSLLD mmxreg,mem \301\2\x0F\xF2\110 PENT,MMX,SM +PSLLD mmxreg,mmxreg \2\x0F\xF2\110 PENT,MMX +PSLLD mmxreg,imm \2\x0F\x72\206\25 PENT,MMX +PSLLQ mmxreg,mem \301\2\x0F\xF3\110 PENT,MMX,SM +PSLLQ mmxreg,mmxreg \2\x0F\xF3\110 PENT,MMX +PSLLQ mmxreg,imm \2\x0F\x73\206\25 PENT,MMX +PSLLW mmxreg,mem \301\2\x0F\xF1\110 PENT,MMX,SM +PSLLW mmxreg,mmxreg \2\x0F\xF1\110 PENT,MMX +PSLLW mmxreg,imm \2\x0F\x71\206\25 PENT,MMX +PSRAD mmxreg,mem \301\2\x0F\xE2\110 PENT,MMX,SM +PSRAD mmxreg,mmxreg \2\x0F\xE2\110 PENT,MMX +PSRAD mmxreg,imm \2\x0F\x72\204\25 PENT,MMX +PSRAW mmxreg,mem \301\2\x0F\xE1\110 PENT,MMX,SM +PSRAW mmxreg,mmxreg \2\x0F\xE1\110 PENT,MMX +PSRAW mmxreg,imm \2\x0F\x71\204\25 PENT,MMX +PSRLD mmxreg,mem \301\2\x0F\xD2\110 PENT,MMX,SM +PSRLD mmxreg,mmxreg \2\x0F\xD2\110 PENT,MMX +PSRLD mmxreg,imm \2\x0F\x72\202\25 PENT,MMX +PSRLQ mmxreg,mem \301\2\x0F\xD3\110 PENT,MMX,SM +PSRLQ mmxreg,mmxreg \2\x0F\xD3\110 PENT,MMX +PSRLQ mmxreg,imm \2\x0F\x73\202\25 PENT,MMX +PSRLW mmxreg,mem \301\2\x0F\xD1\110 PENT,MMX,SM +PSRLW mmxreg,mmxreg \2\x0F\xD1\110 PENT,MMX +PSRLW mmxreg,imm \2\x0F\x71\202\25 PENT,MMX +PSUBB mmxreg,mem \301\2\x0F\xF8\110 PENT,MMX,SM +PSUBB mmxreg,mmxreg \2\x0F\xF8\110 PENT,MMX +PSUBD mmxreg,mem \301\2\x0F\xFA\110 PENT,MMX,SM +PSUBD mmxreg,mmxreg \2\x0F\xFA\110 PENT,MMX +PSUBSB mmxreg,mem \301\2\x0F\xE8\110 PENT,MMX,SM +PSUBSB mmxreg,mmxreg \2\x0F\xE8\110 PENT,MMX +PSUBSW mmxreg,mem \301\2\x0F\xE9\110 PENT,MMX,SM +PSUBSW mmxreg,mmxreg \2\x0F\xE9\110 PENT,MMX +PSUBUSB mmxreg,mem \301\2\x0F\xD8\110 PENT,MMX,SM +PSUBUSB mmxreg,mmxreg \2\x0F\xD8\110 PENT,MMX +PSUBUSW mmxreg,mem \301\2\x0F\xD9\110 PENT,MMX,SM +PSUBUSW mmxreg,mmxreg \2\x0F\xD9\110 PENT,MMX +PSUBW mmxreg,mem \301\2\x0F\xF9\110 PENT,MMX,SM +PSUBW mmxreg,mmxreg \2\x0F\xF9\110 PENT,MMX +PUNPCKHBW mmxreg,mem \301\2\x0F\x68\110 PENT,MMX,SM +PUNPCKHBW mmxreg,mmxreg \2\x0F\x68\110 PENT,MMX +PUNPCKHDQ mmxreg,mem \301\2\x0F\x6A\110 PENT,MMX,SM +PUNPCKHDQ mmxreg,mmxreg \2\x0F\x6A\110 PENT,MMX +PUNPCKHWD mmxreg,mem \301\2\x0F\x69\110 PENT,MMX,SM +PUNPCKHWD mmxreg,mmxreg \2\x0F\x69\110 PENT,MMX +PUNPCKLBW mmxreg,mem \301\2\x0F\x60\110 PENT,MMX,SM +PUNPCKLBW mmxreg,mmxreg \2\x0F\x60\110 PENT,MMX +PUNPCKLDQ mmxreg,mem \301\2\x0F\x62\110 PENT,MMX,SM +PUNPCKLDQ mmxreg,mmxreg \2\x0F\x62\110 PENT,MMX +PUNPCKLWD mmxreg,mem \301\2\x0F\x61\110 PENT,MMX,SM +PUNPCKLWD mmxreg,mmxreg \2\x0F\x61\110 PENT,MMX +PUSH mem16 \320\300\1\xFF\206 8086 +PUSH mem32 \321\300\1\xFF\206 386 +PUSH reg_fsgs \1\x0F\7 386 +PUSH reg_sreg \6 8086 +PUSH reg16 \320\10\x50 8086 +PUSH reg32 \321\10\x50 386 +PUSH imm8 \1\x6A\14 286 +PUSH imm16 \320\1\x68\30 286 +PUSH imm32 \321\1\x68\40 386 +PUSHA void \1\x60 186 +PUSHAD void \321\1\x60 386 +PUSHAW void \320\1\x60 186 +PUSHF void \1\x9C 186 +PUSHFD void \321\1\x9C 386 +PUSHFW void \320\1\x9C 186 +PXOR mmxreg,mem \301\2\x0F\xEF\110 PENT,MMX,SM +PXOR mmxreg,mmxreg \2\x0F\xEF\110 PENT,MMX +RCL rm8,unity \300\1\xD0\202 8086 +RCL rm8,reg_cl \300\1\xD2\202 8086 +RCL rm8,imm \300\1\xC0\202\25 286 +RCL rm16,unity \320\300\1\xD1\202 8086 +RCL rm16,reg_cl \320\300\1\xD3\202 8086 +RCL rm16,imm \320\300\1\xC1\202\25 286 +RCL rm32,unity \321\300\1\xD1\202 386 +RCL rm32,reg_cl \321\300\1\xD3\202 386 +RCL rm32,imm \321\300\1\xC1\202\25 386 +RCR rm8,unity \300\1\xD0\203 8086 +RCR rm8,reg_cl \300\1\xD2\203 8086 +RCR rm8,imm \300\1\xC0\203\25 286 +RCR rm16,unity \320\300\1\xD1\203 8086 +RCR rm16,reg_cl \320\300\1\xD3\203 8086 +RCR rm16,imm \320\300\1\xC1\203\25 286 +RCR rm32,unity \321\300\1\xD1\203 386 +RCR rm32,reg_cl \321\300\1\xD3\203 386 +RCR rm32,imm \321\300\1\xC1\203\25 386 +RDMSR void \2\x0F\x32 PENT +RDPMC void \2\x0F\x33 P6 +RDTSC void \2\x0F\x31 PENT +RESB imm \340 8086 +RESD ignore ignore ignore +RESQ ignore ignore ignore +REST ignore ignore ignore +RESW ignore ignore ignore +RET void \1\xC3 8086 +RET imm \1\xC2\30 8086 +RETF void \1\xCB 8086 +RETF imm \1\xCA\30 8086 +RETN void \1\xC3 8086 +RETN imm \1\xC2\30 8086 +ROL rm8,unity \300\1\xD0\200 8086 +ROL rm8,reg_cl \300\1\xD2\200 8086 +ROL rm8,imm \300\1\xC0\200\25 286 +ROL rm16,unity \320\300\1\xD1\200 8086 +ROL rm16,reg_cl \320\300\1\xD3\200 8086 +ROL rm16,imm \320\300\1\xC1\200\25 286 +ROL rm32,unity \321\300\1\xD1\200 386 +ROL rm32,reg_cl \321\300\1\xD3\200 386 +ROL rm32,imm \321\300\1\xC1\200\25 386 +ROR rm8,unity \300\1\xD0\201 8086 +ROR rm8,reg_cl \300\1\xD2\201 8086 +ROR rm8,imm \300\1\xC0\201\25 286 +ROR rm16,unity \320\300\1\xD1\201 8086 +ROR rm16,reg_cl \320\300\1\xD3\201 8086 +ROR rm16,imm \320\300\1\xC1\201\25 286 +ROR rm32,unity \321\300\1\xD1\201 386 +ROR rm32,reg_cl \321\300\1\xD3\201 386 +ROR rm32,imm \321\300\1\xC1\201\25 386 +RSM void \2\x0F\xAA PENT +SAHF void \1\x9E 8086 +SAL rm8,unity \300\1\xD0\204 8086,ND +SAL rm8,reg_cl \300\1\xD2\204 8086,ND +SAL rm8,imm \300\1\xC0\204\25 286,ND +SAL rm16,unity \320\300\1\xD1\204 8086,ND +SAL rm16,reg_cl \320\300\1\xD3\204 8086,ND +SAL rm16,imm \320\300\1\xC1\204\25 286,ND +SAL rm32,unity \321\300\1\xD1\204 386,ND +SAL rm32,reg_cl \321\300\1\xD3\204 386,ND +SAL rm32,imm \321\300\1\xC1\204\25 386,ND +SALC void \1\xD6 8086,UNDOC +SAR rm8,unity \300\1\xD0\207 8086 +SAR rm8,reg_cl \300\1\xD2\207 8086 +SAR rm8,imm \300\1\xC0\207\25 286 +SAR rm16,unity \320\300\1\xD1\207 8086 +SAR rm16,reg_cl \320\300\1\xD3\207 8086 +SAR rm16,imm \320\300\1\xC1\207\25 286 +SAR rm32,unity \321\300\1\xD1\207 386 +SAR rm32,reg_cl \321\300\1\xD3\207 386 +SAR rm32,imm \321\300\1\xC1\207\25 386 +SBB mem,reg8 \300\1\x18\101 8086,SM +SBB reg8,reg8 \300\1\x18\101 8086 +SBB mem,reg16 \320\300\1\x19\101 8086,SM +SBB reg16,reg16 \320\300\1\x19\101 8086 +SBB mem,reg32 \321\300\1\x19\101 386,SM +SBB reg32,reg32 \321\300\1\x19\101 386 +SBB reg8,mem \301\1\x1A\110 8086,SM +SBB reg8,reg8 \301\1\x1A\110 8086 +SBB reg16,mem \320\301\1\x1B\110 8086,SM +SBB reg16,reg16 \320\301\1\x1B\110 8086 +SBB reg32,mem \321\301\1\x1B\110 386,SM +SBB reg32,reg32 \321\301\1\x1B\110 386 +SBB rm16,imm8 \320\300\1\x83\203\15 8086 +SBB rm32,imm8 \321\300\1\x83\203\15 8086 +SBB reg_al,imm \1\x1C\21 8086,SM +SBB reg_ax,imm \320\1\x1D\31 8086,SM +SBB reg_eax,imm \321\1\x1D\41 386,SM +SBB rm8,imm \300\1\x80\203\21 8086,SM +SBB rm16,imm \320\300\1\x81\203\31 8086,SM +SBB rm32,imm \321\300\1\x81\203\41 386,SM +SBB mem,imm8 \300\1\x80\203\21 8086,SM +SBB mem,imm16 \320\300\1\x81\203\31 8086,SM +SBB mem,imm32 \321\300\1\x81\203\41 386,SM +SCASB void \1\xAE 8086 +SCASD void \321\1\xAF 386 +SCASW void \320\1\xAF 8086 +SGDT mem \300\2\x0F\x01\200 286,PRIV +SHL rm8,unity \300\1\xD0\204 8086 +SHL rm8,reg_cl \300\1\xD2\204 8086 +SHL rm8,imm \300\1\xC0\204\25 286 +SHL rm16,unity \320\300\1\xD1\204 8086 +SHL rm16,reg_cl \320\300\1\xD3\204 8086 +SHL rm16,imm \320\300\1\xC1\204\25 286 +SHL rm32,unity \321\300\1\xD1\204 386 +SHL rm32,reg_cl \321\300\1\xD3\204 386 +SHL rm32,imm \321\300\1\xC1\204\25 386 +SHLD mem,reg16,imm \300\320\2\x0F\xA4\101\26 386,SM2 +SHLD reg16,reg16,imm \300\320\2\x0F\xA4\101\26 386,SM2 +SHLD mem,reg32,imm \300\321\2\x0F\xA4\101\26 386,SM2 +SHLD reg32,reg32,imm \300\321\2\x0F\xA4\101\26 386,SM2 +SHLD mem,reg16,reg_cl \300\320\2\x0F\xA5\101 386,SM +SHLD reg16,reg16,reg_cl \300\320\2\x0F\xA5\101 386 +SHLD mem,reg32,reg_cl \300\321\2\x0F\xA5\101 386,SM +SHLD reg32,reg32,reg_cl \300\321\2\x0F\xA5\101 386 +SHR rm8,unity \300\1\xD0\205 8086 +SHR rm8,reg_cl \300\1\xD2\205 8086 +SHR rm8,imm \300\1\xC0\205\25 286 +SHR rm16,unity \320\300\1\xD1\205 8086 +SHR rm16,reg_cl \320\300\1\xD3\205 8086 +SHR rm16,imm \320\300\1\xC1\205\25 286 +SHR rm32,unity \321\300\1\xD1\205 386 +SHR rm32,reg_cl \321\300\1\xD3\205 386 +SHR rm32,imm \321\300\1\xC1\205\25 386 +SHRD mem,reg16,imm \300\320\2\x0F\xAC\101\26 386,SM2 +SHRD reg16,reg16,imm \300\320\2\x0F\xAC\101\26 386,SM2 +SHRD mem,reg32,imm \300\321\2\x0F\xAC\101\26 386,SM2 +SHRD reg32,reg32,imm \300\321\2\x0F\xAC\101\26 386,SM2 +SHRD mem,reg16,reg_cl \300\320\2\x0F\xAD\101 386,SM +SHRD reg16,reg16,reg_cl \300\320\2\x0F\xAD\101 386 +SHRD mem,reg32,reg_cl \300\321\2\x0F\xAD\101 386,SM +SHRD reg32,reg32,reg_cl \300\321\2\x0F\xAD\101 386 +SIDT mem \300\2\x0F\x01\201 286,PRIV +SLDT mem \300\1\x0F\17\200 286,PRIV +SLDT mem16 \300\1\x0F\17\200 286,PRIV +SLDT reg16 \300\1\x0F\17\200 286,PRIV +SMSW mem \300\2\x0F\x01\204 286,PRIV +SMSW reg16 \300\2\x0F\x01\204 286,PRIV +STC void \1\xF9 8086 +STD void \1\xFD 8086 +STI void \1\xFB 8086 +STOSB void \1\xAA 8086 +STOSD void \321\1\xAB 386 +STOSW void \320\1\xAB 8086 +STR mem \300\1\x0F\17\201 286,PRIV +STR mem16 \300\1\x0F\17\201 286,PRIV +STR reg16 \300\1\x0F\17\201 286,PRIV +SUB mem,reg8 \300\1\x28\101 8086,SM +SUB reg8,reg8 \300\1\x28\101 8086 +SUB mem,reg16 \320\300\1\x29\101 8086,SM +SUB reg16,reg16 \320\300\1\x29\101 8086 +SUB mem,reg32 \321\300\1\x29\101 386,SM +SUB reg32,reg32 \321\300\1\x29\101 386 +SUB reg8,mem \301\1\x2A\110 8086,SM +SUB reg8,reg8 \301\1\x2A\110 8086 +SUB reg16,mem \320\301\1\x2B\110 8086,SM +SUB reg16,reg16 \320\301\1\x2B\110 8086 +SUB reg32,mem \321\301\1\x2B\110 386,SM +SUB reg32,reg32 \321\301\1\x2B\110 386 +SUB rm16,imm8 \320\300\1\x83\205\15 8086 +SUB rm32,imm8 \321\300\1\x83\205\15 386 +SUB reg_al,imm \1\x2C\21 8086,SM +SUB reg_ax,imm \320\1\x2D\31 8086,SM +SUB reg_eax,imm \321\1\x2D\41 386,SM +SUB rm8,imm \300\1\x80\205\21 8086,SM +SUB rm16,imm \320\300\1\x81\205\31 8086,SM +SUB rm32,imm \321\300\1\x81\205\41 386,SM +SUB mem,imm8 \300\1\x80\205\21 8086,SM +SUB mem,imm16 \320\300\1\x81\205\31 8086,SM +SUB mem,imm32 \321\300\1\x81\205\41 386,SM +TEST mem,reg8 \300\1\x84\101 8086,SM +TEST reg8,reg8 \300\1\x84\101 8086 +TEST mem,reg16 \320\300\1\x85\101 8086,SM +TEST reg16,reg16 \320\300\1\x85\101 8086 +TEST mem,reg32 \321\300\1\x85\101 386,SM +TEST reg32,reg32 \321\300\1\x85\101 386 +TEST reg_al,imm \1\xA8\21 8086,SM +TEST reg_ax,imm \320\1\xA9\31 8086,SM +TEST reg_eax,imm \321\1\xA9\41 386,SM +TEST rm8,imm \300\1\xF6\200\21 8086,SM +TEST rm16,imm \320\300\1\xF7\200\31 8086,SM +TEST rm32,imm \321\300\1\xF7\200\41 386,SM +TEST mem,imm8 \300\1\xF6\200\21 8086,SM +TEST mem,imm16 \320\300\1\xF7\200\31 8086,SM +TEST mem,imm32 \321\300\1\xF7\200\41 386,UNDOC,SM +UMOV mem,reg8 \300\2\x0F\x10\101 386,UNDOC,SM +UMOV reg8,reg8 \300\2\x0F\x10\101 386,UNDOC +UMOV mem,reg16 \320\300\2\x0F\x11\101 386,UNDOC,SM +UMOV reg16,reg16 \320\300\2\x0F\x11\101 386,UNDOC +UMOV mem,reg32 \321\300\2\x0F\x11\101 386,UNDOC,SM +UMOV reg32,reg32 \321\300\2\x0F\x11\101 386,UNDOC +UMOV reg8,mem \301\2\x0F\x12\110 386,UNDOC,SM +UMOV reg8,reg8 \301\2\x0F\x12\110 386,UNDOC +UMOV reg16,mem \320\301\2\x0F\x13\110 386,UNDOC,SM +UMOV reg16,reg16 \320\301\2\x0F\x13\110 386,UNDOC +UMOV reg32,mem \321\301\2\x0F\x13\110 386,UNDOC,SM +UMOV reg32,reg32 \321\301\2\x0F\x13\110 386,UNDOC +VERR mem \300\1\x0F\17\204 286,PRIV +VERR mem16 \300\1\x0F\17\204 286,PRIV +VERR reg16 \300\1\x0F\17\204 286,PRIV +VERW mem \300\1\x0F\17\205 286,PRIV +VERW mem16 \300\1\x0F\17\205 286,PRIV +VERW reg16 \300\1\x0F\17\205 286,PRIV +WAIT void \1\x9B 8086 +WBINVD void \2\x0F\x09 486 +WRMSR void \2\x0F\x30 PENT +XADD mem,reg8 \300\2\x0F\xC0\101 486,SM +XADD reg8,reg8 \300\2\x0F\xC0\101 486 +XADD mem,reg16 \320\300\2\x0F\xC1\101 486,SM +XADD reg16,reg16 \320\300\2\x0F\xC1\101 486 +XADD mem,reg32 \321\300\2\x0F\xC1\101 486,SM +XADD reg32,reg32 \321\300\2\x0F\xC1\101 486 +XCHG reg_ax,reg16 \320\11\x90 8086 +XCHG reg_eax,reg32 \321\11\x90 386 +XCHG reg16,reg_ax \320\10\x90 8086 +XCHG reg32,reg_eax \321\10\x90 386 +XCHG reg8,mem \301\1\x86\110 8086,SM +XCHG reg8,reg8 \301\1\x86\110 8086 +XCHG reg16,mem \320\301\1\x87\110 8086,SM +XCHG reg16,reg16 \320\301\1\x87\110 8086 +XCHG reg32,mem \321\301\1\x87\110 386,SM +XCHG reg32,reg32 \321\301\1\x87\110 386 +XCHG mem,reg8 \300\1\x86\101 8086,SM +XCHG reg8,reg8 \300\1\x86\101 8086 +XCHG mem,reg16 \320\300\1\x87\101 8086,SM +XCHG reg16,reg16 \320\300\1\x87\101 8086 +XCHG mem,reg32 \321\300\1\x87\101 386,SM +XCHG reg32,reg32 \321\300\1\x87\101 386 +XLATB void \1\xD7 8086 +XOR mem,reg8 \300\1\x30\101 8086,SM +XOR reg8,reg8 \300\1\x30\101 8086 +XOR mem,reg16 \320\300\1\x31\101 8086,SM +XOR reg16,reg16 \320\300\1\x31\101 8086 +XOR mem,reg32 \321\300\1\x31\101 386,SM +XOR reg32,reg32 \321\300\1\x31\101 386 +XOR reg8,mem \301\1\x32\110 8086,SM +XOR reg8,reg8 \301\1\x32\110 8086 +XOR reg16,mem \320\301\1\x33\110 8086,SM +XOR reg16,reg16 \320\301\1\x33\110 8086 +XOR reg32,mem \321\301\1\x33\110 386,SM +XOR reg32,reg32 \321\301\1\x33\110 386 +XOR rm16,imm8 \320\300\1\x83\206\15 8086 +XOR rm32,imm8 \321\300\1\x83\206\15 386 +XOR reg_al,imm \1\x34\21 8086,SM +XOR reg_ax,imm \320\1\x35\31 8086,SM +XOR reg_eax,imm \321\1\x35\41 386,SM +XOR rm8,imm \300\1\x80\206\21 8086,SM +XOR rm16,imm \320\300\1\x81\206\31 8086,SM +XOR rm32,imm \321\300\1\x81\206\41 386,SM +XOR mem,imm8 \300\1\x80\206\21 8086,SM +XOR mem,imm16 \320\300\1\x81\206\31 8086,SM +XOR mem,imm32 \321\300\1\x81\206\41 386,SM +CMOVcc reg16,mem \320\301\1\x0F\330\x40\110 P6,SM +CMOVcc reg16,reg16 \320\301\1\x0F\330\x40\110 P6 +CMOVcc reg32,mem \320\301\1\x0F\330\x40\110 P6,SM +CMOVcc reg32,reg32 \320\301\1\x0F\330\x40\110 P6 +Jcc imm|near \322\1\x0F\330\x80\64 386 +Jcc imm \330\x70\50 8086 +Jcc imm|short \330\x70\50 8086 +SETcc mem \300\1\x0F\330\x90\200 386,SB +SETcc reg8 \300\1\x0F\330\x90\200 386 diff --git a/insns.h b/insns.h new file mode 100644 index 00000000..c42790da --- /dev/null +++ b/insns.h @@ -0,0 +1,66 @@ +/* insns.h header file for insns.c + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_INSNS_H +#define NASM_INSNS_H + +struct itemplate { + int opcode; /* the token, passed from "parser.c" */ + int operands; /* number of operands */ + long opd[3]; /* bit flags for operand types */ + char *code; /* the code it assembles to */ + int flags; /* some flags */ +}; + +/* + * Instruction template flags. These specify which processor + * targets the instruction is eligible for, whether it is + * privileged or undocumented, and also specify extra error + * checking on the matching of the instruction. + * + * IF_SM stands for Size Match: any operand whose size is not + * explicitly specified by the template is `really' intended to be + * the same size as the first size-specified operand. + * Non-specification is tolerated in the input instruction, but + * _wrong_ specification is not. + * + * IF_SM2 invokes Size Match on only the first _two_ operands, for + * three-operand instructions such as SHLD: it implies that the + * first two operands must match in size, but that the third is + * required to be _unspecified_. + * + * IF_SB invokes Size Byte: operands with unspecified size in the + * template are really bytes, and so no non-byte specification in + * the input instruction will be tolerated. + * + * IF_SD similarly invokes Size Doubleword. + * + * (The default state if neither IF_SM nor IF_SM2 is specified is + * that any operand with unspecified size in the template is + * required to have unspecified size in the instruction too...) + */ + +#define IF_SM 0x0001 /* size match */ +#define IF_SM2 0x0002 /* size match first two operands */ +#define IF_SB 0x0004 /* unsized operands can't be non-byte */ +#define IF_SD 0x0008 /* unsized operands can't be nondword */ +#define IF_8086 0x0000 /* 8086 instruction */ +#define IF_186 0x0010 /* 186+ instruction */ +#define IF_286 0x0020 /* 286+ instruction */ +#define IF_386 0x0030 /* 386+ instruction */ +#define IF_486 0x0040 /* 486+ instruction */ +#define IF_PENT 0x0050 /* Pentium instruction */ +#define IF_P6 0x0060 /* P6 instruction */ +#define IF_PMASK 0x00F0 /* the mask for processor types */ +#define IF_PRIV 0x0100 /* it's a privileged instruction */ +#define IF_UNDOC 0x0200 /* it's an undocumented instruction */ +#define IF_FPU 0x0400 /* it's an FPU instruction */ +#define IF_MMX 0x0800 /* it's an MMX instruction */ +#define IF_ND 0x1000 /* ignore this in the disassembler */ + +#endif diff --git a/insns.pl b/insns.pl new file mode 100644 index 00000000..275a66bc --- /dev/null +++ b/insns.pl @@ -0,0 +1,160 @@ +#!/usr/bin/perl +# +# insns.pl produce insnsa.c and insnsd.c from insns.dat +# +# The Netwide Assembler is copyright (C) 1996 Simon Tatham and +# Julian Hall. All rights reserved. The software is +# redistributable under the licence given in the file "Licence" +# distributed in the NASM archive. + +print STDERR "Reading insns.dat...\n"; + +open (F, "insns.dat") || die "unable to open insns.dat"; + +$line = 0; +$opcodes = 0; +$insns = 0; +while () { + $line++; + next if /^\s*;/; # comments + chomp; + split; + next if $#_ == -1; # blank lines + (warn "line $line does not contain four fields\n"), next if $#_ != 3; + $formatted = &format(@_); + if ($formatted) { + $insns++; + $aname = "aa_$_[0]"; + push @$aname, $formatted; + } + $opcodes[$opcodes++] = $_[0], $done{$_[0]} = 1 if !$done{$_[0]}; + if ($formatted && $formatted !~ /IF_ND/) { + push @big, $formatted; + foreach $i (&startbyte($_[2])) { + $aname = sprintf "dd_%02X",$i; + push @$aname, $#big; + } + } +} + +close F; + +print STDERR "Writing insnsa.c...\n"; + +open A, ">insnsa.c"; + +print A "/* This file auto-generated from insns.dat by insns.pl" . + " - don't edit it */\n\n"; +print A "#include \n"; +print A "#include \"nasm.h\"\n"; +print A "#include \"insns.h\"\n"; +print A "\n"; + +foreach $i (@opcodes) { + print A "static struct itemplate instrux_${i}[] = {\n"; + $aname = "aa_$i"; + foreach $j (@$aname) { + print A " $j\n"; + } + print A " {-1}\n};\n\n"; +} +print A "struct itemplate *nasm_instructions[] = {\n"; +foreach $i (@opcodes) { + print A " instrux_${i},\n"; +} +print A "};\n"; + +close A; + +print STDERR "Writing insnsd.c...\n"; + +open D, ">insnsd.c"; + +print D "/* This file auto-generated from insns.dat by insns.pl" . + " - don't edit it */\n\n"; +print D "#include \n"; +print D "#include \"nasm.h\"\n"; +print D "#include \"insns.h\"\n"; +print D "\n"; + +print D "static struct itemplate instrux[] = {\n"; +foreach $j (@big) { + print D " $j\n"; +} +print D " {-1}\n};\n\n"; + +for ($c=0; $c<256; $c++) { + $h = sprintf "%02X", $c; + print D "static struct itemplate *itable_${h}[] = {\n"; + $aname = "dd_$h"; + foreach $j (@$aname) { + print D " instrux + $j,\n"; + } + print D " NULL\n};\n\n"; +} + +print D "struct itemplate **itable[] = {\n"; +for ($c=0; $c<256; $c++) { + printf D " itable_%02X,\n", $c; +} +print D "};\n"; + +close D; + +printf STDERR "Done: %d instructions\n", $insns; + +sub format { + local ($opcode, $operands, $codes, $flags) = @_; + local $num; + + return undef if $operands eq "ignore"; + + # format the operands + $operands =~ s/:/|colon,/g; + $operands =~ s/mem(\d+)/mem|bits$1/g; + $operands =~ s/mem/memory/g; + $operands =~ s/memory_offs/mem_offs/g; + $operands =~ s/imm(\d+)/imm|bits$1/g; + $operands =~ s/imm/immediate/g; + $operands =~ s/rm(\d+)/regmem|bits$1/g; + $num = 3; + $operands = '0,0,0', $num = 0 if $operands eq 'void'; + $operands .= ',0', $num-- while $operands !~ /,.*,/; + $operands =~ tr/a-z/A-Z/; + + # format the flags + $flags =~ s/,/|IF_/g; + $flags = "IF_" . $flags; + + "{I_$opcode, $num, {$operands}, \"$codes\", $flags},"; +} + +# Here we determine the range of possible starting bytes for a given +# instruction. We need only consider the codes: +# \1 \2 \3 mean literal bytes, of course +# \4 \5 \6 \7 mean PUSH/POP of segment registers: special case +# \10 \11 \12 mean byte plus register value +# \17 means byte zero +# \330 means byte plus condition code +# \0 or \340 mean give up and return empty set +sub startbyte { # FIXME we cheat, for now :-) + local ($codes) = @_; + local $word, @range; + + while (1) { + die "couldn't get code in '$codes'" if $codes !~ /^(\\[^\\]+)(\\.*)?$/; + $word = $1, $codes = $2; + return (hex $1) if $word =~ /^\\[123]$/ && $codes =~ /^\\x(..)/; + return (0x07, 0x17, 0x1F) if $word eq "\\4"; + return (0xA1, 0xA9) if $word eq "\\5"; + return (0x06, 0x0E, 0x16, 0x1E) if $word eq "\\6"; + return (0xA0, 0xA8) if $word eq "\\7"; + $start=hex $1, $r=8, last if $word =~ /^\\1[012]$/ && $codes =~/^\\x(..)/; + return (0) if $word eq "\\17"; + $start=hex $1, $r=16, last if $word =~ /^\\330$/ && $codes =~ /^\\x(..)/; + return () if $word eq "\\0" || $word eq "\\340"; + } + @range = (); + push @range, $start++ while ($r-- > 0); + @range; +} diff --git a/internal.doc b/internal.doc new file mode 100644 index 00000000..f04152a3 --- /dev/null +++ b/internal.doc @@ -0,0 +1,268 @@ +Internals of the Netwide Assembler +================================== + +The Netwide Assembler is intended to be a modular, re-usable x86 +assembler, which can be embedded in other programs, for example as +the back end to a compiler. + +The assembler is composed of modules. The interfaces between them +look like: + + +---- parser.c ----+ + | | | + | float.c | + | | + +--- assemble.c ---+ + | | | + nasm.c ---+ insnsa.c +--- nasmlib.c + | | + +---- labels.c ----+ + | | + +--- outform.c ----+ + | | + +----- *out.c -----+ + +In other words, each of `parser.c', `assemble.c', `labels.c', +`outform.c' and each of the output format modules `*out.c' are +independent modules, which do not inter-communicate except through +the main program. + +The Netwide *Disassembler* is not intended to be particularly +portable or reusable or anything, however. So I won't bother +documenting it here. :-) + +nasmlib.c +--------- + +This is a library module; it contains simple library routines which +may be referenced by all other modules. Among these are a set of +wrappers around the standard `malloc' routines, which will report a +fatal error if they run out of memory, rather than returning NULL. + +parser.c +-------- + +This contains a source-line parser. It parses `canonical' assembly +source lines, containing some combination of the `label', `opcode', +`operand' and `comment' fields: it does not process directives or +macros. It exports two functions: `parse_line' and `cleanup_insn'. + +`parse_line' is the main parser function: you pass it a source line +in ASCII text form, and it returns you an `insn' structure +containing all the details of the instruction on that line. The +parameters it requires are: + +- The location (segment, offset) where the instruction on this line + will eventually be placed. This is necessary in order to evaluate + expressions containing the Here token, `$'. + +- A function which can be called to retrieve the value of any + symbols the source line references. + +- Which pass the assembler is on: an undefined symbol only causes an + error condition on pass two. + +- The source line to be parsed. + +- A structure to fill with the results of the parse. + +- A function which can be called to report errors. + +Some instructions (DB, DW, DD for example) can require an arbitrary +amount of storage, and so some of the members of the resulting +`insn' structure will be dynamically allocated. The other function +exported by `parser.c' is `cleanup_insn', which can be called to +deallocate any dynamic storage associated with the results of a +parse. + +names.c +------- + +This doesn't count as a module - it defines a few arrays which are +shared between NASM and NDISASM, so it's a separate file which is +#included by both parser.c and disasm.c. + +float.c +------- + +This is essentially a library module: it exports one function, +`float_const', which converts an ASCII representation of a +floating-point number into an x86-compatible binary representation, +without using any built-in floating-point arithmetic (so it will run +on any platform, portably). It calls nothing, and is called only by +`parser.c'. Note that the function `float_const' must be passed an +error reporting routine. + +assemble.c +---------- + +This module contains the code generator: it translates `insn' +structures as returned from the parser module into actual generated +code which can be placed in an output file. It exports two +functions, `assemble' and `insn_size'. + +`insn_size' is designed to be called on pass one of assembly: it +takes an `insn' structure as input, and returns the amount of space +that would be taken up if the instruction described in the structure +were to be converted to real machine code. `insn_size' also requires +to be told the location (as a segment/offset pair) where the +instruction would be assembled, the mode of assembly (16/32 bit +default), and a function it can call to report errors. + +`assemble' is designed to be called on pass two: it takes all the +parameters that `insn_size' does, but has an extra parameter which +is an output driver. `assemble' actually converts the input +instruction into machine code, and outputs the machine code by means +of calling the `output' function of the driver. + +insnsa.c +-------- + +This is another library module: it exports one very big array of +instruction translations. It has to be a separate module so that DOS +compilers, with less memory to spare than typical Unix ones, can +cope with it. + +labels.c +-------- + +This module contains a label manager. It exports six functions: + +`init_labels' should be called before any other function in the +module. `cleanup_labels' may be called after all other use of the +module has finished, to deallocate storage. + +`define_label' is called to define new labels: you pass it the name +of the label to be defined, and the (segment,offset) pair giving the +value of the label. It is also passed an error-reporting function, +and an output driver structure (so that it can call the output +driver's label-definition function). `define_label' mentally +prepends the name of the most recently defined non-local label to +any label beginning with a period. + +`define_label_stub' is designed to be called in pass two, once all +the labels have already been defined: it does nothing except to +update the "most-recently-defined-non-local-label" status, so that +references to local labels in pass two will work correctly. + +`declare_as_global' is used to declare that a label should be +global. It must be called _before_ the label in question is defined. + +Finally, `lookup_label' attempts to translate a label name into a +(segment,offset) pair. It returns non-zero on success. + +The label manager module is (theoretically :) restartable: after +calling `cleanup_labels', you can call `init_labels' again, and +start a new assembly with a new set of symbols. + +outform.c +--------- + +This small module contains a set of routines to manage a list of +output formats, and select one given a keyword. It contains three +small routines: `ofmt_register' which registers an output driver as +part of the managed list, `ofmt_list' which lists the available +drivers on stdout, and `ofmt_find' which tries to find the driver +corresponding to a given name. + +The output modules +------------------ + +Each of the output modules, `binout.o', `elfout.o' and so on, +exports only one symbol, which is an output driver data structure +containing pointers to all the functions needed to produce output +files of the appropriate type. + +The exception to this is `coffout.o', which exports _two_ output +driver structures, since COFF and Win32 object file formats are very +similar and most of the code is shared between them. + +nasm.c +------ + +This is the main program: it calls all the functions in the above +modules, and puts them together to form a working assembler. We +hope. :-) + +Segment Mechanism +----------------- + +In NASM, the term `segment' is used to separate the different +sections/segments/groups of which an object file is composed. +Essentially, every address NASM is capable of understanding is +expressed as an offset from the beginning of some segment. + +The defining property of a segment is that if two symbols are +declared in the same segment, then the distance between them is +fixed at assembly time. Hence every externally-declared variable +must be declared in its own segment, since none of the locations of +these are known, and so no distances may be computed at assembly +time. + +The special segment value NO_SEG (-1) is used to denote an absolute +value, e.g. a constant whose value does not depend on relocation, +such as the _size_ of a data object. + +Apart from NO_SEG, segment indices all have their least significant +bit clear, if they refer to actual in-memory segments. For each +segment of this type, there is an auxiliary segment value, defined +to be the same number but with the LSB set, which denotes the +segment-base value of that segment, for object formats which support +it (Microsoft .OBJ, for example). + +Hence, if `textsym' is declared in a code segment with index 2, then +referencing `SEG textsym' would return zero offset from +segment-index 3. Or, in object formats which don't understand such +references, it would return an error instead. + +The next twist is SEG_ABS. Some symbols may be declared with a +segment value of SEG_ABS plus a 16-bit constant: this indicates that +they are far-absolute symbols, such as the BIOS keyboard buffer +under MS-DOS, which always resides at 0040h:001Eh. Far-absolutes are +handled with care in the parser, since they are supposed to evaluate +simply to their offset part within expressions, but applying SEG to +one should yield its segment part. A far-absolute should never find +its way _out_ of the parser, unless it is enclosed in a WRT clause, +in which case Microsoft 16-bit object formats will want to know +about it. + +Porting Issues +-------------- + +We have tried to write NASM in portable ANSI C: we do not assume +little-endianness or any hardware characteristics (in order that +NASM should work as a cross-assembler for x86 platforms, even when +run on other, stranger machines). + +Assumptions we _have_ made are: + +- We assume that `short' is at least 16 bits, and `long' at least + 32. This really _shouldn't_ be a problem, since Kernighan and + Ritchie tell us we are entitled to do so. + +- We rely on having more than 6 characters of significance on + externally linked symbols in the NASM sources. This may get fixed + at some point. We haven't yet come across a linker brain-dead + enough to get it wrong anyway. + +- We assume that `fopen' using the mode "wb" can be used to write + binary data files. This may be wrong on systems like VMS, with a + strange file system. Though why you'd want to run NASM on VMS is + beyond me anyway. + +That's it. Subject to those caveats, NASM should be completely +portable. If not, we _really_ want to know about it. + +Porting Non-Issues +------------------ + +The following is _not_ a portability problem, although it looks like +one. + +- When compiling with some versions of DJGPP, you may get errors + such as `warning: ANSI C forbids braced-groups within + expressions'. This isn't NASM's fault - the problem seems to be + that DJGPP's definitions of the macros include a + GNU-specific C extension. So when compiling using -ansi and + -pedantic, DJGPP complains about its own header files. It isn't a + problem anyway, since it still generates correct code. diff --git a/labels.c b/labels.c new file mode 100644 index 00000000..ff1d571a --- /dev/null +++ b/labels.c @@ -0,0 +1,292 @@ +/* labels.c label handling for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include +#include +#include +#include "nasm.h" +#include "nasmlib.h" + +/* + * A local label is one that begins with exactly one period. Things + * that begin with _two_ periods are NASM-specific things. + */ +#define islocal(l) ((l)[0] == '.' && (l)[1] != '.') + +#define LABEL_BLOCK 320 /* no. of labels/block */ +#define LBLK_SIZE (LABEL_BLOCK*sizeof(union label)) +#define LABEL_HASHES 32 /* no. of hash table entries */ + +#define END_LIST -3 /* don't clash with NO_SEG! */ +#define END_BLOCK -2 +#define BOGUS_VALUE -4 + +#define PERMTS_SIZE 4096 /* size of text blocks */ + +/* values for label.defn.is_global */ +#define NOT_DEFINED_YET 0 +#define LOCAL_SYMBOL 1 +#define GLOBAL_SYMBOL 2 +#define GLOBAL_PLACEHOLDER 3 + +union label { /* actual label structures */ + struct { + long segment, offset; + char *label; + int is_global; + } defn; + struct { + long movingon, dummy; + union label *next; + } admin; +}; + +struct permts { /* permanent text storage */ + struct permts *next; /* for the linked list */ + int size, usage; /* size and used space in ... */ + char data[PERMTS_SIZE]; /* ... the data block itself */ +}; + +static union label *ltab[LABEL_HASHES];/* using a hash table */ +static union label *lfree[LABEL_HASHES];/* pointer into the above */ +static struct permts *perm_head; /* start of perm. text storage */ +static struct permts *perm_tail; /* end of perm. text storage */ + +static void init_block (union label *blk); +static char *perm_copy (char *string1, char *string2); + +static char *prevlabel; + +/* + * Internal routine: finds the `union label' corresponding to the + * given label name. Creates a new one, if it isn't found, and if + * `create' is TRUE. + */ +static union label *find_label (char *label, int create) { + int hash = 0; + char *p, *prev; + int prevlen; + union label *lptr; + + if (islocal(label)) + prev = prevlabel; + else + prev = ""; + prevlen = strlen(prev); + p = prev; + while (*p) hash += *p++; + p = label; + while (*p) hash += *p++; + hash %= LABEL_HASHES; + lptr = ltab[hash]; + while (lptr->admin.movingon != END_LIST) { + if (lptr->admin.movingon == END_BLOCK) { + lptr = lptr->admin.next; + } + if (!strncmp(lptr->defn.label, prev, prevlen) && + !strcmp(lptr->defn.label+prevlen, label)) + return lptr; + lptr++; + } + if (create) { + if (lfree[hash]->admin.movingon == END_BLOCK) { + /* + * must allocate a new block + */ + lfree[hash]->admin.next = (union label *) nasm_malloc (LBLK_SIZE); + lfree[hash] = lfree[hash]->admin.next; + init_block(lfree[hash]); + } + + lfree[hash]->admin.movingon = BOGUS_VALUE; + lfree[hash]->defn.label = perm_copy (prev, label); + lfree[hash]->defn.is_global = NOT_DEFINED_YET; + return lfree[hash]++; + } else + return NULL; +} + +int lookup_label (char *label, long *segment, long *offset) { + union label *lptr; + + lptr = find_label (label, 0); + if (lptr && (lptr->defn.is_global == LOCAL_SYMBOL || + lptr->defn.is_global == GLOBAL_SYMBOL)) { + *segment = lptr->defn.segment; + *offset = lptr->defn.offset; + return 1; + } else + return 0; +} + +void define_label_stub (char *label, efunc error) { + union label *lptr; + + if (!islocal(label)) { + lptr = find_label (label, 1); + if (!lptr) + error (ERR_PANIC, "can't find label `%s' on pass two", label); + prevlabel = lptr->defn.label; + } +} + +void define_label (char *label, long segment, long offset, + struct ofmt *ofmt, efunc error) { + union label *lptr; + + lptr = find_label (label, 1); + switch (lptr->defn.is_global) { + case NOT_DEFINED_YET: + lptr->defn.is_global = LOCAL_SYMBOL; + break; + case GLOBAL_PLACEHOLDER: + lptr->defn.is_global = GLOBAL_SYMBOL; + break; + default: + error(ERR_NONFATAL, "symbol `%s' redefined", label); + return; + } + + if (label[0] != '.') /* not local, but not special either */ + prevlabel = lptr->defn.label; + else if (!*prevlabel) + error(ERR_NONFATAL, "attempt to define a local label before any" + " non-local labels"); + + lptr->defn.segment = segment; + lptr->defn.offset = offset; + + ofmt->symdef (lptr->defn.label, segment, offset, + lptr->defn.is_global == GLOBAL_SYMBOL); +} + +void define_common (char *label, long segment, long size, + struct ofmt *ofmt, efunc error) { + union label *lptr; + + lptr = find_label (label, 1); + switch (lptr->defn.is_global) { + case NOT_DEFINED_YET: + lptr->defn.is_global = LOCAL_SYMBOL; + break; + case GLOBAL_PLACEHOLDER: + lptr->defn.is_global = GLOBAL_SYMBOL; + break; + default: + error(ERR_NONFATAL, "symbol `%s' redefined", label); + return; + } + + if (label[0] != '.') /* not local, but not special either */ + prevlabel = lptr->defn.label; + else + error(ERR_NONFATAL, "attempt to define a local label as a " + "common variable"); + + lptr->defn.segment = segment; + lptr->defn.offset = 0; + + ofmt->symdef (lptr->defn.label, segment, size, 2); +} + +void declare_as_global (char *label, efunc error) { + union label *lptr; + + if (islocal(label)) { + error(ERR_NONFATAL, "attempt to declare local symbol `%s' as" + " global", label); + return; + } + lptr = find_label (label, 1); + switch (lptr->defn.is_global) { + case NOT_DEFINED_YET: + lptr->defn.is_global = GLOBAL_PLACEHOLDER; + break; + case GLOBAL_PLACEHOLDER: /* already done: silently ignore */ + case GLOBAL_SYMBOL: + break; + case LOCAL_SYMBOL: + error(ERR_NONFATAL, "symbol `%s': [GLOBAL] directive must" + " appear before symbol definition", label); + break; + } +} + +int init_labels (void) { + int i; + + for (i=0; inext = NULL; + perm_head->size = PERMTS_SIZE; + perm_head->usage = 0; + + prevlabel = ""; + + return 0; +} + +void cleanup_labels (void) { + int i; + + for (i=0; iadmin.movingon != END_BLOCK) lptr++; + lptr = lptr->admin.next; + nasm_free (lhold); + lhold = lptr; + } + } + + while (perm_head) { + perm_tail = perm_head; + perm_head = perm_head->next; + nasm_free (perm_tail); + } +} + +static void init_block (union label *blk) { + int j; + + for (j=0; jsize - perm_tail->usage < len) { + perm_tail->next = (struct permts *)nasm_malloc(sizeof(struct permts)); + perm_tail = perm_tail->next; + perm_tail->size = PERMTS_SIZE; + perm_tail->usage = 0; + } + p = q = perm_tail->data + perm_tail->usage; + while ( (*q = *string1++) ) q++; + while ( (*q++ = *string2++) ); + perm_tail->usage = q - perm_tail->data; + + return p; +} diff --git a/labels.h b/labels.h new file mode 100644 index 00000000..fb466ca1 --- /dev/null +++ b/labels.h @@ -0,0 +1,17 @@ +/* labels.h header file for labels.c + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +int lookup_label (char *label, long *segment, long *offset); +void define_label (char *label, long segment, long offset, + struct ofmt *ofmt, efunc error); +void define_common (char *label, long segment, long size, + struct ofmt *ofmt, efunc error); +void define_label_stub (char *label, efunc error); +void declare_as_global (char *label, efunc error); +int init_labels (void); +void cleanup_labels (void); diff --git a/lcc/Readme b/lcc/Readme new file mode 100644 index 00000000..d37f812b --- /dev/null +++ b/lcc/Readme @@ -0,0 +1,57 @@ +This directory contains the necessary files to port the C compiler +``LCC'' (available by FTP from sunsite.doc.ic.ac.uk in the directory +/computing/programming/languages/c/lcc) to compile for Linux (a.out +or ELF) by using NASM as a back-end code generator. + +This patch has been tested on lcc version 3.6. + +To install: + +- Copy `x86nasm.md' into the `src' directory of the lcc tree. + +- Copy either `lin-elf.c' or `lin-aout.c' into the `etc' directory. + +- If you're installing for a.out, edit `x86nasm.md' and change the + conditional after the comment reading "CHANGE THIS FOR a.out" in + the `defsymbol' function from `#if 0' to `#if 1'. + +- Make the following changes to `bind.c' in the `src' directory: + + - Near the top of the file, add a line that reads + extern Interface x86nasmIR; + + - In the `bindings' array, add the lines + "x86-nasm", &x86nasmIR, + "x86/nasm", &x86nasmIR, + (in sensible looking places...) + + A sample `bind.c' has been provided to show what the result of + this might look like. You might be able to get away with using it + directly... + +- Modify the lcc makefile to include rules for x86nasm.o: this will + have to be done in about three places. Just copy any line with + `x86' on it and modify it to read `x86nasm' everywhere. (Except + that in the list of object files that rcc is made up from, do + remember to ensure that every line but the last has a trailing + backslash...) + +- You may have to modify the contents of `lin-elf.c' or `lin-aout.c' + to reflect the true locations of files such as crt0.o, crt1.o, + ld-linux.so and so forth. If you don't know where to find these, + compile a short C program with `gcc -v' and see what command line + gcc feeds to `ld'. + +- You should now be able to build lcc, using `lin-elf.c' or + `lin-aout.c' as the system-dependent part of the `lcc' wrapper + program. + +- Symlink x86nasm.c into the `src' directory before attempting the + triple test, or the compile will fail. + +- Now it should pass the triple test, on either ELF or a.out. Voila! + +Known potential problems: + +- The machine description may occasionally generate `db' lines that + are longer than NASM's 1024-character maximum. diff --git a/lcc/bind.c b/lcc/bind.c new file mode 100644 index 00000000..b0c1f51f --- /dev/null +++ b/lcc/bind.c @@ -0,0 +1,23 @@ +#include "c.h" +extern Interface nullIR, symbolicIR; +extern Interface mipsebIR, mipselIR; +extern Interface sparcIR, solarisIR; +extern Interface x86IR, x86nasmIR; +Binding bindings[] = { + "symbolic", &symbolicIR, + "mips-irix", &mipsebIR, + "mips-ultrix", &mipselIR, + "sparc-sun", &sparcIR, + "sparc-solaris", &solarisIR, + "x86-dos", &x86IR, + "x86-nasm", &x86nasmIR, + "symbolic/irix", &symbolicIR, /* omit */ + "mips/irix", &mipsebIR, /* omit */ + "mips/ultrix", &mipselIR, /* omit */ + "sparc/sun", &sparcIR, /* omit */ + "sparc/solaris", &solarisIR, /* omit */ + "x86/dos", &x86IR, /* omit */ + "x86/nasm", &x86nasmIR, /* omit */ + "null", &nullIR, + NULL, NULL +}; diff --git a/lcc/lin-aout.c b/lcc/lin-aout.c new file mode 100644 index 00000000..f1ac88ad --- /dev/null +++ b/lcc/lin-aout.c @@ -0,0 +1,44 @@ +/* x86 running linux and using nasm as a.out */ + +#include + +#ifndef LCCDIR +#define LCCDIR "/usr/local/lib/lcc/" +#endif + +#define NASMPATH "/usr/local/bin/nasm" + +char *cpp[] = { LCCDIR "cpp", "-D__STDC__=1", + "-Di386", "-D__i386", "-D__i386__", + "-Dlinux", "-D__linux", "-D__linux__", + "-Dunix", "-D__unix", "-D__unix__", + "$1", "$2", "$3", 0 }; +char *include[] = { "-I" LCCDIR "include", "-I/usr/local/include", + "-I/usr/include", 0 }; +char *com[] = { LCCDIR "rcc", "-target=x86/nasm", + "$1", "$2", "$3", 0 }; +char *as[] = { NASMPATH, "-faout", "-o", "$3", "$1", "$2", 0 }; +char *ld[] = { "/usr/bin/ld", "-m", "i386linux", + "-L/usr/i486-linuxaout/lib", + "-o", "$3", "$1", + "/usr/i486-linuxaout/lib/crt0.o", + "$2", "", "-lc", 0 }; +static char *bbexit = LCCDIR "bbexit.o"; + +extern char *concat(char *, char *); +extern int access(const char *, int); + +int option(char *arg) { + if (strncmp(arg, "-lccdir=", 8) == 0) { + cpp[0] = concat(&arg[8], "/cpp"); + include[0] = concat("-I", concat(&arg[8], "/include")); + com[0] = concat(&arg[8], "/rcc"); + bbexit = concat(&arg[8], "/bbexit.o"); + } else if (strcmp(arg, "-g") == 0) + ; + else if (strcmp(arg, "-b") == 0 && access(bbexit, 4) == 0) + ld[9] = bbexit; + else + return 0; + return 1; +} diff --git a/lcc/lin-elf.c b/lcc/lin-elf.c new file mode 100644 index 00000000..15df9e57 --- /dev/null +++ b/lcc/lin-elf.c @@ -0,0 +1,45 @@ +/* x86 running linux and using nasm as ELF */ + +#include + +#ifndef LCCDIR +#define LCCDIR "/usr/local/lib/lcc/" +#endif + +#define NASMPATH "/usr/local/bin/nasm" + +char *cpp[] = { LCCDIR "cpp", "-D__STDC__=1", + "-D__ELF__", "-Di386", "-D__i386", "-D__i386__", + "-Dlinux", "-D__linux", "-D__linux__", + "$1", "$2", "$3", 0 }; +char *include[] = { "-I" LCCDIR "include", "-I/usr/local/include", + "-I/usr/include", 0 }; +char *com[] = { LCCDIR "rcc", "-target=x86/nasm", + "$1", "$2", "$3", 0 }; +char *as[] = { NASMPATH, "-felf", "-o", "$3", "$1", "$2", 0 }; +char *ld[] = { "/usr/bin/ld", "-m", "elf_i386", + "-dynamic-linker", "/lib/ld-linux.so.1", + "-L/usr/i486-linux/lib", + "-o", "$3", "$1", + "/usr/lib/crt1.o", "/usr/lib/crti.o", "/usr/lib/crtbegin.o", + "$2", "", + "-lc", "", "/usr/lib/crtend.o", "/usr/lib/crtn.o", 0 }; +static char *bbexit = LCCDIR "bbexit.o"; + +extern char *concat(char *, char *); +extern int access(const char *, int); + +int option(char *arg) { + if (strncmp(arg, "-lccdir=", 8) == 0) { + cpp[0] = concat(&arg[8], "/cpp"); + include[0] = concat("-I", concat(&arg[8], "/include")); + com[0] = concat(&arg[8], "/rcc"); + bbexit = concat(&arg[8], "/bbexit.o"); + } else if (strcmp(arg, "-g") == 0) + ; + else if (strcmp(arg, "-b") == 0 && access(bbexit, 4) == 0) + ld[13] = bbexit; + else + return 0; + return 1; +} diff --git a/lcc/x86nasm.md b/lcc/x86nasm.md new file mode 100644 index 00000000..d7091225 --- /dev/null +++ b/lcc/x86nasm.md @@ -0,0 +1,703 @@ +%{ +enum { EAX=0, ECX=1, EDX=2, EBX=3, ESI=6, EDI=7 }; +#include "c.h" +#define NODEPTR_TYPE Node +#define OP_LABEL(p) ((p)->op) +#define LEFT_CHILD(p) ((p)->kids[0]) +#define RIGHT_CHILD(p) ((p)->kids[1]) +#define STATE_LABEL(p) ((p)->x.state) +static void address ARGS((Symbol, Symbol, int)); +static void blkfetch ARGS((int, int, int, int)); +static void blkloop ARGS((int, int, int, int, int, int[])); +static void blkstore ARGS((int, int, int, int)); +static void defaddress ARGS((Symbol)); +static void defconst ARGS((int, Value)); +static void defstring ARGS((int, char *)); +static void defsymbol ARGS((Symbol)); +static void doarg ARGS((Node)); +static void emit2 ARGS((Node)); +static void export ARGS((Symbol)); +static void clobber ARGS((Node)); +static void function ARGS((Symbol, Symbol [], Symbol [], int)); +static void global ARGS((Symbol)); +static void import ARGS((Symbol)); +static void local ARGS((Symbol)); +static void progbeg ARGS((int, char **)); +static void progend ARGS((void)); +static void segment ARGS((int)); +static void space ARGS((int)); +static void target ARGS((Node)); +static int ckstack ARGS((Node, int)); +static int memop ARGS((Node)); +static int sametree ARGS((Node, Node)); +static Symbol charreg[32], shortreg[32], intreg[32]; +static Symbol fltreg[32]; + +static int cseg; + +static Symbol quo, rem; + +%} +%start stmt +%term ADDD=306 ADDF=305 ADDI=309 ADDP=311 ADDU=310 +%term ADDRFP=279 +%term ADDRGP=263 +%term ADDRLP=295 +%term ARGB=41 ARGD=34 ARGF=33 ARGI=37 ARGP=39 +%term ASGNB=57 ASGNC=51 ASGND=50 ASGNF=49 ASGNI=53 ASGNP=55 ASGNS=52 +%term BANDU=390 +%term BCOMU=406 +%term BORU=422 +%term BXORU=438 +%term CALLB=217 CALLD=210 CALLF=209 CALLI=213 CALLV=216 +%term CNSTC=19 CNSTD=18 CNSTF=17 CNSTI=21 CNSTP=23 CNSTS=20 CNSTU=22 +%term CVCI=85 CVCU=86 +%term CVDF=97 CVDI=101 +%term CVFD=114 +%term CVIC=131 CVID=130 CVIS=132 CVIU=134 +%term CVPU=150 +%term CVSI=165 CVSU=166 +%term CVUC=179 CVUI=181 CVUP=183 CVUS=180 +%term DIVD=450 DIVF=449 DIVI=453 DIVU=454 +%term EQD=482 EQF=481 EQI=485 +%term GED=498 GEF=497 GEI=501 GEU=502 +%term GTD=514 GTF=513 GTI=517 GTU=518 +%term INDIRB=73 INDIRC=67 INDIRD=66 INDIRF=65 INDIRI=69 INDIRP=71 INDIRS=68 +%term JUMPV=584 +%term LABELV=600 +%term LED=530 LEF=529 LEI=533 LEU=534 +%term LOADB=233 LOADC=227 LOADD=226 LOADF=225 LOADI=229 LOADP=231 LOADS=228 LOADU=230 +%term LSHI=341 LSHU=342 +%term LTD=546 LTF=545 LTI=549 LTU=550 +%term MODI=357 MODU=358 +%term MULD=466 MULF=465 MULI=469 MULU=470 +%term NED=562 NEF=561 NEI=565 +%term NEGD=194 NEGF=193 NEGI=197 +%term RETD=242 RETF=241 RETI=245 +%term RSHI=373 RSHU=374 +%term SUBD=322 SUBF=321 SUBI=325 SUBP=327 SUBU=326 +%term VREGP=615 +%% +reg: INDIRC(VREGP) "# read register\n" +reg: INDIRD(VREGP) "# read register\n" +reg: INDIRF(VREGP) "# read register\n" +reg: INDIRI(VREGP) "# read register\n" +reg: INDIRP(VREGP) "# read register\n" +reg: INDIRS(VREGP) "# read register\n" +stmt: ASGNC(VREGP,reg) "# write register\n" +stmt: ASGND(VREGP,reg) "# write register\n" +stmt: ASGNF(VREGP,reg) "# write register\n" +stmt: ASGNI(VREGP,reg) "# write register\n" +stmt: ASGNP(VREGP,reg) "# write register\n" +stmt: ASGNS(VREGP,reg) "# write register\n" +con: CNSTC "%a" +con: CNSTI "%a" +con: CNSTP "%a" +con: CNSTS "%a" +con: CNSTU "%a" +stmt: reg "" +reg: CVIU(reg) "%0" notarget(a) +reg: CVPU(reg) "%0" notarget(a) +reg: CVUI(reg) "%0" notarget(a) +reg: CVUP(reg) "%0" notarget(a) +acon: ADDRGP "%a" +acon: con "%0" +base: ADDRGP "%a" +base: reg "%0" +base: ADDI(reg,acon) "%0 + (%1)" +base: ADDP(reg,acon) "%0 + (%1)" +base: ADDU(reg,acon) "%0 + (%1)" +base: ADDRFP "ebp + %a" +base: ADDRLP "ebp + %a" +index: reg "%0" +index: LSHI(reg,con1) "%0*2" +index: LSHI(reg,con2) "%0*4" +index: LSHI(reg,con3) "%0*8" + +con1: CNSTI "1" range(a, 1, 1) +con1: CNSTU "1" range(a, 1, 1) +con2: CNSTI "2" range(a, 2, 2) +con2: CNSTU "2" range(a, 2, 2) +con3: CNSTI "3" range(a, 3, 3) +con3: CNSTU "3" range(a, 3, 3) +index: LSHU(reg,con1) "%0*2" +index: LSHU(reg,con2) "%0*4" +index: LSHU(reg,con3) "%0*8" +addr: base "[%0]" +addr: ADDI(index,base) "[%1 + %0]" +addr: ADDP(index,base) "[%1 + %0]" +addr: ADDU(index,base) "[%1 + %0]" +addr: index "[%0]" +mem: INDIRC(addr) "byte %0" +mem: INDIRI(addr) "dword %0" +mem: INDIRP(addr) "dword %0" +mem: INDIRS(addr) "word %0" +rc: reg "%0" +rc: con "%0" + +mr: reg "%0" +mr: mem "%0" + +mrc0: mem "%0" +mrc0: rc "%0" +mrc1: mem "%0" 1 +mrc1: rc "%0" + +mrc3: mem "%0" 3 +mrc3: rc "%0" +reg: addr "lea %c,%0\n" 1 +reg: mrc0 "mov %c,%0\n" 1 +reg: LOADC(reg) "mov %c,%0\n" move(a) +reg: LOADI(reg) "mov %c,%0\n" move(a) +reg: LOADP(reg) "mov %c,%0\n" move(a) +reg: LOADS(reg) "mov %c,%0\n" move(a) +reg: LOADU(reg) "mov %c,%0\n" move(a) +reg: ADDI(reg,mrc1) "?mov %c,%0\nadd %c,%1\n" 1 +reg: ADDP(reg,mrc1) "?mov %c,%0\nadd %c,%1\n" 1 +reg: ADDU(reg,mrc1) "?mov %c,%0\nadd %c,%1\n" 1 +reg: SUBI(reg,mrc1) "?mov %c,%0\nsub %c,%1\n" 1 +reg: SUBP(reg,mrc1) "?mov %c,%0\nsub %c,%1\n" 1 +reg: SUBU(reg,mrc1) "?mov %c,%0\nsub %c,%1\n" 1 +reg: BANDU(reg,mrc1) "?mov %c,%0\nand %c,%1\n" 1 +reg: BORU(reg,mrc1) "?mov %c,%0\nor %c,%1\n" 1 +reg: BXORU(reg,mrc1) "?mov %c,%0\nxor %c,%1\n" 1 +stmt: ASGNI(addr,ADDI(mem,con1)) "inc %1\n" memop(a) +stmt: ASGNI(addr,ADDU(mem,con1)) "inc %1\n" memop(a) +stmt: ASGNP(addr,ADDP(mem,con1)) "inc %1\n" memop(a) +stmt: ASGNI(addr,SUBI(mem,con1)) "dec %1\n" memop(a) +stmt: ASGNI(addr,SUBU(mem,con1)) "dec %1\n" memop(a) +stmt: ASGNP(addr,SUBP(mem,con1)) "dec %1\n" memop(a) +stmt: ASGNI(addr,ADDI(mem,rc)) "add %1,%2\n" memop(a) +stmt: ASGNI(addr,ADDU(mem,rc)) "add %1,%2\n" memop(a) +stmt: ASGNI(addr,SUBI(mem,rc)) "sub %1,%2\n" memop(a) +stmt: ASGNI(addr,SUBU(mem,rc)) "sub %1,%2\n" memop(a) + +stmt: ASGNI(addr,BANDU(mem,rc)) "and %1,%2\n" memop(a) +stmt: ASGNI(addr,BORU(mem,rc)) "or %1,%2\n" memop(a) +stmt: ASGNI(addr,BXORU(mem,rc)) "xor %1,%2\n" memop(a) +reg: BCOMU(reg) "?mov %c,%0\nnot %c\n" 2 +reg: NEGI(reg) "?mov %c,%0\nneg %c\n" 2 + +stmt: ASGNI(addr,BCOMU(mem)) "not %1\n" memop(a) +stmt: ASGNI(addr,NEGI(mem)) "neg %1\n" memop(a) +reg: LSHI(reg,rc5) "?mov %c,%0\nsal %c,%1\n" 2 +reg: LSHU(reg,rc5) "?mov %c,%0\nshl %c,%1\n" 2 +reg: RSHI(reg,rc5) "?mov %c,%0\nsar %c,%1\n" 2 +reg: RSHU(reg,rc5) "?mov %c,%0\nshr %c,%1\n" 2 + +stmt: ASGNI(addr,LSHI(mem,rc5)) "sal %1,%2\n" memop(a) +stmt: ASGNI(addr,LSHU(mem,rc5)) "shl %1,%2\n" memop(a) +stmt: ASGNI(addr,RSHI(mem,rc5)) "sar %1,%2\n" memop(a) +stmt: ASGNI(addr,RSHU(mem,rc5)) "shr %1,%2\n" memop(a) + +rc5: CNSTI "%a" range(a, 0, 31) +rc5: reg "cl" +reg: MULI(reg,mrc3) "?mov %c,%0\nimul %c,%1\n" 14 +reg: MULI(con,mr) "imul %c,%1,%0\n" 13 +reg: MULU(reg,mr) "mul %1\n" 13 +reg: DIVU(reg,reg) "xor edx,edx\ndiv %1\n" +reg: MODU(reg,reg) "xor edx,edx\ndiv %1\n" +reg: DIVI(reg,reg) "cdq\nidiv %1\n" +reg: MODI(reg,reg) "cdq\nidiv %1\n" +reg: CVIU(reg) "mov %c,%0\n" move(a) +reg: CVPU(reg) "mov %c,%0\n" move(a) +reg: CVUI(reg) "mov %c,%0\n" move(a) +reg: CVUP(reg) "mov %c,%0\n" move(a) +reg: CVCI(INDIRC(addr)) "movsx %c,byte %0\n" 3 +reg: CVCU(INDIRC(addr)) "movzx %c,byte %0\n" 3 +reg: CVSI(INDIRS(addr)) "movsx %c,word %0\n" 3 +reg: CVSU(INDIRS(addr)) "movzx %c,word %0\n" 3 +reg: CVCI(reg) "# extend\n" 3 +reg: CVCU(reg) "# extend\n" 3 +reg: CVSI(reg) "# extend\n" 3 +reg: CVSU(reg) "# extend\n" 3 + +reg: CVIC(reg) "# truncate\n" 1 +reg: CVIS(reg) "# truncate\n" 1 +reg: CVUC(reg) "# truncate\n" 1 +reg: CVUS(reg) "# truncate\n" 1 +stmt: ASGNC(addr,rc) "mov byte %0,%1\n" 1 +stmt: ASGNI(addr,rc) "mov dword %0,%1\n" 1 +stmt: ASGNP(addr,rc) "mov dword %0,%1\n" 1 +stmt: ASGNS(addr,rc) "mov word %0,%1\n" 1 +stmt: ARGI(mrc3) "push dword %0\n" 1 +stmt: ARGP(mrc3) "push dword %0\n" 1 +stmt: ASGNB(reg,INDIRB(reg)) "mov ecx,%a\nrep movsb\n" +stmt: ARGB(INDIRB(reg)) "sub esp,%a\nmov edi,esp\nmov ecx,%a\nrep movsb\n" + +memf: INDIRD(addr) "qword %0" +memf: INDIRF(addr) "dword %0" +memf: CVFD(INDIRF(addr)) "dword %0" +reg: memf "fld %0\n" 3 +stmt: ASGND(addr,reg) "fstp qword %0\n" 7 +stmt: ASGNF(addr,reg) "fstp dword %0\n" 7 +stmt: ASGNF(addr,CVDF(reg)) "fstp dword %0\n" 7 +stmt: ARGD(reg) "sub esp,8\nfstp qword [esp]\n" +stmt: ARGF(reg) "sub esp,4\nfstp dword [esp]\n" +reg: NEGD(reg) "fchs\n" +reg: NEGF(reg) "fchs\n" +reg: ADDD(reg,memf) "fadd %1\n" +reg: ADDD(reg,reg) "faddp st1\n" +reg: ADDF(reg,memf) "fadd %1\n" +reg: ADDF(reg,reg) "faddp st1\n" +reg: DIVD(reg,memf) "fdiv %1\n" +reg: DIVD(reg,reg) "fdivrp st1\n" +reg: DIVF(reg,memf) "fdiv %1\n" +reg: DIVF(reg,reg) "fdivrp st1\n" +reg: MULD(reg,memf) "fmul %1\n" +reg: MULD(reg,reg) "fmulp st1\n" +reg: MULF(reg,memf) "fmul %1\n" +reg: MULF(reg,reg) "fmulp st1\n" +reg: SUBD(reg,memf) "fsub %1\n" +reg: SUBD(reg,reg) "fsubrp st1\n" +reg: SUBF(reg,memf) "fsub %1\n" +reg: SUBF(reg,reg) "fsubrp st1\n" +reg: CVFD(reg) "# CVFD\n" +reg: CVDF(reg) "sub esp,4\nfstp dword [esp]\nfld dword [esp]\nadd esp,4\n" 12 + +stmt: ASGNI(addr,CVDI(reg)) "fistp dword %0\n" 29 +reg: CVDI(reg) "sub esp,4\nfistp dword [esp]\npop %c\n" 31 + +reg: CVID(INDIRI(addr)) "fild dword %0\n" 10 +reg: CVID(reg) "push %0\nfild dword [esp]\nadd esp,4\n" 12 + +addrj: ADDRGP "%a" +addrj: reg "%0" 2 +addrj: mem "%0" 2 + +stmt: JUMPV(addrj) "jmp %0\n" 3 +stmt: LABELV "%a:\n" +stmt: EQI(mem,rc) "cmp %0,%1\nje near %a\n" 5 +stmt: GEI(mem,rc) "cmp %0,%1\njge near %a\n" 5 +stmt: GTI(mem,rc) "cmp %0,%1\njg near %a\n" 5 +stmt: LEI(mem,rc) "cmp %0,%1\njle near %a\n" 5 +stmt: LTI(mem,rc) "cmp %0,%1\njl near %a\n" 5 +stmt: NEI(mem,rc) "cmp %0,%1\njne near %a\n" 5 +stmt: GEU(mem,rc) "cmp %0,%1\njae near %a\n" 5 +stmt: GTU(mem,rc) "cmp %0,%1\nja near %a\n" 5 +stmt: LEU(mem,rc) "cmp %0,%1\njbe near %a\n" 5 +stmt: LTU(mem,rc) "cmp %0,%1\njb near %a\n" 5 +stmt: EQI(reg,mrc1) "cmp %0,%1\nje near %a\n" 4 +stmt: GEI(reg,mrc1) "cmp %0,%1\njge near %a\n" 4 +stmt: GTI(reg,mrc1) "cmp %0,%1\njg near %a\n" 4 +stmt: LEI(reg,mrc1) "cmp %0,%1\njle near %a\n" 4 +stmt: LTI(reg,mrc1) "cmp %0,%1\njl near %a\n" 4 +stmt: NEI(reg,mrc1) "cmp %0,%1\njne near %a\n" 4 + +stmt: GEU(reg,mrc1) "cmp %0,%1\njae near %a\n" 4 +stmt: GTU(reg,mrc1) "cmp %0,%1\nja near %a\n" 4 +stmt: LEU(reg,mrc1) "cmp %0,%1\njbe near %a\n" 4 +stmt: LTU(reg,mrc1) "cmp %0,%1\njb near %a\n" 4 +cmpf: memf " %0" +cmpf: reg "p" +stmt: EQD(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\nje near %a\n" +stmt: GED(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njbe near %a\n" +stmt: GTD(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njb near %a\n" +stmt: LED(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njae near %a\n" +stmt: LTD(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\nja near %a\n" +stmt: NED(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njne near %a\n" + +stmt: EQF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\nje near %a\n" +stmt: GEF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njbe near %a\n" +stmt: GTF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njb near %a\n" +stmt: LEF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njae near %a\n" +stmt: LTF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\nja near %a\n" +stmt: NEF(cmpf,reg) "fcomp%0\nfstsw ax\nsahf\njne near %a\n" +reg: CALLI(addrj) "call %0\nadd esp,%a\n" +stmt: CALLV(addrj) "call %0\nadd esp,%a\n" +reg: CALLF(addrj) "call %0\nadd esp,%a\n" +reg: CALLD(addrj) "call %0\nadd esp,%a\n" + +stmt: RETI(reg) "# ret\n" +stmt: RETF(reg) "# ret\n" +stmt: RETD(reg) "# ret\n" +%% +static void progbeg(argc, argv) int argc; char *argv[]; { + int i; + + { + union { + char c; + int i; + } u; + u.i = 0; + u.c = 1; + swap = (u.i == 1) != IR->little_endian; + } + parseflags(argc, argv); + intreg[EAX] = mkreg("eax", EAX, 1, IREG); + intreg[EDX] = mkreg("edx", EDX, 1, IREG); + intreg[ECX] = mkreg("ecx", ECX, 1, IREG); + intreg[EBX] = mkreg("ebx", EBX, 1, IREG); + intreg[ESI] = mkreg("esi", ESI, 1, IREG); + intreg[EDI] = mkreg("edi", EDI, 1, IREG); + shortreg[EAX] = mkreg("ax", EAX, 1, IREG); + shortreg[ECX] = mkreg("cx", ECX, 1, IREG); + shortreg[EDX] = mkreg("dx", EDX, 1, IREG); + shortreg[EBX] = mkreg("bx", EBX, 1, IREG); + shortreg[ESI] = mkreg("si", ESI, 1, IREG); + shortreg[EDI] = mkreg("di", EDI, 1, IREG); + + charreg[EAX] = mkreg("al", EAX, 1, IREG); + charreg[ECX] = mkreg("cl", ECX, 1, IREG); + charreg[EDX] = mkreg("dl", EDX, 1, IREG); + charreg[EBX] = mkreg("bl", EBX, 1, IREG); + for (i = 0; i < 8; i++) + fltreg[i] = mkreg("%d", i, 0, FREG); + rmap[C] = mkwildcard(charreg); + rmap[S] = mkwildcard(shortreg); + rmap[P] = rmap[B] = rmap[U] = rmap[I] = mkwildcard(intreg); + rmap[F] = rmap[D] = mkwildcard(fltreg); + tmask[IREG] = (1<x.regnode->mask |= 1<x.regnode->mask |= 1<op) { + case RSHI: case RSHU: case LSHI: case LSHU: + if (generic(p->kids[1]->op) != CNST + && !( generic(p->kids[1]->op) == INDIR + && p->kids[1]->kids[0]->op == VREG+P + && p->kids[1]->syms[RX]->u.t.cse + && generic(p->kids[1]->syms[RX]->u.t.cse->op) == CNST +)) { + rtarget(p, 1, intreg[ECX]); + setreg(p, intreg[EAX]); + } + break; + case MULU: + setreg(p, quo); + rtarget(p, 0, intreg[EAX]); + break; + case DIVI: case DIVU: + setreg(p, quo); + rtarget(p, 0, intreg[EAX]); + rtarget(p, 1, intreg[ECX]); + break; + case MODI: case MODU: + setreg(p, rem); + rtarget(p, 0, intreg[EAX]); + rtarget(p, 1, intreg[ECX]); + break; + case ASGNB: + rtarget(p, 0, intreg[EDI]); + rtarget(p->kids[1], 0, intreg[ESI]); + break; + case ARGB: + rtarget(p->kids[0], 0, intreg[ESI]); + break; + case CALLI: case CALLV: + setreg(p, intreg[EAX]); + break; + case RETI: + rtarget(p, 0, intreg[EAX]); + break; + } +} + +static void clobber(p) Node p; { + static int nstack = 0; + + assert(p); + nstack = ckstack(p, nstack); + assert(p->count > 0 || nstack == 0); + switch (p->op) { + case ASGNB: case ARGB: + spill(1<op)==F || optype((p)->op)==D) + +static int ckstack(p, n) Node p; int n; { + int i; + + for (i = 0; i < NELEMS(p->x.kids) && p->x.kids[i]; i++) + if (isfp(p->x.kids[i])) + n--; + if (isfp(p) && p->count > 0) + n++; + if (n > 8) + error("expression too complicated\n"); + debug(fprint(2, "(ckstack(%x)=%d)\n", p, n)); + assert(n >= 0); + return n; +} +static int memop(p) Node p; { + assert(p); + assert(generic(p->op) == ASGN); + assert(p->kids[0]); + assert(p->kids[1]); + if (generic(p->kids[1]->kids[0]->op) == INDIR + && sametree(p->kids[0], p->kids[1]->kids[0]->kids[0])) + return 3; + else + return LBURG_MAX; +} +static int sametree(p, q) Node p, q; { + return p == NULL && q == NULL + || p && q && p->op == q->op && p->syms[0] == q->syms[0] + && sametree(p->kids[0], q->kids[0]) + && sametree(p->kids[1], q->kids[1]); +} +static void emit2(p) Node p; { +#define preg(f) ((f)[getregnum(p->x.kids[0])]->x.name) + + if (p->op == CVCI) + print("movsx %s,%s\n", p->syms[RX]->x.name +, preg(charreg)); + else if (p->op == CVCU) + print("movzx %s,%s\n", p->syms[RX]->x.name +, preg(charreg)); + else if (p->op == CVSI) + print("movsx %s,%s\n", p->syms[RX]->x.name +, preg(shortreg)); + else if (p->op == CVSU) + print("movzx %s,%s\n", p->syms[RX]->x.name +, preg(shortreg)); + else if (p->op == CVIC || p->op == CVIS + || p->op == CVUC || p->op == CVUS) { + char *dst = shortreg[getregnum(p)]->x.name; + char *src = preg(shortreg); + if (dst != src) + print("mov %s,%s\n", dst, src); + } +} + +static void doarg(p) Node p; { + assert(p && p->syms[0]); + mkactual(4, p->syms[0]->u.c.v.i); +} +static void blkfetch(k, off, reg, tmp) +int k, off, reg, tmp; {} +static void blkstore(k, off, reg, tmp) +int k, off, reg, tmp; {} +static void blkloop(dreg, doff, sreg, soff, size, tmps) +int dreg, doff, sreg, soff, size, tmps[]; {} +static void local(p) Symbol p; { + if (isfloat(p->type)) + p->sclass = AUTO; + if (askregvar(p, rmap[ttob(p->type)]) == 0) + mkauto(p); +} +static void function(f, caller, callee, n) +Symbol f, callee[], caller[]; int n; { + int i; + + print("%s:\n", f->x.name); + print("push ebx\n"); + print("push esi\n"); + print("push edi\n"); + print("push ebp\n"); + print("mov ebp,esp\n"); +usedmask[0] = usedmask[1] = 0; +freemask[0] = freemask[1] = ~(unsigned)0; + offset = 16 + 4; + for (i = 0; callee[i]; i++) { + Symbol p = callee[i]; + Symbol q = caller[i]; + assert(q); + p->x.offset = q->x.offset = offset; + p->x.name = q->x.name = stringf("%d", p->x.offset); + p->sclass = q->sclass = AUTO; + offset += roundup(q->type->size, 4); + } + assert(caller[i] == 0); + offset = maxoffset = 0; + gencode(caller, callee); + framesize = roundup(maxoffset, 4); + if (framesize > 0) + print("sub esp,%d\n", framesize); + emitcode(); + print("mov esp,ebp\n"); + print("pop ebp\n"); + print("pop edi\n"); + print("pop esi\n"); + print("pop ebx\n"); + print("ret\n"); +} +static void defsymbol(p) Symbol p; { + if (p->scope >= LOCAL && p->sclass == STATIC) + p->x.name = stringf("L%d", genlabel(1)); + else if (p->generated) + p->x.name = stringf("$L%s", p->name); + else if (p->scope == GLOBAL || p->sclass == EXTERN) + /* CHANGE THIS FOR a.out */ +#if 0 + p->x.name = stringf("$_%s", p->name); +#else + p->x.name = stringf("$%s", p->name); +#endif + else if (p->scope == CONSTANTS + && (isint(p->type) || isptr(p->type)) + && p->name[0] == '0' && p->name[1] == 'x') + p->x.name = stringf("0%sH", &p->name[2]); + else + p->x.name = p->name; +} +static void address(q, p, n) Symbol q, p; int n; { + if (p->scope == GLOBAL + || p->sclass == STATIC || p->sclass == EXTERN) + q->x.name = stringf("%s%s%d", + p->x.name, n >= 0 ? "+" : "", n); + else { + q->x.offset = p->x.offset + n; + q->x.name = stringd(q->x.offset); + } +} +static void defconst(ty, v) int ty; Value v; { + switch (ty) { + case C: print("db %d\n", v.uc); return; + case S: print("dw %d\n", v.ss); return; + case I: print("dd %d\n", v.i ); return; + case U: print("dd 0%xH\n", v.u ); return; + case P: print("dd 0%xH\n", v.p ); return; + case F: + print("dd 0%xH\n", *(unsigned *)&v.f); + return; + case D: { + unsigned *p = (unsigned *)&v.d; + print("dd 0%xH,0%xH\n", p[swap], p[1 - swap]); + return; + } + } + assert(0); +} +static void defaddress(p) Symbol p; { + print("dd %s\n", p->x.name); +} +static void defstring(n, str) int n; char *str; { + char *s; + int inquote = 1; + + print("db '"); + + for (s = str; s < str + n; s++) + { + if ((*s & 0x7F) == *s && *s >= ' ' && *s != '\'') { + if (!inquote){ + print(", '"); + inquote = 1; + } + print("%c",*s); + } + else + { + if (inquote){ + print("', "); + inquote = 0; + } + else + print(", "); + print("%d",*s); + } + } + if (inquote) print("'"); + print("\n"); +} +static void export(p) Symbol p; { + print("[global %s]\n", p->x.name); +} +static void import(p) Symbol p; { + if (p->ref > 0) { + print("[extern %s]\n", p->x.name); + } +} +static void global(p) Symbol p; { + int i; + + if (p->u.seg == BSS) + print("resb ($-$$) & %d\n", + p->type->align > 4 ? 3 : p->type->align-1); + else + print("times ($-$$) & %d nop\n", + p->type->align > 4 ? 3 : p->type->align-1); + print("%s:\n", p->x.name); + if (p->u.seg == BSS) + print("resb %d\n", p->type->size); +} +static void space(n) int n; { + int i; + + if (cseg != BSS) + print("times %d db 0\n", n); +} +Interface x86nasmIR = { + 1, 1, 0, /* char */ + 2, 2, 0, /* short */ + 4, 4, 0, /* int */ + 4, 4, 1, /* float */ + 8, 4, 1, /* double */ + 4, 4, 0, /* T * */ + 0, 4, 0, /* struct; so that ARGB keeps stack aligned */ + 1, /* little_endian */ + 0, /* mulops_calls */ + 0, /* wants_callb */ + 1, /* wants_argb */ + 0, /* left_to_right */ + 0, /* wants_dag */ + address, + blockbeg, + blockend, + defaddress, + defconst, + defstring, + defsymbol, + emit, + export, + function, + gen, + global, + import, + local, + progbeg, + progend, + segment, + space, + 0, 0, 0, 0, 0, 0, 0, + {1, blkfetch, blkstore, blkloop, + _label, + _rule, + _nts, + _kids, + _opname, + _arity, + _string, + _templates, + _isinstruction, + _ntname, + emit2, + doarg, + target, + clobber, +} +}; diff --git a/misc/magic b/misc/magic new file mode 100644 index 00000000..0172f4a0 --- /dev/null +++ b/misc/magic @@ -0,0 +1,6 @@ +# Put the following lines in your /etc/magic file to get 'file' to recognise +# RDOFF Object Files + +0 string RDOFF RDOFF Object File +>5 byte >32 version %c (little endian) +>5 byte <32 version %d (big endian) diff --git a/misc/nasm.sl b/misc/nasm.sl new file mode 100644 index 00000000..be4d30b5 --- /dev/null +++ b/misc/nasm.sl @@ -0,0 +1,305 @@ +% This file defines a NASM editor mode for the JED editor. +% JED's home page is http://space.mit.edu/~davis/jed.html. +% +% To install, copy this file into your JED_LIBRARY directory +% (/usr/local/jed/lib or C:\JED\LIB or whatever), then add the +% following lines to your .jedrc or jed.rc file: +% autoload("nasm_mode", "nasm"); +% add_mode_for_extension("nasm", "asm"); +% (you can of course replace "asm" with whatever file extension +% you like to use for your NASM source files). + +variable Nasm_Instruction_Indent = 10; +variable Nasm_Comment_Column = 33; +variable Nasm_Comment_Space = 1; + +variable nasm_kw_2 = strcat("ahalaxbhblbpbtbxchclcscxdbdddhdidldqdsdtdwdxes", + "fsgsinjajbjcjejgjljojpjsjzorsispssto"); +variable nasm_kw_3 = strncat("a16a32aaaaadaamaasadcaddandbsfbsrbtcbtrbtscbw", + "cdqclccldclicmccmpcr0cr2cr3cr4cwddaadasdecdiv", + "dr0dr1dr2dr3dr6dr7eaxebpebxecxediedxequesiesp", + "farfldfsthltincintjaejbejgejlejmpjnajnbjncjne", + "jngjnljnojnpjnsjnzjpejpolarldslealeslfslgslsl", + "lssltrmm0mm1mm2mm3mm4mm5mm6mm7movmulnegnopnot", + "o16o32outpopporrclrcrrepretrolrorrsmsalsarsbb", + "segshlshrst0st1st2st3st4st5st6st7stcstdstistr", + "subtr3tr4tr5tr6tr7wrtxor", 9); +variable nasm_kw_4 = strncat("arplbytecallcltscwdeemmsfabsfaddfbldfchsfcom", + "fcosfdivfenifildfistfld1fldzfmulfnopfsinfstp", + "fsubftstfxamfxchidivimulinsbinsdinswint3into", + "invdiretjcxzjnaejnbejngejnlelahflgdtlidtlldt", + "lmswlocklongloopmovdmovqnearpandpopapopfpush", + "pxorreperepzresbresdreswretfretnsahfsetasetb", + "setcsetesetgsetlsetosetpsetssetzsgdtshldshrd", + "sidtsldtsmswtestverrverwwaitwordxaddxchg", 8); +variable nasm_kw_5 = strncat("boundbswapcmpsbcmpsdcmpswcpuiddwordenterf2xm1", + "faddpfbstpfclexfcompfdisifdivpfdivrffreefiadd", + "ficomfidivfimulfinitfistpfisubfldcwfldpifmulp", + "fpremfptanfsavefsqrtfstcwfstswfsubpfsubrfucom", + "fyl2xiretdiretwjecxzleavelodsblodsdlodswloope", + "loopzmovsbmovsdmovswmovsxmovzxoutsboutsdoutsw", + "paddbpadddpaddwpandnpopadpopawpopfdpopfwpslld", + "psllqpsllwpsradpsrawpsrldpsrlqpsrlwpsubbpsubd", + "psubwpushapushfqwordrdmsrrdtscrepnerepnzscasb", + "scasdscaswsetaesetbesetgesetlesetnasetnbsetnc", + "setnesetngsetnlsetnosetnpsetnssetnzsetpesetpo", + "shortstosbstosdstoswtimestwordwrmsrxlatb", 12); +variable nasm_kw_6 = strncat("fcomppfdivrpficompfidivrfisubrfldenvfldl2e", + "fldl2tfldlg2fldln2fpatanfprem1frstorfscale", + "fsetpmfstenvfsubrpfucompinvlpgloopneloopnz", + "paddsbpaddswpmulhwpmullwpsubsbpsubswpushad", + "pushawpushfdpushfwsetnaesetnbesetngesetnle", + "wbinvd", 6); +variable nasm_kw_7 = strncat("cmpxchgfdecstpfincstpfrndintfsincosfucompp", + "fxtractfyl2xp1paddusbpadduswpcmpeqbpcmpeqd", + "pcmpeqwpcmpgtbpcmpgtdpcmpgtwpmaddwdpsubusb", + "psubusw", 4); +variable nasm_kw_8 = "packssdwpacksswbpackuswb"; +variable nasm_kw_9 = strcat("cmpxchg8bpunpckhbwpunpckhdqpunpckhwdpunpcklbw", + "punpckldqpunpcklwd"); + +define nasm_is_kw { + variable word; + variable len; + variable list, min, max, pos, cmp; + + word = strlow(()); + len = strlen(word); + + switch (len) + { case 0: return 1; } + { case 2: list = nasm_kw_2; } + { case 3: list = nasm_kw_3; } + { case 4: list = nasm_kw_4; } + { case 5: list = nasm_kw_5; } + { case 6: list = nasm_kw_6; } + { case 7: list = nasm_kw_7; } + { case 8: list = nasm_kw_8; } + { case 9: list = nasm_kw_9; } + { pop(); return 0; } + + min = -1; + max = strlen(list) / len; + while (max - min >= 2) { + pos = (max + min) / 2; + cmp = strcmp(word, substr(list, pos * len + 1, len)); + if (cmp == 0) + return 1; % it's a keyword + else if (cmp < 0) + max = pos; % bottom half + else if (cmp > 0) + min = pos; % top half + } + return 0; +} + +define nasm_indent_line() { + variable word, len, e; + + e = eolp(); + + push_spot(); + EXIT_BLOCK { + pop_spot(); + if (what_column() <= Nasm_Instruction_Indent) + skip_white(); + } + + bol_skip_white(); + + if (orelse + {looking_at_char(';')} + {looking_at_char('#')} + {looking_at_char('[')}) { + bol_trim(); + pop_spot(); + EXIT_BLOCK { + } + return; + } + + push_mark(); + skip_chars("0-9a-zA-Z_."); + word = bufsubstr(); + + if (nasm_is_kw(word)) { + bol_trim(); + whitespace(Nasm_Instruction_Indent); + } else { + push_spot(); + bol_trim(); + pop_spot(); + len = strlen(word); + if (looking_at_char(':')) { + go_right_1(); + len++; + } + trim(); + if (e or not(eolp())) { + if (len >= Nasm_Instruction_Indent) { + pop(); + whitespace(1); + } else + whitespace(Nasm_Instruction_Indent - len); + if (e) { + pop_spot(); + eol(); + push_spot(); + } + } + } +} + +define nasm_newline_indent { + push_spot(); + bol_skip_white(); + if (eolp()) + trim(); + pop_spot(); + newline(); + nasm_indent_line(); +} + +define nasm_bol_self_ins { + push_spot(); + bskip_white(); + bolp(); + pop_spot(); + + call("self_insert_cmd"); + + % Grotty: force immediate update of the syntax highlighting. + insert_char('.'); + deln(left(1)); + + if (()) + nasm_indent_line(); +} + +define nasm_self_ins_ind { + call("self_insert_cmd"); + + % Grotty: force immediate update of the syntax highlighting. + insert_char('.'); + deln(left(1)); + + nasm_indent_line(); +} + +define nasm_insert_comment { + variable spc; + + bol_skip_white(); + if (looking_at_char(';')) { + bol_trim(); + go_right(1); + skip_white(); + return; + } else if (eolp()) { + bol_trim(); + insert("; "); + return; + } + + forever { + skip_chars("^;\n'\""); + if (looking_at_char('\'')) { + go_right_1(); + skip_chars("^'\n"); + !if (eolp()) + go_right_1(); + } else if (looking_at_char('\"')) { + go_right_1(); + skip_chars("^\"\n"); + !if (eolp()) + go_right_1(); + } else if (looking_at_char(';')) { + !if (bolp()) { + go_left_1(); + trim(); + !if (looking_at_char(';')) + go_right_1(); + } + break; + } else { + break; + } + } + spc = Nasm_Comment_Column - what_column(); + if (spc < Nasm_Comment_Space) + spc = Nasm_Comment_Space; + whitespace(spc); + if (eolp()) { + insert("; "); + } else { + go_right_1(); + skip_white(); + } +} + +$1 = "NASM"; +create_syntax_table($1); + +define_syntax (";", "", '%', $1); +define_syntax ("([", ")]", '(', $1); +define_syntax ('"', '"', $1); +define_syntax ('\'', '\'', $1); +define_syntax ("0-9a-zA-Z_.@#", 'w', $1); +define_syntax ("-+0-9a-fA-F.xXL", '0', $1); +define_syntax (",:", ',', $1); +define_syntax ('#', '#', $1); +define_syntax ("|^&<>+-*/%~", '+', $1); + +set_syntax_flags($1,1); + +#ifdef HAS_DFA_SYNTAX + +enable_highlight_cache("nasm.dfa", $1); +define_highlight_rule(";.*$", "comment", $1); +define_highlight_rule("[A-Za-z_\\.\\?][A-Za-z0-9_\\.\\?\\$#@~]*", + "Knormal", $1); +define_highlight_rule("$([A-Za-z_\\.\\?][A-Za-z0-9_\\.\\?\\$#@~]*)?", + "normal", $1); +define_highlight_rule("[0-9]+(\\.[0-9]*)?([Ee][\\+\\-]?[0-9]*)?", + "number", $1); +define_highlight_rule("[0-9]+[QqBb]", "number", $1); +define_highlight_rule("(0x|\\$[0-9A-Fa-f])[0-9A-Fa-f]*", "number", $1); +define_highlight_rule("[0-9A-Fa-f]+[Hh]", "number", $1); +define_highlight_rule("\"[^\"]*\"", "string", $1); +define_highlight_rule("\"[^\"]*$", "string", $1); +define_highlight_rule("'[^']*'", "string", $1); +define_highlight_rule("'[^']*$", "string", $1); +define_highlight_rule("[\\(\\)\\[\\],:]*", "delimiter", $1); +define_highlight_rule("[\\|\\^&<>\\+\\-\\*/%~]*", "operator", $1); +define_highlight_rule("^[ \t]*#", "PQpreprocess", $1); +define_highlight_rule("@[0-9A-Za-z_\\.]*", "keyword1", $1); +define_highlight_rule("[ \t]*", "normal", $1); +define_highlight_rule(".", "normal", $1); +build_highlight_table($1); + +#endif + +define_keywords_n($1, nasm_kw_2, 2, 0); +define_keywords_n($1, nasm_kw_3, 3, 0); +define_keywords_n($1, nasm_kw_4, 4, 0); +define_keywords_n($1, nasm_kw_5, 5, 0); +define_keywords_n($1, nasm_kw_6, 6, 0); +define_keywords_n($1, nasm_kw_7, 7, 0); +define_keywords_n($1, nasm_kw_8, 8, 0); +define_keywords_n($1, nasm_kw_9, 9, 0); + +!if (keymap_p ($1)) make_keymap ($1); +definekey("nasm_bol_self_ins", ";", $1); +definekey("nasm_bol_self_ins", "#", $1); +definekey("nasm_bol_self_ins", "[", $1); +definekey("nasm_self_ins_ind", ":", $1); +definekey("nasm_insert_comment", "^[;", $1); + +define nasm_mode { + set_mode("NASM", 4); + use_keymap ("NASM"); + use_syntax_table ("NASM"); + set_buffer_hook ("indent_hook", "nasm_indent_line"); + set_buffer_hook ("newline_indent_hook", "nasm_newline_indent"); + runhooks("nasm_mode_hook"); +} diff --git a/names.c b/names.c new file mode 100644 index 00000000..5b9ae3c0 --- /dev/null +++ b/names.c @@ -0,0 +1,79 @@ +/* names.c included source file defining instruction and register + * names for the Netwide [Dis]Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +static char *reg_names[] = { /* register names, as strings */ + "\0", "ah", "al", "ax", "bh", "bl", "bp", "bx", "ch", "cl", + "cr0", "cr2", "cr3", "cr4", "cs", "cx", "dh", "di", "dl", "dr0", + "dr1", "dr2", "dr3", "dr6", "dr7", "ds", "dx", "eax", "ebp", + "ebx", "ecx", "edi", "edx", "es", "esi", "esp", "fs", "gs", + "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "si", + "sp", "ss", "st0", "st1", "st2", "st3", "st4", "st5", "st6", + "st7", "tr3", "tr4", "tr5", "tr6", "tr7" +}; + +static char *insn_names[] = { /* instruction names, as strings */ + "aaa", "aad", "aam", "aas", "adc", "add", "and", "arpl", + "bound", "bsf", "bsr", "bswap", "bt", "btc", "btr", "bts", + "call", "cbw", "cdq", "clc", "cld", "cli", "clts", "cmc", "cmp", + "cmpsb", "cmpsd", "cmpsw", "cmpxchg", "cmpxchg8b", "cpuid", + "cwd", "cwde", "daa", "das", "db", "dd", "dec", "div", "dq", + "dt", "dw", "emms", "enter", "equ", "f2xm1", "fabs", "fadd", + "faddp", "fbld", "fbstp", "fchs", "fclex", "fcmovb", "fcmovbe", + "fcmove", "fcmovnb", "fcmovnbe", "fcmovne", "fcmovnu", "fcmovu", + "fcom", "fcomi", "fcomip", "fcomp", "fcompp", "fcos", "fdecstp", + "fdisi", "fdiv", "fdivp", "fdivr", "fdivrp", "feni", "ffree", + "fiadd", "ficom", "ficomp", "fidiv", "fidivr", "fild", "fimul", + "fincstp", "finit", "fist", "fistp", "fisub", "fisubr", "fld", + "fld1", "fldcw", "fldenv", "fldl2e", "fldl2t", "fldlg2", + "fldln2", "fldpi", "fldz", "fmul", "fmulp", "fnop", "fpatan", + "fprem", "fprem1", "fptan", "frndint", "frstor", "fsave", + "fscale", "fsetpm", "fsin", "fsincos", "fsqrt", "fst", "fstcw", + "fstenv", "fstp", "fstsw", "fsub", "fsubp", "fsubr", "fsubrp", + "ftst", "fucom", "fucomi", "fucomip", "fucomp", "fucompp", + "fxam", "fxch", "fxtract", "fyl2x", "fyl2xp1", "hlt", "icebp", + "idiv", "imul", "in", "inc", "insb", "insd", "insw", "int", + "int1", "int01", "int3", "into", "invd", "invlpg", "iret", + "iretd", "iretw", "jcxz", "jecxz", "jmp", "lahf", "lar", "lds", + "lea", "leave", "les", "lfs", "lgdt", "lgs", "lidt", "lldt", + "lmsw", "loadall", "lodsb", "lodsd", "lodsw", "loop", "loope", + "loopne", "loopnz", "loopz", "lsl", "lss", "ltr", "mov", "movd", + "movq", "movsb", "movsd", "movsw", "movsx", "movzx", "mul", + "neg", "nop", "not", "or", "out", "outsb", "outsd", "outsw", + "packssdw", "packsswb", "packuswb", "paddb", "paddd", "paddsb", + "paddsw", "paddusb", "paddusw", "paddw", "pand", "pandn", + "pcmpeqb", "pcmpeqd", "pcmpeqw", "pcmpgtb", "pcmpgtd", + "pcmpgtw", "pmaddwd", "pmulhw", "pmullw", "pop", "popa", + "popad", "popaw", "popf", "popfd", "popfw", "por", "pslld", + "psllq", "psllw", "psrad", "psraw", "psrld", "psrlq", "psrlw", + "psubb", "psubd", "psubsb", "psubsw", "psubusb", "psubusw", + "psubw", "punpckhbw", "punpckhdq", "punpckhwd", "punpcklbw", + "punpckldq", "punpcklwd", "push", "pusha", "pushad", "pushaw", + "pushf", "pushfd", "pushfw", "pxor", "rcl", "rcr", "rdmsr", + "rdpmc", "rdtsc", "resb", "resd", "resq", "rest", "resw", "ret", + "retf", "retn", "rol", "ror", "rsm", "sahf", "sal", "salc", + "sar", "sbb", "scasb", "scasd", "scasw", "sgdt", "shl", "shld", + "shr", "shrd", "sidt", "sldt", "smsw", "stc", "std", "sti", + "stosb", "stosd", "stosw", "str", "sub", "test", "umov", "verr", + "verw", "wait", "wbinvd", "wrmsr", "xadd", "xchg", "xlatb", + "xor" +}; + +static char *icn[] = { /* conditional instructions */ + "cmov", "j", "set" +}; + +static int ico[] = { /* and the corresponding opcodes */ + I_CMOVcc, I_Jcc, I_SETcc +}; + +static char *conditions[] = { /* condition code names */ + "a", "ae", "b", "be", "c", "e", "g", "ge", "l", "le", "na", "nae", + "nb", "nbe", "nc", "ne", "ng", "nge", "nl", "nle", "no", "np", + "ns", "nz", "o", "p", "pe", "po", "s", "z" +}; diff --git a/nasm.c b/nasm.c new file mode 100644 index 00000000..f4c75c43 --- /dev/null +++ b/nasm.c @@ -0,0 +1,648 @@ +/* The Netwide Assembler main program module + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" +#include "parser.h" +#include "assemble.h" +#include "labels.h" +#include "outform.h" + +static void report_error (int, char *, ...); +static void parse_cmdline (int, char **); +static void assemble_file (char *); +static int getkw (char *buf, char **value); +static void register_output_formats(void); +static void usage(void); + +static char *obuf; +static char inname[FILENAME_MAX]; +static char outname[FILENAME_MAX]; +static char realout[FILENAME_MAX]; +static int lineno; /* for error reporting */ +static int pass; +static struct ofmt *ofmt = NULL; + +static FILE *ofile = NULL; +static int sb = 16; /* by default */ + +static long current_seg; +static struct RAA *offsets; +static long abs_offset; +#define OFFSET_DELTA 256 + +/* + * get/set current offset... + */ +#define get_curr_ofs (current_seg==NO_SEG?abs_offset:\ + raa_read(offsets,current_seg)) +#define set_curr_ofs(x) (current_seg==NO_SEG?(void)(abs_offset=(x)):\ + (void)(offsets=raa_write(offsets,current_seg,(x)))) + +static int want_usage; +static int terminate_after_phase; + +int main(int argc, char **argv) { + want_usage = terminate_after_phase = FALSE; + + nasm_set_malloc_error (report_error); + offsets = raa_init(); + + seg_init(); + + register_output_formats(); + + parse_cmdline(argc, argv); + + if (terminate_after_phase) { + if (want_usage) + usage(); + return 1; + } + + if (!*outname) { + ofmt->filename (inname, realout, report_error); + strcpy(outname, realout); + } + + ofile = fopen(outname, "wb"); + if (!ofile) { + report_error (ERR_FATAL | ERR_NOFILE, + "unable to open output file `%s'", outname); + } + ofmt->init (ofile, report_error, define_label); + assemble_file (inname); + if (!terminate_after_phase) { + ofmt->cleanup (); + cleanup_labels (); + } + fclose (ofile); + if (terminate_after_phase) + remove(outname); + + if (want_usage) + usage(); + + return 0; +} + +static void parse_cmdline(int argc, char **argv) { + char *param; + + *inname = *outname = '\0'; + while (--argc) { + char *p = *++argv; + if (p[0]=='-') { + switch (p[1]) { + case 'o': /* these parameters take values */ + case 'f': + if (p[2]) /* the parameter's in the option */ + param = p+2; + else if (!argv[1]) { + report_error (ERR_NONFATAL | ERR_NOFILE | ERR_USAGE, + "option `-%c' requires an argument", + p[1]); + break; + } else + --argc, param = *++argv; + if (p[1]=='o') { /* output file */ + strcpy (outname, param); + } else if (p[1]=='f') { /* output format */ + ofmt = ofmt_find(param); + if (!ofmt) { + report_error (ERR_FATAL | ERR_NOFILE | ERR_USAGE, + "unrecognised output format `%s'", + param); + } + } + break; + case 'h': + fprintf(stderr, + "usage: nasm [-o outfile] [-f format] filename\n"); + fprintf(stderr, + " or nasm -r for version info\n\n"); + fprintf(stderr, + "valid output formats for -f are" + " (`*' denotes default):\n"); + ofmt_list(ofmt); + exit (0); /* never need usage message here */ + break; + case 'r': + fprintf(stderr, "NASM version %s\n", NASM_VER); + exit (0); /* never need usage message here */ + break; + default: + report_error (ERR_NONFATAL | ERR_NOFILE | ERR_USAGE, + "unrecognised option `-%c'", + p[1]); + break; + } + } else { + if (*inname) { + report_error (ERR_NONFATAL | ERR_NOFILE | ERR_USAGE, + "more than one input file specified"); + } else + strcpy(inname, p); + } + } + if (!*inname) + report_error (ERR_NONFATAL | ERR_NOFILE | ERR_USAGE, + "no input file specified"); +} + +/* used by error function to report location */ +static char currentfile[FILENAME_MAX]; + +static void assemble_file (char *fname) { + FILE *fp = fopen (fname, "r"); + FILE *oldfile = NULL; /* jrh - used when processing include files */ + int oldfileline = 0; + char *value, *p, buffer[1024+2]; /* maximum line length defined here */ + insn output_ins; + int i, seg, rn_error; + + if (!fp) { /* couldn't open file */ + report_error (ERR_FATAL | ERR_NOFILE, + "unable to open input file `%s'", fname); + return; + } + + init_labels (); + strcpy(currentfile,fname); + + /* pass one */ + pass = 1; + current_seg = ofmt->section(NULL, pass, &sb); + lineno = 0; + while (1) { + if (! fgets(buffer, sizeof(buffer), fp)) { /* EOF on current file */ + if (oldfile) { + fclose(fp); + fp = oldfile; + lineno = oldfileline; + strcpy(currentfile,fname); + oldfile = NULL; + continue; + } + else + break; + } + lineno++; + if (buffer[strlen(buffer)-1] == '\n') { + buffer[strlen(buffer)-1] = '\0'; + } else { + /* + * We have a line that's too long. Throw an error, read + * to EOL, and ignore the line for assembly purposes. + */ + report_error (ERR_NONFATAL, "line is longer than %d characters", + sizeof(buffer)-2); + while (fgets(buffer, sizeof(buffer), fp) && + buffer[strlen(buffer)-1] != '\n'); + continue; /* read another line */ + } + + /* here we parse our directives; this is not handled by the 'real' + * parser. */ + + if ( (i = getkw (buffer, &value)) ) { + switch (i) { + case 1: /* [SEGMENT n] */ + seg = ofmt->section (value, pass, &sb); + if (seg == NO_SEG) { + report_error (ERR_NONFATAL, + "segment name `%s' not recognised", + value); + } else { + current_seg = seg; + } + break; + case 2: /* [EXTERN label] */ + if (*value == '$') + value++; /* skip initial $ if present */ + declare_as_global (value, report_error); + define_label (value, seg_alloc(), 0L, ofmt, report_error); + break; + case 3: /* [BITS bits] */ + switch (atoi(value)) { + case 16: + case 32: + sb = atoi(value); + break; + default: + report_error(ERR_NONFATAL, + "`%s' is not a valid argument to [BITS]", + value); + break; + } + break; + case 4: /* [INC file] */ + oldfile = fp; + oldfileline = lineno; + lineno = 0; + strcpy(currentfile,value); + fp = fopen(value,"r"); + if (!fp) { + lineno = oldfileline; + fp = oldfile; + strcpy(currentfile,fname); + report_error (ERR_FATAL, + "unable to open include file `%s'\n", + value); + } + break; + case 5: /* [GLOBAL symbol] */ + if (*value == '$') + value++; /* skip initial $ if present */ + declare_as_global (value, report_error); + break; + case 6: /* [COMMON symbol size] */ + p = value; + while (*p && !isspace(*p)) + p++; + if (*p) { + long size; + + while (*p && isspace(*p)) + *p++ = '\0'; + size = readnum (p, &rn_error); + if (rn_error) + report_error (ERR_NONFATAL, "invalid size specified" + " in COMMON declaration"); + else + define_common (value, seg_alloc(), size, + ofmt, report_error); + } else + report_error (ERR_NONFATAL, "no size specified in" + " COMMON declaration"); + break; + case 7: /* [ABSOLUTE address] */ + current_seg = NO_SEG; + abs_offset = readnum(value, &rn_error); + if (rn_error) { + report_error (ERR_NONFATAL, "invalid address specified" + " for ABSOLUTE directive"); + abs_offset = 0x100;/* don't go near zero in case of / */ + } + break; + default: + if (!ofmt->directive (buffer+1, value, 1)) + report_error (ERR_NONFATAL, "unrecognised directive [%s]", + buffer+1); + break; + } + } else { + long offs = get_curr_ofs; + parse_line (current_seg, offs, lookup_label, + 1, buffer, &output_ins, ofmt, report_error); + if (output_ins.opcode == I_EQU) { + /* + * Special `..' EQUs get processed in pass two. + */ + if (!output_ins.label) + report_error (ERR_NONFATAL, + "EQU not preceded by label"); + else if (output_ins.label[0] != '.' || + output_ins.label[1] != '.') { + if (output_ins.operands == 1 && + (output_ins.oprs[0].type & IMMEDIATE)) { + define_label (output_ins.label, + output_ins.oprs[0].segment, + output_ins.oprs[0].offset, + ofmt, report_error); + } else if (output_ins.operands == 2 && + (output_ins.oprs[0].type & IMMEDIATE) && + (output_ins.oprs[0].type & COLON) && + output_ins.oprs[0].segment == NO_SEG && + (output_ins.oprs[1].type & IMMEDIATE) && + output_ins.oprs[1].segment == NO_SEG) { + define_label (output_ins.label, + output_ins.oprs[0].offset | SEG_ABS, + output_ins.oprs[1].offset, + ofmt, report_error); + } else + report_error(ERR_NONFATAL, "bad syntax for EQU"); + } + } else { + if (output_ins.label) + define_label (output_ins.label, + current_seg, offs, + ofmt, report_error); + offs += insn_size (current_seg, offs, sb, + &output_ins, report_error); + set_curr_ofs (offs); + } + cleanup_insn (&output_ins); + } + } + + if (terminate_after_phase) { + fclose(ofile); + remove(outname); + if (want_usage) + usage(); + exit (1); + } + + /* pass two */ + pass = 2; + rewind (fp); + current_seg = ofmt->section(NULL, pass, &sb); + raa_free (offsets); + offsets = raa_init(); + lineno = 0; + while (1) { + if (!fgets(buffer, sizeof(buffer), fp)) { + if (oldfile) { + fclose(fp); + fp = oldfile; + lineno = oldfileline; + strcpy(currentfile,fname); + oldfile = NULL; + continue; + } else + break; + } + lineno++; + if (buffer[strlen(buffer)-1] == '\n') + buffer[strlen(buffer)-1] = '\0'; + else + report_error (ERR_PANIC, + "too-long line got through from pass one"); + + /* here we parse our directives; this is not handled by + * the 'real' parser. */ + + if ( (i = getkw (buffer, &value)) ) { + switch (i) { + case 1: /* [SEGMENT n] */ + seg = ofmt->section (value, pass, &sb); + if (seg == NO_SEG) { + report_error (ERR_PANIC, + "invalid segment name on pass two"); + } else + current_seg = seg; + break; + case 2: /* [EXTERN label] */ + break; + case 3: /* [BITS bits] */ + switch (atoi(value)) { + case 16: + case 32: + sb = atoi(value); + break; + default: + report_error(ERR_PANIC, + "invalid [BITS] value on pass two", + value); + break; + } + break; + case 4: + oldfile = fp; + oldfileline = lineno; + lineno = 0; + strcpy(currentfile,value); + fp = fopen(value,"r"); + if (!fp) { + lineno = oldfileline; + fp = oldfile; + strcpy(currentfile,fname); + /* + * We don't report this error in the PANIC + * class, even though we might expect to have + * already picked it up during pass one, + * because of the tiny chance that some other + * process may have removed the include file + * between the passes. + */ + report_error (ERR_FATAL, + "unable to open include file `%s'\n", + value); + } + break; + case 5: /* [GLOBAL symbol] */ + break; + case 6: /* [COMMON symbol size] */ + break; + case 7: /* [ABSOLUTE addr] */ + current_seg = NO_SEG; + abs_offset = readnum(value, &rn_error); + if (rn_error) + report_error (ERR_PANIC, "invalid ABSOLUTE address " + "in pass two"); + break; + default: + if (!ofmt->directive (buffer+1, value, 2)) + report_error (ERR_PANIC, "invalid directive on pass two"); + break; + } + } else { + long offs = get_curr_ofs; + parse_line (current_seg, offs, lookup_label, 2, + buffer, &output_ins, ofmt, report_error); + obuf = buffer; + if (output_ins.label) + define_label_stub (output_ins.label, report_error); + if (output_ins.opcode == I_EQU) { + /* + * Special `..' EQUs get processed here. + */ + if (output_ins.label[0] == '.' && + output_ins.label[1] == '.') { + if (output_ins.operands == 1 && + (output_ins.oprs[0].type & IMMEDIATE)) { + define_label (output_ins.label, + output_ins.oprs[0].segment, + output_ins.oprs[0].offset, + ofmt, report_error); + } else if (output_ins.operands == 2 && + (output_ins.oprs[0].type & IMMEDIATE) && + (output_ins.oprs[0].type & COLON) && + output_ins.oprs[0].segment == NO_SEG && + (output_ins.oprs[1].type & IMMEDIATE) && + output_ins.oprs[1].segment == NO_SEG) { + define_label (output_ins.label, + output_ins.oprs[0].offset | SEG_ABS, + output_ins.oprs[1].offset, + ofmt, report_error); + } else + report_error(ERR_NONFATAL, "bad syntax for EQU"); + } + } + offs += assemble (current_seg, offs, sb, + &output_ins, ofmt, report_error); + cleanup_insn (&output_ins); + set_curr_ofs (offs); + } + } +} + +static int getkw (char *buf, char **value) { + char *p, *q; + + if (*buf!='[') + return 0; + p = buf; + while (*p && *p != ']') p++; + if (!*p) + return 0; + q = p++; + while (*p && *p != ';') { + if (!isspace(*p)) + return 0; + p++; + } + q[1] = '\0'; + + p = buf+1; + while (*buf && *buf!=' ' && *buf!=']' && *buf!='\t') + buf++; + if (*buf==']') { + *buf = '\0'; + *value = buf; + } else { + *buf++ = '\0'; + *value = buf; + while (*buf!=']') buf++; + *buf++ = '\0'; + } + for (q=p; *q; q++) + *q = tolower(*q); + if (!strcmp(p, "segment") || !strcmp(p, "section")) + return 1; + if (!strcmp(p, "extern")) + return 2; + if (!strcmp(p, "bits")) + return 3; + if (!strcmp(p, "inc") || !strcmp(p, "include")) + return 4; + if (!strcmp(p, "global")) + return 5; + if (!strcmp(p, "common")) + return 6; + if (!strcmp(p, "absolute")) + return 7; + return -1; +} + +static void report_error (int severity, char *fmt, ...) { + va_list ap; + + if (severity & ERR_NOFILE) + fputs ("nasm: ", stderr); + else + fprintf (stderr, "%s:%d: ", currentfile, lineno); + + if ( (severity & ERR_MASK) == ERR_WARNING) + fputs ("warning: ", stderr); + else if ( (severity & ERR_MASK) == ERR_PANIC) + fputs ("panic: ", stderr); + + va_start (ap, fmt); + vfprintf (stderr, fmt, ap); + fputc ('\n', stderr); + + if (severity & ERR_USAGE) + want_usage = TRUE; + + switch (severity & ERR_MASK) { + case ERR_WARNING: + /* no further action, by definition */ + break; + case ERR_NONFATAL: + terminate_after_phase = TRUE; + break; + case ERR_FATAL: + fclose(ofile); + remove(outname); + if (want_usage) + usage(); + exit(1); /* instantly die */ + break; /* placate silly compilers */ + case ERR_PANIC: + abort(); /* panic and dump core */ + break; + } +} + +static void usage(void) { + fputs("type `nasm -h' for help\n", stderr); +} + +static void register_output_formats(void) { + /* Flat-form binary format */ +#ifdef OF_BIN + extern struct ofmt of_bin; +#endif + /* Unix formats: a.out, COFF, ELF */ +#ifdef OF_AOUT + extern struct ofmt of_aout; +#endif +#ifdef OF_COFF + extern struct ofmt of_coff; +#endif +#ifdef OF_ELF + extern struct ofmt of_elf; +#endif + /* Linux strange format: as86 */ +#ifdef OF_AS86 + extern struct ofmt of_as86; +#endif + /* DOS formats: OBJ, Win32 */ +#ifdef OF_OBJ + extern struct ofmt of_obj; +#endif +#ifdef OF_WIN32 + extern struct ofmt of_win32; +#endif +#ifdef OF_RDF + extern struct ofmt of_rdf; +#endif +#ifdef OF_DBG /* debug format must be included specifically */ + extern struct ofmt of_dbg; +#endif + +#ifdef OF_BIN + ofmt_register (&of_bin); +#endif +#ifdef OF_AOUT + ofmt_register (&of_aout); +#endif +#ifdef OF_COFF + ofmt_register (&of_coff); +#endif +#ifdef OF_ELF + ofmt_register (&of_elf); +#endif +#ifdef OF_AS86 + ofmt_register (&of_as86); +#endif +#ifdef OF_OBJ + ofmt_register (&of_obj); +#endif +#ifdef OF_WIN32 + ofmt_register (&of_win32); +#endif +#ifdef OF_RDF + ofmt_register (&of_rdf); +#endif +#ifdef OF_DBG + ofmt_register (&of_dbg); +#endif + /* + * set the default format + */ + ofmt = &OF_DEFAULT; +} diff --git a/nasm.doc b/nasm.doc new file mode 100644 index 00000000..dd2073bb --- /dev/null +++ b/nasm.doc @@ -0,0 +1,996 @@ + The Netwide Assembler, NASM + =========================== + +Introduction +============ + +The Netwide Assembler grew out of an idea on comp.lang.asm.x86 (or +possibly alt.lang.asm, I forget which), which was essentially that +there didn't seem to be a good free x86-series assembler around, and +that maybe someone ought to write one. + +- A86 is good, but not free, and in particular you don't get any + 32-bit capability until you pay. It's DOS only, too. + +- GAS is free, and ports over DOS/Unix, but it's not very good, + since it's designed to be a back end to gcc, which always feeds it + correct code. So its error checking is minimal. Also its syntax is + horrible, from the point of view of anyone trying to actually + _write_ anything in it. Plus you can't write 16-bit code in it. + +- AS86 is Linux specific, and (my version at least) doesn't seem to + have much (or any) documentation. + +- MASM isn't very good. And it's expensive. And it runs only under + DOS. + +- TASM is better, but still strives for MASM compatibility, which + means millions of directives and tons of red tape. And its syntax + is essentially MASM's, with the contradictions and quirks that + entails (although it sorts out some of those by means of Ideal + mode). It's expensive too. And it's DOS only. + +So here, for your coding pleasure, is NASM. At present it's still in +prototype stage - we don't promise that it can outperform any of +these assemblers. But please, _please_ send us bug reports and fixes +and anything else you can get your hands on, and we'll improve it +out of all recognition. Again. + +Please see the file `Licence' for the legalese. + +Getting Started: Installation +============================= + +NASM is distributed in source form, in what we hope is totally +ANSI-compliant C. It uses no non-portable code at all, that we know +of. It ought to compile without change on any system you care to try +it on. We also supply a pre-compiled 16-bit DOS binary. + +To install it, edit the Makefile to describe your C compiler, and +type `make'. Then copy the binary to somewhere on your path. That's +all - NASM relies on no files other than its own executable. +Although if you're on a Unix system, you may also want to install +the NASM manpage (`nasm.1'). You may also want to install the binary +and manpage for the Netwide Disassembler, NDISASM (also see +`ndisasm.doc'). + +Running NASM +============ + +To assemble a file, you issue a command of the form + + nasm -f [-o ] + +For example, + + nasm -f elf myfile.asm + +will assemble `myfile.asm' into an ELF object file `myfile.o'. And + + nasm -f bin myfile.asm -o myfile.com + +will assemble `myfile.asm' into a raw binary program `myfile.com'. + +To get usage instructions from NASM, try typing `nasm -h'. This will +also list the available output file formats, and what they are. + +If you use Linux but aren't sure whether your system is a.out or +ELF, type `file /usr/bin/nasm' or wherever you put the NASM binary. +If it says something like + +/usr/bin/nasm: ELF 32-bit LSB executable i386 (386 and up) Version 1 + +then your system is ELF, and you should use `-f elf' when you want +NASM to produce Linux object files. If it says + +/usr/bin/nasm: Linux/i386 demand-paged executable (QMAGIC) + +or something similar, your system is a.out, and you should use `-f +aout' instead. + +Like Unix compilers and assemblers, NASM is silent unless it goes +wrong: you won't see any output at all, unless it gives error +messages. + +Writing Programs with NASM +========================== + +Each line of a NASM source file should contain some combination of +the four fields + +LABEL: INSTRUCTION OPERANDS ; COMMENT + +`LABEL' defines a label pointing to that point in the source. There +are no restrictions on white space: labels may have white space +before them, or not, as you please. The colon after the label is +also optional. + +Valid characters in labels are letters, numbers, `_', `$', `#', `@', +`~', `?', and `.'. The only characters which may be used as the +_first_ character of an identifier are letters, `_' and `?', and +(with special meaning: see `Local Labels') `.'. An identifier may +also be prefixed with a $ sign to indicate that it is intended to be +read as an identifier and not a reserved word; thus, if some other +module you are linking with defines a symbol `eax', you can refer to +`$eax' in NASM code to distinguish it from the register name. + +`INSTRUCTION' can be any machine opcode (Pentium and P6 opcodes, FPU +opcodes, MMX opcodes and even undocumented opcodes are all +supported). The instruction may be prefixed by LOCK, REP, REPE/REPZ +or REPNE/REPNZ, in the usual way. Explicit address-size and operand- +size prefixes A16, A32, O16 and O32 are provided - one example of +their use is given in the `Unusual Instruction Sizes' section below. +You can also use a segment register as a prefix: coding `es mov +[bx],ax' is equivalent to coding `mov [es:bx],ax'. We recommend the +latter syntax, since it is consistent with other syntactic features +of the language, but for instructions such as `lodsb' there isn't +anywhere to put a segment override except as a prefix. This is why +we support it. + +The `INSTRUCTION' field may also contain some pseudo-opcodes: see +the section on pseudo-opcodes for details. + +`OPERANDS' can be nonexistent, or huge, depending on the +instruction, of course. When operands are registers, they are given +simply as register names: `eax', `ss', `di' for example. NASM does +_not_ use the GAS syntax, in which register names are prefixed by a +`%' sign. Operands may also be effective addresses, or they may be +constants or expressions. See the separate sections on these for +details. + +`COMMENT' is anything after the first semicolon on the line, +excluding semicolons inside quoted strings. + +Of course, all these fields are optional: the presence or absence of +the OPERANDS field is required by the nature of the INSTRUCTION +field, but any line may contain a LABEL or not, may contain an +INSTRUCTION or not, and may contain a COMMENT or not, independently +of each other. + +Lines may also contain nothing but a directive: see `Assembler +Directives' below for details. + +NASM can currently not handle any line longer than 1024 characters. +This may be fixed in a future release. + +Floating Point Instructions +=========================== + +NASM has support for assembling FPU opcodes. However, its syntax is +not necessarily the same as anyone else's. + +NASM uses the notation `st0', `st1', etc. to denote the FPU stack +registers. NASM also accepts a wide range of single-operand and +two-operand forms of the instructions. For people who wish to use +the single-operand form exclusively (this is in fact the `canonical' +form from NASM's point of view, in that it is the form produced by +the Netwide Disassembler), there is a TO keyword which makes +available the opcodes which cannot be so easily accessed by one +operand. Hence: + + fadd st1 ; this sets st0 := st0 + st1 + fadd st0,st1 ; so does this + fadd st1,st0 ; this sets st1 := st1 + st0 + fadd to st1 ; so does this + +It's also worth noting that the FPU instructions that reference +memory must use the prefixes DWORD, QWORD or TWORD to indicate what +size of memory operand they refer to. + +NASM, in keeping with our policy of not trying to second-guess the +programmer, will _never_ automatically insert WAIT instructions into +your code stream. You must code WAIT yourself before _any_ +instruction that needs it. (Of course, on 286 processors or above, +it isn't needed anyway...) + +NASM supports specification of floating point constants by means of +`dd' (single precision), `dq' (double precision) and `dt' (extended +precision). Floating-point _arithmetic_ is not done, due to +portability constraints (not all platforms on which NASM can be run +support the same floating point types), but simple constants can be +specified. For example: + +gamma dq 0.5772156649 ; Euler's constant + +Pseudo-Opcodes +============== + +Pseudo-opcodes are not real x86 machine opcodes, but are used in the +instruction field anyway because that's the most convenient place to +put them. The current pseudo-opcodes are DB, DW and DD, their +uninitialised counterparts RESB, RESW and RESD, the EQU command, and +the TIMES prefix. + +DB, DW and DD work as you would expect: they can each take an +arbitrary number of operands, and when assembled, they generate +nothing but those operands. All three of them can take string +constants as operands, which no other instruction can currently do. +See the `Constants' section for details about string constants. + +RESB, RESW and RESD are designed to be used in the BSS section of a +module: they declare _uninitialised_ storage space. Each takes a +single operand, which is the number of bytes, words or doublewords +to reserve. We do not support the MASM/TASM syntax of reserving +uninitialised space by writing `DW ?' or similar: this is what we do +instead. (But see `Critical Expressions' for a caveat on the nature +of the operand.) + +(An aside: if you want to be able to write `DW ?' and have something +vaguely useful happen, you can always code `? EQU 0'...) + +EQU defines a symbol to a specified value: when EQU is used, the +LABEL field must be present. The action of EQU is to define the +given label name to the value of its (only) operand. This definition +is absolute, and cannot change later. So, for example, + +message db 'hello, world' +msglen equ $-message + +defines `msglen' to be the constant 12. `msglen' may not then be +redefined later. This is not a preprocessor definition either: the +value of `msglen' is evaluated _once_, using the value of `$' (see +the section `Expressions' for details of `$') at the point of +definition, rather than being evaluated wherever it is referenced +and using the value of `$' at the point of reference. Note that the +caveat in `Critical Expressions' applies to EQU too, at the moment. + +Finally, the TIMES prefix causes the instruction to be assembled +multiple times. This is partly NASM's equivalent of the DUP syntax +supported by MASM-compatible assemblers, in that one can do + +zerobuf: times 64 db 0 + +or similar, but TIMES is more versatile than that. TIMES takes not +just a numeric constant, but a numeric _expression_, so one can do +things like + +buffer: db 'hello, world' + times 64-$+buffer db ' ' + +which will store exactly enough spaces to make the total length of +`buffer' up to 64. (See the section `Critical Expressions' for a +caveat on the use of TIMES.) Finally, TIMES can be applied to +ordinary opcodes, so you can code trivial unrolled loops in it: + + times 100 movsb + +Note that there is no effective difference between `times 100 resb +1' and `resb 100'. + +Effective Addresses +=================== + +NASM's addressing scheme is very simple, although it can involve +more typing than other assemblers. Where other assemblers +distinguish between a _variable_ (label declared without a colon) +and a _label_ (declared with a colon), and use different means of +addressing the two, NASM is totally consistent. + +To refer to the contents of a memory location, square brackets are +required. This applies to simple variables, computed offsets, +segment overrides, effective addresses - _everything_. E.g.: + +wordvar dw 123 + mov ax,[wordvar] + mov ax,[wordvar+1] + mov ax,[es:wordvar+bx] + +NASM does _not_ support the various strange syntaxes used by MASM +and others, such as + + mov ax,wordvar ; this is legal, but means something else + mov ax,es:wordvar[bx] ; not even slightly legal + es mov ax,wordvar[1] ; the prefix is OK, but not the rest + +If no square brackets are used, NASM interprets label references to +mean the address of the label. Hence there is no need for MASM's +OFFSET keyword, but + + mov ax,wordvar + +loads AX with the _address_ of the variable `wordvar'. + +More complicated effective addresses are handled by enclosing them +within square brackets as before: + + mov eax,[ebp+2*edi+offset] + mov ax,[bx+di+8] + +NASM will cope with some fairly strange effective addresses, if you +try it: provided your effective address expression evaluates +_algebraically_ to something that the instruction set supports, it +will be able to assemble it. For example, + + mov eax,[ebx*5] ; actually assembles to [ebx+ebx*4] + mov ax,[bx-si+2*si] ; actually assembles to [bx+si] + +will both work. + +There is an ambiguity in the instruction set, which allows two forms +of 32-bit effective address with equivalent meaning: + + mov eax,[2*eax+0] + mov eax,[eax+eax] + +These two expressions clearly refer to the same address. The +difference is that the first one, if assembled `as is', requires a +four-byte offset to be stored as part of the instruction, so it +takes up more space. NASM will generate the second (smaller) form +for both of the above instructions, in an effort to save space. +There is not, currently, any means for forcing NASM to generate the +larger form of the instruction. + +Mixing 16 and 32 Bit Code: Unusual Instruction Sizes +==================================================== + +A number of assemblers seem to have trouble assembling instructions +that use a different operand or address size from the one they are +expecting; as86 is a good example, even though the Linux kernel boot +process (which is assembled using as86) needs several such +instructions and as86 can't do them. + +Instructions such as `mov eax,2' in 16-bit mode are easy, of course, +and NASM can do them just as well as any other assembler. The +difficult instructions are things like far jumps. + +Suppose you are in a 16-bit segment, in protected mode, and you want +to execute a far jump to a point in a 32-bit segment. You need to +code a 32-bit far jump in a 16-bit segment; not many assemblers I +know of will easily support this. NASM can, by means of the `word' +and `dword' specifiers. So you can code + + call 1234h:5678h ; this uses the default segment size + call word 1234h:5678h ; this is guaranteed to be 16-bit + call dword 1234h:56789ABCh ; and this is guaranteed 32-bit + +and NASM will generate correct code for them. + +Similarly, if you are coding in a 16-bit code segment, but trying to +access memory in a 32-bit data segment, your effective addresses +will want to be 32-bit. Of course as soon as you specify an +effective address containing a 32-bit register, like `[eax]', the +addressing is forced to be 32-bit anyway. But if you try to specify +a simple offset, such as `[label]' or `[0x10000]', you will get the +default address size, which in this case will be wrong. However, +NASM allows you to code `[dword 0x10000]' to force a 32-bit address +size, or conversely `[word wlabel]' to force 16 bits. + +Be careful not to confuse `word' and `dword' _inside_ the square +brackets with _outside_: consider the instruction + + mov word [dword 0x123456],0x7890 + +which moves 16 bits of data to an address specified by a 32-bit +offset. There is no contradiction between the `word' and `dword' in +this instruction, since they modify different aspects of the +functionality. Or, even more confusingly, + + call dword far [fs:word 0x4321] + +which takes an address specified by a 16-bit offset, and extracts a +48-bit DWORD FAR pointer from it to call. + +Using this effective-address syntax, the `dword' or `word' override +may come before or after the segment override if any: NASM isn't +fussy. Hence: + + mov ax,[fs:dword 0x123456] + mov ax,[dword fs:0x123456] + +are equivalent forms, and generate the same code. + +The LOOP instruction comes in strange sizes, too: in a 16-bit +segment it uses CX as its count register by default, and in a 32-bit +segment it uses ECX. But it's possible to do either one in the other +segment, and NASM will cope by letting you specify the count +register as a second operand: + + loop label ; uses CX or ECX depending on mode + loop label,cx ; always uses CX + loop label,ecx ; always uses ECX + +Finally, the string instructions LODSB, STOSB, MOVSB, CMPSB, SCASB, +INSB, and OUTSB can all have strange address sizes: typically, in a +16-bit segment they read from [DS:SI] and write to [ES:DI], and in a +32-bit segment they read from [DS:ESI] and write to [ES:EDI]. +However, this can be changed by the use of the explicit address-size +prefixes `a16' and `a32'. These prefixes generate null code if used +in the same size segment as they specify, but generate an 0x67 +prefix otherwise. Hence `a16' generates no code in a 16-bit segment, +but 0x67 in a 32-bit one, and vice versa. So `a16 lodsb' will always +generate code to read a byte from [DS:SI], no matter what the size +of the segment. There are also explicit operand-size override +prefixes, `o16' and `o32', which will optionally generate 0x66 +bytes, but these are provided for completeness and should never have +to be used. + +Constants +========= + +NASM can accept three kinds of constant: _numeric_, _character_ and +_string_ constants. + +Numeric constants are simply numbers. NASM supports a variety of +syntaxes for expressing numbers in strange bases: you can do any of + + 100 ; this is decimal + 0x100 ; hex + 100h ; hex as well + $100 ; hex again + 100q ; octal + 100b ; binary + +NASM does not support A86's syntax of treating anything with a +leading zero as hex, nor does it support the C syntax of treating +anything with a leading zero as octal. Leading zeros make no +difference to NASM. (Except that, as usual, if you have a hex +constant beginning with a letter, and you want to use the trailing-H +syntax to represent it, you have to use a leading zero so that NASM +will recognise it as a number instead of a label.) + +The `x' in `0x100', and the trailing `h', `q' and `b', may all be +upper case if you want. + +Character constants consist of up to four characters enclosed in +single or double quotes. No escape character is defined for +including the quote character itself: if you want to declare a +character constant containing a double quote, enclose it in single +quotes, and vice versa. + +Character constants' values are worked out in terms of a +little-endian computer: if you code + + mov eax,'abcd' + +then if you were to examine the binary output from NASM, it would +contain the visible string `abcd', which of course means that the +actual value loaded into EAX would be 0x64636261, not 0x61626364. + +String constants are like character constants, only more so: if a +character constant appearing as operand to a DB, DW or DD is longer +than the word size involved (1, 2 or 4 respectively), it will be +treated as a string constant instead, which is to say the +concatenation of separate character constants. + +For example, + + db 'hello, world' + +declares a twelve-character string constant. And + + dd 'dontpanic' + +(a string constant) is equivalent to writing + + dd 'dont','pani','c' + +(three character constants), so that what actually gets assembled is +equivalent to + + db 'dontpanic',0,0,0 + +(It's worth noting that one of the reasons for the reversal of +character constants is so that the instruction `dw "ab"' has the +same meaning whether "ab" is treated as a character constant or a +string constant. Hence there is less confusion.) + +Expressions +=========== + +Expressions in NASM can be formed of the following operators: `|' +(bitwise OR), `^' (bitwise XOR), `&' (bitwise AND), `<<' and `>>' +(logical bit shifts), `+', `-', `*' (ordinary addition, subtraction +and multiplication), `/', `%' (unsigned division and modulo), `//', +`%%' (signed division and modulo), `~' (bitwise NOT), and the +operators SEG and WRT (see `SEG and WRT' below). + +The order of precedence is: + +| lowest +^ +& +<< >> +binary + and - +* / % // %% +unary + and -, ~, SEG highest + +As usual, operators within a precedence level associate to the left +(i.e. `2-3-4' evaluates the same way as `(2-3)-4'). + +A form of algebra is done by NASM when evaluating expressions: I +have already stated that an effective address expression such as +`[EAX*6-EAX]' will be recognised by NASM as algebraically equivalent +to `[EAX*4+EAX]', and assembled as such. In addition, algebra can be +done on labels as well: `label2*2-label1' is an acceptable way to +define an address as far beyond `label2' as `label1' is before it. +(In less algebraically capable assemblers, one might have to write +that as `label2 + (label2-label1)', where the value of every +sub-expression is either a valid address or a constant. NASM can of +course cope with that version as well.) + +Expressions may also contain the special token `$', known as a Here +token, which always evaluates to the address of the current assembly +point. (That is, the address of the assembly point _before_ the +current instruction gets assembled.) The special token `$$' +evaluates to the address of the beginning of the current section; +this can be used for alignment, as shown below: + + times ($$-$) & 3 nop ; pad with NOPs to 4-byte boundary + +SEG and WRT +=========== + +NASM contains the capability for its object file formats (currently, +only `obj' makes use of this) to permit programs to directly refer +to the segment-base values of their segments. This is achieved +either by the object format defining the segment names as symbols +(`obj' does this), or by the use of the SEG operator. + +SEG is a unary prefix operator which, when applied to a symbol +defined in a segment, will yield the segment base value of that +segment. (In `obj' format, symbols defined in segments which are +grouped are considered to be primarily a member of the _group_, not +the segment, and the return value of SEG reflects this.) + +SEG may be used for far pointers: it is guaranteed that for any +symbol `sym', using the offset `sym' from the segment base `SEG sym' +yields a correct pointer to the symbol. Hence you can code a far +call by means of + + CALL SEG routine:routine + +or store a far pointer in a data segment by + + DW routine, SEG routine + +For convenience, NASM supports the forms + + CALL FAR routine + JMP FAR routine + +as direct synonyms for the canonical syntax + + CALL SEG routine:routine + JMP SEG routine:routine + +No alternative syntax for + + DW routine, SEG routine + +is supported. + +Simply referring to `sym', for some symbol, will return the offset +of `sym' from its _preferred_ segment base (as returned from `SEG +sym'); sometimes, you may want to obtain the offset of `sym' from +some _other_ segment base. (E.g. the offset of `sym' from the base +of the segment it's in, where normally you'd get the offset from a +group base). This is accomplished using the WRT (With Reference To) +keyword: if `sym' is defined in segment `seg' but you want its +offset relative to the beginning of segment `seg2', you can do + + mov ax,sym WRT seg2 + +The right-hand operand to WRT must be a segment-base value. You can +also do `sym WRT SEG sym2' if you need to. + +Critical Expressions +==================== + +NASM is a two-pass assembler: it goes over the input once to +determine the location of all the symbols, then once more to +actually generate the output code. Most expressions are +non-critical, in that if they contain a forward reference and hence +their correct value is unknown during the first pass, it doesn't +matter. However, arguments to RESB, RESW and RESD, and the argument +to the TIMES prefix, can actually affect the _size_ of the generated +code, and so it is critical that the expression can be evaluated +correctly on the first pass. So in these situations, expressions may +not contain forward references. This prevents NASM from having to +sort out a mess such as + + times (label-$) db 0 +label: db 'where am I?' + +in which the TIMES argument could equally legally evaluate to +_anything_, or perhaps even worse, + + times (label-$+1) db 0 +label: db 'NOW where am I?' + +in which any value for the TIMES argument is by definition invalid. + +Since NASM is a two-pass assembler, this criticality condition also +applies to the argument to EQU. Suppose, if this were not the case, +we were to have the setup + + mov ax,a +a equ b +b: + +On pass one, `a' cannot be defined properly, since `b' is not known +yet. On pass two, `b' is known, so line two can define `a' properly. +Unfortunately, line 1 needed `a' to be defined properly, so this +code will not assemble using only two passes. + +Local Labels +============ + +NASM takes its local label scheme mainly from the old Amiga +assembler Devpac: a local label is one that begins with a period. +The `localness' comes from the fact that local labels are associated +with the previous non-local label, so that you may declare the same +local label twice if a non-local one intervenes. Hence: + +label1 ; some code +.loop ; some more code + jne .loop + ret +label2 ; some code +.loop ; some more code + jne .loop + ret + +In the above code, each `jne' instruction jumps to the line of code +before it, since the `.loop' labels are distinct from each other. + +NASM, however, introduces an extra capability not present in Devpac, +which is that the local labels are actually _defined_ in terms of +their associated non-local label. So if you really have to, you can +write + +label3 ; some more code + ; and some more + jmp label1.loop + +So although local labels are _usually_ local, it is possible to +reference them from anywhere in your program, if you really have to. + +Assembler Directives +==================== + +Assembler directives appear on a line by themselves (apart from a +comment), and must be enclosed in square brackets. No white space +may appear before the opening square bracket, although white space +and a comment may come after the closing bracket. + +Some directives are universal: they may be used in any situation, +and do not change their syntax. The universal directives are listed +below. + +[BITS 16] or [BITS 32] switches NASM into 16-bit or 32-bit mode. +(This is equivalent to USE16 and USE32 segments, in TASM or MASM.) +In 32-bit mode, instructions are prefixed with 0x66 or 0x67 prefixes +when they use 16-bit data or addresses; in 16-bit mode, the reverse +happens. NASM's default depends on the object format; the defaults +are documented with the formats. (See `obj', in particular, for some +unusual behaviour.) + +[INCLUDE filename] or [INC filename] includes another source file +into the current one. At present, only one level of inclusion is +supported. + +[SECTION name] or [SEGMENT name] changes which section the code you +write will be assembled into. Acceptable section names vary between +output formats, but most formats (indeed, all formats at the moment) +support the names `.text', `.data' and `.bss'. Note that `.bss' is +an uninitialised data section, and so you will receive a warning +from NASM if you try to assemble any code or data in it. The only +thing you can do in `.bss' without triggering a warning is use RESB, +RESW and RESD. That's what they're for. + +[ABSOLUTE address] can be considered a different form of [SECTION], +in that it must be overridden using a SECTION directive once you +have finished using it. It is used to assemble notional code at an +absolute offset address; of course, you can't actually assemble +_code_ there, since no object file format is capable of putting the +code in place, but you can use RESB, RESW and RESD, and you can +define labels. Hence you could, for example, define a C-like data +structure by means of + + [ABSOLUTE 0] + stLong resd 1 + stWord resw 1 + stByte1 resb 1 + stByte2 resb 1 + st_size: + [SEGMENT .text] + +and then carry on coding. This defines `stLong' to be zero, `stWord' +to be 4, `stByte1' to be 6, `stByte2' to be 7 and `st_size' to be 8. +So this has defined a data structure. + +[EXTERN symbol] defines a symbol as being `external', in the C +sense: `EXTERN' states that the symbol is _not_ declared in this +module, but is declared elsewhere, and that you wish to _reference_ +it in this module. + +[GLOBAL symbol] defines a symbol as being global, in the sense that +it is exported from this module and other modules may reference it. +All symbols are local, unless declared as global. Note that the +`GLOBAL' directive must appear before the definition of the symbol +it refers to. + +[COMMON symbol size] defines a symbol as being common: it is +declared to have the given size, and it is merged at link time with +any declarations of the same symbol in other modules. This is not +_fully_ supported in the `obj' file format: see the section on `obj' +for details. + +Directives may also be specific to the output file format. At +present, the `bin' and `obj' formats define extra directives, which +are specified below. + +Output Formats +============== + +The current output formats supported are `bin', `aout', `coff', +`elf' and `win32'. + +`bin': flat-form binary +----------------------- + +This is at present the only output format that generates instantly +runnable code: all the others produce object files that need linking +before they become executable. + +`bin' output files contain no red tape at all: they simply contain +the binary representation of the exact code you wrote. + +The `bin' format supports a format-specific directive, which is ORG. +[ORG addr] declares that your code should be assembled as if it were +to be loaded into memory at the address `addr'. So a DOS .COM file +should state [ORG 0x100], and a DOS .SYS file should state [ORG 0]. +There should be _one_ ORG directive, at most, in an assembly file: +NASM does not support the use of ORG to jump around inside an object +file, like MASM does (see the `Bugs' section for a use of the ORG +directive not supported by NASM). + +Like all formats, the `bin' format defines the section names +`.text', `.data' and `.bss'. The layout is that `.text' comes first +in the output file, followed by `.data', and notionally followed by +`.bss'. So if you declare a BSS section in a flat binary file, +references to the BSS section will refer to space past the end of +the actual file. The `.data' and `.bss' sections are considered to +be aligned on four-byte boundaries: this is achieved by inserting +padding zero bytes between the end of the text section and the start +of the data, if there is data present. Of course if no [SECTION] +directives are present, everything will go into `.text', and you +will get nothing in the output except the code you wrote. + +`bin' silently ignores GLOBAL directives, and will also not complain +at EXTERN ones. You only get an error if you actually _reference_ an +external symbol. + +Using the `bin' format, the default output filename is `filename' +for inputs of `filename.asm'. If there is no extension to be +removed, output will be placed in `nasm.out' and a warning will be +generated. + +`bin' defaults to 16-bit assembly mode. + +`aout' and `elf': Linux object files +------------------------------------ + +These two object formats are the ones used under Linux. They have no +format-specific directives, and their default output filename is +`filename.o'. + +ELF is a much more featureful object-file format than a.out: in +particular it has enough features to support the writing of position +independent code by means of a global offset table, and position +independent shared libraries by means of a procedure linkage table. +Unfortunately NASM, as yet, does not support these extensions, and +so NASM cannot be used to write shared library code under ELF. NASM +also does not support the capability, in ELF, for specifying precise +alignment constraints on common variables. + +Both `aout' and `elf' default to 32-bit assembly mode. + +`coff' and `win32': Common Object File Format +--------------------------------------------- + +The `coff' format generates standard Unix COFF object files, which +can be fed to (for example) the DJGPP linker. Its default output +filename, like the other Unix formats, is `filename.o'. + +The `win32' format generates Win32 (Windows 95 or Intel-platform +Windows NT) object files, which nominally use the COFF standard, but +in fact are not compatible. Its default output filename is +`filename.obj'. + +`coff' and `win32' are not quite compatible formats, due to the fact +that Microsoft's interpretation of the term `relative relocation' +does not seem to be the same as the interpretation used by anyone +else. It is therefore more correct to state that Win32 uses a +_variant_ of COFF. The object files will not therefore produce +correct output when fed to each other's linkers. + +In addition to this subtle incompatibility, Win32 also defines +extensions to basic COFF, such as a mechanism for importing symbols +from dynamic-link libraries at load time. NASM may eventually +support this extension in the form of a format-specific directive. +However, as yet, it does not. Neither the `coff' nor `win32' output +formats have any specific directives. + +The Microsoft linker also has a small blind spot: it cannot +correctly relocate a relative CALL or JMP to an absolute address. +Hence all PC-relative CALLs or JMPs, when using the `win32' format, +must have targets which are relative to sections, or to external +symbols. You can't do + call 0x123456 +_even_ if you happen to know that there is executable code at that +address. The linker simply won't get the reference right; so in the +interests of not generating incorrect code, NASM will not allow this +form of reference to be written to a Win32 object file. (Standard +COFF, or at least the DJGPP linker, seems to be able to cope with +this contingency. Although that may be due to the executable having +a zero load address.) + +Both `coff' and `win32' default to 32-bit assembly mode. + +`obj': Microsoft 16-bit Object Module Format +-------------------------------------------- + +The `obj' format generates 16-bit Microsoft object files, suitable +for feeding to 16-bit versions of Microsoft C, and probably +TLINK as well (although that hasn't been tested). The Use32 +extensions are supported. + +`obj' defines no special segment names: you can call segments what +you like. Unlike the other formats, too, segment names are actually +defined as symbols, so you can write + +[SEGMENT CODE] + mov ax,CODE + +and get the _segment_ address of the segment, suitable for loading +into a segment register. + +Segments can be declared with attributes: + +[SEGMENT CODE PRIVATE ALIGN=16 CLASS=CODE OVERLAY=OVL2 USE16] + +You can specify segments to be PRIVATE, PUBLIC, COMMON or STACK; +their alignment may be any power of two from 1 to 256 (although only +1, 2, 4, 16 and 256 are really supported, so anything else gets +rounded up to the next highest one of those); their class and +overlay names may be specified. You may also specify segments to be +USE16 or USE32. The defaults are PUBLIC ALIGN=1, no class, no +alignment, USE16. + +You can also specify that a segment is _absolute_ at a certain +segment address: + +[SEGMENT SCREEN ABSOLUTE=0xB800] + +This is an alternative to the ALIGN keyword. + +The format-specific directive GROUP allows segment grouping: [GROUP +DGROUP DATA BSS] defines the group DGROUP to contain segments DATA +and BSS. + +Segments are defined as part of their group by default: if `var' is +declared in segment `data', which is part of group `dgroup', then +`SEG var' returns `dgroup', and `var' signifies the offset of `var' +relative to the beginning of `dgroup'. You must use `var WRT data' +to get the offset of `var' relative to the beginning of its +_segment_. + +NASM allows a segment to be in two groups, but will generate a +warning. References to the symbols in that segment will be resolved +relative to the _first_ group it is defined in. + +The directive [UPPERCASE] causes all symbol, segment and group names +output to the object file to be uppercased. The actual _assembly_ is +still case sensitive. + +Common variables in OBJ files can be `near' or `far': currently, +NASM has a horribly grotty way to support that, which is that if you +specify the common variable's size as negative, it will be near, and +otherwise it will be far. The support isn't perfect: if you declare +a far common variable both in a NASM assembly module and in a C +program, you may well find the linker reports "mismatch in +array-size" or some such. The reason for this is that far common +variables are defined by means of _two_ size constants, which are +multiplied to give the real size. Apparently the Microsoft linker +(at least) likes both constants, not merely their product, to match +up. This may be fixed in a future release. + +If the module you're writing is intended to contain the program +entry point, you can declare this by defining the special label +`..start' at the start point, either as a label or by EQU (although +of course the normal caveats about EQU dependency still apply). + +`obj' has an unusual handling of assembly modes: instead of having a +global default for the whole file, there is a separate default for +each segment. Thus, each [SEGMENT] directive carries an implicit +[BITS] directive with it, which switches to 16-bit or 32-bit mode +depending on whether the segment is a Use16 or Use32 segment. If you +want to place 32-bit code in a Use16 segment, you can use an +explicit [BITS 32] override, but if you switch temporarily away from +that segment, you will have to repeat the override after coming back +to it. + +`as86': Linux as86 (bin86-0.3) +------------------------------ + +This output format replicates the format used to pass data between +the Linux x86 assembler and linker, as86 and ld86. Its default file +name, yet again, is `filename.o'. Its default segment-size attribute +is 16 bits. + +`rdf': Relocatable Dynamic Object File Format +--------------------------------------------- + +RDOFF was designed initially to test the object-file production +interface to NASM. It soon became apparent that it could be enhanced +for use in serious applications due to its simplicity; code to load +and execute an RDOFF object module is very simple. It also contains +enhancements to allow it to be linked with a dynamic link library at +either run- or load- time, depending on how complex you wish to make +your loader. + +The `rdoff' directory in the NASM distribution archive contains +source for an RDF linker and loader to run under Linux. + +`rdf' has a default segment-size attribute of 32 bits. + +Debugging format: `dbg' +----------------------- + +This output format is not built into NASM by default: it's for +debugging purposes. It produces a debug dump of everything that the +NASM assembly module feeds to the output driver, for the benefit of +people trying to write their own output drivers. + +Bugs +==== + +Apart from the missing features (correct OBJ COMMON support, ELF +alignment, ELF PIC support, etc.), there are no _known_ bugs. +However, any you find, with patches if possible, should be sent to + or , and we'll try to +fix them. + +Beware of Pentium-specific instructions: Intel have provided a macro +file for MASM, to implement the eight or nine new Pentium opcodes as +MASM macros. NASM does not generate the same code for the CMPXCHG8B +instruction as these macros do: this is due to a bug in the _macro_, +not in NASM. The macro works by generating an SIDT instruction (if I +remember rightly), which has almost exactly the right form, then +using ORG to back up a bit and do a DB over the top of one of the +opcode bytes. The trouble is that Intel overlooked (or were unable +to allow for) the possibility that the SIDT instruction may contain +an 0x66 or 0x67 operand or address size prefix. If this happens, the +ORG will back up by the wrong amount, and the macro will generate +incorrect code. NASM gets it right. This, also, is not a bug in +NASM, so please don't report it as one. (Also please note that the +ORG directive in NASM doesn't work this way, and so you can't do +equivalent tricks with it...) + +That's All Folks! +================= + +Enjoy using NASM! Please feel free to send me comments, or +constructive criticism, or bug fixes, or requests, or general chat. + +Contributions are also welcome: if anyone knows anything about any +other object file formats I should support, please feel free to send +me documentation and some short example files (in my experience, +documentation is useless without at _least_ one example), or even to +write me an output module. OS/2 object files, in particular, spring +to mind. I don't have OS/2, though. + +Please keep flames to a minimum: I have had some very angry e-mails +in the past, condemning me for writing a useless assembler, that +output in no useful format (at the time, that was true), generated +incorrect code (several typos in the instruction table, since fixed) +and took up too much memory and disk space (the price you pay for +total portability, it seems). All these were criticisms I was happy +to hear, but I didn't appreciate the flames that went with them. +NASM _is_ still a prototype, and you use it at your own risk. I +_think_ it works, and if it doesn't then I want to know about it, +but I don't guarantee anything. So don't flame me, please. Blame, +but don't flame. + +- Simon Tatham , 21-Nov-96 diff --git a/nasm.h b/nasm.h new file mode 100644 index 00000000..9609667f --- /dev/null +++ b/nasm.h @@ -0,0 +1,443 @@ +/* nasm.h main header file for the Netwide Assembler: inter-module interface + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * initial version: 27/iii/95 by Simon Tatham + */ + +#ifndef NASM_H +#define NASM_H + +#define NASM_MAJOR_VER 0 +#define NASM_MINOR_VER 91 +#define NASM_VER "0.91" + +#ifndef NULL +#define NULL 0 +#endif + +#ifndef FALSE +#define FALSE 0 /* comes in handy */ +#endif +#ifndef TRUE +#define TRUE 1 +#endif + +#define NO_SEG -1L /* null segment value */ +#define SEG_ABS 0x40000000L /* mask for far-absolute segments */ + +#ifndef FILENAME_MAX +#define FILENAME_MAX 256 +#endif + +/* + * We must declare the existence of this structure type up here, + * since we have to reference it before we define it... + */ +struct ofmt; + +/* + * ------------------------- + * Error reporting functions + * ------------------------- + */ + +/* + * An error reporting function should look like this. + */ +typedef void (*efunc) (int severity, char *fmt, ...); + +/* + * These are the error severity codes which get passed as the first + * argument to an efunc. + */ + +#define ERR_WARNING 0 /* warn only: no further action */ +#define ERR_NONFATAL 1 /* terminate assembly after phase */ +#define ERR_FATAL 2 /* instantly fatal: exit with error */ +#define ERR_PANIC 3 /* internal error: panic instantly + * and dump core for reference */ +#define ERR_MASK 0x0F /* mask off the above codes */ +#define ERR_NOFILE 0x10 /* don't give source file name/line */ +#define ERR_USAGE 0x20 /* print a usage message */ + +/* + * ----------------------- + * Other function typedefs + * ----------------------- + */ + +/* + * A label-lookup function should look like this. + */ +typedef int (*lfunc) (char *label, long *segment, long *offset); + +/* + * And a label-definition function like this. + */ +typedef void (*ldfunc) (char *label, long segment, long offset, + struct ofmt *ofmt, efunc error); + +/* + * ----------------------------------------------------------- + * Format of the `insn' structure returned from `parser.c' and + * passed into `assemble.c' + * ----------------------------------------------------------- + */ + +/* + * Here we define the operand types. These are implemented as bit + * masks, since some are subsets of others; e.g. AX in a MOV + * instruction is a special operand type, whereas AX in other + * contexts is just another 16-bit register. (Also, consider CL in + * shift instructions, DX in OUT, etc.) + */ + +/* size, and other attributes, of the operand */ +#define BITS8 0x00000001L +#define BITS16 0x00000002L +#define BITS32 0x00000004L +#define BITS64 0x00000008L /* FPU only */ +#define BITS80 0x00000010L /* FPU only */ +#define FAR 0x00000020L /* grotty: this means 16:16 or */ + /* 16:32, like in CALL/JMP */ +#define NEAR 0x00000040L +#define SHORT 0x00000080L /* and this means what it says :) */ + +#define SIZE_MASK 0x000000FFL /* all the size attributes */ +#define NON_SIZE (~SIZE_MASK) + +#define TO 0x00000100L /* reverse effect in FADD, FSUB &c */ +#define COLON 0x00000200L /* operand is followed by a colon */ + +/* type of operand: memory reference, register, etc. */ +#define MEMORY 0x00204000L +#define REGISTER 0x00001000L /* register number in 'basereg' */ +#define IMMEDIATE 0x00002000L + +#define REGMEM 0x00200000L /* for r/m, ie EA, operands */ +#define REGNORM 0x00201000L /* 'normal' reg, qualifies as EA */ +#define REG8 0x00201001L +#define REG16 0x00201002L +#define REG32 0x00201004L +#define FPUREG 0x01000000L /* floating point stack registers */ +#define FPU0 0x01000800L /* FPU stack register zero */ +#define MMXREG 0x00001008L /* MMX registers */ + +/* special register operands: these may be treated differently */ +#define REG_SMASK 0x00070000L /* a mask for the following */ +#define REG_ACCUM 0x00211000L /* accumulator: AL, AX or EAX */ +#define REG_AL 0x00211001L /* REG_ACCUM | BITSxx */ +#define REG_AX 0x00211002L /* ditto */ +#define REG_EAX 0x00211004L /* and again */ +#define REG_COUNT 0x00221000L /* counter: CL, CX or ECX */ +#define REG_CL 0x00221001L /* REG_COUNT | BITSxx */ +#define REG_CX 0x00221002L /* ditto */ +#define REG_ECX 0x00221004L /* another one */ +#define REG_DX 0x00241002L +#define REG_SREG 0x00081002L /* any segment register */ +#define REG_CS 0x01081002L /* CS */ +#define REG_DESS 0x02081002L /* DS, ES, SS (non-CS 86 registers) */ +#define REG_FSGS 0x04081002L /* FS, GS (386 extended registers) */ +#define REG_CDT 0x00101004L /* CRn, DRn and TRn */ +#define REG_CREG 0x08101004L /* CRn */ +#define REG_CR4 0x08101404L /* CR4 (Pentium only) */ +#define REG_DREG 0x10101004L /* DRn */ +#define REG_TREG 0x20101004L /* TRn */ + +/* special type of EA */ +#define MEM_OFFS 0x00604000L /* simple [address] offset */ + +/* special type of immediate operand */ +#define UNITY 0x00802000L /* for shift/rotate instructions */ + +/* + * Next, the codes returned from the parser, for registers and + * instructions. + */ + +enum { /* register names */ + R_AH = 1, R_AL, R_AX, R_BH, R_BL, R_BP, R_BX, R_CH, R_CL, R_CR0, + R_CR2, R_CR3, R_CR4, R_CS, R_CX, R_DH, R_DI, R_DL, R_DR0, R_DR1, + R_DR2, R_DR3, R_DR6, R_DR7, R_DS, R_DX, R_EAX, R_EBP, R_EBX, + R_ECX, R_EDI, R_EDX, R_ES, R_ESI, R_ESP, R_FS, R_GS, R_MM0, + R_MM1, R_MM2, R_MM3, R_MM4, R_MM5, R_MM6, R_MM7, R_SI, R_SP, + R_SS, R_ST0, R_ST1, R_ST2, R_ST3, R_ST4, R_ST5, R_ST6, R_ST7, + R_TR3, R_TR4, R_TR5, R_TR6, R_TR7, REG_ENUM_LIMIT +}; + +enum { /* instruction names */ + I_AAA, I_AAD, I_AAM, I_AAS, I_ADC, I_ADD, I_AND, I_ARPL, + I_BOUND, I_BSF, I_BSR, I_BSWAP, I_BT, I_BTC, I_BTR, I_BTS, + I_CALL, I_CBW, I_CDQ, I_CLC, I_CLD, I_CLI, I_CLTS, I_CMC, I_CMP, + I_CMPSB, I_CMPSD, I_CMPSW, I_CMPXCHG, I_CMPXCHG8B, I_CPUID, + I_CWD, I_CWDE, I_DAA, I_DAS, I_DB, I_DD, I_DEC, I_DIV, I_DQ, + I_DT, I_DW, I_EMMS, I_ENTER, I_EQU, I_F2XM1, I_FABS, I_FADD, + I_FADDP, I_FBLD, I_FBSTP, I_FCHS, I_FCLEX, I_FCMOVB, I_FCMOVBE, + I_FCMOVE, I_FCMOVNB, I_FCMOVNBE, I_FCMOVNE, I_FCMOVNU, I_FCMOVU, + I_FCOM, I_FCOMI, I_FCOMIP, I_FCOMP, I_FCOMPP, I_FCOS, I_FDECSTP, + I_FDISI, I_FDIV, I_FDIVP, I_FDIVR, I_FDIVRP, I_FENI, I_FFREE, + I_FIADD, I_FICOM, I_FICOMP, I_FIDIV, I_FIDIVR, I_FILD, I_FIMUL, + I_FINCSTP, I_FINIT, I_FIST, I_FISTP, I_FISUB, I_FISUBR, I_FLD, + I_FLD1, I_FLDCW, I_FLDENV, I_FLDL2E, I_FLDL2T, I_FLDLG2, + I_FLDLN2, I_FLDPI, I_FLDZ, I_FMUL, I_FMULP, I_FNOP, I_FPATAN, + I_FPREM, I_FPREM1, I_FPTAN, I_FRNDINT, I_FRSTOR, I_FSAVE, + I_FSCALE, I_FSETPM, I_FSIN, I_FSINCOS, I_FSQRT, I_FST, I_FSTCW, + I_FSTENV, I_FSTP, I_FSTSW, I_FSUB, I_FSUBP, I_FSUBR, I_FSUBRP, + I_FTST, I_FUCOM, I_FUCOMI, I_FUCOMIP, I_FUCOMP, I_FUCOMPP, + I_FXAM, I_FXCH, I_FXTRACT, I_FYL2X, I_FYL2XP1, I_HLT, I_ICEBP, + I_IDIV, I_IMUL, I_IN, I_INC, I_INSB, I_INSD, I_INSW, I_INT, + I_INT1, I_INT01, I_INT3, I_INTO, I_INVD, I_INVLPG, I_IRET, + I_IRETD, I_IRETW, I_JCXZ, I_JECXZ, I_JMP, I_LAHF, I_LAR, I_LDS, + I_LEA, I_LEAVE, I_LES, I_LFS, I_LGDT, I_LGS, I_LIDT, I_LLDT, + I_LMSW, I_LOADALL, I_LODSB, I_LODSD, I_LODSW, I_LOOP, I_LOOPE, + I_LOOPNE, I_LOOPNZ, I_LOOPZ, I_LSL, I_LSS, I_LTR, I_MOV, I_MOVD, + I_MOVQ, I_MOVSB, I_MOVSD, I_MOVSW, I_MOVSX, I_MOVZX, I_MUL, + I_NEG, I_NOP, I_NOT, I_OR, I_OUT, I_OUTSB, I_OUTSD, I_OUTSW, + I_PACKSSDW, I_PACKSSWB, I_PACKUSWB, I_PADDB, I_PADDD, I_PADDSB, + I_PADDSW, I_PADDUSB, I_PADDUSW, I_PADDW, I_PAND, I_PANDN, + I_PCMPEQB, I_PCMPEQD, I_PCMPEQW, I_PCMPGTB, I_PCMPGTD, + I_PCMPGTW, I_PMADDWD, I_PMULHW, I_PMULLW, I_POP, I_POPA, + I_POPAD, I_POPAW, I_POPF, I_POPFD, I_POPFW, I_POR, I_PSLLD, + I_PSLLQ, I_PSLLW, I_PSRAD, I_PSRAW, I_PSRLD, I_PSRLQ, I_PSRLW, + I_PSUBB, I_PSUBD, I_PSUBSB, I_PSUBSW, I_PSUBUSB, I_PSUBUSW, + I_PSUBW, I_PUNPCKHBW, I_PUNPCKHDQ, I_PUNPCKHWD, I_PUNPCKLBW, + I_PUNPCKLDQ, I_PUNPCKLWD, I_PUSH, I_PUSHA, I_PUSHAD, I_PUSHAW, + I_PUSHF, I_PUSHFD, I_PUSHFW, I_PXOR, I_RCL, I_RCR, I_RDMSR, + I_RDPMC, I_RDTSC, I_RESB, I_RESD, I_RESQ, I_REST, I_RESW, I_RET, + I_RETF, I_RETN, I_ROL, I_ROR, I_RSM, I_SAHF, I_SAL, I_SALC, + I_SAR, I_SBB, I_SCASB, I_SCASD, I_SCASW, I_SGDT, I_SHL, I_SHLD, + I_SHR, I_SHRD, I_SIDT, I_SLDT, I_SMSW, I_STC, I_STD, I_STI, + I_STOSB, I_STOSD, I_STOSW, I_STR, I_SUB, I_TEST, I_UMOV, I_VERR, + I_VERW, I_WAIT, I_WBINVD, I_WRMSR, I_XADD, I_XCHG, I_XLATB, + I_XOR, I_CMOVcc, I_Jcc, I_SETcc +}; + +enum { /* condition code names */ + C_A, C_AE, C_B, C_BE, C_C, C_E, C_G, C_GE, C_L, C_LE, C_NA, C_NAE, + C_NB, C_NBE, C_NC, C_NE, C_NG, C_NGE, C_NL, C_NLE, C_NO, C_NP, + C_NS, C_NZ, C_O, C_P, C_PE, C_PO, C_S, C_Z +}; + +/* + * Note that because segment registers may be used as instruction + * prefixes, we must ensure the enumerations for prefixes and + * register names do not overlap. + */ +enum { /* instruction prefixes */ + PREFIX_ENUM_START = REG_ENUM_LIMIT, + P_A16 = PREFIX_ENUM_START, P_A32, P_LOCK, P_O16, P_O32, P_REP, P_REPE, + P_REPNE, P_REPNZ, P_REPZ, P_TIMES +}; + +enum { /* extended operand types */ + EOT_NOTHING, EOT_DB_STRING, EOT_DB_NUMBER +}; + +typedef struct { /* operand to an instruction */ + long type; /* type of operand */ + int addr_size; /* 0 means default; 16; 32 */ + int basereg, indexreg, scale; /* registers and scale involved */ + long segment; /* immediate segment, if needed */ + long offset; /* any immediate number */ + long wrt; /* segment base it's relative to */ +} operand; + +typedef struct extop { /* extended operand */ + struct extop *next; /* linked list */ + long type; /* defined above */ + char *stringval; /* if it's a string, then here it is */ + int stringlen; /* ... and here's how long it is */ + long segment; /* if it's a number/address, then... */ + long offset; /* ... it's given here ... */ + long wrt; /* ... and here */ +} extop; + +#define MAXPREFIX 4 + +typedef struct { /* an instruction itself */ + char *label; /* the label defined, or NULL */ + int prefixes[MAXPREFIX]; /* instruction prefixes, if any */ + int nprefix; /* number of entries in above */ + int opcode; /* the opcode - not just the string */ + int condition; /* the condition code, if Jcc/SETcc */ + int operands; /* how many operands? 0-3 */ + operand oprs[3]; /* the operands, defined as above */ + extop *eops; /* extended operands */ + int times; /* repeat count (TIMES prefix) */ +} insn; + +/* + * ------------------------------------------------------------ + * The data structure defining an output format driver, and the + * interfaces to the functions therein. + * ------------------------------------------------------------ + */ + +struct ofmt { + /* + * This is a short (one-liner) description of the type of + * output generated by the driver. + */ + char *fullname; + + /* + * This is a single keyword used to select the driver. + */ + char *shortname; + + /* + * This procedure is called at the start of an output session. + * It tells the output format what file it will be writing to, + * what routine to report errors through, and how to interface + * to the label manager if necessary. It also gives it a chance + * to do other initialisation. + */ + void (*init) (FILE *fp, efunc error, ldfunc ldef); + + /* + * This procedure is called by assemble() to write actual + * generated code or data to the object file. Typically it + * doesn't have to actually _write_ it, just store it for + * later. + * + * The `type' argument specifies the type of output data, and + * usually the size as well: its contents are described below. + */ + void (*output) (long segto, void *data, unsigned long type, + long segment, long wrt); + + /* + * This procedure is called once for every symbol defined in + * the module being assembled. It gives the name and value of + * the symbol, in NASM's terms, and indicates whether it has + * been declared to be global. Note that the parameter "name", + * when passed, will point to a piece of static storage + * allocated inside the label manager - it's safe to keep using + * that pointer, because the label manager doesn't clean up + * until after the output driver has. + * + * Values of `is_global' are: 0 means the symbol is local; 1 + * means the symbol is global; 2 means the symbol is common (in + * which case `offset' holds the _size_ of the variable). + * Anything else is available for the output driver to use + * internally. + */ + void (*symdef) (char *name, long segment, long offset, int is_global); + + /* + * This procedure is called when the source code requests a + * segment change. It should return the corresponding segment + * _number_ for the name, or NO_SEG if the name is not a valid + * segment name. + * + * It may also be called with NULL, in which case it is to + * return the _default_ section number for starting assembly in. + * + * It is allowed to modify the string it is given a pointer to. + * + * It is also allowed to specify a default instruction size for + * the segment, by setting `*bits' to 16 or 32. Or, if it + * doesn't wish to define a default, it can leave `bits' alone. + */ + long (*section) (char *name, int pass, int *bits); + + /* + * This procedure is called to modify the segment base values + * returned from the SEG operator. It is given a segment base + * value (i.e. a segment value with the low bit set), and is + * required to produce in return a segment value which may be + * different. It can map segment bases to absolute numbers by + * means of returning SEG_ABS types. + */ + long (*segbase) (long segment); + + /* + * This procedure is called to allow the output driver to + * process its own specific directives. When called, it has the + * directive word in `directive' and the parameter string in + * `value'. It is called in both assembly passes, and `pass' + * will be either 1 or 2. + * + * This procedure should return zero if it does not _recognise_ + * the directive, so that the main program can report an error. + * If it recognises the directive but then has its own errors, + * it should report them itself and then return non-zero. It + * should also return non-zero if it correctly processes the + * directive. + */ + int (*directive) (char *directive, char *value, int pass); + + /* + * This procedure is called before anything else - even before + * the "init" routine - and is passed the name of the input + * file from which this output file is being generated. It + * should return its preferred name for the output file in + * `outfunc'. Since it is called before the driver is properly + * initialised, it has to be passed its error handler + * separately. + * + * This procedure may also take its own copy of the input file + * name for use in writing the output file: it is _guaranteed_ + * that it will be called before the "init" routine. + * + * The parameter `outname' points to an area of storage + * guaranteed to be at least FILENAME_MAX in size. + */ + void (*filename) (char *inname, char *outname, efunc error); + + /* + * This procedure is called after assembly finishes, to allow + * the output driver to clean itself up and free its memory. + * Typically, it will also be the point at which the object + * file actually gets _written_. + * + * One thing the cleanup routine should always do is to close + * the output file pointer. + */ + void (*cleanup) (void); +}; + +/* + * values for the `type' parameter to an output function. Each one + * must have the actual number of _bytes_ added to it. + * + * Exceptions are OUT_RELxADR, which denote an x-byte relocation + * which will be a relative jump. For this we need to know the + * distance in bytes from the start of the relocated record until + * the end of the containing instruction. _This_ is what is stored + * in the size part of the parameter, in this case. + * + * Also OUT_RESERVE denotes reservation of N bytes of BSS space, + * and the contents of the "data" parameter is irrelevant. + * + * The "data" parameter for the output function points to a "long", + * containing the address in question, unless the type is + * OUT_RAWDATA, in which case it points to an "unsigned char" + * array. + */ +#define OUT_RAWDATA 0x00000000UL +#define OUT_ADDRESS 0x10000000UL +#define OUT_REL2ADR 0x20000000UL +#define OUT_REL4ADR 0x30000000UL +#define OUT_RESERVE 0x40000000UL +#define OUT_TYPMASK 0xF0000000UL +#define OUT_SIZMASK 0x0FFFFFFFUL + +/* + * ----- + * Other + * ----- + */ + +/* + * This is a useful #define which I keep meaning to use more often: + * the number of elements of a statically defined array. + */ + +#define elements(x) ( sizeof(x) / sizeof(*(x)) ) + +#endif diff --git a/nasmlib.c b/nasmlib.c new file mode 100644 index 00000000..c8710b0c --- /dev/null +++ b/nasmlib.c @@ -0,0 +1,488 @@ +/* nasmlib.c library routines for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" + +static efunc nasm_malloc_error; + +void nasm_set_malloc_error (efunc error) { + nasm_malloc_error = error; +} + +void *nasm_malloc (size_t size) { + void *p = malloc(size); + if (!p) + nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory"); + return p; +} + +void *nasm_realloc (void *q, size_t size) { + void *p = q ? realloc(q, size) : malloc(size); + if (!p) + nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory"); + return p; +} + +void nasm_free (void *q) { + if (q) + free (q); +} + +char *nasm_strdup (char *s) { + char *p; + + p = nasm_malloc(strlen(s)+1); + strcpy (p, s); + return p; +} + +int nasm_stricmp (char *s1, char *s2) { + while (*s1 && toupper(*s1) == toupper(*s2)) + s1++, s2++; + if (!*s1 && !*s2) + return 0; + else if (toupper(*s1) < toupper(*s2)) + return -1; + else + return 1; +} + +int nasm_strnicmp (char *s1, char *s2, int n) { + while (n > 0 && *s1 && toupper(*s1) == toupper(*s2)) + s1++, s2++, n--; + if ((!*s1 && !*s2) || n==0) + return 0; + else if (toupper(*s1) < toupper(*s2)) + return -1; + else + return 1; +} + +#define isnumchar(c) ( isalnum(c) || (c) == '$') +#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0') + +long readnum (char *str, int *error) { + char *r = str, *q; + long radix; + long result; + + *error = FALSE; + + while (isspace(*r)) r++; /* find start of number */ + q = r; + + while (isnumchar(*q)) q++; /* find end of number */ + + /* + * If it begins 0x, 0X or $, or ends in H, it's in hex. if it + * ends in Q, it's octal. if it ends in B, it's binary. + * Otherwise, it's ordinary decimal. + */ + if (*r=='0' && (r[1]=='x' || r[1]=='X')) + radix = 16, r += 2; + else if (*r=='$') + radix = 16, r++; + else if (q[-1]=='H' || q[-1]=='h') + radix = 16 , q--; + else if (q[-1]=='Q' || q[-1]=='q') + radix = 8 , q--; + else if (q[-1]=='B' || q[-1]=='b') + radix = 2 , q--; + else + radix = 10; + + result = 0; + while (*r && r < q) { + if (*r<'0' || (*r>'9' && *r<'A') || numvalue(*r)>=radix) { + *error = TRUE; + return 0; + } + result = radix * result + numvalue(*r); + r++; + } + return result; +} + +static long next_seg; + +void seg_init(void) { + next_seg = 0; +} + +long seg_alloc(void) { + return (next_seg += 2) - 2; +} + +void fwriteshort (int data, FILE *fp) { + fputc (data & 255, fp); + fputc ((data >> 8) & 255, fp); +} + +void fwritelong (long data, FILE *fp) { + fputc (data & 255, fp); + fputc ((data >> 8) & 255, fp); + fputc ((data >> 16) & 255, fp); + fputc ((data >> 24) & 255, fp); +} + +void standard_extension (char *inname, char *outname, char *extension, + efunc error) { + char *p, *q; + + q = inname; + p = outname; + while (*q) *p++ = *q++; /* copy, and find end of string */ + *p = '\0'; /* terminate it */ + while (p > outname && *--p != '.');/* find final period (or whatever) */ + if (*p != '.') while (*p) p++; /* go back to end if none found */ + if (!strcmp(p, extension)) { /* is the extension already there? */ + if (*extension) + error(ERR_WARNING | ERR_NOFILE, + "file name already ends in `%s': " + "output will be in `nasm.out'", + extension); + else + error(ERR_WARNING | ERR_NOFILE, + "file name already has no extension: " + "output will be in `nasm.out'"); + strcpy(outname, "nasm.out"); + } else + strcpy(p, extension); +} + +#define RAA_BLKSIZE 4096 /* this many longs allocated at once */ +#define RAA_LAYERSIZE 1024 /* this many _pointers_ allocated */ + +typedef struct RAA RAA; +typedef union RAA_UNION RAA_UNION; +typedef struct RAA_LEAF RAA_LEAF; +typedef struct RAA_BRANCH RAA_BRANCH; + +struct RAA { + int layers; + long stepsize; + union RAA_UNION { + struct RAA_LEAF { + long data[RAA_BLKSIZE]; + } l; + struct RAA_BRANCH { + struct RAA *data[RAA_LAYERSIZE]; + } b; + } u; +}; + +#define LEAFSIZ (sizeof(RAA)-sizeof(RAA_UNION)+sizeof(RAA_LEAF)) +#define BRANCHSIZ (sizeof(RAA)-sizeof(RAA_UNION)+sizeof(RAA_BRANCH)) + +#define LAYERSIZ(r) ( (r)->layers==0 ? RAA_BLKSIZE : RAA_LAYERSIZE ) + +static struct RAA *real_raa_init (int layers) { + struct RAA *r; + + if (layers == 0) { + r = nasm_malloc (LEAFSIZ); + memset (r->u.l.data, 0, sizeof(r->u.l.data)); + r->layers = 0; + r->stepsize = 1L; + } else { + r = nasm_malloc (BRANCHSIZ); + memset (r->u.b.data, 0, sizeof(r->u.b.data)); + r->layers = layers; + r->stepsize = 1L; + while (layers--) + r->stepsize *= RAA_LAYERSIZE; + } + return r; +} + +struct RAA *raa_init (void) { + return real_raa_init (0); +} + +void raa_free (struct RAA *r) { + if (r->layers == 0) + nasm_free (r); + else { + struct RAA **p; + for (p = r->u.b.data; p - r->u.b.data < RAA_LAYERSIZE; p++) + if (*p) + raa_free (*p); + } +} + +long raa_read (struct RAA *r, long posn) { + if (posn > r->stepsize * LAYERSIZ(r)) + return 0L; + while (r->layers > 0) { + ldiv_t l; + l = ldiv (posn, r->stepsize); + r = r->u.b.data[l.quot]; + posn = l.rem; + if (!r) /* better check this */ + return 0L; + } + return r->u.l.data[posn]; +} + +struct RAA *raa_write (struct RAA *r, long posn, long value) { + struct RAA *result; + + if (posn < 0) + nasm_malloc_error (ERR_PANIC, "negative position in raa_write"); + + while (r->stepsize * LAYERSIZ(r) < posn) { + /* + * Must go up a layer. + */ + struct RAA *s; + + s = nasm_malloc (BRANCHSIZ); + memset (s->u.b.data, 0, sizeof(r->u.b.data)); + s->layers = r->layers + 1; + s->stepsize = RAA_LAYERSIZE * r->stepsize; + s->u.b.data[0] = r; + r = s; + } + + result = r; + + while (r->layers > 0) { + ldiv_t l; + struct RAA **s; + l = ldiv (posn, r->stepsize); + s = &r->u.b.data[l.quot]; + if (!*s) + *s = real_raa_init (r->layers - 1); + r = *s; + posn = l.rem; + } + + r->u.l.data[posn] = value; + + return result; +} + +#define SAA_MAXLEN 8192 + +struct SAA { + /* + * members `end' and `elem_len' are only valid in first link in + * list; `rptr' and `rpos' are used for reading + */ + struct SAA *next, *end, *rptr; + long elem_len, length, posn, start, rpos; + char *data; +}; + +struct SAA *saa_init (long elem_len) { + struct SAA *s; + + if (elem_len > SAA_MAXLEN) + nasm_malloc_error (ERR_PANIC | ERR_NOFILE, "SAA with huge elements"); + + s = nasm_malloc (sizeof(struct SAA)); + s->posn = s->start = 0L; + s->elem_len = elem_len; + s->length = SAA_MAXLEN - (SAA_MAXLEN % elem_len); + s->data = nasm_malloc (s->length); + s->next = NULL; + s->end = s; + + return s; +} + +void saa_free (struct SAA *s) { + struct SAA *t; + + while (s) { + t = s->next; + nasm_free (s->data); + nasm_free (s); + s = t; + } +} + +void *saa_wstruct (struct SAA *s) { + void *p; + + if (s->end->length - s->end->posn < s->elem_len) { + s->end->next = nasm_malloc (sizeof(struct SAA)); + s->end->next->start = s->end->start + s->end->posn; + s->end = s->end->next; + s->end->length = s->length; + s->end->next = NULL; + s->end->posn = 0L; + s->end->data = nasm_malloc (s->length); + } + + p = s->end->data + s->end->posn; + s->end->posn += s->elem_len; + return p; +} + +void saa_wbytes (struct SAA *s, void *data, long len) { + char *d = data; + + while (len > 0) { + long l = s->end->length - s->end->posn; + if (l > len) + l = len; + if (l > 0) { + if (d) { + memcpy (s->end->data + s->end->posn, d, l); + d += l; + } else + memset (s->end->data + s->end->posn, 0, l); + s->end->posn += l; + len -= l; + } + if (len > 0) { + s->end->next = nasm_malloc (sizeof(struct SAA)); + s->end->next->start = s->end->start + s->end->posn; + s->end = s->end->next; + s->end->length = s->length; + s->end->next = NULL; + s->end->posn = 0L; + s->end->data = nasm_malloc (s->length); + } + } +} + +void saa_rewind (struct SAA *s) { + s->rptr = s; + s->rpos = 0L; +} + +void *saa_rstruct (struct SAA *s) { + void *p; + + if (!s->rptr) + return NULL; + + if (s->rptr->posn - s->rpos < s->elem_len) { + s->rptr = s->rptr->next; + if (!s->rptr) + return NULL; /* end of array */ + s->rpos = 0L; + } + + p = s->rptr->data + s->rpos; + s->rpos += s->elem_len; + return p; +} + +void *saa_rbytes (struct SAA *s, long *len) { + void *p; + + if (!s->rptr) + return NULL; + + p = s->rptr->data + s->rpos; + *len = s->rptr->posn - s->rpos; + s->rptr = s->rptr->next; + s->rpos = 0L; + return p; +} + +void saa_rnbytes (struct SAA *s, void *data, long len) { + char *d = data; + + while (len > 0) { + long l; + + if (!s->rptr) + return; + + l = s->rptr->posn - s->rpos; + if (l > len) + l = len; + if (l > 0) { + memcpy (d, s->rptr->data + s->rpos, l); + d += l; + s->rpos += l; + len -= l; + } + if (len > 0) { + s->rptr = s->rptr->next; + s->rpos = 0L; + } + } +} + +void saa_fread (struct SAA *s, long posn, void *data, long len) { + struct SAA *p; + long pos; + char *cdata = data; + + if (!s->rptr || posn > s->rptr->start + s->rpos) + saa_rewind (s); + while (posn >= s->rptr->start + s->rptr->posn) { + s->rptr = s->rptr->next; + if (!s->rptr) + return; /* what else can we do?! */ + } + + p = s->rptr; + pos = posn - s->rptr->start; + while (len) { + long l = s->rptr->posn - pos; + if (l > len) + l = len; + memcpy (cdata, s->rptr->data+pos, l); + len -= l; + cdata += l; + p = p->next; + if (!p) + return; + pos = 0L; + } +} + +void saa_fwrite (struct SAA *s, long posn, void *data, long len) { + struct SAA *p; + long pos; + char *cdata = data; + + if (!s->rptr || posn > s->rptr->start + s->rpos) + saa_rewind (s); + while (posn >= s->rptr->start + s->rptr->posn) { + s->rptr = s->rptr->next; + if (!s->rptr) + return; /* what else can we do?! */ + } + + p = s->rptr; + pos = posn - s->rptr->start; + while (len) { + long l = s->rptr->posn - pos; + if (l > len) + l = len; + memcpy (s->rptr->data+pos, cdata, l); + len -= l; + cdata += l; + p = p->next; + if (!p) + return; + pos = 0L; + } +} + +void saa_fpwrite (struct SAA *s, FILE *fp) { + char *data; + long len; + + saa_rewind (s); + while ( (data = saa_rbytes (s, &len)) ) + fwrite (data, 1, len, fp); +} diff --git a/nasmlib.h b/nasmlib.h new file mode 100644 index 00000000..d8273719 --- /dev/null +++ b/nasmlib.h @@ -0,0 +1,115 @@ +/* nasmlib.c header file for nasmlib.h + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_NASMLIB_H +#define NASM_NASMLIB_H + +/* + * Wrappers around malloc, realloc and free. nasm_malloc will + * fatal-error and die rather than return NULL; nasm_realloc will + * do likewise, and will also guarantee to work right on being + * passed a NULL pointer; nasm_free will do nothing if it is passed + * a NULL pointer. + */ +void nasm_set_malloc_error (efunc); +void *nasm_malloc (size_t); +void *nasm_realloc (void *, size_t); +void nasm_free (void *); +char *nasm_strdup (char *); + +/* + * ANSI doesn't guarantee the presence of `stricmp' or + * `strcasecmp'. + */ +int nasm_stricmp (char *, char *); +int nasm_strnicmp (char *, char *, int); + +/* + * Convert a string into a number, using NASM number rules. Sets + * `*error' to TRUE if an error occurs, and FALSE otherwise. + */ +long readnum(char *str, int *error); + +/* + * seg_init: Initialise the segment-number allocator. + * seg_alloc: allocate a hitherto unused segment number. + */ +void seg_init(void); +long seg_alloc(void); + +/* + * many output formats will be able to make use of this: a standard + * function to add an extension to the name of the input file + */ +void standard_extension (char *inname, char *outname, char *extension, + efunc error); + +/* + * some handy macros that will probably be of use in more than one + * output format: convert integers into little-endian byte packed + * format in memory + */ + +#define WRITELONG(p,v) \ + do { \ + *(p)++ = (v) & 0xFF; \ + *(p)++ = ((v) >> 8) & 0xFF; \ + *(p)++ = ((v) >> 16) & 0xFF; \ + *(p)++ = ((v) >> 24) & 0xFF; \ + } while (0) + +#define WRITESHORT(p,v) \ + do { \ + *(p)++ = (v) & 0xFF; \ + *(p)++ = ((v) >> 8) & 0xFF; \ + } while (0) + +/* + * and routines to do the same thing to a file + */ +void fwriteshort (int data, FILE *fp); +void fwritelong (long data, FILE *fp); + +/* + * Routines to manage a dynamic random access array of longs which + * may grow in size to be more than the largest single malloc'able + * chunk. + */ + +struct RAA; + +struct RAA *raa_init (void); +void raa_free (struct RAA *); +long raa_read (struct RAA *, long); +struct RAA *raa_write (struct RAA *r, long posn, long value); + +/* + * Routines to manage a dynamic sequential-access array, under the + * same restriction on maximum mallocable block. This array may be + * written to in two ways: a contiguous chunk can be reserved of a + * given size, and a pointer returned, or single-byte data may be + * written. The array can also be read back in the same two ways: + * as a series of big byte-data blocks or as a list of structures + * of a given size. + */ + +struct SAA; + +struct SAA *saa_init (long elem_len); /* 1 == byte */ +void saa_free (struct SAA *); +void *saa_wstruct (struct SAA *); /* return a structure of elem_len */ +void saa_wbytes (struct SAA *, void *, long); /* write arbitrary bytes */ +void saa_rewind (struct SAA *); /* for reading from beginning */ +void *saa_rstruct (struct SAA *); /* return NULL on EOA */ +void *saa_rbytes (struct SAA *, long *); /* return 0 on EOA */ +void saa_rnbytes (struct SAA *, void *, long); /* read a given no. of bytes */ +void saa_fread (struct SAA *s, long posn, void *p, long len); /* fixup */ +void saa_fwrite (struct SAA *s, long posn, void *p, long len); /* fixup */ +void saa_fpwrite (struct SAA *, FILE *); + +#endif diff --git a/ndisasm.c b/ndisasm.c new file mode 100644 index 00000000..8e7a4cd0 --- /dev/null +++ b/ndisasm.c @@ -0,0 +1,270 @@ +/* ndisasm.c the Netwide Disassembler main module + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" +#include "sync.h" +#include "disasm.h" + +#define BPL 8 /* bytes per line of hex dump */ + +static const char *help = +"usage: ndisasm [-a] [-i] [-h] [-r] [-u] [-b bits] [-o origin] [-s sync...]\n" +" [-e bytes] [-k start,bytes] file\n" +" -a or -i activates auto (intelligent) sync\n" +" -u sets USE32 (32-bit mode)\n" +" -b 16 or -b 32 sets number of bits too\n" +" -h displays this text\n" +" -r displays the version number\n" +" -e skips bytes of header\n" +" -k avoids disassembling bytes from position \n"; + +static void output_ins (unsigned long, unsigned char *, int, char *); +static void skip (unsigned long dist, FILE *fp); + +int main(int argc, char **argv) { + unsigned char buffer[INSN_MAX * 2], *p, *q; + char outbuf[256]; + char *pname = *argv; + char *filename = NULL; + unsigned long nextsync, synclen, initskip = 0L; + int lenread, lendis; + int autosync = FALSE; + int bits = 16; + int rn_error; + long offset; + FILE *fp; + + offset = 0; + init_sync(); + + while (--argc) { + char *v, *vv, *p = *++argv; + if (*p == '-') { + p++; + while (*p) switch (tolower(*p)) { + case 'a': /* auto or intelligent sync */ + case 'i': + autosync = TRUE; + p++; + break; + case 'h': + fprintf(stderr, help); + return 0; + break; + case 'r': + fprintf(stderr, "NDISASM version " NASM_VER "\n"); + return 0; + break; + case 'u': /* USE32 */ + bits = 32; + p++; + break; + case 'b': /* bits */ + v = p[1] ? p+1 : --argc ? *++argv : NULL; + if (!v) { + fprintf(stderr, "%s: `-b' requires an argument\n", pname); + return 1; + } + if (!strcmp(v, "16")) + bits = 16; + else if (!strcmp(v, "32")) + bits = 32; + else { + fprintf(stderr, "%s: argument to `-b' should" + " be `16' or `32'\n", pname); + } + p = ""; /* force to next argument */ + break; + case 'o': /* origin */ + v = p[1] ? p+1 : --argc ? *++argv : NULL; + if (!v) { + fprintf(stderr, "%s: `-o' requires an argument\n", pname); + return 1; + } + offset = readnum (v, &rn_error); + if (rn_error) { + fprintf(stderr, "%s: `-o' requires a numeric argument\n", + pname); + return 1; + } + p = ""; /* force to next argument */ + break; + case 's': /* sync point */ + v = p[1] ? p+1 : --argc ? *++argv : NULL; + if (!v) { + fprintf(stderr, "%s: `-s' requires an argument\n", pname); + return 1; + } + add_sync (readnum (v, &rn_error), 0L); + if (rn_error) { + fprintf(stderr, "%s: `-s' requires a numeric argument\n", + pname); + return 1; + } + p = ""; /* force to next argument */ + break; + case 'e': /* skip a header */ + v = p[1] ? p+1 : --argc ? *++argv : NULL; + if (!v) { + fprintf(stderr, "%s: `-e' requires an argument\n", pname); + return 1; + } + initskip = readnum (v, &rn_error); + if (rn_error) { + fprintf(stderr, "%s: `-e' requires a numeric argument\n", + pname); + return 1; + } + p = ""; /* force to next argument */ + break; + case 'k': /* skip a region */ + v = p[1] ? p+1 : --argc ? *++argv : NULL; + if (!v) { + fprintf(stderr, "%s: `-k' requires an argument\n", pname); + return 1; + } + vv = strchr(v, ','); + if (!vv) { + fprintf(stderr, "%s: `-k' requires two numbers separated" + " by a comma\n", pname); + return 1; + } + *vv++ = '\0'; + nextsync = readnum (v, &rn_error); + if (rn_error) { + fprintf(stderr, "%s: `-k' requires numeric arguments\n", + pname); + return 1; + } + synclen = readnum (vv, &rn_error); + if (rn_error) { + fprintf(stderr, "%s: `-k' requires numeric arguments\n", + pname); + return 1; + } + add_sync (nextsync, synclen); + p = ""; /* force to next argument */ + break; + } + } else if (!filename) { + filename = p; + } else { + fprintf(stderr, "%s: more than one filename specified\n", pname); + return 1; + } + } + + if (!filename) { + fprintf(stderr, help, pname); + return 0; + } + + fp = fopen(filename, "rb"); + if (initskip > 0) + skip (initskip, fp); + + /* + * This main loop is really horrible, and wants rewriting with + * an axe. It'll stay the way it is for a while though, until I + * find the energy... + */ + + p = q = buffer; + nextsync = next_sync (offset, &synclen); + do { + unsigned long to_read = buffer+sizeof(buffer)-p; + if (to_read > nextsync-offset-(p-q)) + to_read = nextsync-offset-(p-q); + lenread = fread (p, 1, to_read, fp); + p += lenread; + if (offset == nextsync) { + if (synclen) { + printf("%08lX skipping 0x%lX bytes\n", offset, synclen); + offset += synclen; + skip (synclen, fp); + } + p = q = buffer; + nextsync = next_sync (offset, &synclen); + } + while (p > q && (p - q >= INSN_MAX || lenread == 0)) { + lendis = disasm (q, outbuf, bits, offset, autosync); + if (!lendis || lendis > (p - q) || + lendis > nextsync-offset) + lendis = eatbyte (q, outbuf); + output_ins (offset, q, lendis, outbuf); + q += lendis; + offset += lendis; + } + if (q >= buffer+INSN_MAX) { + unsigned char *r = buffer, *s = q; + int count = p - q; + while (count--) + *r++ = *s++; + p -= (q - buffer); + q = buffer; + } + } while (lenread > 0 || !feof(fp)); + fclose (fp); + return 0; +} + +static void output_ins (unsigned long offset, unsigned char *data, + int datalen, char *insn) { + int bytes; + printf("%08lX ", offset); + + bytes = 0; + while (datalen > 0 && bytes < BPL) { + printf("%02X", *data++); + bytes++; + datalen--; + } + + printf("%*s%s\n", (BPL+1-bytes)*2, "", insn); + + while (datalen > 0) { + printf(" -"); + bytes = 0; + while (datalen > 0 && bytes < BPL) { + printf("%02X", *data++); + bytes++; + datalen--; + } + printf("\n"); + } +} + +/* + * Skip a certain amount of data in a file, either by seeking if + * possible, or if that fails then by reading and discarding. + */ +static void skip (unsigned long dist, FILE *fp) { + char buffer[256]; /* should fit on most stacks :-) */ + + /* + * Got to be careful with fseek: at least one fseek I've tried + * doesn't approve of SEEK_CUR. So I'll use SEEK_SET and + * ftell... horrible but apparently necessary. + */ + if (fseek (fp, dist+ftell(fp), SEEK_SET)) { + while (dist > 0) { + unsigned long len = (dist < sizeof(buffer) ? + dist : sizeof(buffer)); + if (fread (buffer, 1, len, fp) < len) { + perror("fread"); + exit(1); + } + dist -= len; + } + } +} diff --git a/ndisasm.doc b/ndisasm.doc new file mode 100644 index 00000000..5b5374af --- /dev/null +++ b/ndisasm.doc @@ -0,0 +1,199 @@ + The Netwide Disassembler, NDISASM + ================================= + +Introduction +============ + +The Netwide Disassembler is a small companion program to the Netwide +Assembler, NASM. It seemed a shame to have an x86 assembler, +complete with a full instruction table, and not make as much use of +it as possible, so here's a disassembler which shares the +instruction table (and some other bits of code) with NASM. + +The Netwide Disassembler does nothing except to produce +disassemblies of _binary_ source files. NDISASM does not have any +understanding of object file formats, like `objdump', and it will +not understand DOS .EXE files like `debug' will. It just +disassembles. + +Getting Started: Installation +============================= + +See `nasm.doc' for installation instructions. NDISASM, like NASM, +has a man page which you may want to put somewhere useful, if you +are on a Unix system. + +Running NDISASM +=============== + +To disassemble a file, you will typically use a command of the form + + ndisasm [-b16 | -b32] filename + +NDISASM can disassemble 16 bit code or 32 bit code equally easily, +provided of course that you remember to specify which it is to work +with. If no `-b' switch is present, NDISASM works in 16-bit mode by +default. The `-u' switch (for USE32) also invokes 32-bit mode. + +Two more command line options are `-r' which reports the version +number of NDISASM you are running, and `-h' which gives a short +summary of command line options. + +COM Files: Specifying an Origin +=============================== + +To disassemble a DOS .COM file correctly, a disassembler must assume +that the first instruction in the file is loaded at address 0x100, +rather than at zero. NDISASM, which assumes by default that any file +you give it is loaded at zero, will therefore need to be informed of +this. + +The `-o' option allows you to declare a different origin for the +file you are disassembling. Its argument may be expressed in any of +the NASM numeric formats: decimal by default, if it begins with `$' +or `0x' or ends in `H' it's hex, if it ends in `Q' it's octal, and +if it ends in `B' it's binary. + +Hence, to disassemble a .COM file: + + ndisasm -o100h filename.com + +will do the trick. + +Code Following Data: Synchronisation +==================================== + +Suppose you are disassembling a file which contains some data which +isn't machine code, and _then_ contains some machine code. NDISASM +will faithfully plough through the data section, producing machine +instructions wherever it can (although most of them will look +bizarre, and some may have unusual prefixes, e.g. `fs or +ax,0x240a'), and generating `db' instructions every so often if it's +totally stumped. Then it will reach the code section. + +Supposing NDISASM has just finished generating a strange machine +instruction from part of the data section, and its file position is +now one byte _before_ the beginning of the code section. It's +entirely possible that another spurious instruction will get +generated, starting with the final byte of the data section, and +then the correct first instruction in the code section will not be +seen because the starting point skipped over it. This isn't really +ideal. + +To avoid this, you can specify a `synchronisation' point, or indeed +as many synchronisation points as you like (although NDISASM can +only handle 8192 sync points internally). The definition of a sync +point is this: NDISASM guarantees to hit sync points exactly during +disassembly. If it is thinking about generating an instruction which +would cause it to jump over a sync point, it will discard that +instruction and output a `db' instead. So it _will_ start +disassembly exactly from the sync point, and so you _will_ see all +the instructions in your code section. + +Sync points are specified using the `-s' option: they are measured +in terms of the program origin, not the file position. So if you +want to synchronise after 32 bytes of a .COM file, you would have to +do + + ndisasm -o100h -s120h file.com + +rather than + + ndisasm -o100h -s20h file.com + +As stated above, you can specify multiple sync markers if you need +to, just by repeating the `-s' option. + +Mixed Code and Data: Automatic (Intelligent) Synchronisation +============================================================ + +Suppose you are disassembling the boot sector of a DOS floppy (maybe +it has a virus, and you need to understand the virus so that you +know what kinds of damage it might have done you). Typically, this +will contain a JMP instruction, then some data, then the rest of the +code. So there is a very good chance of NDISASM being misaligned +when the data ends and the code begins. Hence a sync point is +needed. + +On the other hand, why should you have to specify the sync point +manually? What you'd do in order to find where the sync point would +be, surely, would be to read the JMP instruction, and then to use +its target address as a sync point. So can NDISASM do that for you? + +The answer, of course, is yes: using either of the synonymous +switches `-a' (for automatic sync) or `-i' (for intelligent sync) +will enable auto-sync mode. Auto-sync mode automatically generates a +sync point for any forward-referring PC-relative jump or call +instruction that NDISASM encounters. (Since NDISASM is one-pass, if +it encounters a PC-relative jump whose target has already been +processed, there isn't much it can do about it...) + +Only PC-relative jumps are processed, since an absolute jump is +either through a register (in which case NDISASM doesn't know what +the register contains) or involves a segment address (in which case +the target code isn't in the same segment that NDISASM is working +in, and so the sync point can't be placed anywhere useful). + +For some kinds of file, this mechanism will automatically put sync +points in all the right places, and save you from having to place +any sync points manually. However, it should be stressed that +auto-sync mode is _not_ guaranteed to catch all the sync points, and +you may still have to place some manually. + +Auto-sync mode doesn't prevent you from declaring manual sync +points: it just adds automatically generated ones to the ones you +provide. It's perfectly feasible to specify `-i' _and_ some `-s' +options. + +Another caveat with auto-sync mode is that if, by some unpleasant +fluke, something in your data section should disassemble to a +PC-relative call or jump instruction, NDISASM may obediently place a +sync point in a totally random place, for example in the middle of +one of the instructions in your code section. So you may end up with +a wrong disassembly even if you use auto-sync. Again, there isn't +much I can do about this. If you have problems, you'll have to use +manual sync points, or use the `-k' option (documented below) to +suppress disassembly of the data area. + +Other Options +============= + +The `-e' option skips a header on the file, by ignoring the first N +bytes. This means that the header is _not_ counted towards the +disassembly offset: if you give `-e10 -o10', disassembly will start +at byte 10 in the file, and this will be given offset 10, not 20. + +The `-k' option is provided with two comma-separated numeric +arguments, the first of which is an assembly offset and the second +is a number of bytes to skip. This _will_ count the skipped bytes +towards the assembly offset: its use is to suppress disassembly of a +data section which wouldn't contain anything you wanted to see +anyway. + +Bugs and Improvements +===================== + +There are no known bugs. However, any you find, with patches if +possible, should be sent to or +, and we'll try to fix them. Feel free to send +contributions and new features as well. + +Future plans include awareness of which processors certain +instructions will run on, and marking of instructions that are too +advanced for some processor (or are FPU instructions, or are +undocumented opcodes, or are privileged protected-mode instructions, +or whatever). + +That's All Folks! +================= + +I hope NDISASM is of some use to somebody. Including me. :-) + +I don't recommend taking NDISASM apart to see how an efficient +disassembler works, because as far as I know, it isn't an efficient +one anyway. You have been warned. + +Please feel free to send comments, suggestions, or chat to +. As with NASM, no flames please. + +- Simon Tatham , 21-Nov-96 diff --git a/outaout.c b/outaout.c new file mode 100644 index 00000000..e0ada3c1 --- /dev/null +++ b/outaout.c @@ -0,0 +1,466 @@ +/* outaout.c output routines for the Netwide Assembler to produce + * Linux a.out object files + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" +#include "outform.h" + +#ifdef OF_AOUT + +struct Reloc { + struct Reloc *next; + long address; /* relative to _start_ of section */ + long symbol; /* symbol number or -ve section id */ + int bytes; /* 2 or 4 */ + int relative; /* TRUE or FALSE */ +}; + +struct Symbol { + long strpos; /* string table position of name */ + int type; /* symbol type - see flags below */ + long value; /* address, or COMMON variable size */ +}; + +/* + * Section IDs - used in Reloc.symbol when negative, and in + * Symbol.type when positive. + */ +#define SECT_ABS 2 /* absolute value */ +#define SECT_TEXT 4 /* text section */ +#define SECT_DATA 6 /* data section */ +#define SECT_BSS 8 /* bss section */ +#define SECT_MASK 0xE /* mask out any of the above */ + +/* + * Another flag used in Symbol.type. + */ +#define SYM_GLOBAL 1 /* it's a global symbol */ + +/* + * Bit more explanation of symbol types: SECT_xxx denotes a local + * symbol. SECT_xxx|SYM_GLOBAL denotes a global symbol, defined in + * this module. Just SYM_GLOBAL, with zero value, denotes an + * external symbol referenced in this module. And just SYM_GLOBAL, + * but with a non-zero value, declares a C `common' variable, of + * size `value'. + */ + +struct Section { + struct SAA *data; + unsigned long len, size, nrelocs; + long index; + struct Reloc *head, **tail; +}; + +static struct Section stext, sdata; +static unsigned long bsslen; +static long bssindex; + +static struct SAA *syms; +static unsigned long nsyms; + +static struct RAA *bsym; + +static struct SAA *strs; +static unsigned long strslen; + +static FILE *aoutfp; +static efunc error; + +static void aout_write(void); +static void aout_write_relocs(struct Reloc *); +static void aout_write_syms(void); +static void aout_sect_write(struct Section *, unsigned char *, unsigned long); +static void aout_pad_sections(void); +static void aout_fixup_relocs(struct Section *); + +static void aout_init(FILE *fp, efunc errfunc, ldfunc ldef) { + aoutfp = fp; + error = errfunc; + (void) ldef; /* placate optimisers */ + stext.data = saa_init(1L); stext.head = NULL; stext.tail = &stext.head; + sdata.data = saa_init(1L); sdata.head = NULL; sdata.tail = &sdata.head; + stext.len = stext.size = sdata.len = sdata.size = bsslen = 0; + stext.nrelocs = sdata.nrelocs = 0; + stext.index = seg_alloc(); + sdata.index = seg_alloc(); + bssindex = seg_alloc(); + syms = saa_init((long)sizeof(struct Symbol)); + nsyms = 0; + bsym = raa_init(); + strs = saa_init(1L); + strslen = 0; +} + +static void aout_cleanup(void) { + struct Reloc *r; + + aout_pad_sections(); + aout_fixup_relocs(&stext); + aout_fixup_relocs(&sdata); + aout_write(); + fclose (aoutfp); + saa_free (stext.data); + while (stext.head) { + r = stext.head; + stext.head = stext.head->next; + nasm_free (r); + } + saa_free (sdata.data); + while (sdata.head) { + r = sdata.head; + sdata.head = sdata.head->next; + nasm_free (r); + } + saa_free (syms); + raa_free (bsym); + saa_free (strs); +} + +static long aout_section_names (char *name, int pass, int *bits) { + /* + * Default to 32 bits. + */ + if (!name) + *bits = 32; + + if (!name) + return stext.index; + + if (!strcmp(name, ".text")) + return stext.index; + else if (!strcmp(name, ".data")) + return sdata.index; + else if (!strcmp(name, ".bss")) + return bssindex; + else + return NO_SEG; +} + +static void aout_deflabel (char *name, long segment, long offset, + int is_global) { + int pos = strslen+4; + struct Symbol *sym; + + if (name[0] == '.' && name[1] == '.') { + return; + } + + saa_wbytes (strs, name, (long)(1+strlen(name))); + strslen += 1+strlen(name); + + sym = saa_wstruct (syms); + + sym->strpos = pos; + sym->type = is_global ? SYM_GLOBAL : 0; + if (segment == NO_SEG) + sym->type |= SECT_ABS; + else if (segment == stext.index) + sym->type |= SECT_TEXT; + else if (segment == sdata.index) + sym->type |= SECT_DATA; + else if (segment == bssindex) + sym->type |= SECT_BSS; + else + sym->type = SYM_GLOBAL; + if (is_global == 2) + sym->value = offset; + else + sym->value = (sym->type == SYM_GLOBAL ? 0 : offset); + + /* + * define the references from external-symbol segment numbers + * to these symbol records. + */ + if (segment != NO_SEG && segment != stext.index && + segment != sdata.index && segment != bssindex) + bsym = raa_write (bsym, segment, nsyms); + + nsyms++; +} + +static void aout_add_reloc (struct Section *sect, long segment, + int relative, int bytes) { + struct Reloc *r; + + r = *sect->tail = nasm_malloc(sizeof(struct Reloc)); + sect->tail = &r->next; + r->next = NULL; + + r->address = sect->len; + r->symbol = (segment == NO_SEG ? -SECT_ABS : + segment == stext.index ? -SECT_TEXT : + segment == sdata.index ? -SECT_DATA : + segment == bssindex ? -SECT_BSS : + raa_read(bsym, segment)); + r->relative = relative; + r->bytes = bytes; + + sect->nrelocs++; +} + +static void aout_out (long segto, void *data, unsigned long type, + long segment, long wrt) { + struct Section *s; + long realbytes = type & OUT_SIZMASK; + unsigned char mydata[4], *p; + + if (wrt != NO_SEG) { + wrt = NO_SEG; /* continue to do _something_ */ + error (ERR_NONFATAL, "WRT not supported by a.out output format"); + } + + type &= OUT_TYPMASK; + + /* + * handle absolute-assembly (structure definitions) + */ + if (segto == NO_SEG) { + if (type != OUT_RESERVE) + error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]" + " space"); + return; + } + + if (segto == stext.index) + s = &stext; + else if (segto == sdata.index) + s = &sdata; + else if (segto == bssindex) + s = NULL; + else { + error(ERR_WARNING, "attempt to assemble code in" + " segment %d: defaulting to `.text'", segto); + s = &stext; + } + + if (!s && type != OUT_RESERVE) { + error(ERR_WARNING, "attempt to initialise memory in the" + " BSS section: ignored"); + if (type == OUT_REL2ADR) + realbytes = 2; + else if (type == OUT_REL4ADR) + realbytes = 4; + bsslen += realbytes; + return; + } + + if (type == OUT_RESERVE) { + if (s) { + error(ERR_WARNING, "uninitialised space declared in" + " %s section: zeroing", + (segto == stext.index ? "code" : "data")); + aout_sect_write (s, NULL, realbytes); + } else + bsslen += realbytes; + } else if (type == OUT_RAWDATA) { + if (segment != NO_SEG) + error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG"); + aout_sect_write (s, data, realbytes); + } else if (type == OUT_ADDRESS) { + if (segment != NO_SEG) { + if (segment % 2) { + error(ERR_NONFATAL, "a.out format does not support" + " segment base references"); + } else + aout_add_reloc (s, segment, FALSE, realbytes); + } + p = mydata; + if (realbytes == 2) + WRITESHORT (p, *(long *)data); + else + WRITELONG (p, *(long *)data); + aout_sect_write (s, mydata, realbytes); + } else if (type == OUT_REL2ADR) { + if (segment == segto) + error(ERR_PANIC, "intra-segment OUT_REL2ADR"); + if (segment != NO_SEG && segment % 2) { + error(ERR_NONFATAL, "a.out format does not support" + " segment base references"); + } else + aout_add_reloc (s, segment, TRUE, 2); + p = mydata; + WRITESHORT (p, *(long*)data-(realbytes + s->len)); + aout_sect_write (s, mydata, 2L); + } else if (type == OUT_REL4ADR) { + if (segment == segto) + error(ERR_PANIC, "intra-segment OUT_REL4ADR"); + if (segment != NO_SEG && segment % 2) { + error(ERR_NONFATAL, "a.out format does not support" + " segment base references"); + } else + aout_add_reloc (s, segment, TRUE, 4); + p = mydata; + WRITELONG (p, *(long*)data-(realbytes + s->len)); + aout_sect_write (s, mydata, 4L); + } +} + +static void aout_pad_sections(void) { + static unsigned char pad[] = { 0x90, 0x90, 0x90, 0x90 }; + /* + * Pad each of the text and data sections with NOPs until their + * length is a multiple of four. (NOP == 0x90.) Also increase + * the length of the BSS section similarly. + */ + aout_sect_write (&stext, pad, (-stext.len) & 3); + aout_sect_write (&sdata, pad, (-sdata.len) & 3); + bsslen = (bsslen + 3) & ~3; +} + +/* + * a.out files have the curious property that all references to + * things in the data or bss sections are done by addresses which + * are actually relative to the start of the _text_ section, in the + * _file_. (No relation to what happens after linking. No idea why + * this should be so. It's very strange.) So we have to go through + * the relocation table, _after_ the final size of each section is + * known, and fix up the relocations pointed to. + */ +static void aout_fixup_relocs(struct Section *sect) { + struct Reloc *r; + + saa_rewind (sect->data); + for (r = sect->head; r; r = r->next) { + unsigned char *p, *q, blk[4]; + long l; + + saa_fread (sect->data, r->address, blk, (long)r->bytes); + p = q = blk; + l = *p++; + l += ((long)*p++) << 8; + if (r->bytes == 4) { + l += ((long)*p++) << 16; + l += ((long)*p++) << 24; + } + if (r->symbol == -SECT_DATA) + l += stext.len; + else if (r->symbol == -SECT_BSS) + l += stext.len + sdata.len; + if (r->bytes == 4) + WRITELONG(q, l); + else + WRITESHORT(q, l); + saa_fwrite (sect->data, r->address, blk, (long)r->bytes); + } +} + +static void aout_write(void) { + /* + * Emit the a.out header. + */ + fwritelong (0x640107, aoutfp); /* OMAGIC, M_386, no flags */ + fwritelong (stext.len, aoutfp); + fwritelong (sdata.len, aoutfp); + fwritelong (bsslen, aoutfp); + fwritelong (nsyms * 12, aoutfp); /* length of symbol table */ + fwritelong (0L, aoutfp); /* object files have no entry point */ + fwritelong (stext.nrelocs * 8, aoutfp); /* size of text relocs */ + fwritelong (sdata.nrelocs * 8, aoutfp); /* size of data relocs */ + + /* + * Write out the code section and the data section. + */ + saa_fpwrite (stext.data, aoutfp); + saa_fpwrite (sdata.data, aoutfp); + + /* + * Write out the relocations. + */ + aout_write_relocs (stext.head); + aout_write_relocs (sdata.head); + + /* + * Write the symbol table. + */ + aout_write_syms (); + + /* + * And the string table. + */ + fwritelong (strslen+4, aoutfp); /* length includes length count */ + saa_fpwrite (strs, aoutfp); +} + +static void aout_write_relocs (struct Reloc *r) { + while (r) { + unsigned long word2; + + fwritelong (r->address, aoutfp); + + if (r->symbol >= 0) + word2 = r->symbol | 0x8000000; + else + word2 = -r->symbol; + if (r->relative) + word2 |= 0x1000000; + word2 |= (r->bytes == 2 ? 0x2000000 : 0x4000000); + fwritelong (word2, aoutfp); + + r = r->next; + } +} + +static void aout_write_syms (void) { + int i; + + saa_rewind (syms); + for (i=0; istrpos, aoutfp); + fwritelong ((long)sym->type, aoutfp); + /* + * Fix up the symbol value now we know the final section + * sizes. + */ + if ((sym->type & SECT_MASK) == SECT_DATA) + sym->value += stext.len; + if ((sym->type & SECT_MASK) == SECT_BSS) + sym->value += stext.len + sdata.len; + fwritelong (sym->value, aoutfp); + } +} + +static void aout_sect_write (struct Section *sect, + unsigned char *data, unsigned long len) { + saa_wbytes (sect->data, data, len); + sect->len += len; +} + +static long aout_segbase (long segment) { + return segment; +} + +static int aout_directive (char *directive, char *value, int pass) { + return 0; +} + +static void aout_filename (char *inname, char *outname, efunc error) { + standard_extension (inname, outname, ".o", error); +} + +struct ofmt of_aout = { + "GNU a.out (i386) object files (e.g. Linux)", + "aout", + aout_init, + aout_out, + aout_deflabel, + aout_section_names, + aout_segbase, + aout_directive, + aout_filename, + aout_cleanup +}; + +#endif /* OF_AOUT */ diff --git a/outas86.c b/outas86.c new file mode 100644 index 00000000..82dedb2b --- /dev/null +++ b/outas86.c @@ -0,0 +1,548 @@ +/* outas86.c output routines for the Netwide Assembler to produce + * Linux as86 (bin86-0.3) object files + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" +#include "outform.h" + +#ifdef OF_AS86 + +struct Piece { + struct Piece *next; + int type; /* 0 = absolute, 1 = seg, 2 = sym */ + long offset; /* relative offset */ + int number; /* symbol/segment number (4=bss) */ + long bytes; /* size of reloc or of absolute data */ + int relative; /* TRUE or FALSE */ +}; + +struct Symbol { + long strpos; /* string table position of name */ + int flags; /* symbol flags */ + int segment; /* 4=bss at this point */ + long value; /* address, or COMMON variable size */ +}; + +/* + * Section IDs - used in Piece.number and Symbol.segment. + */ +#define SECT_TEXT 0 /* text section */ +#define SECT_DATA 3 /* data section */ +#define SECT_BSS 4 /* bss section */ + +/* + * Flags used in Symbol.flags. + */ +#define SYM_ENTRY (1<<8) +#define SYM_EXPORT (1<<7) +#define SYM_IMPORT (1<<6) +#define SYM_ABSOLUTE (1<<4) + +struct Section { + struct SAA *data; + unsigned long datalen, size, len; + long index; + struct Piece *head, *last, **tail; +}; + +static char as86_module[FILENAME_MAX]; + +static struct Section stext, sdata; +static unsigned long bsslen; +static long bssindex; + +static struct SAA *syms; +static unsigned long nsyms; + +static struct RAA *bsym; + +static struct SAA *strs; +static unsigned long strslen; + +static int as86_reloc_size; + +static FILE *as86fp; +static efunc error; + +static void as86_write(void); +static void as86_write_section (struct Section *, int); +static int as86_add_string (char *name); +static void as86_sect_write(struct Section *, unsigned char *, unsigned long); + +static void as86_init(FILE *fp, efunc errfunc, ldfunc ldef) { + as86fp = fp; + error = errfunc; + (void) ldef; /* placate optimisers */ + stext.data = saa_init(1L); stext.datalen = 0L; + stext.head = stext.last = NULL; + stext.tail = &stext.head; + sdata.data = saa_init(1L); sdata.datalen = 0L; + sdata.head = sdata.last = NULL; + sdata.tail = &sdata.head; + bsslen = + stext.len = stext.datalen = stext.size = + sdata.len = sdata.datalen = sdata.size = 0; + stext.index = seg_alloc(); + sdata.index = seg_alloc(); + bssindex = seg_alloc(); + syms = saa_init((long)sizeof(struct Symbol)); + nsyms = 0; + bsym = raa_init(); + strs = saa_init(1L); + strslen = 0; + + as86_add_string (as86_module); +} + +static void as86_cleanup(void) { + struct Piece *p; + + as86_write(); + fclose (as86fp); + saa_free (stext.data); + while (stext.head) { + p = stext.head; + stext.head = stext.head->next; + nasm_free (p); + } + saa_free (sdata.data); + while (sdata.head) { + p = sdata.head; + sdata.head = sdata.head->next; + nasm_free (p); + } + saa_free (syms); + raa_free (bsym); + saa_free (strs); +} + +static long as86_section_names (char *name, int pass, int *bits) { + /* + * Default is 16 bits. + */ + if (!name) + *bits = 16; + + if (!name) + return stext.index; + + if (!strcmp(name, ".text")) + return stext.index; + else if (!strcmp(name, ".data")) + return sdata.index; + else if (!strcmp(name, ".bss")) + return bssindex; + else + return NO_SEG; +} + +static int as86_add_string (char *name) { + int pos = strslen; + int length = strlen(name); + + saa_wbytes (strs, name, (long)(length+1)); + strslen += 1+length; + + return pos; +} + +static void as86_deflabel (char *name, long segment, long offset, + int is_global) { + struct Symbol *sym; + + if (name[0] == '.' && name[1] == '.') { + return; + } + + sym = saa_wstruct (syms); + + sym->strpos = as86_add_string (name); + sym->flags = 0; + if (segment == NO_SEG) + sym->flags |= SYM_ABSOLUTE, sym->segment = 0; + else if (segment == stext.index) + sym->segment = SECT_TEXT; + else if (segment == sdata.index) + sym->segment = SECT_DATA; + else if (segment == bssindex) + sym->segment = SECT_BSS; + else { + sym->flags |= SYM_IMPORT; + sym->segment = 15; + } + + if (is_global == 2) + sym->segment = 3; /* already have IMPORT */ + + if (is_global && !(sym->flags & SYM_IMPORT)) + sym->flags |= SYM_EXPORT; + + sym->value = offset; + + /* + * define the references from external-symbol segment numbers + * to these symbol records. + */ + if (segment != NO_SEG && segment != stext.index && + segment != sdata.index && segment != bssindex) + bsym = raa_write (bsym, segment, nsyms); + + nsyms++; +} + +static void as86_add_piece (struct Section *sect, int type, long offset, + long segment, long bytes, int relative) { + struct Piece *p; + + sect->len += bytes; + + if (type == 0 && sect->last && sect->last->type == 0) { + sect->last->bytes += bytes; + return; + } + + p = sect->last = *sect->tail = nasm_malloc(sizeof(struct Piece)); + sect->tail = &p->next; + p->next = NULL; + + p->type = type; + p->offset = offset; + p->bytes = bytes; + p->relative = relative; + + if (type == 1 && segment == stext.index) + p->number = SECT_TEXT; + else if (type == 1 && segment == sdata.index) + p->number = SECT_DATA; + else if (type == 1 && segment == bssindex) + p->number = SECT_BSS; + else if (type == 1) + p->number = raa_read (bsym, segment), p->type = 2; +} + +static void as86_out (long segto, void *data, unsigned long type, + long segment, long wrt) { + struct Section *s; + long realbytes = type & OUT_SIZMASK; + long offset; + unsigned char mydata[4], *p; + + if (wrt != NO_SEG) { + wrt = NO_SEG; /* continue to do _something_ */ + error (ERR_NONFATAL, "WRT not supported by as86 output format"); + } + + type &= OUT_TYPMASK; + + /* + * handle absolute-assembly (structure definitions) + */ + if (segto == NO_SEG) { + if (type != OUT_RESERVE) + error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]" + " space"); + return; + } + + if (segto == stext.index) + s = &stext; + else if (segto == sdata.index) + s = &sdata; + else if (segto == bssindex) + s = NULL; + else { + error(ERR_WARNING, "attempt to assemble code in" + " segment %d: defaulting to `.text'", segto); + s = &stext; + } + + if (!s && type != OUT_RESERVE) { + error(ERR_WARNING, "attempt to initialise memory in the" + " BSS section: ignored"); + if (type == OUT_REL2ADR) + realbytes = 2; + else if (type == OUT_REL4ADR) + realbytes = 4; + bsslen += realbytes; + return; + } + + if (type == OUT_RESERVE) { + if (s) { + error(ERR_WARNING, "uninitialised space declared in" + " %s section: zeroing", + (segto == stext.index ? "code" : "data")); + as86_sect_write (s, NULL, realbytes); + as86_add_piece (s, 0, 0L, 0L, realbytes, 0); + } else + bsslen += realbytes; + } else if (type == OUT_RAWDATA) { + if (segment != NO_SEG) + error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG"); + as86_sect_write (s, data, realbytes); + as86_add_piece (s, 0, 0L, 0L, realbytes, 0); + } else if (type == OUT_ADDRESS) { + if (segment != NO_SEG) { + if (segment % 2) { + error(ERR_NONFATAL, "as86 format does not support" + " segment base references"); + } else{ + offset = * (long *) data; + as86_add_piece (s, 1, offset, segment, realbytes, 0); + } + } else { + p = mydata; + WRITELONG (p, * (long *) data); + as86_sect_write (s, data, realbytes); + as86_add_piece (s, 0, 0L, 0L, realbytes, 0); + } + } else if (type == OUT_REL2ADR) { + if (segment == segto) + error(ERR_PANIC, "intra-segment OUT_REL2ADR"); + if (segment != NO_SEG) { + if (segment % 2) { + error(ERR_NONFATAL, "as86 format does not support" + " segment base references"); + } else { + offset = * (long *) data; + as86_add_piece (s, 1, offset-realbytes+2, segment, 2L, 1); + } + } + } else if (type == OUT_REL4ADR) { + if (segment == segto) + error(ERR_PANIC, "intra-segment OUT_REL4ADR"); + if (segment != NO_SEG) { + if (segment % 2) { + error(ERR_NONFATAL, "as86 format does not support" + " segment base references"); + } else { + offset = * (long *) data; + as86_add_piece (s, 1, offset-realbytes+4, segment, 4L, 1); + } + } + } +} + +static void as86_write(void) { + int i; + long symlen, seglen, segsize; + + /* + * First, go through the symbol records working out how big + * each will be. Also fix up BSS references at this time, and + * set the flags words up completely. + */ + symlen = 0; + saa_rewind (syms); + for (i = 0; i < nsyms; i++) { + struct Symbol *sym = saa_rstruct (syms); + if (sym->segment == SECT_BSS) + sym->segment = SECT_DATA, sym->value += sdata.len; + sym->flags |= sym->segment; + if (sym->value == 0) + sym->flags |= 0 << 14, symlen += 4; + else if (sym->value >= 0 && sym->value <= 255) + sym->flags |= 1 << 14, symlen += 5; + else if (sym->value >= 0 && sym->value <= 65535) + sym->flags |= 2 << 14, symlen += 6; + else + sym->flags |= 3 << 14, symlen += 8; + } + + /* + * Now do the same for the segments, and get the segment size + * descriptor word at the same time. + */ + seglen = segsize = 0; + if ((unsigned long) stext.len > 65535) + segsize |= 0x03000000, seglen += 4; + else + segsize |= 0x02000000, seglen += 2; + if ((unsigned long) sdata.len > 65535) + segsize |= 0xC0000000, seglen += 4; + else + segsize |= 0x80000000, seglen += 2; + + /* + * Emit the as86 header. + */ + fwritelong (0x000186A3, as86fp); + fputc (0x2A, as86fp); + fwritelong (27+symlen+seglen+strslen, as86fp); /* header length */ + fwritelong (stext.len+sdata.len, as86fp); + fwriteshort (strslen, as86fp); + fwriteshort (0, as86fp); /* class = revision = 0 */ + fwritelong (0x55555555, as86fp); /* segment max sizes: always this */ + fwritelong (segsize, as86fp); /* segment size descriptors */ + if (segsize & 0x01000000) + fwritelong (stext.len, as86fp); + else + fwriteshort (stext.len, as86fp); + if (segsize & 0x40000000) + fwritelong (sdata.len, as86fp); + else + fwriteshort (sdata.len, as86fp); + fwriteshort (nsyms, as86fp); + + /* + * Write the symbol table. + */ + saa_rewind (syms); + for (i = 0; i < nsyms; i++) { + struct Symbol *sym = saa_rstruct (syms); + fwriteshort (sym->strpos, as86fp); + fwriteshort (sym->flags, as86fp); + switch (sym->flags & (3<<14)) { + case 0<<14: break; + case 1<<14: fputc (sym->value, as86fp); break; + case 2<<14: fwriteshort (sym->value, as86fp); break; + case 3<<14: fwritelong (sym->value, as86fp); break; + } + } + + /* + * Write out the string table. + */ + saa_fpwrite (strs, as86fp); + + /* + * Write the program text. + */ + as86_reloc_size = -1; + as86_write_section (&stext, SECT_TEXT); + as86_write_section (&sdata, SECT_DATA); + fputc (0, as86fp); /* termination */ +} + +static void as86_set_rsize (int size) { + if (as86_reloc_size != size) { + switch (as86_reloc_size = size) { + case 1: fputc (0x01, as86fp); break; /* shouldn't happen */ + case 2: fputc (0x02, as86fp); break; + case 4: fputc (0x03, as86fp); break; + default: error (ERR_PANIC, "bizarre relocation size %d", size); + } + } +} + +static void as86_write_section (struct Section *sect, int index) { + struct Piece *p; + unsigned long s; + long length; + + fputc (0x20+index, as86fp); /* select the right section */ + + saa_rewind (sect->data); + + for (p = sect->head; p; p = p->next) + switch (p->type) { + case 0: + /* + * Absolute data. Emit it in chunks of at most 64 + * bytes. + */ + length = p->bytes; + do { + char buf[64]; + long tmplen = (length > 64 ? 64 : length); + fputc (0x40 | (tmplen & 0x3F), as86fp); + saa_rnbytes (sect->data, buf, tmplen); + fwrite (buf, 1, tmplen, as86fp); + length -= tmplen; + } while (length > 0); + break; + case 1: + /* + * A segment-type relocation. First fix up the BSS. + */ + if (p->number == SECT_BSS) + p->number = SECT_DATA, p->offset += sdata.len; + as86_set_rsize (p->bytes); + fputc (0x80 | (p->relative ? 0x20 : 0) | p->number, as86fp); + if (as86_reloc_size == 2) + fwriteshort (p->offset, as86fp); + else + fwritelong (p->offset, as86fp); + break; + case 2: + /* + * A symbol-type relocation. + */ + as86_set_rsize (p->bytes); + s = p->offset; + if (s > 65535) + s = 3; + else if (s > 255) + s = 2; + else if (s > 0) + s = 1; + else + s = 0; + fputc (0xC0 | + (p->relative ? 0x20 : 0) | + (p->number > 255 ? 0x04 : 0) | s, as86fp); + if (p->number > 255) + fwriteshort (p->number, as86fp); + else + fputc (p->number, as86fp); + switch ((int)s) { + case 0: break; + case 1: fputc (p->offset, as86fp); break; + case 2: fwriteshort (p->offset, as86fp); break; + case 3: fwritelong (p->offset, as86fp); break; + } + break; + } +} + +static void as86_sect_write (struct Section *sect, + unsigned char *data, unsigned long len) { + saa_wbytes (sect->data, data, len); + sect->datalen += len; +} + +static long as86_segbase (long segment) { + return segment; +} + +static int as86_directive (char *directive, char *value, int pass) { + return 0; +} + +static void as86_filename (char *inname, char *outname, efunc error) { + char *p; + + if ( (p = strrchr (inname, '.')) != NULL) { + strncpy (as86_module, inname, p-inname); + as86_module[p-inname] = '\0'; + } else + strcpy (as86_module, inname); + + standard_extension (inname, outname, ".o", error); +} + +struct ofmt of_as86 = { + "Linux as86 (bin86 version 0.3) object files", + "as86", + as86_init, + as86_out, + as86_deflabel, + as86_section_names, + as86_segbase, + as86_directive, + as86_filename, + as86_cleanup +}; + +#endif /* OF_AS86 */ diff --git a/outbin.c b/outbin.c new file mode 100644 index 00000000..82c85106 --- /dev/null +++ b/outbin.c @@ -0,0 +1,303 @@ +/* outbin.c output routines for the Netwide Assembler to produce + * flat-form binary files + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include +#include +#include +#include "nasm.h" +#include "nasmlib.h" +#include "outform.h" + +#ifdef OF_BIN + +static FILE *fp; +static efunc error; + +static struct Section { + struct SAA *contents; + long length; + long index; +} textsect, datasect; +static long bsslen, bssindex; + +static struct Reloc { + struct Reloc *next; + long posn; + long bytes; + long secref; + long secrel; + struct Section *target; +} *relocs, **reloctail; + +static int start_point; + +static void add_reloc (struct Section *s, long bytes, long secref, + long secrel) { + struct Reloc *r; + + r = *reloctail = nasm_malloc(sizeof(struct Reloc)); + reloctail = &r->next; + r->next = NULL; + r->posn = s->length; + r->bytes = bytes; + r->secref = secref; + r->secrel = secrel; + r->target = s; +} + +static void bin_init (FILE *afp, efunc errfunc, ldfunc ldef) { + fp = afp; + + error = errfunc; + (void) ldef; /* placate optimisers */ + + start_point = 0; /* default */ + textsect.contents = saa_init(1L); + datasect.contents = saa_init(1L); + textsect.length = datasect.length = 0; + textsect.index = seg_alloc(); + datasect.index = seg_alloc(); + bsslen = 0; + bssindex = seg_alloc(); + relocs = NULL; + reloctail = &relocs; +} + +static void bin_cleanup (void) { + struct Reloc *r; + long datapos, dataalign, bsspos; + + datapos = (start_point + textsect.length + 3) & ~3;/* align on 4 bytes */ + dataalign = datapos - (start_point + textsect.length); + + saa_rewind (textsect.contents); + saa_rewind (datasect.contents); + + bsspos = (datapos + datasect.length + 3) & ~3; + + for (r = relocs; r; r = r->next) { + unsigned char *p, *q, mydata[4]; + long l; + + saa_fread (r->target->contents, r->posn, mydata, r->bytes); + p = q = mydata; + l = *p++; + l += ((long)*p++) << 8; + if (r->bytes == 4) { + l += ((long)*p++) << 16; + l += ((long)*p++) << 24; + } + + if (r->secref == textsect.index) + l += start_point; + else if (r->secref == datasect.index) + l += datapos; + else if (r->secref == bssindex) + l += bsspos; + + if (r->secrel == textsect.index) + l -= start_point; + else if (r->secrel == datasect.index) + l -= datapos; + else if (r->secrel == bssindex) + l -= bsspos; + + if (r->bytes == 4) + WRITELONG(q, l); + else + WRITESHORT(q, l); + saa_fwrite (r->target->contents, r->posn, mydata, r->bytes); + } + saa_fpwrite (textsect.contents, fp); + if (datasect.length > 0) { + fwrite ("\0\0\0\0", dataalign, 1, fp); + saa_fpwrite (datasect.contents, fp); + } + fclose (fp); + saa_free (textsect.contents); + saa_free (datasect.contents); + while (relocs) { + r = relocs->next; + nasm_free (relocs); + relocs = r; + } +} + +static void bin_out (long segto, void *data, unsigned long type, + long segment, long wrt) { + unsigned char *p, mydata[4]; + struct Section *s; + long realbytes; + + if (wrt != NO_SEG) { + wrt = NO_SEG; /* continue to do _something_ */ + error (ERR_NONFATAL, "WRT not supported by binary output format"); + } + + /* + * handle absolute-assembly (structure definitions) + */ + if (segto == NO_SEG) { + if ((type & OUT_TYPMASK) != OUT_RESERVE) + error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]" + " space"); + return; + } + + if (segto == bssindex) { /* BSS */ + if ((type & OUT_TYPMASK) != OUT_RESERVE) + error(ERR_WARNING, "attempt to initialise memory in the" + " BSS section: ignored"); + s = NULL; + } else if (segto == textsect.index) { + s = &textsect; + } else if (segto == datasect.index) { + s = &datasect; + } else { + error(ERR_WARNING, "attempt to assemble code in" + " segment %d: defaulting to `.text'", segto); + s = &textsect; + } + + if ((type & OUT_TYPMASK) == OUT_ADDRESS) { + if (segment != NO_SEG && + segment != textsect.index && + segment != datasect.index && + segment != bssindex) { + if (segment % 2) + error(ERR_NONFATAL, "binary output format does not support" + " segment base references"); + else + error(ERR_NONFATAL, "binary output format does not support" + " external references"); + segment = NO_SEG; + } + if (s) { + if (segment != NO_SEG) + add_reloc (s, type & OUT_SIZMASK, segment, -1L); + p = mydata; + if ((type & OUT_SIZMASK) == 4) + WRITELONG (p, *(long *)data); + else + WRITESHORT (p, *(long *)data); + saa_wbytes (s->contents, mydata, type & OUT_SIZMASK); + s->length += type & OUT_SIZMASK; + } else + bsslen += type & OUT_SIZMASK; + } else if ((type & OUT_TYPMASK) == OUT_RAWDATA) { + type &= OUT_SIZMASK; + p = data; + if (s) { + saa_wbytes (s->contents, data, type); + s->length += type; + } else + bsslen += type; + } else if ((type & OUT_TYPMASK) == OUT_RESERVE) { + if (s) { + error(ERR_WARNING, "uninitialised space declared in" + " %s section: zeroing", + (segto == textsect.index ? "code" : "data")); + } + type &= OUT_SIZMASK; + if (s) { + saa_wbytes (s->contents, NULL, type); + s->length += type; + } else + bsslen += type; + } else if ((type & OUT_TYPMASK) == OUT_REL2ADR || + (type & OUT_TYPMASK) == OUT_REL4ADR) { + realbytes = ((type & OUT_TYPMASK) == OUT_REL4ADR ? 4 : 2); + if (segment != NO_SEG && + segment != textsect.index && + segment != datasect.index && + segment != bssindex) { + if (segment % 2) + error(ERR_NONFATAL, "binary output format does not support" + " segment base references"); + else + error(ERR_NONFATAL, "binary output format does not support" + " external references"); + segment = NO_SEG; + } + if (s) { + add_reloc (s, realbytes, segment, segto); + p = mydata; + if (realbytes == 4) + WRITELONG (p, *(long*)data - realbytes - s->length); + else + WRITESHORT (p, *(long*)data - realbytes - s->length); + saa_wbytes (s->contents, mydata, realbytes); + s->length += realbytes; + } else + bsslen += realbytes; + } +} + +static void bin_deflabel (char *name, long segment, long offset, + int is_global) { + if (is_global == 2) { + error (ERR_NONFATAL, "binary output format does not support common" + " variables"); + } +} + +static long bin_secname (char *name, int pass, int *bits) { + /* + * Default is 16 bits. + */ + if (!name) + *bits = 16; + + if (!name) + return textsect.index; + + if (!strcmp(name, ".text")) + return textsect.index; + else if (!strcmp(name, ".data")) + return datasect.index; + else if (!strcmp(name, ".bss")) + return bssindex; + else + return NO_SEG; +} + +static long bin_segbase (long segment) { + return segment; +} + +static int bin_directive (char *directive, char *value, int pass) { + int rn_error; + + if (!strcmp(directive, "org")) { + start_point = readnum (value, &rn_error); + if (rn_error) + error (ERR_NONFATAL, "argument to ORG should be numeric"); + return 1; + } else + return 0; +} + +static void bin_filename (char *inname, char *outname, efunc error) { + standard_extension (inname, outname, "", error); +} + +struct ofmt of_bin = { + "flat-form binary files (e.g. DOS .COM, .SYS)", + "bin", + bin_init, + bin_out, + bin_deflabel, + bin_secname, + bin_segbase, + bin_directive, + bin_filename, + bin_cleanup +}; + +#endif /* OF_BIN */ diff --git a/outcoff.c b/outcoff.c new file mode 100644 index 00000000..c3ae712d --- /dev/null +++ b/outcoff.c @@ -0,0 +1,611 @@ +/* outcoff.c output routines for the Netwide Assembler to produce + * COFF object files (for DJGPP and Win32) + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" +#include "outform.h" + +#if defined(OF_COFF) || defined(OF_WIN32) + +/* + * Notes on COFF: + * + * (0) When I say `standard COFF' below, I mean `COFF as output and + * used by DJGPP'. I assume DJGPP gets it right. + * + * (1) Win32 appears to interpret the term `relative relocation' + * differently from standard COFF. Standard COFF understands a + * relative relocation to mean that during relocation you add the + * address of the symbol you're referencing, and subtract the base + * address of the section you're in. Win32 COFF, by contrast, seems + * to add the address of the symbol and then subtract the address + * of THE BYTE AFTER THE RELOCATED DWORD. Hence the two formats are + * subtly incompatible. + * + * (2) Win32 doesn't bother putting any flags in the header flags + * field (at offset 0x12 into the file). + * + * (3) Win32 puts some weird flags into the section header table. + * It uses flags 0x80000000 (writable), 0x40000000 (readable) and + * 0x20000000 (executable) in the expected combinations, which + * standard COFF doesn't seem to bother with, but it also does + * something else strange: it also flags code sections as + * 0x00500000 and data/bss as 0x00300000. Even Microsoft's + * documentation doesn't explain what these things mean. I just go + * ahead and use them anyway - it seems to work. + * + * (4) Both standard COFF and Win32 COFF seem to use the DWORD + * field directly after the section name in the section header + * table for something strange: they store what the address of the + * section start point _would_ be, if you laid all the sections end + * to end starting at zero. Dunno why. Microsoft's documentation + * lists this field as "Virtual Size of Section", which doesn't + * seem to fit at all. In fact, Win32 even includes non-linked + * sections such as .drectve in this calculation. Not that I can be + * bothered with those things anyway. + * + * (5) Standard COFF does something very strange to common + * variables: the relocation point for a common variable is as far + * _before_ the variable as its size stretches out _after_ it. So + * we must fix up common variable references. Win32 seems to be + * sensible on this one. + */ + +/* Flag which version of COFF we are currently outputting. */ +static int win32; + +struct Reloc { + struct Reloc *next; + long address; /* relative to _start_ of section */ + long symbol; /* symbol number */ + int relative; /* TRUE or FALSE */ +}; + +struct Symbol { + char name[9]; + long strpos; /* string table position of name */ + int section; /* section number where it's defined + * - in COFF codes, not NASM codes */ + int is_global; /* is it a global symbol or not? */ + long value; /* address, or COMMON variable size */ +}; + +static FILE *coffp; +static efunc error; +static char coff_infile[FILENAME_MAX]; + +struct Section { + struct SAA *data; + unsigned long len; + int nrelocs; + long index; + struct Reloc *head, **tail; +}; + +static struct Section stext, sdata; +static unsigned long bsslen; +static long bssindex; + +static struct SAA *syms; +static unsigned long nsyms; + +static struct RAA *bsym, *symval; + +static struct SAA *strs; +static unsigned long strslen; + +/* + * The symbol table contains a double entry for the file name, a + * double entry for each of the three sections, and an absolute + * symbol referencing address zero, followed by the _real_ symbols. + * That's nine extra symbols. + */ +#define SYM_INITIAL 9 + +/* + * Symbol table indices we can relocate relative to. + */ +#define SYM_ABS_SEG 8 +#define SYM_TEXT_SEG 2 +#define SYM_DATA_SEG 4 +#define SYM_BSS_SEG 6 + +/* + * The section header table ends at this offset: 0x14 for the + * header, plus 0x28 for each of three sections. + */ +#define COFF_HDRS_END 0x8c + +static void coff_gen_init(FILE *, efunc); +static void coff_sect_write (struct Section *, unsigned char *, + unsigned long); +static void coff_write (void); +static void coff_section_header (char *, long, long, long, long, int, long); +static void coff_write_relocs (struct Section *); +static void coff_write_symbols (void); + +static void coff_win32_init(FILE *fp, efunc errfunc, ldfunc ldef) { + win32 = TRUE; + (void) ldef; /* placate optimisers */ + coff_gen_init(fp, errfunc); +} + +static void coff_std_init(FILE *fp, efunc errfunc, ldfunc ldef) { + win32 = FALSE; + (void) ldef; /* placate optimisers */ + coff_gen_init(fp, errfunc); +} + +static void coff_gen_init(FILE *fp, efunc errfunc) { + coffp = fp; + error = errfunc; + stext.data = saa_init(1L); stext.head = NULL; stext.tail = &stext.head; + sdata.data = saa_init(1L); sdata.head = NULL; sdata.tail = &sdata.head; + stext.len = sdata.len = bsslen = 0; + stext.nrelocs = sdata.nrelocs = 0; + stext.index = seg_alloc(); + sdata.index = seg_alloc(); + bssindex = seg_alloc(); + syms = saa_init((long)sizeof(struct Symbol)); + nsyms = 0; + bsym = raa_init(); + symval = raa_init(); + strs = saa_init(1L); + strslen = 0; +} + +static void coff_cleanup(void) { + struct Reloc *r; + + coff_write(); + fclose (coffp); + saa_free (stext.data); + while (stext.head) { + r = stext.head; + stext.head = stext.head->next; + nasm_free (r); + } + saa_free (sdata.data); + while (sdata.head) { + r = sdata.head; + sdata.head = sdata.head->next; + nasm_free (r); + } + saa_free (syms); + raa_free (bsym); + raa_free (symval); + saa_free (strs); +} + +static long coff_section_names (char *name, int pass, int *bits) { + /* + * Default is 32 bits. + */ + if (!name) + *bits = 32; + + if (!name) + return stext.index; + + if (!strcmp(name, ".text")) + return stext.index; + else if (!strcmp(name, ".data")) + return sdata.index; + else if (!strcmp(name, ".bss")) + return bssindex; + else + return NO_SEG; +} + +static void coff_deflabel (char *name, long segment, long offset, + int is_global) { + int pos = strslen+4; + struct Symbol *sym; + + if (name[0] == '.' && name[1] == '.') { + return; + } + + if (strlen(name) > 8) { + saa_wbytes (strs, name, (long)(1+strlen(name))); + strslen += 1+strlen(name); + } else + pos = -1; + + sym = saa_wstruct (syms); + + sym->strpos = pos; + if (pos == -1) + strcpy (sym->name, name); + sym->is_global = !!is_global; + if (segment == NO_SEG) + sym->section = -1; /* absolute symbol */ + else if (segment == stext.index) + sym->section = 1; /* .text */ + else if (segment == sdata.index) + sym->section = 2; /* .data */ + else if (segment == bssindex) + sym->section = 3; /* .bss */ + else { + sym->section = 0; /* undefined */ + sym->is_global = TRUE; + } + if (is_global == 2) + sym->value = offset; + else + sym->value = (sym->section == 0 ? 0 : offset); + + /* + * define the references from external-symbol segment numbers + * to these symbol records. + */ + if (segment != NO_SEG && segment != stext.index && + segment != sdata.index && segment != bssindex) + bsym = raa_write (bsym, segment, nsyms); + + if (segment != NO_SEG) + symval = raa_write (symval, segment, sym->section ? 0 : sym->value); + + nsyms++; +} + +static long coff_add_reloc (struct Section *sect, long segment, + int relative) { + struct Reloc *r; + + r = *sect->tail = nasm_malloc(sizeof(struct Reloc)); + sect->tail = &r->next; + r->next = NULL; + + r->address = sect->len; + r->symbol = (segment == NO_SEG ? SYM_ABS_SEG : + segment == stext.index ? SYM_TEXT_SEG : + segment == sdata.index ? SYM_DATA_SEG : + segment == bssindex ? SYM_BSS_SEG : + raa_read (bsym, segment) + SYM_INITIAL); + r->relative = relative; + + sect->nrelocs++; + + /* + * Return the fixup for standard COFF common variables. + */ + if (r->symbol >= SYM_INITIAL && !win32) + return raa_read (symval, segment); + else + return 0; +} + +static void coff_out (long segto, void *data, unsigned long type, + long segment, long wrt) { + struct Section *s; + long realbytes = type & OUT_SIZMASK; + unsigned char mydata[4], *p; + + if (wrt != NO_SEG) { + wrt = NO_SEG; /* continue to do _something_ */ + error (ERR_NONFATAL, "WRT not supported by COFF output formats"); + } + + type &= OUT_TYPMASK; + + /* + * handle absolute-assembly (structure definitions) + */ + if (segto == NO_SEG) { + if (type != OUT_RESERVE) + error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]" + " space"); + return; + } + + if (segto == stext.index) + s = &stext; + else if (segto == sdata.index) + s = &sdata; + else if (segto == bssindex) + s = NULL; + else { + error(ERR_WARNING, "attempt to assemble code in" + " segment %d: defaulting to `.text'", segto); + s = &stext; + } + + if (!s && type != OUT_RESERVE) { + error(ERR_WARNING, "attempt to initialise memory in the" + " BSS section: ignored"); + if (type == OUT_REL2ADR) + realbytes = 2; + else if (type == OUT_REL4ADR) + realbytes = 4; + bsslen += realbytes; + return; + } + + if (type == OUT_RESERVE) { + if (s) { + error(ERR_WARNING, "uninitialised space declared in" + " %s section: zeroing", + (segto == stext.index ? "code" : "data")); + coff_sect_write (s, NULL, realbytes); + } else + bsslen += realbytes; + } else if (type == OUT_RAWDATA) { + if (segment != NO_SEG) + error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG"); + coff_sect_write (s, data, realbytes); + } else if (type == OUT_ADDRESS) { + if (realbytes == 2 && (segment != NO_SEG || wrt != NO_SEG)) + error(ERR_NONFATAL, "COFF format does not support 16-bit" + " relocations"); + else { + long fix = 0; + if (segment != NO_SEG || wrt != NO_SEG) { + if (wrt != NO_SEG) { + error(ERR_NONFATAL, "COFF format does not support" + " WRT types"); + } else if (segment % 2) { + error(ERR_NONFATAL, "COFF format does not support" + " segment base references"); + } else + fix = coff_add_reloc (s, segment, FALSE); + } + p = mydata; + WRITELONG (p, *(long *)data + fix); + coff_sect_write (s, mydata, realbytes); + } + } else if (type == OUT_REL2ADR) { + error(ERR_NONFATAL, "COFF format does not support 16-bit" + " relocations"); + } else if (type == OUT_REL4ADR) { + if (segment == segto) + error(ERR_PANIC, "intra-segment OUT_REL4ADR"); + else if (segment == NO_SEG && win32) + error(ERR_NONFATAL, "Win32 COFF does not correctly support" + " relative references to absolute addresses"); + else { + long fix = 0; + if (segment != NO_SEG && segment % 2) { + error(ERR_NONFATAL, "COFF format does not support" + " segment base references"); + } else + fix = coff_add_reloc (s, segment, TRUE); + p = mydata; + if (win32) { + WRITELONG (p, *(long*)data + 4 - realbytes + fix); + } else { + WRITELONG (p, *(long*)data-(realbytes + s->len) + fix); + } + coff_sect_write (s, mydata, 4L); + } + } +} + +static void coff_sect_write (struct Section *sect, + unsigned char *data, unsigned long len) { + saa_wbytes (sect->data, data, len); + sect->len += len; +} + +static int coff_directives (char *directive, char *value, int pass) { + return 0; +} + +static void coff_write (void) { + long textpos, textrelpos, datapos, datarelpos, sympos; + + /* + * Work out how big the file will get. + */ + textpos = COFF_HDRS_END; + textrelpos = textpos + stext.len; + datapos = textrelpos + stext.nrelocs * 10; + datarelpos = datapos + sdata.len; + sympos = datarelpos + sdata.nrelocs * 10; + + /* + * Output the COFF header. + */ + fwriteshort (0x14C, coffp); /* MACHINE_i386 */ + fwriteshort (3, coffp); /* number of sections */ + fwritelong (time(NULL), coffp); /* time stamp */ + fwritelong (sympos, coffp); + fwritelong (nsyms + SYM_INITIAL, coffp); + fwriteshort (0, coffp); /* no optional header */ + /* Flags: 32-bit, no line numbers. Win32 doesn't even bother with them. */ + fwriteshort (win32 ? 0 : 0x104, coffp); + + /* + * Output the section headers. + */ + + coff_section_header (".text", 0L, stext.len, textpos, + textrelpos, stext.nrelocs, + (win32 ? 0x60500020L : 0x20L)); + coff_section_header (".data", stext.len, sdata.len, datapos, + datarelpos, sdata.nrelocs, + (win32 ? 0xC0300040L : 0x40L)); + coff_section_header (".bss", stext.len+sdata.len, bsslen, 0L, 0L, 0, + (win32 ? 0xC0300080L : 0x80L)); + + /* + * Output the text section, and its relocations. + */ + saa_fpwrite (stext.data, coffp); + coff_write_relocs (&stext); + + /* + * Output the data section, and its relocations. + */ + saa_fpwrite (sdata.data, coffp); + coff_write_relocs (&sdata); + + /* + * Output the symbol and string tables. + */ + coff_write_symbols(); + fwritelong (strslen+4, coffp); /* length includes length count */ + saa_fpwrite (strs, coffp); +} + +static void coff_section_header (char *name, long vsize, + long datalen, long datapos, + long relpos, int nrelocs, long flags) { + char padname[8]; + + memset (padname, 0, 8); + strncpy (padname, name, 8); + fwrite (padname, 8, 1, coffp); + fwritelong (vsize, coffp); + fwritelong (0L, coffp); /* RVA/offset - we ignore */ + fwritelong (datalen, coffp); + fwritelong (datapos, coffp); + fwritelong (relpos, coffp); + fwritelong (0L, coffp); /* no line numbers - we don't do 'em */ + fwriteshort (nrelocs, coffp); + fwriteshort (0, coffp); /* again, no line numbers */ + fwritelong (flags, coffp); +} + +static void coff_write_relocs (struct Section *s) { + struct Reloc *r; + + for (r = s->head; r; r = r->next) { + fwritelong (r->address, coffp); + fwritelong (r->symbol, coffp); + /* + * Strange: Microsoft's COFF documentation says 0x03 for an + * absolute relocation, but both Visual C++ and DJGPP agree + * that in fact it's 0x06. I'll use 0x06 until someone + * argues. + */ + fwriteshort (r->relative ? 0x14 : 0x06, coffp); + } +} + +static void coff_symbol (char *name, long strpos, long value, + int section, int type, int aux) { + char padname[8]; + + if (name) { + memset (padname, 0, 8); + strncpy (padname, name, 8); + fwrite (padname, 8, 1, coffp); + } else { + fwritelong (0L, coffp); + fwritelong (strpos, coffp); + } + fwritelong (value, coffp); + fwriteshort (section, coffp); + fwriteshort (0, coffp); + fputc (type, coffp); + fputc (aux, coffp); +} + +static void coff_write_symbols (void) { + char filename[18]; + int i; + + /* + * The `.file' record, and the file name auxiliary record. + */ + coff_symbol (".file", 0L, 0L, -2, 0x67, 1); + memset (filename, 0, 18); + strncpy (filename, coff_infile, 18); + fwrite (filename, 18, 1, coffp); + + /* + * The section records, with their auxiliaries. + */ + memset (filename, 0, 18); /* useful zeroed buffer */ + + coff_symbol (".text", 0L, 0L, 1, 3, 1); + fwritelong (stext.len, coffp); + fwriteshort (stext.nrelocs, coffp); + fwrite (filename, 12, 1, coffp); + coff_symbol (".data", 0L, 0L, 2, 3, 1); + fwritelong (sdata.len, coffp); + fwriteshort (sdata.nrelocs, coffp); + fwrite (filename, 12, 1, coffp); + coff_symbol (".bss", 0L, 0L, 3, 3, 1); + fwritelong (bsslen, coffp); + fwrite (filename, 14, 1, coffp); + + /* + * The absolute symbol, for relative-to-absolute relocations. + */ + coff_symbol (".absolut", 0L, 0L, -1, 3, 0); + + /* + * The real symbols. + */ + saa_rewind (syms); + for (i=0; istrpos == -1 ? sym->name : NULL, + sym->strpos, sym->value, sym->section, + sym->is_global ? 2 : 3, 0); + } +} + +static long coff_segbase (long segment) { + return segment; +} + +static void coff_std_filename (char *inname, char *outname, efunc error) { + strcpy(coff_infile, inname); + standard_extension (inname, outname, ".o", error); +} + +static void coff_win32_filename (char *inname, char *outname, efunc error) { + strcpy(coff_infile, inname); + standard_extension (inname, outname, ".obj", error); +} + +#endif /* defined(OF_COFF) || defined(OF_WIN32) */ + +#ifdef OF_COFF + +struct ofmt of_coff = { + "COFF (i386) object files (e.g. DJGPP for DOS)", + "coff", + coff_std_init, + coff_out, + coff_deflabel, + coff_section_names, + coff_segbase, + coff_directives, + coff_std_filename, + coff_cleanup +}; + +#endif + +#ifdef OF_WIN32 + +struct ofmt of_win32 = { + "Microsoft Win32 (i386) object files", + "win32", + coff_win32_init, + coff_out, + coff_deflabel, + coff_section_names, + coff_segbase, + coff_directives, + coff_win32_filename, + coff_cleanup +}; + +#endif diff --git a/outdbg.c b/outdbg.c new file mode 100644 index 00000000..a55d3db8 --- /dev/null +++ b/outdbg.c @@ -0,0 +1,138 @@ +/* outdbg.c output routines for the Netwide Assembler to produce + * a debugging trace + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" +#include "outform.h" + +#ifdef OF_DBG + +FILE *dbgf; +efunc dbgef; + +int segcode,segdata,segbss; + +static void dbg_init(FILE *fp, efunc errfunc, ldfunc ldef) +{ + dbgf = fp; + dbgef = errfunc; + (void) ldef; + segcode = seg_alloc(); + segdata = seg_alloc(); + segbss = seg_alloc(); + fprintf(fp,"NASM Output format debug dump - code=%d,data=%d,bss=%d\n", + segcode,segdata,segbss); +} + +static void dbg_cleanup(void) +{ + fclose(dbgf); +} + +static long dbg_section_names (char *name, int pass, int *bits) +{ + /* + * We must have an initial default: let's make it 16. + */ + if (!name) + *bits = 16; + + if (!name) + return 0; + + if (!strcmp(name, ".text")) + return segcode; + else if (!strcmp(name, ".data")) + return segdata; + else if (!strcmp(name, ".bss")) + return segbss; + else + return NO_SEG; +} + +static void dbg_deflabel (char *name, long segment, long offset, + int is_global) { + fprintf(dbgf,"deflabel %s := %08lx:%08lx %s (%d)\n",name,segment,offset, + is_global ? "global" : "local", is_global); +} + +static void dbg_out (long segto, void *data, unsigned long type, + long segment, long wrt) { + long realbytes = type & OUT_SIZMASK; + long ldata; + int id; + + type &= OUT_TYPMASK; + + fprintf(dbgf,"out to %lx, len = %ld: ",segto,realbytes); + + switch(type) { + case OUT_RESERVE: + fprintf(dbgf,"reserved.\n"); break; + case OUT_RAWDATA: + fprintf(dbgf,"raw data = "); + while (realbytes--) { + id = *(unsigned char *)data; + data = (char *)data + 1; + fprintf(dbgf,"%02x ",id); + } + fprintf(dbgf,"\n"); break; + case OUT_ADDRESS: + ldata = 0; /* placate gcc */ + if (realbytes == 1) + ldata = *((char *)data); + else if (realbytes == 2) + ldata = *((short *)data); + else if (realbytes == 4) + ldata = *((long *)data); + fprintf(dbgf,"addr %08lx (seg %08lx, wrt %08lx)\n",ldata, + segment,wrt);break; + case OUT_REL2ADR: + fprintf(dbgf,"rel2adr %04x (seg %08lx)\n",(int)*(short *)data,segment); + break; + case OUT_REL4ADR: + fprintf(dbgf,"rel4adr %08lx (seg %08lx)\n",*(long *)data,segment); + break; + default: + fprintf(dbgf,"unknown\n"); + break; + } +} + +static long dbg_segbase(long segment) { + return segment; +} + +static int dbg_directive (char *directive, char *value, int pass) { + return 0; +} + +static void dbg_filename (char *inname, char *outname, efunc error) { + standard_extension (inname, outname, ".dbg", error); +} + +struct ofmt of_dbg = { + "Trace of all info passed to output stage", + "dbg", + dbg_init, + dbg_out, + dbg_deflabel, + dbg_section_names, + dbg_segbase, + dbg_directive, + dbg_filename, + dbg_cleanup +}; + +#endif /* OF_DBG */ diff --git a/outelf.c b/outelf.c new file mode 100644 index 00000000..b84bae3a --- /dev/null +++ b/outelf.c @@ -0,0 +1,620 @@ +/* outelf.c output routines for the Netwide Assembler to produce + * ELF32 (i386 of course) object file format + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" +#include "outform.h" + +#ifdef OF_ELF + +struct Reloc { + struct Reloc *next; + long address; /* relative to _start_ of section */ + long symbol; /* ELF symbol info thingy */ + int relative; /* TRUE or FALSE */ +}; + +struct Symbol { + long strpos; /* string table position of name */ + long section; /* section ID of the symbol */ + int type; /* TRUE or FALSE */ + long value; /* address, or COMMON variable size */ +}; + +struct Section { + struct SAA *data; + unsigned long len, size, nrelocs; + long index; + struct Reloc *head, **tail; +}; + +static struct Section stext, sdata; +static unsigned long bsslen; +static long bssindex; + +static struct SAA *syms; +static unsigned long nlocals, nglobs; + +static struct RAA *bsym; + +static struct SAA *strs; +static unsigned long strslen; + +static FILE *elffp; +static efunc error; + +static char elf_module[FILENAME_MAX]; + +#define SHN_ABS 0xFFF1 +#define SHN_COMMON 0xFFF2 +#define SHN_UNDEF 0 + +#define SYM_SECTION 0x04 +#define SYM_GLOBAL 0x10 + +#define GLOBAL_TEMP_BASE 6 /* bigger than any constant sym id */ + +#define SEG_ALIGN 16 /* alignment of sections in file */ +#define SEG_ALIGN_1 (SEG_ALIGN-1) + +static const char align_str[SEG_ALIGN] = ""; /* ANSI will pad this with 0s */ + +#define ELF_MAX_SECTIONS 16 /* really 10, but let's play safe */ +static struct ELF_SECTDATA { + void *data; + long len; + int is_saa; +} elf_sects[ELF_MAX_SECTIONS]; +static int elf_nsect; +static long elf_foffs; + +static void elf_write(void); +static void elf_sect_write(struct Section *, unsigned char *, unsigned long); +static void elf_section_header (int, int, int, void *, int, long, + int, int, int, int); +static void elf_write_sections (void); +static struct SAA *elf_build_symtab (long *, long *); +static struct SAA *elf_build_reltab (long *, struct Reloc *); + +static void elf_init(FILE *fp, efunc errfunc, ldfunc ldef) { + elffp = fp; + error = errfunc; + (void) ldef; /* placate optimisers */ + stext.data = saa_init(1L); stext.head = NULL; stext.tail = &stext.head; + sdata.data = saa_init(1L); sdata.head = NULL; sdata.tail = &sdata.head; + stext.len = stext.size = sdata.len = sdata.size = bsslen = 0; + stext.nrelocs = sdata.nrelocs = 0; + stext.index = seg_alloc(); + sdata.index = seg_alloc(); + bssindex = seg_alloc(); + syms = saa_init((long)sizeof(struct Symbol)); + nlocals = nglobs = 0; + bsym = raa_init(); + + strs = saa_init(1L); + saa_wbytes (strs, "\0", 1L); + saa_wbytes (strs, elf_module, (long)(strlen(elf_module)+1)); + strslen = 2+strlen(elf_module); +} + +static void elf_cleanup(void) { + struct Reloc *r; + + elf_write(); + fclose (elffp); + saa_free (stext.data); + while (stext.head) { + r = stext.head; + stext.head = stext.head->next; + nasm_free (r); + } + saa_free (sdata.data); + while (sdata.head) { + r = sdata.head; + sdata.head = sdata.head->next; + nasm_free (r); + } + saa_free (syms); + raa_free (bsym); + saa_free (strs); +} + +static long elf_section_names (char *name, int pass, int *bits) { + /* + * Default is 32 bits. + */ + if (!name) + *bits = 32; + + if (!name) + return stext.index; + + if (!strcmp(name, ".text")) + return stext.index; + else if (!strcmp(name, ".data")) + return sdata.index; + else if (!strcmp(name, ".bss")) + return bssindex; + else + return NO_SEG; +} + +static void elf_deflabel (char *name, long segment, long offset, + int is_global) { + int pos = strslen; + struct Symbol *sym; + + if (name[0] == '.' && name[1] == '.') { + return; + } + + saa_wbytes (strs, name, (long)(1+strlen(name))); + strslen += 1+strlen(name); + + sym = saa_wstruct (syms); + + sym->strpos = pos; + sym->type = is_global ? SYM_GLOBAL : 0; + if (segment == NO_SEG) + sym->section = SHN_ABS; + else if (segment == stext.index) + sym->section = 1; + else if (segment == sdata.index) + sym->section = 2; + else if (segment == bssindex) + sym->section = 3; + else + sym->section = SHN_UNDEF; + + if (is_global == 2) { + sym->value = offset; + sym->section = SHN_COMMON; + } else + sym->value = (sym->section == SHN_UNDEF ? 0 : offset); + + if (sym->type == SYM_GLOBAL) { + if (sym->section == SHN_UNDEF || sym->section == SHN_COMMON) + bsym = raa_write (bsym, segment, nglobs); + nglobs++; + } else + nlocals++; +} + +static void elf_add_reloc (struct Section *sect, long segment, + int relative) { + struct Reloc *r; + + r = *sect->tail = nasm_malloc(sizeof(struct Reloc)); + sect->tail = &r->next; + r->next = NULL; + + r->address = sect->len; + r->symbol = (segment == NO_SEG ? 5 : + segment == stext.index ? 2 : + segment == sdata.index ? 3 : + segment == bssindex ? 4 : + GLOBAL_TEMP_BASE + raa_read(bsym, segment)); + r->relative = relative; + + sect->nrelocs++; +} + +static void elf_out (long segto, void *data, unsigned long type, + long segment, long wrt) { + struct Section *s; + long realbytes = type & OUT_SIZMASK; + unsigned char mydata[4], *p; + + if (wrt != NO_SEG) { + wrt = NO_SEG; /* continue to do _something_ */ + error (ERR_NONFATAL, "WRT not supported by ELF output format"); + } + + type &= OUT_TYPMASK; + + /* + * handle absolute-assembly (structure definitions) + */ + if (segto == NO_SEG) { + if (type != OUT_RESERVE) + error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]" + " space"); + return; + } + + if (segto == stext.index) + s = &stext; + else if (segto == sdata.index) + s = &sdata; + else if (segto == bssindex) + s = NULL; + else { + error(ERR_WARNING, "attempt to assemble code in" + " segment %d: defaulting to `.text'", segto); + s = &stext; + } + + if (!s && type != OUT_RESERVE) { + error(ERR_WARNING, "attempt to initialise memory in the" + " BSS section: ignored"); + if (type == OUT_REL2ADR) + realbytes = 2; + else if (type == OUT_REL4ADR) + realbytes = 4; + bsslen += realbytes; + return; + } + + if (type == OUT_RESERVE) { + if (s) { + error(ERR_WARNING, "uninitialised space declared in" + " %s section: zeroing", + (segto == stext.index ? "code" : "data")); + elf_sect_write (s, NULL, realbytes); + } else + bsslen += realbytes; + } else if (type == OUT_RAWDATA) { + if (segment != NO_SEG) + error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG"); + elf_sect_write (s, data, realbytes); + } else if (type == OUT_ADDRESS) { + if (wrt != NO_SEG) + error(ERR_NONFATAL, "ELF format does not support WRT types"); + if (segment != NO_SEG) { + if (segment % 2) { + error(ERR_NONFATAL, "ELF format does not support" + " segment base references"); + } else + elf_add_reloc (s, segment, FALSE); + } + p = mydata; + if (realbytes == 2 && segment != NO_SEG) + error (ERR_NONFATAL, "ELF format does not support 16-bit" + " relocations"); + WRITELONG (p, *(long *)data); + elf_sect_write (s, mydata, realbytes); + } else if (type == OUT_REL2ADR) { + error (ERR_NONFATAL, "ELF format does not support 16-bit" + " relocations"); + } else if (type == OUT_REL4ADR) { + if (segment == segto) + error(ERR_PANIC, "intra-segment OUT_REL4ADR"); + if (segment != NO_SEG && segment % 2) { + error(ERR_NONFATAL, "ELF format does not support" + " segment base references"); + } else + elf_add_reloc (s, segment, TRUE); + p = mydata; + WRITELONG (p, *(long*)data - realbytes); + elf_sect_write (s, mydata, 4L); + } +} + +static void elf_write(void) { + int nsections, align; + char shstrtab[80], *p; + int shstrtablen, commlen; + char comment[64]; + + struct SAA *symtab, *reltext, *reldata; + long symtablen, symtablocal, reltextlen, reldatalen; + + /* + * Work out how many sections we will have. + * + * Fixed sections are: + * SHN_UNDEF .text .data .bss .comment .shstrtab .symtab .strtab + * + * Optional sections are: + * .rel.text .rel.data + * + * (.rel.bss makes very little sense;-) + */ + nsections = 8; + *shstrtab = '\0'; + shstrtablen = 1; + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".text"); + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".data"); + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".bss"); + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".comment"); + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".shstrtab"); + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".symtab"); + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".strtab"); + if (stext.head) { + nsections++; + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".rel.text"); + } + if (sdata.head) { + nsections++; + shstrtablen += 1+sprintf(shstrtab+shstrtablen, ".rel.data"); + } + + /* + * Do the comment. + */ + *comment = '\0'; + commlen = 2+sprintf(comment+1, "The Netwide Assembler %s", NASM_VER); + + /* + * Output the ELF header. + */ + fwrite ("\177ELF\1\1\1\0\0\0\0\0\0\0\0\0", 16, 1, elffp); + fwriteshort (1, elffp); /* ET_REL relocatable file */ + fwriteshort (3, elffp); /* EM_386 processor ID */ + fwritelong (1L, elffp); /* EV_CURRENT file format version */ + fwritelong (0L, elffp); /* no entry point */ + fwritelong (0L, elffp); /* no program header table */ + fwritelong (0x40L, elffp); /* section headers straight after + * ELF header plus alignment */ + fwritelong (0L, elffp); /* 386 defines no special flags */ + fwriteshort (0x34, elffp); /* size of ELF header */ + fwriteshort (0, elffp); /* no program header table, again */ + fwriteshort (0, elffp); /* still no program header table */ + fwriteshort (0x28, elffp); /* size of section header */ + fwriteshort (nsections, elffp); /* number of sections */ + fwriteshort (5, elffp); /* string table section index for + * section header table */ + fwritelong (0L, elffp); /* align to 0x40 bytes */ + fwritelong (0L, elffp); + fwritelong (0L, elffp); + + /* + * Build the symbol table and relocation tables. + */ + symtab = elf_build_symtab (&symtablen, &symtablocal); + reltext = elf_build_reltab (&reltextlen, stext.head); + reldata = elf_build_reltab (&reldatalen, sdata.head); + + /* + * Now output the section header table. + */ + + elf_foffs = 0x40 + 0x28 * nsections; + align = ((elf_foffs+SEG_ALIGN_1) & ~SEG_ALIGN_1) - elf_foffs; + elf_foffs += align; + elf_nsect = 0; + + elf_section_header (0, 0, 0, NULL, FALSE, 0L, 0, 0, 0, 0); /* SHN_UNDEF */ + p = shstrtab+1; + elf_section_header (p - shstrtab, 1, 6, stext.data, TRUE, + stext.len, 0, 0, 16, 0); /* .text */ + p += strlen(p)+1; + elf_section_header (p - shstrtab, 1, 3, sdata.data, TRUE, + sdata.len, 0, 0, 4, 0); /* .data */ + p += strlen(p)+1; + elf_section_header (p - shstrtab, 8, 3, NULL, TRUE, + bsslen, 0, 0, 4, 0); /* .bss */ + p += strlen(p)+1; + elf_section_header (p - shstrtab, 1, 0, comment, FALSE, + (long)commlen, 0, 0, 1, 0);/* .comment */ + p += strlen(p)+1; + elf_section_header (p - shstrtab, 3, 0, shstrtab, FALSE, + (long)shstrtablen, 0, 0, 1, 0);/* .shstrtab */ + p += strlen(p)+1; + elf_section_header (p - shstrtab, 2, 0, symtab, TRUE, + symtablen, 7, symtablocal, 4, 16);/* .symtab */ + p += strlen(p)+1; + elf_section_header (p - shstrtab, 3, 0, strs, TRUE, + strslen, 0, 0, 1, 0); /* .strtab */ + if (reltext) { + p += strlen(p)+1; + elf_section_header (p - shstrtab, 9, 0, reltext, TRUE, + reltextlen, 6, 1, 4, 8); /* .rel.text */ + } + if (reldata) { + p += strlen(p)+1; + elf_section_header (p - shstrtab, 9, 0, reldata, TRUE, + reldatalen, 6, 2, 4, 8); /* .rel.data */ + } + + fwrite (align_str, align, 1, elffp); + + /* + * Now output the sections. + */ + elf_write_sections(); + + saa_free (symtab); + if (reltext) + saa_free (reltext); + if (reldata) + saa_free (reldata); +} + +static struct SAA *elf_build_symtab (long *len, long *local) { + struct SAA *s = saa_init(1L); + struct Symbol *sym; + unsigned char entry[16], *p; + int i; + + *len = *local = 0; + + /* + * First, an all-zeros entry, required by the ELF spec. + */ + saa_wbytes (s, NULL, 16L); /* null symbol table entry */ + *len += 16; + (*local)++; + + /* + * Next, an entry for the file name. + */ + p = entry; + WRITELONG (p, 1); /* we know it's 1st thing in strtab */ + WRITELONG (p, 0); /* no value */ + WRITELONG (p, 0); /* no size either */ + WRITESHORT (p, 4); /* type FILE */ + WRITESHORT (p, SHN_ABS); + saa_wbytes (s, entry, 16L); + *len += 16; + (*local)++; + + /* + * Now four standard symbols defining segments, for relocation + * purposes. + */ + for (i = 1; i <= 4; i++) { + p = entry; + WRITELONG (p, 0); /* no symbol name */ + WRITELONG (p, 0); /* offset zero */ + WRITELONG (p, 0); /* size zero */ + WRITESHORT (p, 3); /* local section-type thing */ + WRITESHORT (p, (i==4 ? SHN_ABS : i)); /* the section id */ + saa_wbytes (s, entry, 16L); + *len += 16; + (*local)++; + } + + /* + * Now the other local symbols. + */ + saa_rewind (syms); + while ( (sym = saa_rstruct (syms)) ) { + if (sym->type == SYM_GLOBAL) + continue; + p = entry; + WRITELONG (p, sym->strpos); + WRITELONG (p, sym->value); + if (sym->section == SHN_COMMON) + WRITELONG (p, sym->value); + else + WRITELONG (p, 0); + WRITESHORT (p, 0); /* local non-typed thing */ + WRITESHORT (p, sym->section); + saa_wbytes (s, entry, 16L); + *len += 16; + (*local)++; + } + + /* + * Now the global symbols. + */ + saa_rewind (syms); + while ( (sym = saa_rstruct (syms)) ) { + if (sym->type != SYM_GLOBAL) + continue; + p = entry; + WRITELONG (p, sym->strpos); + WRITELONG (p, sym->value); + if (sym->section == SHN_COMMON) + WRITELONG (p, sym->value); + else + WRITELONG (p, 0); + WRITESHORT (p, SYM_GLOBAL); /* global non-typed thing */ + WRITESHORT (p, sym->section); + saa_wbytes (s, entry, 16L); + *len += 16; + } + + return s; +} + +static struct SAA *elf_build_reltab (long *len, struct Reloc *r) { + struct SAA *s; + unsigned char *p, entry[8]; + + if (!r) + return NULL; + + s = saa_init(1L); + *len = 0; + + while (r) { + long sym = r->symbol; + + if (sym >= GLOBAL_TEMP_BASE) + sym += -GLOBAL_TEMP_BASE + 6 + nlocals; + + p = entry; + WRITELONG (p, r->address); + WRITELONG (p, (sym << 8) + (r->relative ? 2 : 1)); + saa_wbytes (s, entry, 8L); + *len += 8; + + r = r->next; + } + + return s; +} + +static void elf_section_header (int name, int type, int flags, + void *data, int is_saa, long datalen, + int link, int info, int align, int eltsize) { + elf_sects[elf_nsect].data = data; + elf_sects[elf_nsect].len = datalen; + elf_sects[elf_nsect].is_saa = is_saa; + elf_nsect++; + + fwritelong ((long)name, elffp); + fwritelong ((long)type, elffp); + fwritelong ((long)flags, elffp); + fwritelong (0L, elffp); /* no address, ever, in object files */ + fwritelong (type == 0 ? 0L : elf_foffs, elffp); + fwritelong (datalen, elffp); + if (data) + elf_foffs += (datalen+SEG_ALIGN_1) & ~SEG_ALIGN_1; + fwritelong ((long)link, elffp); + fwritelong ((long)info, elffp); + fwritelong ((long)align, elffp); + fwritelong ((long)eltsize, elffp); +} + +static void elf_write_sections (void) { + int i; + for (i = 0; i < elf_nsect; i++) + if (elf_sects[i].data) { + long len = elf_sects[i].len; + long reallen = (len+SEG_ALIGN_1) & ~SEG_ALIGN_1; + long align = reallen - len; + if (elf_sects[i].is_saa) + saa_fpwrite (elf_sects[i].data, elffp); + else + fwrite (elf_sects[i].data, len, 1, elffp); + fwrite (align_str, align, 1, elffp); + } +} + +static void elf_sect_write (struct Section *sect, + unsigned char *data, unsigned long len) { + saa_wbytes (sect->data, data, len); + sect->len += len; +} + +static long elf_segbase (long segment) { + return segment; +} + +static int elf_directive (char *directive, char *value, int pass) { + return 0; +} + +static void elf_filename (char *inname, char *outname, efunc error) { + strcpy(elf_module, inname); + standard_extension (inname, outname, ".o", error); +} + +struct ofmt of_elf = { + "ELF32 (i386) object files (e.g. Linux)", + "elf", + elf_init, + elf_out, + elf_deflabel, + elf_section_names, + elf_segbase, + elf_directive, + elf_filename, + elf_cleanup +}; + +#endif /* OF_ELF */ diff --git a/outform.c b/outform.c new file mode 100644 index 00000000..154c63f7 --- /dev/null +++ b/outform.c @@ -0,0 +1,42 @@ +/* outform.c manages a list of output formats, and associates + * them with their relevant drivers. Also has a + * routine to find the correct driver given a name + * for it + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include +#include +#include "outform.h" + +static struct ofmt *drivers[MAX_OUTPUT_FORMATS]; +static int ndrivers = 0; + +struct ofmt *ofmt_find(char *name) /* find driver */ +{ + int i; + + for (i=0; ishortname)) + return drivers[i]; + + return NULL; +} + +void ofmt_list(struct ofmt *deffmt) +{ + int i; + for (i=0; ishortname, + drivers[i]->fullname); +} + +void ofmt_register (struct ofmt *info) { + drivers[ndrivers++] = info; +} diff --git a/outform.h b/outform.h new file mode 100644 index 00000000..48b8276a --- /dev/null +++ b/outform.h @@ -0,0 +1,167 @@ +/* outform.h header file for binding output format drivers to the + * remainder of the code in the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +/* + * This header file allows configuration of which output formats + * get compiled into the NASM binary. You can configure by defining + * various preprocessor symbols beginning with "OF_", either on the + * compiler command line or at the top of this file. + * + * OF_ONLY -- only include specified object formats + * OF_name -- ensure that output format 'name' is included + * OF_NO_name -- remove output format 'name' + * OF_DOS -- ensure that 'obj', 'bin' & 'win32' are included. + * OF_UNIX -- ensure that 'aout', 'coff' and 'elf' are in. + * OF_OTHERS -- ensure that 'bin', 'as86' & 'rdf' are in. + * OF_ALL -- ensure that all formats are included. + * + * OF_DEFAULT=of_name -- ensure that 'name' is the default format. + * + * eg: -DOF_UNIX -DOF_ELF -DOF_DEFAULT=of_elf would be a suitable config + * for an average linux system. + * + * Default config = -DOF_ALL -DOF_DEFAULT=of_bin + * + * You probably only want to set these options while compiling 'nasm.c'. */ + +#ifndef NASM_OUTFORM_H +#define NASM_OUTFORM_H + +#include "nasm.h" + +#define MAX_OUTPUT_FORMATS 16 + +struct ofmt *ofmt_find(char *name); +void ofmt_list(struct ofmt *deffmt); +void ofmt_register (struct ofmt *); + +/* -------------- USER MODIFIABLE PART ---------------- */ + +/* + * Insert #defines here in accordance with the configuration + * instructions above. + * + * E.g. + * + * #define OF_ONLY + * #define OF_OBJ + * #define OF_BIN + * + * for a 16-bit DOS assembler with no extraneous formats. + */ + +/* ------------ END USER MODIFIABLE PART -------------- */ + +/* ====configurable info begins here==== */ +/* formats configurable: + * bin,obj,elf,aout,coff,win32,as86,rdf */ + +/* process options... */ + +#ifndef OF_ONLY +#ifndef OF_ALL +#define OF_ALL /* default is to have all formats */ +#endif +#endif + +#ifdef OF_ALL /* set all formats on... */ +#ifndef OF_BIN +#define OF_BIN +#endif +#ifndef OF_OBJ +#define OF_OBJ +#endif +#ifndef OF_ELF +#define OF_ELF +#endif +#ifndef OF_COFF +#define OF_COFF +#endif +#ifndef OF_AOUT +#define OF_AOUT +#endif +#ifndef OF_WIN32 +#define OF_WIN32 +#endif +#ifndef OF_AS86 +#define OF_AS86 +#endif +#ifndef OF_RDF +#define OF_RDF +#endif +#endif /* OF_ALL */ + +/* turn on groups of formats specified.... */ +#ifdef OF_DOS +#ifndef OF_OBJ +#define OF_OBJ +#endif +#ifndef OF_BIN +#define OF_BIN +#endif +#ifndef OF_WIN32 +#define OF_WIN32 +#endif +#endif + +#ifdef OF_UNIX +#ifndef OF_AOUT +#define OF_AOUT +#endif +#ifndef OF_COFF +#define OF_COFF +#endif +#ifndef OF_ELF +#define OF_ELF +#endif +#endif + +#ifdef OF_OTHERS +#ifndef OF_BIN +#define OF_BIN +#endif +#ifndef OF_AS86 +#define OF_AS86 +#endif +#ifndef OF_RDF +#define OF_RDF +#endif +#endif + +/* finally... override any format specifically specifed to be off */ +#ifdef OF_NO_BIN +#undef OF_BIN +#endif +#ifdef OF_NO_OBJ +#undef OF_OBJ +#endif +#ifdef OF_NO_ELF +#undef OF_ELF +#endif +#ifdef OF_NO_AOUT +#undef OF_AOUT +#endif +#ifdef OF_NO_COFF +#undef OF_COFF +#endif +#ifdef OF_NO_WIN32 +#undef OF_WIN32 +#endif +#ifdef OF_NO_AS86 +#undef OF_AS86 +#endif +#ifdef OF_NO_RDF +#undef OF_RDF +#endif + +#ifndef OF_DEFAULT +#define OF_DEFAULT of_bin +#endif + +#endif /* NASM_OUTFORM_H */ diff --git a/outobj.c b/outobj.c new file mode 100644 index 00000000..b33b72de --- /dev/null +++ b/outobj.c @@ -0,0 +1,1229 @@ +/* outobj.c output routines for the Netwide Assembler to produce + * Microsoft 16-bit .OBJ object files + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" +#include "outform.h" + +#ifdef OF_OBJ + +static char obj_infile[FILENAME_MAX]; +static int obj_uppercase; + +static efunc error; +static ldfunc deflabel; +static FILE *ofp; +static long first_seg; +static int any_segs; + +#define LEDATA_MAX 1024 /* maximum size of LEDATA record */ +#define RECORD_MAX 1024 /* maximum size of _any_ record */ +#define GROUP_MAX 256 /* we won't _realistically_ have more + * than this many segs in a group */ +#define EXT_BLKSIZ 256 /* block size for externals list */ + +static unsigned char record[RECORD_MAX], *recptr; + +static struct Public { + struct Public *next; + char *name; + long offset; + long segment; /* only if it's far-absolute */ +} *fpubhead, **fpubtail; + +static struct External { + struct External *next; + char *name; + long commonsize; +} *exthead, **exttail; + +static int externals; + +static struct ExtBack { + struct ExtBack *next; + int index[EXT_BLKSIZ]; +} *ebhead, **ebtail; + +static struct Segment { + struct Segment *next; + long index; /* the NASM segment id */ + long obj_index; /* the OBJ-file segment index */ + struct Group *grp; /* the group it belongs to */ + long currentpos; + long align; /* can be SEG_ABS + absolute addr */ + enum { + CMB_PRIVATE = 0, + CMB_PUBLIC = 2, + CMB_STACK = 5, + CMB_COMMON = 6 + } combine; + long use32; /* is this segment 32-bit? */ + struct Public *pubhead, **pubtail; + char *name; + char *segclass, *overlay; /* `class' is a C++ keyword :-) */ +} *seghead, **segtail, *obj_seg_needs_update; + +static struct Group { + struct Group *next; + char *name; + long index; /* NASM segment id */ + long obj_index; /* OBJ-file group index */ + long nentries; /* number of elements... */ + long nindices; /* ...and number of index elts... */ + union { + long index; + char *name; + } segs[GROUP_MAX]; /* ...in this */ +} *grphead, **grptail, *obj_grp_needs_update; + +static struct ObjData { + struct ObjData *next; + int nonempty; + struct Segment *seg; + long startpos; + int letype, ftype; + unsigned char ledata[LEDATA_MAX], *lptr; + unsigned char fixupp[RECORD_MAX], *fptr; +} *datahead, *datacurr, **datatail; + +static long obj_entry_seg, obj_entry_ofs; + +enum RecordID { /* record ID codes */ + + THEADR = 0x80, /* module header */ + COMENT = 0x88, /* comment record */ + + LNAMES = 0x96, /* list of names */ + + SEGDEF = 0x98, /* segment definition */ + GRPDEF = 0x9A, /* group definition */ + EXTDEF = 0x8C, /* external definition */ + PUBDEF = 0x90, /* public definition */ + COMDEF = 0xB0, /* common definition */ + + LEDATA = 0xA0, /* logical enumerated data */ + FIXUPP = 0x9C, /* fixups (relocations) */ + + MODEND = 0x8A /* module end */ +}; + +extern struct ofmt of_obj; + +static long obj_ledata_space(struct Segment *); +static int obj_fixup_free(struct Segment *); +static void obj_ledata_new(struct Segment *); +static void obj_ledata_commit(void); +static void obj_write_fixup (struct ObjData *, int, int, long, long, long); +static long obj_segment (char *, int, int *); +static void obj_write_file(void); +static unsigned char *obj_write_data(unsigned char *, unsigned char *, int); +static unsigned char *obj_write_byte(unsigned char *, int); +static unsigned char *obj_write_word(unsigned char *, int); +static unsigned char *obj_write_dword(unsigned char *, long); +static unsigned char *obj_write_rword(unsigned char *, int); +static unsigned char *obj_write_name(unsigned char *, char *); +static unsigned char *obj_write_index(unsigned char *, int); +static unsigned char *obj_write_value(unsigned char *, unsigned long); +static void obj_record(int, unsigned char *, unsigned char *); + +static void obj_init (FILE *fp, efunc errfunc, ldfunc ldef) { + ofp = fp; + error = errfunc; + deflabel = ldef; + first_seg = seg_alloc(); + any_segs = FALSE; + fpubhead = NULL; + fpubtail = &fpubhead; + exthead = NULL; + exttail = &exthead; + externals = 0; + ebhead = NULL; + ebtail = &ebhead; + seghead = obj_seg_needs_update = NULL; + segtail = &seghead; + grphead = obj_grp_needs_update = NULL; + grptail = &grphead; + datahead = datacurr = NULL; + datatail = &datahead; + obj_entry_seg = NO_SEG; + obj_uppercase = FALSE; +} + +static void obj_cleanup (void) { + obj_write_file(); + fclose (ofp); + while (seghead) { + struct Segment *segtmp = seghead; + seghead = seghead->next; + while (segtmp->pubhead) { + struct Public *pubtmp = segtmp->pubhead; + segtmp->pubhead = pubtmp->next; + nasm_free (pubtmp); + } + nasm_free (segtmp); + } + while (fpubhead) { + struct Public *pubtmp = fpubhead; + fpubhead = fpubhead->next; + nasm_free (pubtmp); + } + while (exthead) { + struct External *exttmp = exthead; + exthead = exthead->next; + nasm_free (exttmp); + } + while (ebhead) { + struct ExtBack *ebtmp = ebhead; + ebhead = ebhead->next; + nasm_free (ebtmp); + } + while (grphead) { + struct Group *grptmp = grphead; + grphead = grphead->next; + nasm_free (grptmp); + } + while (datahead) { + struct ObjData *datatmp = datahead; + datahead = datahead->next; + nasm_free (datatmp); + } +} + +static void obj_deflabel (char *name, long segment, + long offset, int is_global) { + /* + * We have three cases: + * + * (i) `segment' is a segment-base. If so, set the name field + * for the segment or group structure it refers to, and then + * return. + * + * (ii) `segment' is one of our segments, or a SEG_ABS segment. + * Save the label position for later output of a PUBDEF record. + * (Or a MODPUB, if we work out how.) + * + * (iii) `segment' is not one of our segments. Save the label + * position for later output of an EXTDEF, and also store a + * back-reference so that we can map later references to this + * segment number to the external index. + */ + struct External *ext; + struct ExtBack *eb; + struct Segment *seg; + int i; + + /* + * First check for the double-period, signifying something + * unusual. + */ + if (name[0] == '.' && name[1] == '.') { + if (!strcmp(name, "..start")) { + obj_entry_seg = segment; + obj_entry_ofs = offset; + } + return; + } + + /* + * Case (i): + */ + if (obj_seg_needs_update) { + obj_seg_needs_update->name = name; + return; + } else if (obj_grp_needs_update) { + obj_grp_needs_update->name = name; + return; + } + if (segment < SEG_ABS && segment != NO_SEG && segment % 2) + return; + + if (segment >= SEG_ABS) { + /* + * SEG_ABS subcase of (ii). + */ + if (is_global) { + struct Public *pub; + + pub = *fpubtail = nasm_malloc(sizeof(*pub)); + fpubtail = &pub->next; + pub->next = NULL; + pub->name = name; + pub->offset = offset; + pub->segment = segment & ~SEG_ABS; + } + return; + } + + for (seg = seghead; seg; seg = seg->next) + if (seg->index == segment) { + /* + * Case (ii). Maybe MODPUB someday? + */ + if (is_global) { + struct Public *pub; + + pub = *seg->pubtail = nasm_malloc(sizeof(*pub)); + seg->pubtail = &pub->next; + pub->next = NULL; + pub->name = name; + pub->offset = offset; + } + return; + } + + /* + * Case (iii). + */ + ext = *exttail = nasm_malloc(sizeof(*ext)); + ext->next = NULL; + exttail = &ext->next; + ext->name = name; + if (is_global == 2) + ext->commonsize = offset; + else + ext->commonsize = 0; + + i = segment/2; + eb = ebhead; + if (!eb) { + eb = *ebtail = nasm_malloc(sizeof(*eb)); + eb->next = NULL; + ebtail = &eb->next; + } + while (i > EXT_BLKSIZ) { + if (eb && eb->next) + eb = eb->next; + else { + eb = *ebtail = nasm_malloc(sizeof(*eb)); + eb->next = NULL; + ebtail = &eb->next; + } + i -= EXT_BLKSIZ; + } + eb->index[i] = ++externals; +} + +static void obj_out (long segto, void *data, unsigned long type, + long segment, long wrt) { + long size, realtype; + unsigned char *ucdata; + long ldata; + struct Segment *seg; + + /* + * handle absolute-assembly (structure definitions) + */ + if (segto == NO_SEG) { + if ((type & OUT_TYPMASK) != OUT_RESERVE) + error (ERR_NONFATAL, "attempt to assemble code in [ABSOLUTE]" + " space"); + return; + } + + /* + * If `any_segs' is still FALSE, we must define a default + * segment. + */ + if (!any_segs) { + int tempint; /* ignored */ + if (segto != obj_segment("__NASMDEFSEG", 2, &tempint)) + error (ERR_PANIC, "strange segment conditions in OBJ driver"); + } + + /* + * Find the segment we are targetting. + */ + for (seg = seghead; seg; seg = seg->next) + if (seg->index == segto) + break; + if (!seg) + error (ERR_PANIC, "code directed to nonexistent segment?"); + + size = type & OUT_SIZMASK; + realtype = type & OUT_TYPMASK; + if (realtype == OUT_RAWDATA) { + ucdata = data; + while (size > 0) { + long len = obj_ledata_space(seg); + if (len == 0) { + obj_ledata_new(seg); + len = obj_ledata_space(seg); + } + if (len > size) + len = size; + datacurr->lptr = obj_write_data (datacurr->lptr, ucdata, len); + datacurr->nonempty = TRUE; + ucdata += len; + size -= len; + seg->currentpos += len; + } + } else if (realtype == OUT_ADDRESS || realtype == OUT_REL2ADR || + realtype == OUT_REL4ADR) { + if (segment == NO_SEG && realtype != OUT_ADDRESS) + error(ERR_NONFATAL, "relative call to absolute address not" + " supported by OBJ format"); + if (segment >= SEG_ABS) + error(ERR_NONFATAL, "far-absolute relocations not supported" + " by OBJ format"); + ldata = *(long *)data; + if (realtype == OUT_REL2ADR) + ldata += (size-2); + if (realtype == OUT_REL4ADR) + ldata += (size-4); + if (obj_ledata_space(seg) < 4 || !obj_fixup_free(seg)) + obj_ledata_new(seg); + if (size == 2) + datacurr->lptr = obj_write_word (datacurr->lptr, ldata); + else + datacurr->lptr = obj_write_dword (datacurr->lptr, ldata); + datacurr->nonempty = TRUE; + if (segment != NO_SEG) + obj_write_fixup (datacurr, size, + (realtype == OUT_REL2ADR ? 0 : 0x4000), + segment, wrt, + (seg->currentpos - datacurr->startpos)); + seg->currentpos += size; + } else if (realtype == OUT_RESERVE) { + obj_ledata_commit(); + seg->currentpos += size; + } +} + +static long obj_ledata_space(struct Segment *segto) { + if (datacurr && datacurr->seg == segto) + return datacurr->ledata + LEDATA_MAX - datacurr->lptr; + else + return 0; +} + +static int obj_fixup_free(struct Segment *segto) { + if (datacurr && datacurr->seg == segto) + return (datacurr->fixupp + RECORD_MAX - datacurr->fptr) > 8; + else + return 0; +} + +static void obj_ledata_new(struct Segment *segto) { + datacurr = *datatail = nasm_malloc(sizeof(*datacurr)); + datacurr->next = NULL; + datatail = &datacurr->next; + datacurr->nonempty = FALSE; + datacurr->lptr = datacurr->ledata; + datacurr->fptr = datacurr->fixupp; + datacurr->seg = segto; + if (segto->use32) + datacurr->letype = LEDATA+1; + else + datacurr->letype = LEDATA; + datacurr->startpos = segto->currentpos; + datacurr->ftype = FIXUPP; + + datacurr->lptr = obj_write_index (datacurr->lptr, segto->obj_index); + if (datacurr->letype == LEDATA) + datacurr->lptr = obj_write_word (datacurr->lptr, segto->currentpos); + else + datacurr->lptr = obj_write_dword (datacurr->lptr, segto->currentpos); +} + +static void obj_ledata_commit(void) { + datacurr = NULL; +} + +static void obj_write_fixup (struct ObjData *data, int bytes, + int segrel, long seg, long wrt, + long offset) { + int locat, method; + int base; + long tidx, fidx; + struct Segment *s = NULL; + struct Group *g = NULL; + + locat = 0x8000 | segrel | offset; + if (seg % 2) { + base = TRUE; + locat |= 0x800; + seg--; + if (bytes != 2) + error(ERR_NONFATAL, "OBJ format can only handle 2-byte" + " segment base references"); + } else { + base = FALSE; + if (bytes == 2) + locat |= 0x400; + else { + locat |= 0x2400; + data->ftype = FIXUPP+1; /* need new-style FIXUPP record */ + } + } + data->fptr = obj_write_rword (data->fptr, locat); + + tidx = fidx = -1, method = 0; /* placate optimisers */ + + /* + * See if we can find the segment ID in our segment list. If + * so, we have a T4 (LSEG) target. + */ + for (s = seghead; s; s = s->next) + if (s->index == seg) + break; + if (s) + method = 4, tidx = s->obj_index; + else { + for (g = grphead; g; g = g->next) + if (g->index == seg) + break; + if (g) + method = 5, tidx = g->obj_index; + else { + long i = seg/2; + struct ExtBack *eb = ebhead; + while (i > EXT_BLKSIZ) { + if (eb) + eb = eb->next; + else + break; + i -= EXT_BLKSIZ; + } + if (eb) + method = 6, tidx = eb->index[i]; + else + error(ERR_PANIC, + "unrecognised segment value in obj_write_fixup"); + } + } + + /* + * If no WRT given, assume the natural default, which is method + * F5 unless we are doing an OFFSET fixup for a grouped + * segment, in which case we require F1 (group). + */ + if (wrt == NO_SEG) { + if (!base && s && s->grp) + method |= 0x10, fidx = s->grp->obj_index; + else + method |= 0x50, fidx = -1; + } else { + /* + * See if we can find the WRT-segment ID in our segment + * list. If so, we have a F0 (LSEG) frame. + */ + for (s = seghead; s; s = s->next) + if (s->index == wrt-1) + break; + if (s) + method |= 0x00, fidx = s->obj_index; + else { + for (g = grphead; g; g = g->next) + if (g->index == wrt-1) + break; + if (g) + method |= 0x10, fidx = g->obj_index; + else { + long i = wrt/2; + struct ExtBack *eb = ebhead; + while (i > EXT_BLKSIZ) { + if (eb) + eb = eb->next; + else + break; + i -= EXT_BLKSIZ; + } + if (eb) + method |= 0x20, fidx = eb->index[i]; + else + error(ERR_PANIC, + "unrecognised WRT value in obj_write_fixup"); + } + } + } + + data->fptr = obj_write_byte (data->fptr, method); + if (fidx != -1) + data->fptr = obj_write_index (data->fptr, fidx); + data->fptr = obj_write_index (data->fptr, tidx); +} + +static long obj_segment (char *name, int pass, int *bits) { + /* + * We call the label manager here to define a name for the new + * segment, and when our _own_ label-definition stub gets + * called in return, it should register the new segment name + * using the pointer it gets passed. That way we save memory, + * by sponging off the label manager. + */ + if (!name) { + *bits = 16; + return first_seg; + } else { + struct Segment *seg; + struct Group *grp; + int obj_idx, i, attrs, rn_error; + char *p; + + /* + * Look for segment attributes. + */ + attrs = 0; + p = name; + while (*p && !isspace(*p)) + p++; + if (*p) { + *p++ = '\0'; + while (*p && isspace(*p)) + *p++ = '\0'; + } + while (*p) { + while (*p && !isspace(*p)) + p++; + if (*p) { + *p++ = '\0'; + while (*p && isspace(*p)) + *p++ = '\0'; + } + + attrs++; + } + + obj_idx = 1; + for (seg = seghead; seg; seg = seg->next) { + obj_idx++; + if (!strcmp(seg->name, name)) { + if (attrs > 0 && pass == 1) + error(ERR_WARNING, "segment attributes specified on" + " redeclaration of segment: ignoring"); + if (seg->use32) + *bits = 32; + else + *bits = 16; + return seg->index; + } + } + + *segtail = seg = nasm_malloc(sizeof(*seg)); + seg->next = NULL; + segtail = &seg->next; + seg->index = (any_segs ? seg_alloc() : first_seg); + seg->obj_index = obj_idx; + seg->grp = NULL; + any_segs = TRUE; + seg->name = NULL; + seg->currentpos = 0; + seg->align = 1; /* default */ + seg->use32 = FALSE; /* default */ + seg->combine = CMB_PUBLIC; /* default */ + seg->segclass = seg->overlay = NULL; + seg->pubhead = NULL; + seg->pubtail = &seg->pubhead; + + /* + * Process the segment attributes. + */ + p = name; + while (attrs--) { + p += strlen(p); + while (!*p) p++; + + /* + * `p' contains a segment attribute. + */ + if (!nasm_stricmp(p, "private")) + seg->combine = CMB_PRIVATE; + else if (!nasm_stricmp(p, "public")) + seg->combine = CMB_PUBLIC; + else if (!nasm_stricmp(p, "common")) + seg->combine = CMB_COMMON; + else if (!nasm_stricmp(p, "stack")) + seg->combine = CMB_STACK; + else if (!nasm_stricmp(p, "use16")) + seg->use32 = FALSE; + else if (!nasm_stricmp(p, "use32")) + seg->use32 = TRUE; + else if (!nasm_strnicmp(p, "class=", 6)) + seg->segclass = nasm_strdup(p+6); + else if (!nasm_strnicmp(p, "overlay=", 8)) + seg->overlay = nasm_strdup(p+8); + else if (!nasm_strnicmp(p, "align=", 6)) { + seg->align = readnum(p+6, &rn_error); + if (rn_error) { + seg->align = 1; + error (ERR_NONFATAL, "segment alignment should be" + " numeric"); + } + switch ((int) seg->align) { + case 1: /* BYTE */ + case 2: /* WORD */ + case 4: /* DWORD */ + case 16: /* PARA */ + case 256: /* PAGE */ + break; + case 8: + error(ERR_WARNING, "OBJ format does not support alignment" + " of 8: rounding up to 16"); + seg->align = 16; + break; + case 32: + case 64: + case 128: + error(ERR_WARNING, "OBJ format does not support alignment" + " of %d: rounding up to 256", seg->align); + seg->align = 256; + break; + default: + error(ERR_NONFATAL, "invalid alignment value %d", + seg->align); + seg->align = 1; + break; + } + } else if (!nasm_strnicmp(p, "absolute=", 9)) { + seg->align = SEG_ABS + readnum(p+9, &rn_error); + if (rn_error) + error (ERR_NONFATAL, "argument to `absolute' segment" + " attribute should be numeric"); + } + } + + obj_seg_needs_update = seg; + if (seg->align >= SEG_ABS) + deflabel (name, NO_SEG, seg->align - SEG_ABS, &of_obj, error); + else + deflabel (name, seg->index+1, 0L, &of_obj, error); + obj_seg_needs_update = NULL; + + /* + * See if this segment is defined in any groups. + */ + for (grp = grphead; grp; grp = grp->next) { + for (i = grp->nindices; i < grp->nentries; i++) { + if (!strcmp(grp->segs[i].name, seg->name)) { + nasm_free (grp->segs[i].name); + grp->segs[i] = grp->segs[grp->nindices]; + grp->segs[grp->nindices++].index = seg->obj_index; + if (seg->grp) + error(ERR_WARNING, "segment `%s' is already part of" + " a group: first one takes precedence", + seg->name); + else + seg->grp = grp; + } + } + } + + if (seg->use32) + *bits = 32; + else + *bits = 16; + return seg->index; + } +} + +static int obj_directive (char *directive, char *value, int pass) { + if (!strcmp(directive, "group")) { + char *p, *q; + if (pass == 1) { + struct Group *grp; + struct Segment *seg; + int obj_idx; + + q = value; + while (*q && !isspace(*q)) + q++; + if (isspace(*q)) { + *q++ = '\0'; + while (*q && isspace(*q)) + q++; + } + if (!*q) { + error(ERR_NONFATAL, "GROUP directive contains no segments"); + return 1; + } + + obj_idx = 1; + for (grp = grphead; grp; grp = grp->next) { + obj_idx++; + if (!strcmp(grp->name, value)) { + error(ERR_NONFATAL, "group `%s' defined twice", value); + return 1; + } + } + + *grptail = grp = nasm_malloc(sizeof(*grp)); + grp->next = NULL; + grptail = &grp->next; + grp->index = seg_alloc(); + grp->obj_index = obj_idx; + grp->nindices = grp->nentries = 0; + grp->name = NULL; + + obj_grp_needs_update = grp; + deflabel (value, grp->index+1, 0L, &of_obj, error); + obj_grp_needs_update = NULL; + + while (*q) { + p = q; + while (*q && !isspace(*q)) + q++; + if (isspace(*q)) { + *q++ = '\0'; + while (*q && isspace(*q)) + q++; + } + /* + * Now p contains a segment name. Find it. + */ + for (seg = seghead; seg; seg = seg->next) + if (!strcmp(seg->name, p)) + break; + if (seg) { + /* + * We have a segment index. Shift a name entry + * to the end of the array to make room. + */ + grp->segs[grp->nentries++] = grp->segs[grp->nindices]; + grp->segs[grp->nindices++].index = seg->obj_index; + if (seg->grp) + error(ERR_WARNING, "segment `%s' is already part of" + " a group: first one takes precedence", + seg->name); + else + seg->grp = grp; + } else { + /* + * We have an as-yet undefined segment. + * Remember its name, for later. + */ + grp->segs[grp->nentries++].name = nasm_strdup(p); + } + } + } + return 1; + } + if (!strcmp(directive, "uppercase")) { + obj_uppercase = TRUE; + return 1; + } + return 0; +} + +static long obj_segbase (long segment) { + struct Segment *seg; + + /* + * Find the segment in our list. + */ + for (seg = seghead; seg; seg = seg->next) + if (seg->index == segment-1) + break; + + if (!seg) + return segment; /* not one of ours - leave it alone */ + + if (seg->align >= SEG_ABS) + return seg->align; /* absolute segment */ + if (seg->grp) + return seg->grp->index+1; /* grouped segment */ + + return segment; /* no special treatment */ +} + +static void obj_filename (char *inname, char *outname, efunc error) { + strcpy(obj_infile, inname); + standard_extension (inname, outname, ".obj", error); +} + +static void obj_write_file (void) { + struct Segment *seg; + struct Group *grp; + struct Public *pub; + struct External *ext; + struct ObjData *data; + static unsigned char boast[] = "The Netwide Assembler " NASM_VER; + int lname_idx, rectype; + + /* + * Write the THEADR module header. + */ + recptr = record; + recptr = obj_write_name (recptr, obj_infile); + obj_record (THEADR, record, recptr); + + /* + * Write the NASM boast comment. + */ + recptr = record; + recptr = obj_write_rword (recptr, 0); /* comment type zero */ + recptr = obj_write_data (recptr, boast, sizeof(boast)-1); + obj_record (COMENT, record, recptr); + + /* + * Write the first LNAMES record, containing LNAME one, which + * is null. Also initialise the LNAME counter. + */ + recptr = record; + recptr = obj_write_name (recptr, ""); + obj_record (LNAMES, record, recptr); + lname_idx = 2; + + /* + * Write the SEGDEF records. Each has an associated LNAMES + * record. + */ + for (seg = seghead; seg; seg = seg->next) { + int new_segdef; /* do we use the newer record type? */ + int acbp; + int sn, cn, on; /* seg, class, overlay LNAME idx */ + + if (seg->use32 || seg->currentpos >= 0x10000) + new_segdef = TRUE; + else + new_segdef = FALSE; + + recptr = record; + recptr = obj_write_name (recptr, seg->name); + sn = lname_idx++; + if (seg->segclass) { + recptr = obj_write_name (recptr, seg->segclass); + cn = lname_idx++; + } else + cn = 1; + if (seg->overlay) { + recptr = obj_write_name (recptr, seg->overlay); + on = lname_idx++; + } else + on = 1; + obj_record (LNAMES, record, recptr); + + acbp = (seg->combine << 2); /* C field */ + + if (seg->currentpos >= 0x10000 && !new_segdef) + acbp |= 0x02; /* B bit */ + + if (seg->use32) + acbp |= 0x01; /* P bit is Use32 flag */ + + /* A field */ + if (seg->align >= SEG_ABS) + acbp |= 0x00; + else if (seg->align >= 256) { + if (seg->align > 256) + error(ERR_NONFATAL, "segment `%s' requires more alignment" + " than OBJ format supports", seg->name); + acbp |= 0x80; + } else if (seg->align >= 16) { + acbp |= 0x60; + } else if (seg->align >= 4) { + acbp |= 0xA0; + } else if (seg->align >= 2) { + acbp |= 0x40; + } else + acbp |= 0x20; + + recptr = record; + recptr = obj_write_byte (recptr, acbp); + if (seg->align & SEG_ABS) { + recptr = obj_write_word (recptr, seg->align - SEG_ABS); + recptr = obj_write_byte (recptr, 0); + } + if (new_segdef) + recptr = obj_write_dword (recptr, seg->currentpos); + else + recptr = obj_write_word (recptr, seg->currentpos & 0xFFFF); + recptr = obj_write_index (recptr, sn); + recptr = obj_write_index (recptr, cn); + recptr = obj_write_index (recptr, on); + if (new_segdef) + obj_record (SEGDEF+1, record, recptr); + else + obj_record (SEGDEF, record, recptr); + } + + /* + * Write some LNAMES for the group names. lname_idx is left + * alone here - it will catch up when we write the GRPDEFs. + */ + recptr = record; + for (grp = grphead; grp; grp = grp->next) { + recptr = obj_write_name (recptr, grp->name); + if (recptr - record > 1024) { + obj_record (LNAMES, record, recptr); + recptr = record; + } + } + if (recptr > record) + obj_record (LNAMES, record, recptr); + + /* + * Write the GRPDEF records. + */ + for (grp = grphead; grp; grp = grp->next) { + int i; + + if (grp->nindices != grp->nentries) { + for (i = grp->nindices; i < grp->nentries; i++) { + error(ERR_NONFATAL, "group `%s' contains undefined segment" + " `%s'", grp->name, grp->segs[i].name); + nasm_free (grp->segs[i].name); + grp->segs[i].name = NULL; + } + } + recptr = record; + recptr = obj_write_index (recptr, lname_idx++); + for (i = 0; i < grp->nindices; i++) { + recptr = obj_write_byte (recptr, 0xFF); + recptr = obj_write_index (recptr, grp->segs[i].index); + } + obj_record (GRPDEF, record, recptr); + } + + /* + * Write the PUBDEF records: first the ones in the segments, + * then the far-absolutes. + */ + for (seg = seghead; seg; seg = seg->next) { + int any; + + recptr = record; + recptr = obj_write_index (recptr, seg->grp ? seg->grp->obj_index : 0); + recptr = obj_write_index (recptr, seg->obj_index); + any = FALSE; + if (seg->use32) + rectype = PUBDEF+1; + else + rectype = PUBDEF; + for (pub = seg->pubhead; pub; pub = pub->next) { + if (recptr - record + strlen(pub->name) > 1024) { + if (any) + obj_record (rectype, record, recptr); + recptr = record; + recptr = obj_write_index (recptr, 0); + recptr = obj_write_index (recptr, seg->obj_index); + } + recptr = obj_write_name (recptr, pub->name); + if (seg->use32) + recptr = obj_write_dword (recptr, pub->offset); + else + recptr = obj_write_word (recptr, pub->offset); + recptr = obj_write_index (recptr, 0); + any = TRUE; + } + if (any) + obj_record (rectype, record, recptr); + } + for (pub = fpubhead; pub; pub = pub->next) { /* pub-crawl :-) */ + recptr = record; + recptr = obj_write_index (recptr, 0); /* no group */ + recptr = obj_write_index (recptr, 0); /* no segment either */ + recptr = obj_write_word (recptr, pub->segment); + recptr = obj_write_name (recptr, pub->name); + recptr = obj_write_word (recptr, pub->offset); + recptr = obj_write_index (recptr, 0); + obj_record (PUBDEF, record, recptr); + } + + /* + * Write the EXTDEF and COMDEF records, in order. + */ + recptr = record; + for (ext = exthead; ext; ext = ext->next) { + if (ext->commonsize == 0) { + recptr = obj_write_name (recptr, ext->name); + recptr = obj_write_index (recptr, 0); + if (recptr - record > 1024) { + obj_record (EXTDEF, record, recptr); + recptr = record; + } + } else { + if (recptr > record) + obj_record (EXTDEF, record, recptr); + recptr = record; + if (ext->commonsize > 0) { + recptr = obj_write_name (recptr, ext->name); + recptr = obj_write_index (recptr, 0); + recptr = obj_write_byte (recptr, 0x61);/* far communal */ + recptr = obj_write_value (recptr, 1L); + recptr = obj_write_value (recptr, ext->commonsize); + obj_record (COMDEF, record, recptr); + } else if (ext->commonsize < 0) { + recptr = obj_write_name (recptr, ext->name); + recptr = obj_write_index (recptr, 0); + recptr = obj_write_byte (recptr, 0x62);/* near communal */ + recptr = obj_write_value (recptr, ext->commonsize); + obj_record (COMDEF, record, recptr); + } + recptr = record; + } + } + if (recptr > record) + obj_record (EXTDEF, record, recptr); + + /* + * Write a COMENT record stating that the linker's first pass + * may stop processing at this point. + */ + recptr = record; + recptr = obj_write_rword (recptr, 0x40A2); + recptr = obj_write_byte (recptr, 1); + obj_record (COMENT, record, recptr); + + /* + * Write the LEDATA/FIXUPP pairs. + */ + for (data = datahead; data; data = data->next) { + if (data->nonempty) { + obj_record (data->letype, data->ledata, data->lptr); + if (data->fptr != data->fixupp) + obj_record (FIXUPP, data->fixupp, data->fptr); + } + } + + /* + * Write the MODEND module end marker. + */ + recptr = record; + rectype = MODEND; + if (obj_entry_seg != NO_SEG) { + recptr = obj_write_byte (recptr, 0xC1); + /* + * Find the segment in the segment list. + */ + for (seg = seghead; seg; seg = seg->next) { + if (seg->index == obj_entry_seg) { + if (seg->grp) { + recptr = obj_write_byte (recptr, 0x10); + recptr = obj_write_index (recptr, seg->grp->obj_index); + } else { + recptr = obj_write_byte (recptr, 0x50); + } + recptr = obj_write_index (recptr, seg->obj_index); + if (seg->use32) { + rectype = MODEND+1; + recptr = obj_write_dword (recptr, obj_entry_ofs); + } else + recptr = obj_write_word (recptr, obj_entry_ofs); + break; + } + } + if (!seg) + error(ERR_NONFATAL, "entry point is not in this module"); + } else + recptr = obj_write_byte (recptr, 0); + obj_record (rectype, record, recptr); +} + +static unsigned char *obj_write_data(unsigned char *ptr, + unsigned char *data, int len) { + while (len--) + *ptr++ = *data++; + return ptr; +} + +static unsigned char *obj_write_byte(unsigned char *ptr, int data) { + *ptr++ = data; + return ptr; +} + +static unsigned char *obj_write_word(unsigned char *ptr, int data) { + *ptr++ = data & 0xFF; + *ptr++ = (data >> 8) & 0xFF; + return ptr; +} + +static unsigned char *obj_write_dword(unsigned char *ptr, long data) { + *ptr++ = data & 0xFF; + *ptr++ = (data >> 8) & 0xFF; + *ptr++ = (data >> 16) & 0xFF; + *ptr++ = (data >> 24) & 0xFF; + return ptr; +} + +static unsigned char *obj_write_rword(unsigned char *ptr, int data) { + *ptr++ = (data >> 8) & 0xFF; + *ptr++ = data & 0xFF; + return ptr; +} + +static unsigned char *obj_write_name(unsigned char *ptr, char *data) { + *ptr++ = strlen(data); + if (obj_uppercase) { + while (*data) { + *ptr++ = (unsigned char) toupper(*data); + data++; + } + } else { + while (*data) + *ptr++ = (unsigned char) *data++; + } + return ptr; +} + +static unsigned char *obj_write_index(unsigned char *ptr, int data) { + if (data < 128) + *ptr++ = data; + else { + *ptr++ = 0x80 | ((data >> 8) & 0x7F); + *ptr++ = data & 0xFF; + } + return ptr; +} + +static unsigned char *obj_write_value(unsigned char *ptr, + unsigned long data) { + if (data <= 128) + *ptr++ = data; + else if (data <= 0xFFFF) { + *ptr++ = 129; + *ptr++ = data & 0xFF; + *ptr++ = (data >> 8) & 0xFF; + } else if (data <= 0xFFFFFF) { + *ptr++ = 132; + *ptr++ = data & 0xFF; + *ptr++ = (data >> 8) & 0xFF; + *ptr++ = (data >> 16) & 0xFF; + } else { + *ptr++ = 136; + *ptr++ = data & 0xFF; + *ptr++ = (data >> 8) & 0xFF; + *ptr++ = (data >> 16) & 0xFF; + *ptr++ = (data >> 24) & 0xFF; + } + return ptr; +} + +static void obj_record(int type, unsigned char *start, unsigned char *end) { + unsigned long cksum, len; + + cksum = type; + fputc (type, ofp); + len = end-start+1; + cksum += (len & 0xFF) + ((len>>8) & 0xFF); + fwriteshort (len, ofp); + fwrite (start, 1, end-start, ofp); + while (start < end) + cksum += *start++; + fputc ( (-cksum) & 0xFF, ofp); +} + +struct ofmt of_obj = { + "Microsoft MS-DOS 16-bit object files", + "obj", + obj_init, + obj_out, + obj_deflabel, + obj_segment, + obj_segbase, + obj_directive, + obj_filename, + obj_cleanup +}; + +#endif /* OF_OBJ */ diff --git a/outrdf.c b/outrdf.c new file mode 100644 index 00000000..24fd4808 --- /dev/null +++ b/outrdf.c @@ -0,0 +1,467 @@ +/* outrdf.c output routines for the Netwide Assembler to produce + * RDOFF format object files (which are intended mainly + * for use in proprietary projects, as the code to load and + * execute them is very simple). They will also be used + * for device drivers and possibly some executable files + * in the MOSCOW operating system. See Rdoff.txt for + * details. + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" +#include "outform.h" + +#ifdef OF_RDF + +typedef short int16; /* not sure if this will be required to be altered + at all... best to typedef it just in case */ + +const char *RDOFFId = "RDOFF1"; /* written to the start of RDOFF files */ + +/* the records that can be found in the RDOFF header */ + +/* Note that whenever a segment is referred to in the RDOFF file, its number + * is always half of the segment number that NASM uses to refer to it; this + * is because NASM only allocates even numbered segments, so as to not + * waste any of the 16 bits of segment number written to the file - this + * allows up to 65533 external labels to be defined; otherwise it would be + * 32764. */ + +struct RelocRec { + char type; /* must be 1 */ + char segment; /* only 0 for code, or 1 for data supported, + * but add 64 for relative refs (ie do not require + * reloc @ loadtime, only linkage) */ + long offset; /* from start of segment in which reference is loc'd */ + char length; /* 1 2 or 4 bytes */ + int16 refseg; /* segment to which reference refers to */ +}; + +struct ImportRec { + char type; /* must be 2 */ + int16 segment; /* segment number allocated to the label for reloc + * records - label is assumed to be at offset zero + * in this segment, so linker must fix up with offset + * of segment and of offset within segment */ + char label[33]; /* zero terminated... should be written to file until + * the zero, but not after it - max len = 32 chars */ +}; + +struct ExportRec { + char type; /* must be 3 */ + char segment; /* segment referred to (0/1) */ + long offset; /* offset within segment */ + char label[33]; /* zero terminated as above. max len = 32 chars */ +}; + +struct DLLRec { + char type; /* must be 4 */ + char libname[128]; /* name of library to link with at load time */ +}; + +struct BSSRec { + char type; /* must be 5 */ + long amount; /* number of bytes BSS to reserve */ +}; + +/* code for managing buffers needed to seperate code and data into individual + * sections until they are ready to be written to the file. + * We'd better hope that it all fits in memory else we're buggered... */ + +#define BUF_BLOCK_LEN 4088 /* selected to match page size (4096) + * on 80x86 machines for efficiency */ + +typedef struct memorybuffer { + int length; + char buffer[BUF_BLOCK_LEN]; + struct memorybuffer *next; +} memorybuffer; + +memorybuffer * newmembuf(){ + memorybuffer * t; + + t = nasm_malloc(sizeof(memorybuffer)); + + t->length = 0; + t->next = NULL; + return t; +} + +void membufwrite(memorybuffer *b, void *data, int bytes) { + int16 w; + long l; + + if (b->next) { /* memory buffer full - use next buffer */ + membufwrite(b->next,data,bytes); + return; + } + if ((bytes < 0 && b->length - bytes > BUF_BLOCK_LEN) + || (bytes > 0 && b->length + bytes > BUF_BLOCK_LEN)) { + + /* buffer full and no next allocated... allocate and initialise next + * buffer */ + + b->next = newmembuf(); + membufwrite(b->next,data,bytes); + } + + switch(bytes) { + case -4: /* convert to little-endian */ + l = * (long *) data ; + b->buffer[b->length++] = l & 0xFF; + l >>= 8 ; + b->buffer[b->length++] = l & 0xFF; + l >>= 8 ; + b->buffer[b->length++] = l & 0xFF; + l >>= 8 ; + b->buffer[b->length++] = l & 0xFF; + break; + + case -2: + w = * (int16 *) data ; + b->buffer[b->length++] = w & 0xFF; + w >>= 8 ; + b->buffer[b->length++] = w & 0xFF; + break; + + default: + while(bytes--) { + b->buffer[b->length++] = *(* (unsigned char **) &data); + + (* (unsigned char **) &data)++ ; + } + break; + } +} + +void membufdump(memorybuffer *b,FILE *fp) +{ + if (!b) return; + + fwrite (b->buffer, 1, b->length, fp); + + membufdump(b->next,fp); +} + +int membuflength(memorybuffer *b) +{ + if (!b) return 0; + return b->length + membuflength(b->next); +} + +void freemembuf(memorybuffer *b) +{ + if (!b) return; + freemembuf(b->next); + nasm_free(b); +} + +/*********************************************************************** + * Actual code to deal with RDOFF ouput format begins here... + */ + +/* global variables set during the initialisation phase */ + +memorybuffer *seg[2]; /* seg 0 = code, seg 1 = data */ +memorybuffer *header; /* relocation/import/export records */ + +FILE *ofile; + +int seg_warned; +static efunc error; + +int segtext,segdata,segbss; +long bsslength; + +static void rdf_init(FILE *fp, efunc errfunc, ldfunc ldef) +{ + ofile = fp; + error = errfunc; + seg[0] = newmembuf(); + seg[1] = newmembuf(); + header = newmembuf(); + segtext = seg_alloc(); + segdata = seg_alloc(); + segbss = seg_alloc(); + if (segtext != 0 || segdata != 2 || segbss != 4) + error(ERR_PANIC,"rdf segment numbers not allocated as expected (%d,%d,%d)", + segtext,segdata,segbss); + bsslength=0; +} + +static long rdf_section_names(char *name, int pass, int *bits) +{ + /* + * Default is 32 bits. + */ + if (!name) + *bits = 32; + + if (!name) return 0; + if (!strcmp(name, ".text")) return 0; + else if (!strcmp(name, ".data")) return 2; + else if (!strcmp(name, ".bss")) return 4; + else + return NO_SEG; +} + +static void write_reloc_rec(struct RelocRec *r) +{ + r->refseg >>= 1; /* adjust segment nos to RDF rather than NASM */ + + membufwrite(header,&r->type,1); + membufwrite(header,&r->segment,1); + membufwrite(header,&r->offset,-4); + membufwrite(header,&r->length,1); + membufwrite(header,&r->refseg,-2); /* 9 bytes written */ +} + +static void write_export_rec(struct ExportRec *r) +{ + r->segment >>= 1; + + membufwrite(header,&r->type,1); + membufwrite(header,&r->segment,1); + membufwrite(header,&r->offset,-4); + membufwrite(header,r->label,strlen(r->label) + 1); +} + +static void write_import_rec(struct ImportRec *r) +{ + r->segment >>= 1; + + membufwrite(header,&r->type,1); + membufwrite(header,&r->segment,-2); + membufwrite(header,r->label,strlen(r->label) + 1); +} + +static void write_bss_rec(struct BSSRec *r) +{ + membufwrite(header,&r->type,1); + membufwrite(header,&r->amount,-4); +} + +static void rdf_deflabel(char *name, long segment, long offset, int is_global) +{ + struct ExportRec r; + struct ImportRec ri; + + if (is_global && segment > 4) { + error(ERR_WARNING,"common declarations not supported... using extern"); + is_global = 0; + } + + if (is_global) { + r.type = 3; + r.segment = segment; + r.offset = offset; + strncpy(r.label,name,32); + r.label[32] = 0; + write_export_rec(&r); + } + + if (segment > 4) { /* EXTERN declaration */ + ri.type = 2; + ri.segment = segment; + strncpy(ri.label,name,32); + ri.label[32] = 0; + write_import_rec(&ri); + } +} + +static void rdf_out (long segto, void *data, unsigned long type, + long segment, long wrt) +{ + long bytes = type & OUT_SIZMASK; + struct RelocRec rr; + unsigned char databuf[4],*pd; + + segto >>= 1; /* convert NASM segment no to RDF number */ + + if (segto != 0 && segto != 1 && segto != 2) { + error(ERR_NONFATAL,"specified segment not supported by rdf output format"); + return; + } + + if (wrt != NO_SEG) { + wrt = NO_SEG; /* continue to do _something_ */ + error (ERR_NONFATAL, "WRT not supported by rdf output format"); + } + + type &= OUT_TYPMASK; + + if (segto == 2 && type != OUT_RESERVE) + { + error(ERR_NONFATAL, "BSS segments may not be initialised"); + + /* just reserve the space for now... */ + + if (type == OUT_REL2ADR) + bytes = 2; + else + bytes = 4; + type = OUT_RESERVE; + } + + if (type == OUT_RESERVE) { + if (segto == 2) /* BSS segment space reserverd */ + bsslength += bytes; + else + while (bytes --) + membufwrite(seg[segto],databuf,1); + } + else if (type == OUT_RAWDATA) { + if (segment != NO_SEG) + error(ERR_PANIC, "OUT_RAWDATA with other than NO_SEG"); + membufwrite(seg[segto],data,bytes); + } + else if (type == OUT_ADDRESS) { + + /* if segment == NO_SEG then we are writing an address of an + object within the same segment - do not produce reloc rec. */ + + if (segment != NO_SEG) + { + + /* it's an address, so we must write a relocation record */ + + rr.type = 1; /* type signature */ + rr.segment = segto; /* segment we're currently in */ + rr.offset = membuflength(seg[segto]); /* current offset */ + rr.length = bytes; /* length of reference */ + rr.refseg = segment; /* segment referred to */ + write_reloc_rec(&rr); + } + + pd = databuf; /* convert address to little-endian */ + if (bytes == 2) + WRITESHORT (pd, *(long *)data); + else + WRITELONG (pd, *(long *)data); + + membufwrite(seg[segto],databuf,bytes); + + } + else if (type == OUT_REL2ADR) + { + if (segment == segto) + error(ERR_PANIC, "intra-segment OUT_REL2ADR"); + if (segment != NO_SEG && segment % 2) { + error(ERR_NONFATAL, "rdf format does not support segment base refs"); + } + + rr.type = 1; /* type signature */ + rr.segment = segto+64; /* segment we're currently in + rel flag */ + rr.offset = membuflength(seg[segto]); /* current offset */ + rr.length = 2; /* length of reference */ + rr.refseg = segment; /* segment referred to */ + write_reloc_rec(&rr); + + /* work out what to put in the code: offset of the end of this operand, + * subtracted from any data specified, so that loader can just add + * address of imported symbol onto it to get address relative to end of + * instruction: import_address + data(offset) - end_of_instrn */ + + rr.offset = *(long *)data -(rr.offset + bytes); + + membufwrite(seg[segto],&rr.offset,-2); + } + else if (type == OUT_REL4ADR) + { + if (segment == segto) + error(ERR_PANIC, "intra-segment OUT_REL4ADR"); + if (segment != NO_SEG && segment % 2) { + error(ERR_NONFATAL, "rdf format does not support segment base refs"); + } + + rr.type = 1; /* type signature */ + rr.segment = segto+64; /* segment we're currently in + rel tag */ + rr.offset = membuflength(seg[segto]); /* current offset */ + rr.length = 4; /* length of reference */ + rr.refseg = segment; /* segment referred to */ + write_reloc_rec(&rr); + + rr.offset = *(long *)data -(rr.offset + bytes); + membufwrite(seg[segto],&rr.offset,-4); + } +} + +static void rdf_cleanup (void) { + long l; + unsigned char b[4],*d; + struct BSSRec bs; + + + /* should write imported & exported symbol declarations to header here */ + + /* generate the output file... */ + fwrite("RDOFF1",6,1,ofile); /* file type magic number */ + + if (bsslength != 0) /* reserve BSS */ + { + bs.type = 5; + bs.amount = bsslength; + write_bss_rec(&bs); + } + + l = membuflength(header);d=b; + WRITELONG(d,l); + + fwrite(b,4,1,ofile); /* write length of header */ + membufdump(header,ofile); /* dump header */ + + l = membuflength(seg[0]);d=b; /* code segment */ + WRITELONG(d,l); + + fwrite(b,4,1,ofile); + membufdump(seg[0],ofile); + + l = membuflength(seg[1]);d=b; /* data segment */ + WRITELONG(d,l); + + fwrite(b,4,1,ofile); + membufdump(seg[1],ofile); + + freemembuf(header); + freemembuf(seg[0]); + freemembuf(seg[1]); + fclose(ofile); +} + +static long rdf_segbase (long segment) { + return 0; +} + +static int rdf_directive (char *directive, char *value, int pass) { + return 0; +} + +static void rdf_filename (char *inname, char *outname, efunc error) { + standard_extension(inname,outname,".rdf",error); +} + +struct ofmt of_rdf = { + "Relocatable Dynamic Object File Format v1.1", + "rdf", + rdf_init, + rdf_out, + rdf_deflabel, + rdf_section_names, + rdf_segbase, + rdf_directive, + rdf_filename, + rdf_cleanup +}; + +#endif /* OF_RDF */ diff --git a/parser.c b/parser.c new file mode 100644 index 00000000..14c7a5ba --- /dev/null +++ b/parser.c @@ -0,0 +1,1306 @@ +/* parser.c source line parser for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * initial version 27/iii/95 by Simon Tatham + */ + +#include +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" +#include "parser.h" +#include "float.h" + +#include "names.c" + + +static long reg_flags[] = { /* sizes and special flags */ + 0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL, + REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8, + REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG, + REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX, + REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS, + MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, + REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG, + FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG, + REG_TREG +}; + +enum { /* special tokens */ + S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_QWORD, S_SHORT, S_TO, + S_TWORD, S_WORD +}; + +static char *special_names[] = { /* and the actual text */ + "byte", "dword", "far", "long", "near", "qword", "short", "to", + "tword", "word" +}; + +static char *prefix_names[] = { + "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne", + "repnz", "repz", "times" +}; + +/* + * Evaluator datatype. Expressions, within the evaluator, are + * stored as an array of these beasts, terminated by a record with + * type==0. Mostly, it's a vector type: each type denotes some kind + * of a component, and the value denotes the multiple of that + * component present in the expression. The exception is the WRT + * type, whose `value' field denotes the segment to which the + * expression is relative. These segments will be segment-base + * types, i.e. either odd segment values or SEG_ABS types. So it is + * still valid to assume that anything with a `value' field of zero + * is insignificant. + */ +typedef struct { + long type; /* a register, or EXPR_xxx */ + long value; /* must be >= 32 bits */ +} expr; + +static void eval_reset(void); +static expr *evaluate(int); + +/* + * ASSUMPTION MADE HERE. The number of distinct register names + * (i.e. possible "type" fields for an expr structure) does not + * exceed 126. + */ +#define EXPR_SIMPLE 126 +#define EXPR_WRT 127 +#define EXPR_SEGBASE 128 + +static int is_reloc(expr *); +static int is_simple(expr *); +static int is_really_simple (expr *); +static long reloc_value(expr *); +static long reloc_seg(expr *); +static long reloc_wrt(expr *); + +enum { /* token types, other than chars */ + TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, TOKEN_ERRNUM, + TOKEN_HERE, TOKEN_BASE, TOKEN_SPECIAL, TOKEN_PREFIX, TOKEN_SHL, + TOKEN_SHR, TOKEN_SDIV, TOKEN_SMOD, TOKEN_SEG, TOKEN_WRT, + TOKEN_FLOAT +}; + +struct tokenval { + long t_integer, t_inttwo; + char *t_charptr; +}; + +static char tempstorage[1024], *q; +static int bsi (char *string, char **array, int size);/* binary search */ + +static int nexttoken (void); +static int is_comma_next (void); + +static char *bufptr; +static int i; +static struct tokenval tokval; +static lfunc labelfunc; +static efunc error; +static char *label; +static struct ofmt *outfmt; + +static long seg, ofs; + +insn *parse_line (long segment, long offset, lfunc lookup_label, int pass, + char *buffer, insn *result, struct ofmt *output, + efunc errfunc) { + int operand; + int critical; + + q = tempstorage; + bufptr = buffer; + labelfunc = lookup_label; + outfmt = output; + error = errfunc; + seg = segment; + ofs = offset; + label = ""; + + i = nexttoken(); + + result->eops = NULL; /* must do this, whatever happens */ + + if (i==0) { /* blank line - ignore */ + result->label = NULL; /* so, no label on it */ + result->opcode = -1; /* and no instruction either */ + return result; + } + if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX && + (i!=TOKEN_REG || (REG_SREG & ~reg_flags[tokval.t_integer]))) { + error (ERR_NONFATAL, "label or instruction expected" + " at start of line"); + result->label = NULL; + result->opcode = -1; + return result; + } + + if (i == TOKEN_ID) { /* there's a label here */ + label = result->label = tokval.t_charptr; + i = nexttoken(); + if (i == ':') { /* skip over the optional colon */ + i = nexttoken(); + } + } else /* no label; so, moving swiftly on */ + result->label = NULL; + + if (i==0) { + result->opcode = -1; /* this line contains just a label */ + return result; + } + + result->nprefix = 0; + result->times = 1; + + while (i == TOKEN_PREFIX || + (i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) { + /* + * Handle special case: the TIMES prefix. + */ + if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) { + expr *value; + + i = nexttoken(); + eval_reset(); + value = evaluate (pass); + if (!value) { /* but, error in evaluator */ + result->opcode = -1; /* unrecoverable parse error: */ + return result; /* ignore this instruction */ + } + if (!is_simple (value)) { + error (ERR_NONFATAL, + "non-constant argument supplied to TIMES"); + result->times = 1; + } else + result->times = value->value; + } else { + if (result->nprefix == MAXPREFIX) + error (ERR_NONFATAL, + "instruction has more than %d prefixes", MAXPREFIX); + else + result->prefixes[result->nprefix++] = tokval.t_integer; + i = nexttoken(); + } + } + + if (i != TOKEN_INSN) { + error (ERR_NONFATAL, "parser: instruction expected"); + result->opcode = -1; + return result; + } + + result->opcode = tokval.t_integer; + result->condition = tokval.t_inttwo; + + /* + * RESB, RESW and RESD cannot be satisfied with incorrectly + * evaluated operands, since the correct values _must_ be known + * on the first pass. Hence, even in pass one, we set the + * `critical' flag on calling evaluate(), so that it will bomb + * out on undefined symbols. Nasty, but there's nothing we can + * do about it. + * + * For the moment, EQU has the same difficulty, so we'll + * include that. + */ + if (result->opcode == I_RESB || + result->opcode == I_RESW || + result->opcode == I_RESD || + result->opcode == I_RESQ || + result->opcode == I_REST || + result->opcode == I_EQU) + critical = pass; + else + critical = (pass==2 ? 2 : 0); + + if (result->opcode == I_DB || + result->opcode == I_DW || + result->opcode == I_DD || + result->opcode == I_DQ || + result->opcode == I_DT) { + extop *eop, **tail = &result->eops; + int oper_num = 0; + + /* + * Begin to read the DB/DW/DD/DQ/DT operands. + */ + while (1) { + i = nexttoken(); + if (i == 0) + break; + eop = *tail = nasm_malloc(sizeof(extop)); + tail = &eop->next; + eop->next = NULL; + eop->type = EOT_NOTHING; + oper_num++; + + if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) { + eop->type = EOT_DB_STRING; + eop->stringval = tokval.t_charptr; + eop->stringlen = tokval.t_inttwo; + i = nexttoken(); /* eat the comma */ + continue; + } + + if (i == TOKEN_FLOAT || i == '-') { + long sign = +1L; + + if (i == '-') { + char *save = bufptr; + i = nexttoken(); + sign = -1L; + if (i != TOKEN_FLOAT) { + bufptr = save; + i = '-'; + } + } + + if (i == TOKEN_FLOAT) { + eop->type = EOT_DB_STRING; + eop->stringval = q; + if (result->opcode == I_DD) + eop->stringlen = 4; + else if (result->opcode == I_DQ) + eop->stringlen = 8; + else if (result->opcode == I_DT) + eop->stringlen = 10; + else { + error(ERR_NONFATAL, "floating-point constant" + " encountered in `D%c' instruction", + result->opcode == I_DW ? 'W' : 'B'); + eop->type = EOT_NOTHING; + } + q += eop->stringlen; + if (!float_const (tokval.t_charptr, sign, + (unsigned char *)eop->stringval, + eop->stringlen, error)) + eop->type = EOT_NOTHING; + i = nexttoken(); /* eat the comma */ + continue; + } + } + + /* anything else */ { + expr *value; + eval_reset(); + value = evaluate (critical); + if (!value) { /* but, error in evaluator */ + result->opcode = -1;/* unrecoverable parse error: */ + return result; /* ignore this instruction */ + } + if (is_reloc(value)) { + eop->type = EOT_DB_NUMBER; + eop->offset = reloc_value(value); + eop->segment = reloc_seg(value); + eop->wrt = reloc_wrt(value); + } else { + error (ERR_NONFATAL, + "`%s' operand %d: expression is not simple" + " or relocatable", + insn_names[result->opcode], oper_num); + } + } + } + return result; + } + + /* right. Now we begin to parse the operands. There may be up to three + * of these, separated by commas, and terminated by a zero token. */ + + for (operand = 0; operand < 3; operand++) { + expr *seg, *value; /* used most of the time */ + int mref; /* is this going to be a memory ref? */ + + result->oprs[operand].addr_size = 0;/* have to zero this whatever */ + i = nexttoken(); + if (i == 0) break; /* end of operands: get out of here */ + result->oprs[operand].type = 0; /* so far, no override */ + while (i == TOKEN_SPECIAL) {/* size specifiers */ + switch ((int)tokval.t_integer) { + case S_BYTE: + result->oprs[operand].type |= BITS8; + break; + case S_WORD: + result->oprs[operand].type |= BITS16; + break; + case S_DWORD: + case S_LONG: + result->oprs[operand].type |= BITS32; + break; + case S_QWORD: + result->oprs[operand].type |= BITS64; + break; + case S_TWORD: + result->oprs[operand].type |= BITS80; + break; + case S_TO: + result->oprs[operand].type |= TO; + break; + case S_FAR: + result->oprs[operand].type |= FAR; + break; + case S_NEAR: + result->oprs[operand].type |= NEAR; + break; + case S_SHORT: + result->oprs[operand].type |= SHORT; + break; + } + i = nexttoken(); + } + + if (i == '[') { /* memory reference */ + i = nexttoken(); + mref = TRUE; + if (i == TOKEN_SPECIAL) { /* check for address size override */ + switch ((int)tokval.t_integer) { + case S_WORD: + result->oprs[operand].addr_size = 16; + break; + case S_DWORD: + case S_LONG: + result->oprs[operand].addr_size = 32; + break; + default: + error (ERR_NONFATAL, "invalid size specification in" + " effective address"); + } + i = nexttoken(); + } + } else /* immediate operand, or register */ + mref = FALSE; + + eval_reset(); + + value = evaluate (critical); + if (!value) { /* error in evaluator */ + result->opcode = -1; /* unrecoverable parse error: */ + return result; /* ignore this instruction */ + } + if (i == ':' && mref) { /* it was seg:offset */ + seg = value; /* so shift this into the segment */ + i = nexttoken(); /* then skip the colon */ + if (i == TOKEN_SPECIAL) { /* another check for size override */ + switch ((int)tokval.t_integer) { + case S_WORD: + result->oprs[operand].addr_size = 16; + break; + case S_DWORD: + case S_LONG: + result->oprs[operand].addr_size = 32; + break; + default: + error (ERR_NONFATAL, "invalid size specification in" + " effective address"); + } + i = nexttoken(); + } + value = evaluate (critical); + /* and get the offset */ + if (!value) { /* but, error in evaluator */ + result->opcode = -1; /* unrecoverable parse error: */ + return result; /* ignore this instruction */ + } + } else seg = NULL; + if (mref) { /* find ] at the end */ + if (i != ']') { + error (ERR_NONFATAL, "parser: expecting ]"); + do { /* error recovery again */ + i = nexttoken(); + } while (i != 0 && i != ','); + } else /* we got the required ] */ + i = nexttoken(); + } else { /* immediate operand */ + if (i != 0 && i != ',' && i != ':') { + error (ERR_NONFATAL, "comma or end of line expected"); + do { /* error recovery */ + i = nexttoken(); + } while (i != 0 && i != ','); + } else if (i == ':') { + result->oprs[operand].type |= COLON; + } + } + + /* now convert the exprs returned from evaluate() into operand + * descriptions... */ + + if (mref) { /* it's a memory reference */ + expr *e = value; + int b, i, s; /* basereg, indexreg, scale */ + long o; /* offset */ + + if (seg) { /* segment override */ + if (seg[1].type!=0 || seg->value!=1 || + REG_SREG & ~reg_flags[seg->type]) + error (ERR_NONFATAL, "invalid segment override"); + else if (result->nprefix == MAXPREFIX) + error (ERR_NONFATAL, + "instruction has more than %d prefixes", + MAXPREFIX); + else + result->prefixes[result->nprefix++] = seg->type; + } + + b = i = -1, o = s = 0; + + if (e->type < EXPR_SIMPLE) { /* this bit's a register */ + if (e->value == 1) /* in fact it can be basereg */ + b = e->type; + else /* no, it has to be indexreg */ + i = e->type, s = e->value; + e++; + } + if (e->type && e->type < EXPR_SIMPLE) {/* it's a second register */ + if (e->value != 1) { /* it has to be indexreg */ + if (i != -1) { /* but it can't be */ + error(ERR_NONFATAL, "invalid effective address"); + result->opcode = -1; + return result; + } else + i = e->type, s = e->value; + } else { /* it can be basereg */ + if (b != -1) /* or can it? */ + i = e->type, s = 1; + else + b = e->type; + } + e++; + } + if (e->type != 0) { /* is there an offset? */ + if (e->type < EXPR_SIMPLE) {/* in fact, is there an error? */ + error (ERR_NONFATAL, "invalid effective address"); + result->opcode = -1; + return result; + } else { + if (e->type == EXPR_SIMPLE) { + o = e->value; + e++; + } + if (e->type == EXPR_WRT) { + result->oprs[operand].wrt = e->value; + e++; + } else + result->oprs[operand].wrt = NO_SEG; + if (e->type != 0) { /* is there a segment id? */ + if (e->type < EXPR_SEGBASE) { + error (ERR_NONFATAL, + "invalid effective address"); + result->opcode = -1; + return result; + } else + result->oprs[operand].segment = (e->type - + EXPR_SEGBASE); + e++; + } else + result->oprs[operand].segment = NO_SEG; + } + } else { + o = 0; + result->oprs[operand].wrt = NO_SEG; + result->oprs[operand].segment = NO_SEG; + } + + if (e->type != 0) { /* there'd better be nothing left! */ + error (ERR_NONFATAL, "invalid effective address"); + result->opcode = -1; + return result; + } + + result->oprs[operand].type |= MEMORY; + if (b==-1 && (i==-1 || s==0)) + result->oprs[operand].type |= MEM_OFFS; + result->oprs[operand].basereg = b; + result->oprs[operand].indexreg = i; + result->oprs[operand].scale = s; + result->oprs[operand].offset = o; + } else { /* it's not a memory reference */ + if (is_reloc(value)) { /* it's immediate */ + result->oprs[operand].type |= IMMEDIATE; + result->oprs[operand].offset = reloc_value(value); + result->oprs[operand].segment = reloc_seg(value); + result->oprs[operand].wrt = reloc_wrt(value); + if (is_simple(value) && reloc_value(value)==1) + result->oprs[operand].type |= UNITY; + } else { /* it's a register */ + if (value->type>=EXPR_SIMPLE || value->value!=1) { + error (ERR_NONFATAL, "invalid operand type"); + result->opcode = -1; + return result; + } + /* clear overrides, except TO which applies to FPU regs */ + result->oprs[operand].type &= TO; + result->oprs[operand].type |= REGISTER; + result->oprs[operand].type |= reg_flags[value->type]; + result->oprs[operand].basereg = value->type; + } + } + } + + result->operands = operand; /* set operand count */ + + while (operand<3) /* clear remaining operands */ + result->oprs[operand++].type = 0; + + /* + * Transform RESW, RESD, RESQ, REST into RESB. + */ + switch (result->opcode) { + case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break; + case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break; + case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break; + case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break; + } + + return result; +} + +static int is_comma_next (void) { + char *p; + + p = bufptr; + while (isspace(*p)) p++; + return (*p == ',' || *p == ';' || !*p); +} + +/* isidstart matches any character that may start an identifier, and isidchar + * matches any character that may appear at places other than the start of an + * identifier. E.g. a period may only appear at the start of an identifier + * (for local labels), whereas a number may appear anywhere *but* at the + * start. */ + +#define isidstart(c) ( isalpha(c) || (c)=='_' || (c)=='.' || (c)=='?' ) +#define isidchar(c) ( isidstart(c) || isdigit(c) || (c)=='$' || (c)=='#' \ + || (c)=='@' || (c)=='~' ) + +/* Ditto for numeric constants. */ + +#define isnumstart(c) ( isdigit(c) || (c)=='$' ) +#define isnumchar(c) ( isalnum(c) ) + +/* This returns the numeric value of a given 'digit'. */ + +#define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0') + +/* + * This tokeniser routine has only one side effect, that of + * updating `bufptr'. Hence by saving `bufptr', lookahead may be + * performed. + */ + +static int nexttoken (void) { + char ourcopy[256], *r, *s; + + while (isspace(*bufptr)) bufptr++; + if (!*bufptr) return 0; + + /* we have a token; either an id, a number or a char */ + if (isidstart(*bufptr) || + (*bufptr == '$' && isidstart(bufptr[1]))) { + /* now we've got an identifier */ + int i; + int is_sym = FALSE; + + if (*bufptr == '$') { + is_sym = TRUE; + bufptr++; + } + + tokval.t_charptr = q; + *q++ = *bufptr++; + while (isidchar(*bufptr)) *q++ = *bufptr++; + *q++ = '\0'; + for (s=tokval.t_charptr, r=ourcopy; *s; s++) + *r++ = tolower (*s); + *r = '\0'; + if (is_sym) + return TOKEN_ID; /* bypass all other checks */ + /* right, so we have an identifier sitting in temp storage. now, + * is it actually a register or instruction name, or what? */ + if ((tokval.t_integer=bsi(ourcopy, reg_names, + elements(reg_names)))>=0) + return TOKEN_REG; + if ((tokval.t_integer=bsi(ourcopy, insn_names, + elements(insn_names)))>=0) + return TOKEN_INSN; + for (i=0; i=0) + return TOKEN_INSN; + } + if ((tokval.t_integer=bsi(ourcopy, prefix_names, + elements(prefix_names)))>=0) { + tokval.t_integer += PREFIX_ENUM_START; + return TOKEN_PREFIX; + } + if ((tokval.t_integer=bsi(ourcopy, special_names, + elements(special_names)))>=0) + return TOKEN_SPECIAL; + if (!strcmp(ourcopy, "seg")) + return TOKEN_SEG; + if (!strcmp(ourcopy, "wrt")) + return TOKEN_WRT; + return TOKEN_ID; + } else if (*bufptr == '$' && !isnumchar(bufptr[1])) { + /* + * It's a $ sign with no following hex number; this must + * mean it's a Here token ($), evaluating to the current + * assembly location, or a Base token ($$), evaluating to + * the base of the current segment. + */ + bufptr++; + if (*bufptr == '$') { + bufptr++; + return TOKEN_BASE; + } + return TOKEN_HERE; + } else if (isnumstart(*bufptr)) { /* now we've got a number */ + char *r = q; + int rn_error; + + *q++ = *bufptr++; + while (isnumchar(*bufptr)) { + *q++ = *bufptr++; + } + if (*bufptr == '.') { + /* + * a floating point constant + */ + *q++ = *bufptr++; + while (isnumchar(*bufptr)) { + *q++ = *bufptr++; + } + *q++ = '\0'; + tokval.t_charptr = r; + return TOKEN_FLOAT; + } + *q++ = '\0'; + tokval.t_integer = readnum(r, &rn_error); + if (rn_error) + return TOKEN_ERRNUM; /* some malformation occurred */ + tokval.t_charptr = NULL; + return TOKEN_NUM; + } else if (*bufptr == '\'' || *bufptr == '"') {/* a char constant */ + char quote = *bufptr++, *r; + r = tokval.t_charptr = bufptr; + while (*bufptr && *bufptr != quote) bufptr++; + tokval.t_inttwo = bufptr - r; /* store full version */ + if (!*bufptr) + return TOKEN_ERRNUM; /* unmatched quotes */ + tokval.t_integer = 0; + r = bufptr++; /* skip over final quote */ + while (quote != *--r) { + tokval.t_integer = (tokval.t_integer<<8) + (unsigned char) *r; + } + return TOKEN_NUM; + } else if (*bufptr == ';') { /* a comment has happened - stay */ + return 0; + } else if ((*bufptr == '>' || *bufptr == '<' || + *bufptr == '/' || *bufptr == '%') && bufptr[1] == *bufptr) { + bufptr += 2; + return (bufptr[-2] == '>' ? TOKEN_SHR : + bufptr[-2] == '<' ? TOKEN_SHL : + bufptr[-2] == '/' ? TOKEN_SDIV : + TOKEN_SMOD); + } else /* just an ordinary char */ + return (unsigned char) (*bufptr++); +} + +/* return index of "string" in "array", or -1 if no match. */ +static int bsi (char *string, char **array, int size) { + int i = -1, j = size; /* always, i < index < j */ + while (j-i >= 2) { + int k = (i+j)/2; + int l = strcmp(string, array[k]); + if (l<0) /* it's in the first half */ + j = k; + else if (l>0) /* it's in the second half */ + i = k; + else /* we've got it :) */ + return k; + } + return -1; /* we haven't got it :( */ +} + +void cleanup_insn (insn *i) { + extop *e; + + while (i->eops) { + e = i->eops; + i->eops = i->eops->next; + nasm_free (e); + } +} + +/* ------------- Evaluator begins here ------------------ */ + +static expr exprtempstorage[1024], *tempptr; /* store exprs in here */ + +/* + * Add two vector datatypes. We have some bizarre behaviour on far- + * absolute segment types: we preserve them during addition _only_ + * if one of the segments is a truly pure scalar. + */ +static expr *add_vectors(expr *p, expr *q) { + expr *r = tempptr; + int preserve; + + preserve = is_really_simple(p) || is_really_simple(q); + + while (p->type && q->type && + p->type < EXPR_SEGBASE+SEG_ABS && + q->type < EXPR_SEGBASE+SEG_ABS) + if (p->type > q->type) { + tempptr->type = q->type; + tempptr->value = q->value; + tempptr++, q++; + } else if (p->type < q->type) { + tempptr->type = p->type; + tempptr->value = p->value; + tempptr++, p++; + } else { /* *p and *q have same type */ + tempptr->type = p->type; + tempptr->value = p->value + q->value; + tempptr++, p++, q++; + } + while (p->type && + (preserve || p->type < EXPR_SEGBASE+SEG_ABS)) { + tempptr->type = p->type; + tempptr->value = p->value; + tempptr++, p++; + } + while (q->type && + (preserve || q->type < EXPR_SEGBASE+SEG_ABS)) { + tempptr->type = q->type; + tempptr->value = q->value; + tempptr++, q++; + } + (tempptr++)->type = 0; + + return r; +} + +/* + * Multiply a vector by a scalar. Strip far-absolute segment part + * if present. + */ +static expr *scalar_mult(expr *vect, long scalar) { + expr *p = vect; + + while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) { + p->value = scalar * (p->value); + p++; + } + p->type = 0; + + return vect; +} + +static expr *scalarvect (long scalar) { + expr *p = tempptr; + tempptr->type = EXPR_SIMPLE; + tempptr->value = scalar; + tempptr++; + tempptr->type = 0; + tempptr++; + return p; +} + +/* + * Return TRUE if the argument is a simple scalar. (Or a far- + * absolute, which counts.) + */ +static int is_simple (expr *vect) { + while (vect->type && !vect->value) + vect++; + if (!vect->type) + return 1; + if (vect->type != EXPR_SIMPLE) + return 0; + do { + vect++; + } while (vect->type && !vect->value); + if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0; + return 1; +} + +/* + * Return TRUE if the argument is a simple scalar, _NOT_ a far- + * absolute. + */ +static int is_really_simple (expr *vect) { + while (vect->type && !vect->value) + vect++; + if (!vect->type) + return 1; + if (vect->type != EXPR_SIMPLE) + return 0; + do { + vect++; + } while (vect->type && !vect->value); + if (vect->type) return 0; + return 1; +} + +/* + * Return TRUE if the argument is relocatable (i.e. a simple + * scalar, plus at most one segment-base, plus possibly a WRT). + */ +static int is_reloc (expr *vect) { + while (vect->type && !vect->value) + vect++; + if (!vect->type) + return 1; + if (vect->type < EXPR_SIMPLE) + return 0; + if (vect->type == EXPR_SIMPLE) { + do { + vect++; + } while (vect->type && !vect->value); + if (!vect->type) + return 1; + } + do { + vect++; + } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); + if (!vect->type) + return 1; + return 1; +} + +/* + * Return the scalar part of a relocatable vector. (Including + * simple scalar vectors - those qualify as relocatable.) + */ +static long reloc_value (expr *vect) { + while (vect->type && !vect->value) + vect++; + if (!vect->type) return 0; + if (vect->type == EXPR_SIMPLE) + return vect->value; + else + return 0; +} + +/* + * Return the segment number of a relocatable vector, or NO_SEG for + * simple scalars. + */ +static long reloc_seg (expr *vect) { + while (vect->type && (vect->type == EXPR_WRT || !vect->value)) + vect++; + if (vect->type == EXPR_SIMPLE) { + do { + vect++; + } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); + } + if (!vect->type) + return NO_SEG; + else + return vect->type - EXPR_SEGBASE; +} + +/* + * Return the WRT segment number of a relocatable vector, or NO_SEG + * if no WRT part is present. + */ +static long reloc_wrt (expr *vect) { + while (vect->type && vect->type < EXPR_WRT) + vect++; + if (vect->type == EXPR_WRT) { + return vect->value; + } else + return NO_SEG; +} + +static void eval_reset(void) { + tempptr = exprtempstorage; /* initialise temporary storage */ +} + +/* + * The SEG operator: calculate the segment part of a relocatable + * value. Return NULL, as usual, if an error occurs. Report the + * error too. + */ +static expr *segment_part (expr *e) { + long seg; + + if (!is_reloc(e)) { + error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value"); + return NULL; + } + + seg = reloc_seg(e); + if (seg == NO_SEG) { + error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value"); + return NULL; + } else if (seg & SEG_ABS) + return scalarvect(seg & ~SEG_ABS); + else { + expr *f = tempptr++; + tempptr++->type = 0; + f->type = EXPR_SEGBASE+outfmt->segbase(seg+1); + f->value = 1; + return f; + } +} + +/* + * Recursive-descent parser. Called with a single boolean operand, + * which is TRUE if the evaluation is critical (i.e. unresolved + * symbols are an error condition). Must update the global `i' to + * reflect the token after the parsed string. May return NULL. + * + * evaluate() should report its own errors: on return it is assumed + * that if NULL has been returned, the error has already been + * reported. + */ + +/* + * Grammar parsed is: + * + * expr : expr0 [ WRT expr6 ] + * expr0 : expr1 [ {|} expr1] + * expr1 : expr2 [ {^} expr2] + * expr2 : expr3 [ {&} expr3] + * expr3 : expr4 [ {<<,>>} expr4...] + * expr4 : expr5 [ {+,-} expr5...] + * expr5 : expr6 [ {*,/,%,//,%%} expr6...] + * expr6 : { ~,+,-,SEG } expr6 + * | (expr0) + * | symbol + * | $ + * | number + */ + +static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int); +static expr *expr4(int), *expr5(int), *expr6(int); + +static expr *expr0(int critical) { + expr *e, *f; + + e = expr1(critical); + if (!e) + return NULL; + while (i == '|') { + i = nexttoken(); + f = expr1(critical); + if (!f) + return NULL; + if (!is_simple(e) || !is_simple(f)) { + error(ERR_NONFATAL, "`|' operator may only be applied to" + " scalar values"); + } + e = scalarvect (reloc_value(e) | reloc_value(f)); + } + return e; +} + +static expr *expr1(int critical) { + expr *e, *f; + + e = expr2(critical); + if (!e) + return NULL; + while (i == '^') { + i = nexttoken(); + f = expr2(critical); + if (!f) + return NULL; + if (!is_simple(e) || !is_simple(f)) { + error(ERR_NONFATAL, "`^' operator may only be applied to" + " scalar values"); + } + e = scalarvect (reloc_value(e) ^ reloc_value(f)); + } + return e; +} + +static expr *expr2(int critical) { + expr *e, *f; + + e = expr3(critical); + if (!e) + return NULL; + while (i == '&') { + i = nexttoken(); + f = expr3(critical); + if (!f) + return NULL; + if (!is_simple(e) || !is_simple(f)) { + error(ERR_NONFATAL, "`&' operator may only be applied to" + " scalar values"); + } + e = scalarvect (reloc_value(e) & reloc_value(f)); + } + return e; +} + +static expr *expr3(int critical) { + expr *e, *f; + + e = expr4(critical); + if (!e) + return NULL; + while (i == TOKEN_SHL || i == TOKEN_SHR) { + int j = i; + i = nexttoken(); + f = expr4(critical); + if (!f) + return NULL; + if (!is_simple(e) || !is_simple(f)) { + error(ERR_NONFATAL, "shift operator may only be applied to" + " scalar values"); + } + switch (j) { + case TOKEN_SHL: + e = scalarvect (reloc_value(e) << reloc_value(f)); + break; + case TOKEN_SHR: + e = scalarvect (((unsigned long)reloc_value(e)) >> + reloc_value(f)); + break; + } + } + return e; +} + +static expr *expr4(int critical) { + expr *e, *f; + + e = expr5(critical); + if (!e) + return NULL; + while (i == '+' || i == '-') { + int j = i; + i = nexttoken(); + f = expr5(critical); + if (!f) + return NULL; + switch (j) { + case '+': + e = add_vectors (e, f); + break; + case '-': + e = add_vectors (e, scalar_mult(f, -1L)); + break; + } + } + return e; +} + +static expr *expr5(int critical) { + expr *e, *f; + + e = expr6(critical); + if (!e) + return NULL; + while (i == '*' || i == '/' || i == '*' || + i == TOKEN_SDIV || i == TOKEN_SMOD) { + int j = i; + i = nexttoken(); + f = expr6(critical); + if (!f) + return NULL; + if (j != '*' && (!is_simple(e) || !is_simple(f))) { + error(ERR_NONFATAL, "division operator may only be applied to" + " scalar values"); + return NULL; + } + if (j != '*' && reloc_value(f) == 0) { + error(ERR_NONFATAL, "division by zero"); + return NULL; + } + switch (j) { + case '*': + if (is_simple(e)) + e = scalar_mult (f, reloc_value(e)); + else if (is_simple(f)) + e = scalar_mult (e, reloc_value(f)); + else { + error(ERR_NONFATAL, "unable to multiply two " + "non-scalar objects"); + return NULL; + } + break; + case '/': + e = scalarvect (((unsigned long)reloc_value(e)) / + ((unsigned long)reloc_value(f))); + break; + case '%': + e = scalarvect (((unsigned long)reloc_value(e)) % + ((unsigned long)reloc_value(f))); + break; + case TOKEN_SDIV: + e = scalarvect (((signed long)reloc_value(e)) / + ((signed long)reloc_value(f))); + break; + case TOKEN_SMOD: + e = scalarvect (((signed long)reloc_value(e)) % + ((signed long)reloc_value(f))); + break; + } + } + return e; +} + +static expr *expr6(int critical) { + expr *e; + long label_seg, label_ofs; + + if (i == '-') { + i = nexttoken(); + e = expr6(critical); + if (!e) + return NULL; + return scalar_mult (e, -1L); + } else if (i == '+') { + i = nexttoken(); + return expr6(critical); + } else if (i == '~') { + i = nexttoken(); + e = expr6(critical); + if (!e) + return NULL; + if (!is_simple(e)) { + error(ERR_NONFATAL, "`~' operator may only be applied to" + " scalar values"); + return NULL; + } + return scalarvect(~reloc_value(e)); + } else if (i == TOKEN_SEG) { + i = nexttoken(); + e = expr6(critical); + if (!e) + return NULL; + return segment_part(e); + } else if (i == '(') { + i = nexttoken(); + e = expr0(critical); + if (!e) + return NULL; + if (i != ')') { + error(ERR_NONFATAL, "expecting `)'"); + return NULL; + } + i = nexttoken(); + return e; + } else if (i == TOKEN_NUM || i == TOKEN_REG || i == TOKEN_ID || + i == TOKEN_HERE || i == TOKEN_BASE) { + e = tempptr; + switch (i) { + case TOKEN_NUM: + e->type = EXPR_SIMPLE; + e->value = tokval.t_integer; + break; + case TOKEN_REG: + e->type = tokval.t_integer; + e->value = 1; + break; + case TOKEN_ID: + case TOKEN_HERE: + case TOKEN_BASE: + /* + * Since the whole line is parsed before the label it + * defines is given to the label manager, we have + * problems with lines such as + * + * end: TIMES 512-(end-start) DB 0 + * + * where `end' is not known on pass one, despite not + * really being a forward reference, and due to + * criticality it is _needed_. Hence we check our label + * against the currently defined one, and do our own + * resolution of it if we have to. + */ + if (i == TOKEN_BASE) { + label_seg = seg; + label_ofs = 0; + } else if (i == TOKEN_HERE || !strcmp(tokval.t_charptr, label)) { + label_seg = seg; + label_ofs = ofs; + } else if (!labelfunc(tokval.t_charptr, &label_seg, &label_ofs)) { + if (critical == 2) { + error (ERR_NONFATAL, "symbol `%s' undefined", + tokval.t_charptr); + return NULL; + } else if (critical == 1) { + error (ERR_NONFATAL, "symbol `%s' not defined before use", + tokval.t_charptr); + return NULL; + } else { + label_seg = seg; + label_ofs = ofs; + } + } + e->type = EXPR_SIMPLE; + e->value = label_ofs; + if (label_seg!=NO_SEG) { + tempptr++; + tempptr->type = EXPR_SEGBASE + label_seg; + tempptr->value = 1; + } + break; + } + tempptr++; + tempptr->type = 0; + tempptr++; + i = nexttoken(); + return e; + } else { + error(ERR_NONFATAL, "expression syntax error"); + return NULL; + } +} + +static expr *evaluate (int critical) { + expr *e; + expr *f = NULL; + + e = expr0 (critical); + if (!e) + return NULL; + + if (i == TOKEN_WRT) { + if (!is_reloc(e)) { + error(ERR_NONFATAL, "invalid left-hand operand to WRT"); + return NULL; + } + i = nexttoken(); /* eat the WRT */ + f = expr6 (critical); + if (!f) + return NULL; + } + e = scalar_mult (e, 1L); /* strip far-absolute segment part */ + if (f) { + expr *g = tempptr++; + tempptr++->type = 0; + g->type = EXPR_WRT; + if (!is_reloc(f)) { + error(ERR_NONFATAL, "invalid right-hand operand to WRT"); + return NULL; + } + g->value = reloc_seg(f); + if (g->value == NO_SEG) + g->value = reloc_value(f) | SEG_ABS; + else if (!(g->value & SEG_ABS) && !(g->value % 2) && critical) { + error(ERR_NONFATAL, "invalid right-hand operand to WRT"); + return NULL; + } + e = add_vectors (e, g); + } + return e; +} diff --git a/parser.h b/parser.h new file mode 100644 index 00000000..82d52359 --- /dev/null +++ b/parser.h @@ -0,0 +1,18 @@ +/* parser.h header file for the parser module of version 0.1 of the + * Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_PARSER_H +#define NASM_PARSER_H + +insn *parse_line (long segment, long offset, lfunc lookup_label, int pass, + char *buffer, insn *result, struct ofmt *output, + efunc error); +void cleanup_insn (insn *instruction); + +#endif diff --git a/rdoff/Makefile b/rdoff/Makefile new file mode 100644 index 00000000..2e55dde1 --- /dev/null +++ b/rdoff/Makefile @@ -0,0 +1,43 @@ +# Makefile for RDOFF object file utils; part of the Netwide Assembler +# +# The Netwide Assembler is copyright (C) 1996 Simon Tatham and +# Julian Hall. All rights reserved. The software is +# redistributable under the licence given in the file "Licence" +# distributed in the NASM archive. +# +# This Makefile is designed for use under Unix (probably fairly +# portably). + +CC = gcc +CCFLAGS = -c -O -g -Wall -ansi -pedantic -I.. +LINK = gcc +LINKFLAGS = -o +DLINKFLAGS = -o +LIBRARIES = +STRIP = strip +LDRDFLIBS = rdoff.o ../nasmlib.o symtab.o collectn.o +RDXLIBS = rdoff.o rdfload.o symtab.o collectn.o + +.c.o: + $(CC) $(CCFLAGS) $*.c + +all : rdfdump ldrdf rdx + +rdfdump : rdfdump.o + $(LINK) $(LINKFLAGS) rdfdump rdfdump.o +ldrdf : ldrdf.o $(LDRDFLIBS) + $(LINK) $(LINKFLAGS) ldrdf ldrdf.o $(LDRDFLIBS) +rdx : rdx.o $(RDXLIBS) + $(LINK) $(LINKFLAGS) rdx rdx.o $(RDXLIBS) + +rdfdump.o : rdfdump.c +rdoff.o : rdoff.c rdoff.h +ldrdf.o : ldrdf.c rdoff.h ../nasmlib.h symtab.h collectn.h +symtab.o : symtab.c symtab.h +collectn.o : collectn.c collectn.h +rdx.o : rdx.c rdoff.h rdfload.h symtab.h +rdfload.o : rdfload.c rdfload.h rdoff.h collectn.h symtab.h + +clean : + rm -f *.o *~ rdfdump ldrdf rdx + make -C test clean diff --git a/rdoff/collectn.c b/rdoff/collectn.c new file mode 100644 index 00000000..c265c95f --- /dev/null +++ b/rdoff/collectn.c @@ -0,0 +1,40 @@ +/* collectn.c Implements variable length pointer arrays [collections] + * + * This file is public domain. + */ + +#include "collectn.h" +#include + +void collection_init(Collection * c) +{ + int i; + + for (i = 0; i < 32; i++) c->p[i] = NULL; + c->next = NULL; +} + +void ** colln(Collection * c, int index) +{ + while (index >= 32) { + index -= 32; + if (c->next == NULL) { + c->next = malloc(sizeof(Collection)); + collection_init(c->next); + } + c = c->next; + } + return &(c->p[index]); +} + +void collection_reset(Collection *c) +{ + int i; + if (c->next) { + collection_reset(c->next); + free(c->next); + } + + c->next = NULL; + for (i = 0; i < 32; i++) c->p[i] = NULL; +} diff --git a/rdoff/collectn.h b/rdoff/collectn.h new file mode 100644 index 00000000..b3f2d52a --- /dev/null +++ b/rdoff/collectn.h @@ -0,0 +1,22 @@ +/* collectn.h Header file for 'collection' abstract data type + * + * This file is public domain, and does not come under the NASM license. + * It, along with 'collectn.c' implements what is basically a variable + * length array (of pointers) + */ + +#ifndef _COLLECTN_H +#define _COLLECTN_H + +typedef struct tagCollection { + void *p[32]; /* array of pointers to objects */ + + struct tagCollection *next; +} Collection; + +void collection_init(Collection * c); +void ** colln(Collection * c, int index); +void collection_reset(Collection * c); + +#endif + diff --git a/rdoff/ldrdf.c b/rdoff/ldrdf.c new file mode 100644 index 00000000..ce86b7e6 --- /dev/null +++ b/rdoff/ldrdf.c @@ -0,0 +1,540 @@ +/* ldrdf.c RDOFF Object File linker/loader main program + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +/* TODO: Make the system skip a module (other than the first) if none + * of the other specified modules contain a reference to it. + * May require the system to make an extra pass of the modules to be + * loaded eliminating those that aren't required. + * + * Support libaries (.a files - requires a 'ranlib' type utility) + * + * -s option to strip resolved symbols from exports. + */ + +#include +#include + +#include "nasm.h" +#include "rdoff.h" +#include "nasmlib.h" +#include "symtab.h" +#include "collectn.h" + +#define LDRDF_VERSION "0.11" + +/* global variables - those to set options: */ + +int verbose = 0; /* reflects setting of command line switch */ +int align = 16; +int errors = 0; /* set by functions to cause halt after current + stage of processing */ + +/* the linked list of modules that must be loaded & linked */ + +struct modulenode { + rdffile f; /* the file */ + long coderel; /* module's code relocation factor */ + long datarel; /* module's data relocation factor */ + long bssrel; /* module's bss data reloc. factor */ + void * header; /* header location, if loaded */ + char * name; /* filename */ + struct modulenode *next; +}; + +struct modulenode *modules = NULL,*lastmodule = NULL; + +void *symtab; /* The symbol table */ + +rdf_headerbuf * newheader ; /* New header to be written to output */ + +/* loadmodule - find the characteristics of a module and add it to the + * list of those being linked together */ + +void loadmodule(char *filename) +{ + struct modulenode *prev; + if (! modules) { + modules = malloc(sizeof(struct modulenode)); + lastmodule = modules; + prev = NULL; + } + else { + lastmodule->next = malloc(sizeof(struct modulenode)); + prev = lastmodule; + lastmodule = lastmodule->next; + } + + if (! lastmodule) { + fputs("ldrdf: not enough memory\n",stderr); + exit(1); + } + + if (rdfopen(&lastmodule->f,filename)) { + rdfperror("ldrdf",filename); + exit(1); + } + + lastmodule->header = NULL; /* header hasn't been loaded */ + lastmodule->name = filename; + lastmodule->next = NULL; + + if (prev) { + lastmodule->coderel = prev->coderel + prev->f.code_len; + if (lastmodule->coderel % align != 0) + lastmodule->coderel += align - (lastmodule->coderel % align); + lastmodule->datarel = prev->datarel + prev->f.data_len; + if (lastmodule->datarel % align != 0) + lastmodule->datarel += align - (lastmodule->datarel % align); + } + else { + lastmodule->coderel = 0; + lastmodule->datarel = 0; + } + + if (verbose) + printf("%s code = %08lx (+%04lx), data = %08lx (+%04lx)\n",filename, + lastmodule->coderel,lastmodule->f.code_len, + lastmodule->datarel,lastmodule->f.data_len); + +} + +/* load_segments() allocates memory for & loads the code & data segs + * from the RDF modules + */ + +char *text,*data; +long textlength,datalength,bsslength; + +void load_segments(void) +{ + struct modulenode *mod; + + if (!modules) { + fprintf(stderr,"ldrdf: nothing to do\n"); + exit(0); + } + if (!lastmodule) { + fprintf(stderr,"ldrdf: panic: module list exists, but lastmodule=NULL\n"); + exit(3); + } + + if (verbose) + printf("loading modules into memory\n"); + + /* The following stops 16 bit DOS from crashing whilst attempting to + work using segments > 64K */ + if (sizeof(int) == 2) { /* expect a 'code has no effect' warning on 32 bit + platforms... */ + if (lastmodule->coderel + lastmodule->f.code_len > 65535 || + lastmodule->datarel + lastmodule->f.data_len > 65535) { + fprintf(stderr,"ldrdf: segment length has exceeded 64K; use a 32 bit " + "version.\nldrdf: code size = %05lx, data size = %05lx\n", + lastmodule->coderel + lastmodule->f.code_len, + lastmodule->datarel + lastmodule->f.data_len); + exit(1); + } + } + + text = malloc(textlength = lastmodule->coderel + lastmodule->f.code_len); + data = malloc(datalength = lastmodule->datarel + lastmodule->f.data_len); + + if (!text || !data) { + fprintf(stderr,"ldrdf: out of memory\n"); + exit(1); + } + + mod = modules; + while (mod) { /* load the segments for each module */ + mod->header = malloc(mod->f.header_len); + if (!mod->header) { + fprintf(stderr,"ldrdf: out of memory\n"); + exit(1); + } + if (rdfloadseg(&mod->f,RDOFF_HEADER,mod->header) || + rdfloadseg(&mod->f,RDOFF_CODE,&text[mod->coderel]) || + rdfloadseg(&mod->f,RDOFF_DATA,&data[mod->datarel])) { + rdfperror("ldrdf",mod->name); + exit(1); + } + rdfclose(&mod->f); /* close file; segments remain */ + mod = mod->next; + } +} + +/* build_symbols() step through each module's header, and locate + * exported symbols, placing them in a global table + */ + +void build_symbols() +{ + struct modulenode *mod; + rdfheaderrec *r; + symtabEnt e; + long bssloc,cbBss; + + if (verbose) printf("building global symbol table:\n"); + newheader = rdfnewheader(); + + symtab = symtabNew(); + bssloc = 0; /* keep track of location of BSS symbols */ + + for (mod = modules; mod; mod = mod->next) + { + mod->bssrel = bssloc; + cbBss = 0; + rdfheaderrewind(&mod->f); + while ((r = rdfgetheaderrec(&mod->f))) + { + + if (r->type == 5) /* Allocate BSS */ + cbBss += r->b.amount; + + if (r->type != 3) continue; /* ignore all but export recs */ + + e.segment = r->e.segment; + e.offset = r->e.offset + + (e.segment == 0 ? mod->coderel : /* 0 -> code */ + e.segment == 1 ? mod->datarel : /* 1 -> data */ + mod->bssrel) ; /* 2 -> bss */ + e.flags = 0; + e.name = malloc(strlen(r->e.label) + 1); + if (! e.name) + { + fprintf(stderr,"ldrdf: out of memory\n"); + exit(1); + } + strcpy(e.name,r->e.label); + symtabInsert(symtab,&e); + } + bssloc += cbBss; + } + if (verbose) + { + symtabDump(symtab,stdout); + printf("BSS length = %ld bytes\n\n",bssloc); + } + bsslength = bssloc; +} + +/* link_segments() step through relocation records in each module's + * header, fixing up references. + */ + +void link_segments(void) +{ + struct modulenode *mod; + Collection imports; + symtabEnt *s; + long rel,relto = 0; /* placate gcc */ + char *seg; + rdfheaderrec *r; + int bRelative; + + if (verbose) printf("linking segments\n"); + + collection_init(&imports); + + for (mod = modules; mod; mod = mod->next) { + if (verbose >= 2) printf("* processing %s\n",mod->name); + rdfheaderrewind(&mod->f); + while((r = rdfgetheaderrec(&mod->f))) { + switch(r->type) { + case 1: /* relocation record */ + if (r->r.segment >= 64) { /* Relative relocation; */ + bRelative = 1; /* need to find location relative */ + r->r.segment -= 64; /* to start of this segment */ + relto = r->r.segment == 0 ? mod->coderel : mod->datarel; + } + else + bRelative = 0; /* non-relative - need to relocate + * at load time */ + + /* calculate absolute offset of reference, not rel to beginning of + segment */ + r->r.offset += r->r.segment == 0 ? mod->coderel : mod->datarel; + + /* calculate the relocation factor to apply to the operand - + the base address of one of this modules segments if referred + segment is 0 - 2, or the address of an imported symbol + otherwise. */ + + if (r->r.refseg == 0) rel = mod->coderel; + else if (r->r.refseg == 1) rel = mod->datarel; + else if (r->r.refseg == 2) rel = mod->bssrel; + else { /* cross module link - find reference */ + s = *colln(&imports,r->r.refseg - 2); + if (!s) { + fprintf(stderr,"ldrdf: link to undefined segment %04x in" + " %s:%d\n", r->r.refseg,mod->name,r->r.segment); + errors = 1; + break; + } + rel = s->offset; + + r->r.refseg = s->segment; /* change referred segment, + so that new header is + correct */ + } + + if (bRelative) /* Relative - subtract current segment start */ + rel -= relto; + else + { /* Add new relocation header */ + rdfaddheader(newheader,r); + } + + /* Work out which segment we're making changes to ... */ + if (r->r.segment == 0) seg = text; + else if (r->r.segment == 1) seg = data; + else { + fprintf(stderr,"ldrdf: relocation in unknown segment %d in " + "%s\n", r->r.segment,mod->name); + errors = 1; + break; + } + + /* Add the relocation factor to the datum specified: */ + + if (verbose >= 3) + printf(" - relocating %d:%08lx by %08lx\n",r->r.segment, + r->r.offset,rel); + + /**** The following code is non-portable. Rewrite it... ****/ + switch(r->r.length) { + case 1: + seg[r->r.offset] += (char) rel; + break; + case 2: + *(int16 *)(seg + r->r.offset) += (int16) rel; + break; + case 4: + *(long *)(seg + r->r.offset) += rel; + break; + } + break; + + case 2: /* import record */ + s = symtabFind(symtab, r->i.label); + if (s == NULL) { + /* Need to add support for dynamic linkage */ + fprintf(stderr,"ldrdf: undefined symbol %s in module %s\n", + r->i.label,mod->name); + errors = 1; + } + else + { + *colln(&imports,r->i.segment - 2) = s; + if (verbose >= 2) + printf("imported %s as %04x\n", r->i.label, r->i.segment); + } + break; + + case 3: /* export; dump to output new version */ + s = symtabFind(symtab, r->e.label); + if (! s) continue; /* eh? probably doesn't matter... */ + + r->e.offset = s->offset; + rdfaddheader(newheader,r); + break; + + case 4: /* DLL record */ + rdfaddheader(newheader,r); /* copy straight to output */ + break; + } + } + collection_reset(&imports); + } +} + +/* write_output() write linked program out to a file */ + +void write_output(char *filename) +{ + FILE * fp; + rdfheaderrec r; + + fp = fopen(filename,"wb"); + if (! fp) + { + fprintf(stderr,"ldrdf: could not open '%s' for writing\n",filename); + exit(1); + } + + + /* add BSS length count to header... */ + if (bsslength) + { + r.type = 5; + r.b.amount = bsslength; + rdfaddheader(newheader,&r); + } + + /* Write header */ + rdfwriteheader(fp,newheader); + rdfdoneheader(newheader); + newheader = NULL; + + /* Write text */ + if (fwrite(&textlength,1,4,fp) != 4 + || fwrite(text,1,textlength,fp) !=textlength) + { + fprintf(stderr,"ldrdf: error writing %s\n",filename); + exit(1); + } + + /* Write data */ + if (fwrite(&datalength,1,4,fp) != 4 || + fwrite(data,1,datalength,fp) != datalength) + { + fprintf (stderr,"ldrdf: error writing %s\n", filename); + exit(1); + } + fclose(fp); +} + + +/* main program: interpret command line, and pass parameters on to + * individual module loaders & the linker + * + * Command line format: + * ldrdf [-o outfile | -x] [-r xxxx] [-v] [--] infile [infile ...] + * + * Default action is to output a file named 'aout.rdx'. -x specifies + * that the linked object program should be executed, rather than + * written to a file. -r specifies that the object program should + * be prelocated at address 'xxxx'. This option cannot be used + * in conjunction with -x. + */ + +const char *usagemsg = "usage:\n" +" ldrdf [-o outfile | -x] [-a x] [-v] [-p x] [--] infile [infile ...]\n\n" +" ldrdf -h displays this message\n" +" ldrdf -r displays version information\n\n" +" -o selects output filename (default is aout.rdx)\n" +" -x causes ldrdx to link & execute rather than write to file\n" +" -a x causes object program to be statically relocated to address 'x'\n" +" -v turns on verbose mode\n" +" -p x causes segments to be aligned (padded) to x byte boundaries\n" +" (default is 16 bytes)\n"; + +void usage(void) +{ + fputs(usagemsg,stderr); +} + +int main(int argc,char **argv) +{ + char *ofilename = "aout.rdx"; + long relocateaddr = -1; /* -1 if no relocation is to occur */ + int execute = 0; /* 1 to execute after linking, 0 otherwise */ + int procsw = 1; /* set to 0 by '--' */ + int tmp; + + if (argc == 1) { + usage(); + exit(1); + } + + /* process command line switches, and add modules specified to linked list + of modules, keeping track of total memory required to load them */ + + while(argv++,--argc) { + if (procsw && !strcmp(*argv,"-h")) { /* Help command */ + usage(); exit(1); + } + else if (procsw && !strcmp(*argv,"-r")) { + printf("ldrdf version %s (%s) (%s)\n",LDRDF_VERSION,_RDOFF_H, + sizeof(int) == 2 ? "16 bit" : "32 bit"); + exit(1); + } + else if (procsw && !strcmp(*argv,"-o")) { + ofilename = *++argv; + --argc; + if (execute) { + fprintf(stderr,"ldrdf: -o and -x switches incompatible\n"); + exit(1); + } + if (verbose > 1) printf("output filename set to '%s'\n",ofilename); + } + else if (procsw && !strcmp(*argv,"-x")) { + execute++; + if (verbose > 1) printf("will execute linked object\n"); + } + else if (procsw && !strcmp(*argv,"-a")) { + relocateaddr = readnum(*++argv,&tmp); + --argc; + if (tmp) { + fprintf(stderr,"ldrdf: error in parameter to '-a' switch: '%s'\n", + *argv); + exit(1); + } + if (execute) { + fprintf(stderr,"ldrdf: -a and -x switches incompatible\n"); + exit(1); + } + if (verbose) printf("will relocate to %08lx\n",relocateaddr); + } + else if (procsw && !strcmp(*argv,"-v")) { + verbose++; + if (verbose == 1) printf("verbose mode selected\n"); + } + else if (procsw && !strcmp(*argv,"-p")) { + align = readnum(*++argv,&tmp); + --argc; + if (tmp) { + fprintf(stderr,"ldrdf: error in parameter to '-p' switch: '%s'\n", + *argv); + exit(1); + } + if (align != 1 && align != 2 && align != 4 && align != 8 && align != 16 + && align != 32 && align != 256) { + fprintf(stderr,"ldrdf: %d is an invalid alignment factor - must be" + "1,2,4,8,16 or 256\n",align); + exit(1); + } + if (verbose > 1) printf("alignment %d selected\n",align); + } + else if (procsw && !strcmp(*argv,"--")) { + procsw = 0; + } + else { /* is a filename */ + if (verbose > 1) printf("processing module %s\n",*argv); + loadmodule(*argv); + } + } + + /* we should be scanning for unresolved references, and removing + unreferenced modules from the list of modules here, so that + we know about the final size once libraries have been linked in */ + + load_segments(); /* having calculated size of reqd segments, load + each rdoff module's segments into memory */ + + build_symbols(); /* build a global symbol table... + perhaps this should be done before load_segs? */ + + link_segments(); /* step through each module's header, and resolve + references to the global symbol table. + This also does local address fixups. */ + + if (errors) { + fprintf(stderr,"ldrdf: there were errors - aborted\n"); + exit(errors); + } + if (execute) { + fprintf(stderr,"ldrdf: module execution not yet supported\n"); + exit(1); + } + if (relocateaddr != -1) { + fprintf(stderr,"ldrdf: static relocation not yet supported\n"); + exit(1); + } + + write_output(ofilename); + return 0; +} diff --git a/rdoff/rdf.doc b/rdoff/rdf.doc new file mode 100644 index 00000000..300c2bc5 --- /dev/null +++ b/rdoff/rdf.doc @@ -0,0 +1,99 @@ +RDOFF: Relocatable Dynamically-linked Object File Format +======================================================== + +RDOFF was designed initially to test the object-file production +interface to NASM. It soon became apparent that it could be enhanced +for use in serious applications due to its simplicity; code to load +and execute an RDOFF object module is very simple. It also contains +enhancements to allow it to be linked with a dynamic link library at +either run- or load- time, depending on how complex you wish to make +your loader. + +The RDOFF format (version 1.1, as produced by NASM v0.91) is defined +as follows: + +The first six bytes of the file contain the string 'RDOFF1'. Other +versions of the format may contain other last characters other than +'1' - all little endian versions of the file will always contain an +ASCII character with value greater than 32. If RDOFF is used on a +big-endian machine at some point in the future, the version will be +encoded in decimal rather than ASCII, so will be below 32. + +All multi-byte fields follwing this are encoded in either little- or +big-endian format depending on the system described by this version +information. Object files should be encoded in the endianness of +their target machine; files of incorrect endianness will be rejected +by the loader - this means that loaders do not need to convert +endianness, as RDOFF has been designed with simplicity of loading at +the forefront of the design requirements. + +The next 4 byte field is the length of the header in bytes. The +header consists of a sequence of variable length records. Each +record's type is identified by the first byte of the record. Record +types 1-4 are currently supported. Record type 5 will be added in +the near future, when I implement BSS segments. Record type 6 may be +to do with debugging, when I get debugging implemented. + +Type 1: Relocation +================== + +Offset Length Description +0 1 Type (contains 1) +1 1 Segment that contains reference (0 = text, 1 = data) + Add 64 to this number to indicate a relative linkage + to an external symbol (see notes) +2 4 Offset of reference +6 1 Length of reference (1,2 or 4 bytes) +7 2 Segment to which reference is made (0 = text, 1 = + data, 2 = BSS [when implemented]) others are external + symbols. + +Total length = 9 bytes + +Type 2: Symbol Import +===================== + +0 1 Type (2) +1 2 Segment number that will be used in references to this + symbol. +3 ? Null terminated string containing label (up to 32 + chars) to match against exports in linkage. + +Type 3: Symbol Export +===================== + +0 1 Type (3) +1 1 Segment containing object to be exported (0/1/2) +2 4 Offset within segment +6 ? Null terminate string containing label to export (32 + char maximum length) + +Type 4: Dynamic Link Library +============================ + +0 1 Type (4) +1 ? Library name (up to 128 chars) + +Type 5: Reserve BSS +=================== + +0 1 Type (5) +1 4 Amount of BSS space to reserve in bytes + +Total length: 5 bytes + +----------------------------------------------------------------------------- + +Following the header is the text (code) segment. This is preceded by +a 4-byte integer, which is its length in bytes. This is followed by +the length of the data segment (also 4 bytes), and finally the data +segment. + +Notes +===== + +Relative linking: The number stored at the address is offset +required from the imported symbol, with the address of the end of +the instruction subtracted from it. This means that the linker can +simply add the address of the label relative to the beginning of the +current segment to it. diff --git a/rdoff/rdfdump.c b/rdoff/rdfdump.c new file mode 100644 index 00000000..4d4f4df5 --- /dev/null +++ b/rdoff/rdfdump.c @@ -0,0 +1,156 @@ +#include +#include +#include + +FILE *infile; + +long translatelong(long in) { /* translate from little endian to + local representation */ + long r; + unsigned char *i; + + i = (unsigned char *)∈ + r = i[3]; + r = (r << 8) + i[2]; + r = (r << 8) + i[1]; + r = (r << 8) + *i; + + return r; +} + +int translateshort(short in) { + int r; + unsigned char *i; + + i = (unsigned char *)∈ + r = (i[1] << 8) + *i; + + return r; +} +void print_header(long length) { + unsigned char buf[129],t,s,l; + long o; + short rs; + + while (length > 0) { + fread(&t,1,1,infile); + switch(t) { + case 1: /* relocation record */ + fread(&s,1,1,infile); + fread(&o,4,1,infile); + fread(&l,1,1,infile); + fread(&rs,2,1,infile); + printf(" relocation: location (%04x:%08lx), length %d, " + "referred seg %04x\n",(int)s,translatelong(o),(int)l, + translateshort(rs)); + length -= 9; + break; + case 2: /* import record */ + fread(&rs,2,1,infile); + l = 0; + do { + fread(&buf[l],1,1,infile); + } while (buf[l++]); + printf(" import: segment %04x = %s\n",translateshort(rs),buf); + length -= l + 3; + break; + case 3: /* export record */ + fread(&s,1,1,infile); + fread(&o,4,1,infile); + l = 0; + do { + fread(&buf[l],1,1,infile); + } while (buf[l++]); + printf(" export: (%04x:%08lx) = %s\n",(int)s,translatelong(o),buf); + length -= l + 6; + break; + case 4: /* DLL record */ + l = 0; + do { + fread(&buf[l],1,1,infile); + } while (buf[l++]); + printf(" dll: %s\n",buf); + length -= l + 1; + break; + case 5: /* BSS reservation */ + fread(&l,4,1,infile); + printf(" bss reservation: %08lx bytes\n",translatelong(l)); + length -= 5; + break; + default: + printf(" unrecognised record (type %d)\n",(int)t); + length --; + } + } +} + +int main(int argc,char **argv) { + char id[7]; + long l; + int verbose = 0; + + puts("RDOFF Dump utility v1.1 (C) Copyright 1996 Julian R Hall"); + + if (argc < 2) { + fputs("Usage: rdfdump [-v] \n",stderr); + exit(1); + } + + if (! strcmp (argv[1], "-v") ) + { + verbose = 1; + if (argc < 3) + { + fputs("required parameter missing\n",stderr); + exit(1); + } + argv++; + } + + infile = fopen(argv[1],"rb"); + if (! infile) { + fprintf(stderr,"rdfdump: Could not open %s",argv[1]); + exit(1); + } + + fread(id,6,1,infile); + if (strncmp(id,"RDOFF",5)) { + fputs("rdfdump: File does not contain valid RDOFF header\n",stderr); + exit(1); + } + + printf("File %s: RDOFF version %c\n\n",argv[1],id[5]); + if (id[5] < '1' || id[5] > '1') { + fprintf(stderr,"rdfdump: unknown RDOFF version '%c'\n",id[5]); + exit(1); + } + + fread(&l,4,1,infile); + l = translatelong(l); + printf("Header (%ld bytes):\n",l); + print_header(l); + + fread(&l,4,1,infile); + l = translatelong(l); + printf("\nText segment length = %ld bytes\n",l); + while(l--) { + fread(id,1,1,infile); + if (verbose) printf(" %02x",(int) (unsigned char)id[0]); + } + if (verbose) printf("\n\n"); + + fread(&l,4,1,infile); + l = translatelong(l); + printf("Data segment length = %ld bytes\n",l); + + if (verbose) + { + while (l--) { + fread(id,1,1,infile); + printf(" %02x",(int) (unsigned char) id[0]); + } + printf("\n"); + } + fclose(infile); + return 0; +} diff --git a/rdoff/rdfload.c b/rdoff/rdfload.c new file mode 100644 index 00000000..ad340b36 --- /dev/null +++ b/rdoff/rdfload.c @@ -0,0 +1,173 @@ +/* rdfload.c RDOFF Object File loader library + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * Permission to use this file in your own projects is granted, as long + * as acknowledgement is given in an appropriate manner to its authors, + * with instructions of how to obtain a copy via ftp. + */ + +#include +#include + +#include "rdfload.h" +#include "symtab.h" +#include "rdoff.h" +#include "collectn.h" + +extern int rdf_errno; + +rdfmodule * rdfload(const char *filename) +{ + rdfmodule * f = malloc(sizeof(rdfmodule)); + long bsslength = 0; + char * hdr; + rdfheaderrec *r; + + if (f == NULL) + { + rdf_errno = 6; /* out of memory */ + return NULL; + } + + f->symtab = symtabNew(); + if (!f->symtab) + { + free(f); + rdf_errno = 6; + return NULL; + } + + /* open the file */ + if ( rdfopen( &(f->f), filename ) ) { + free(f); + return NULL; + } + + /* read in text and data segments, and header */ + + f->t = malloc (f->f.code_len); + f->d = malloc (f->f.data_len); /* BSS seg allocated later */ + hdr = malloc (f->f.header_len); + + if (! f->t || ! f->d || !hdr) { + rdf_errno = 6; + rdfclose(&f->f); + if (f->t) free(f->t); + if (f->d) free(f->d); + free(f); + return NULL; + } + + if ( rdfloadseg (&f->f,RDOFF_HEADER,hdr) || + rdfloadseg (&f->f,RDOFF_CODE,f->t) || + rdfloadseg (&f->f,RDOFF_DATA,f->d) ) + { + rdfclose(&f->f); + free(f->t); + free(f->d); + free(f); + free(hdr); + return NULL; + } + + rdfclose(&f->f); + + /* Allocate BSS segment; step through header and count BSS records */ + + while ( ( r = rdfgetheaderrec (&f->f) ) ) + { + if (r->type == 5) + bsslength += r->b.amount; + } + + f->b = malloc ( bsslength ); + if (! f->b ) + { + free(f->t); + free(f->d); + free(f); + free(hdr); + rdf_errno = 6; + return NULL; + } + + rdfheaderrewind (&f->f); + + f->textrel = (long)f->t; + f->datarel = (long)f->d; + f->bssrel = (long)f->b; + + return f; +} + +int rdf_relocate(rdfmodule * m) +{ + rdfheaderrec * r; + Collection imports; + symtabEnt e; + long rel; + unsigned char * seg; + + rdfheaderrewind ( & m->f ); + collection_init(&imports); + + while ( (r = rdfgetheaderrec ( & m->f ) ) ) + { + switch (r->type) + { + case 1: /* Relocation record */ + + /* calculate relocation factor */ + + if (r->r.refseg == 0) rel = m->textrel; + else if (r->r.refseg == 1) rel = m->datarel; + else if (r->r.refseg == 2) rel = m->bssrel; + else + /* We currently do not support load-time linkage. + This should be added some time soon... */ + + return 1; /* return error code */ + + if ((r->r.segment & 63) == 0) seg = m->t; + else if ((r->r.segment & 63) == 1) seg = m->d; + else + return 1; + + /* it doesn't matter in this case that the code is non-portable, + as the entire concept of executing a module like this is + non-portable */ + switch(r->r.length) { + case 1: + seg[r->r.offset] += (char) rel; + break; + case 2: + *(int16 *)(seg + r->r.offset) += (int16) rel; + break; + case 4: + *(long *)(seg + r->r.offset) += rel; + break; + } + break; + + case 3: /* export record - add to symtab */ + e.segment = r->e.segment; + e.offset = r->e.offset + + (e.segment == 0 ? m->textrel : /* 0 -> code */ + e.segment == 1 ? m->datarel : /* 1 -> data */ + m->bssrel) ; /* 2 -> bss */ + e.flags = 0; + e.name = malloc(strlen(r->e.label) + 1); + if (! e.name) + return 1; + + strcpy(e.name,r->e.label); + symtabInsert(m->symtab,&e); + break; + } + } + return 0; +} diff --git a/rdoff/rdfload.h b/rdoff/rdfload.h new file mode 100644 index 00000000..5e264b93 --- /dev/null +++ b/rdoff/rdfload.h @@ -0,0 +1,29 @@ +/* rdfload.h RDOFF Object File loader library header file + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * See the file 'rdfload.c' for special license information for this + * file. + */ + +#ifndef _RDFLOAD_H +#define _RDFLOAD_H + +#include "rdoff.h" + +typedef struct RDFModuleStruct { + rdffile f; /* file structure */ + unsigned char * t, * d, * b; /* text, data, and bss segments */ + long textrel; + long datarel; + long bssrel; + void * symtab; +} rdfmodule; + +rdfmodule * rdfload(const char * filename); +int rdf_relocate(rdfmodule * m); + +#endif diff --git a/rdoff/rdoff.c b/rdoff/rdoff.c new file mode 100644 index 00000000..9a969ad0 --- /dev/null +++ b/rdoff/rdoff.c @@ -0,0 +1,367 @@ +/* rdoff.c library of routines for manipulating rdoff files + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +/* TODO: The functions in this module assume they are running + * on a little-endian machine. This should be fixed to + * make it portable. + */ + +#include +#include + +#include "rdoff.h" + +/* ======================================================================== + * Code for memory buffers (for delayed writing of header until we know + * how long it is). + * ======================================================================== */ + + +memorybuffer * newmembuf(){ + memorybuffer * t; + + t = malloc(sizeof(memorybuffer)); + + t->length = 0; + t->next = NULL; + return t; +} + +void membufwrite(memorybuffer *b, void *data, int bytes) { + int16 w; + long l; + + if (b->next) { /* memory buffer full - use next buffer */ + membufwrite(b->next,data,bytes); + return; + } + if ((bytes < 0 && b->length - bytes > BUF_BLOCK_LEN) + || (bytes > 0 && b->length + bytes > BUF_BLOCK_LEN)) { + + /* buffer full and no next allocated... allocate and initialise next + * buffer */ + + b->next = newmembuf(); + membufwrite(b->next,data,bytes); + } + + switch(bytes) { + case -4: /* convert to little-endian */ + l = * (long *) data ; + b->buffer[b->length++] = l & 0xFF; + l >>= 8 ; + b->buffer[b->length++] = l & 0xFF; + l >>= 8 ; + b->buffer[b->length++] = l & 0xFF; + l >>= 8 ; + b->buffer[b->length++] = l & 0xFF; + break; + + case -2: + w = * (int16 *) data ; + b->buffer[b->length++] = w & 0xFF; + w >>= 8 ; + b->buffer[b->length++] = w & 0xFF; + break; + + default: + while(bytes--) { + b->buffer[b->length++] = *(* (unsigned char **) &data); + + (* (unsigned char **) &data)++ ; + } + break; + } +} + +void membufdump(memorybuffer *b,FILE *fp) +{ + if (!b) return; + + fwrite (b->buffer, 1, b->length, fp); + + membufdump(b->next,fp); +} + +int membuflength(memorybuffer *b) +{ + if (!b) return 0; + return b->length + membuflength(b->next); +} + +void freemembuf(memorybuffer *b) +{ + if (!b) return; + freemembuf(b->next); + free(b); +} + +/* ========================================================================= + General purpose routines and variables used by the library functions + ========================================================================= */ + +long translatelong(long in) { /* translate from little endian to + local representation */ + long r; + unsigned char *i; + + i = (unsigned char *)∈ + r = i[3]; + r = (r << 8) + i[2]; + r = (r << 8) + i[1]; + r = (r << 8) + *i; + + return r; +} + +const char *RDOFFId = "RDOFF1"; /* written to the start of RDOFF files */ + +const char *rdf_errors[7] = { + "no error occurred","could not open file","invalid file format", + "error reading file","unknown error","header not read", + "out of memory"}; + +int rdf_errno = 0; + +/* ======================================================================== + The library functions + ======================================================================== */ + +int rdfopen(rdffile *f, const char *name) +{ + char buf[8]; + + if (translatelong(0x01020304) != 0x01020304) + { /* fix this to be portable! */ + fputs("*** this program requires a little endian machine\n",stderr); + fprintf(stderr,"01020304h = %08lxh\n",translatelong(0x01020304)); + exit(3); + } + + + f->fp = fopen(name,"rb"); + if (!f->fp) return rdf_errno = 1; /* error 1: file open error */ + + fread(buf,6,1,f->fp); /* read header */ + buf[6] = 0; + + if (strcmp(buf,RDOFFId)) { + fclose(f->fp); + return rdf_errno = 2; /* error 2: invalid file format */ + } + + if (fread(&f->header_len,1,4,f->fp) != 4) { + fclose(f->fp); + return rdf_errno = 3; /* error 3: file read error */ + } + + if (fseek(f->fp,f->header_len,SEEK_CUR)) { + fclose(f->fp); + return rdf_errno = 2; /* seek past end of file...? */ + } + + if (fread(&f->code_len,1,4,f->fp) != 4) { + fclose(f->fp); + return rdf_errno = 3; + } + + f->code_ofs = ftell(f->fp); + if (fseek(f->fp,f->code_len,SEEK_CUR)) { + fclose(f->fp); + return rdf_errno = 2; + } + + if (fread(&f->data_len,1,4,f->fp) != 4) { + fclose(f->fp); + return rdf_errno = 3; + } + + f->data_ofs = ftell(f->fp); + rewind(f->fp); + f->header_loc = NULL; + return 0; +} + +int rdfclose(rdffile *f) +{ + fclose(f->fp); + return 0; +} + +void rdfperror(const char *app,const char *name) +{ + fprintf(stderr,"%s:%s: %s\n",app,name,rdf_errors[rdf_errno]); +} + +int rdfloadseg(rdffile *f,int segment,void *buffer) +{ + long fpos; + long slen; + + switch(segment) { + case RDOFF_HEADER: + fpos = 10; + slen = f->header_len; + f->header_loc = (char *)buffer; + f->header_fp = 0; + break; + case RDOFF_CODE: + fpos = f->code_ofs; + slen = f->code_len; + break; + case RDOFF_DATA: + fpos = f->data_ofs; + slen = f->data_len; + break; + default: + fpos = 0; + slen = 0; + } + + if (fseek(f->fp,fpos,SEEK_SET)) + return rdf_errno = 4; + + if (fread(buffer,1,slen,f->fp) != slen) + return rdf_errno = 3; + + return 0; +} + +/* Macros for reading integers from header in memory */ + +#define RI8(v) v = f->header_loc[f->header_fp++] +#define RI16(v) { v = (f->header_loc[f->header_fp] + \ + (f->header_loc[f->header_fp+1] << 8)); \ + f->header_fp += 2; } + +#define RI32(v) { v = (f->header_loc[f->header_fp] + \ + (f->header_loc[f->header_fp+1] << 8) + \ + (f->header_loc[f->header_fp+2] << 16) + \ + (f->header_loc[f->header_fp+3] << 24)); \ + f->header_fp += 4; } + +#define RS(str,max) { for(i=0;iheader_loc) { + rdf_errno = 5; + return NULL; + } + + if (f->header_fp >= f->header_len) return 0; + + RI8(r.type); + switch(r.type) { + case 1: /* Relocation record */ + RI8(r.r.segment); + RI32(r.r.offset); + RI8(r.r.length); + RI16(r.r.refseg); + break; + + case 2: /* Imported symbol record */ + RI16(r.i.segment); + RS(r.i.label,32); + break; + + case 3: /* Exported symbol record */ + RI8(r.e.segment); + RI32(r.e.offset); + RS(r.e.label,32); + break; + + case 4: /* DLL record */ + RS(r.d.libname,127); + break; + + case 5: /* BSS reservation record */ + RI32(r.b.amount); + break; + + default: + rdf_errno = 2; /* invalid file */ + return NULL; + } + return &r; +} + +void rdfheaderrewind(rdffile *f) +{ + f->header_fp = 0; +} + + +rdf_headerbuf * rdfnewheader(void) +{ + return newmembuf(); +} + +int rdfaddheader(rdf_headerbuf * h, rdfheaderrec * r) +{ + switch (r->type) + { + case 1: + membufwrite(h,&r->type,1); + membufwrite(h,&r->r.segment,1); + membufwrite(h,&r->r.offset,-4); + membufwrite(h,&r->r.length,1); + membufwrite(h,&r->r.refseg,-2); /* 9 bytes written */ + break; + + case 2: /* import */ + membufwrite(h,&r->type,1); + membufwrite(h,&r->i.segment,-2); + membufwrite(h,&r->i.label,strlen(r->i.label) + 1); + break ; + + case 3: /* export */ + membufwrite(h,&r->type,1); + membufwrite(h,&r->e.segment,1); + membufwrite(h,&r->e.offset,-4); + membufwrite(h,&r->e.label,strlen(r->e.label) + 1); + break ; + + case 4: /* DLL */ + membufwrite(h,&r->type,1); + membufwrite(h,&r->d.libname,strlen(r->d.libname) + 1); + break ; + + case 5: /* BSS */ + membufwrite(h,&r->type,1); + membufwrite(h,&r->b.amount,-4); + break ; + + default: + return (rdf_errno = 2); + } + return 0; +} + +int rdfwriteheader(FILE * fp, rdf_headerbuf * h) +{ + long l; + + fwrite (RDOFFId, 1, strlen(RDOFFId), fp) ; + + l = translatelong ( membuflength (h) ); + fwrite (&l, 4, 1, fp); + + membufdump(h, fp); + + return 0; /* no error handling in here... CHANGE THIS! */ +} + +void rdfdoneheader(rdf_headerbuf * h) +{ + freemembuf(h); +} diff --git a/rdoff/rdoff.h b/rdoff/rdoff.h new file mode 100644 index 00000000..b022400b --- /dev/null +++ b/rdoff/rdoff.h @@ -0,0 +1,112 @@ +/* rdoff.h RDOFF Object File manipulation routines header file + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef _RDOFF_H +#define _RDOFF_H "RDOFF1 support routines v0.1" + +typedef short int16; /* not sure if this will be required to be altered + at all... best to typedef it just in case */ + +/* the records that can be found in the RDOFF header */ + +struct RelocRec { + char type; /* must be 1 */ + char segment; /* only 0 for code, or 1 for data supported, + but add 64 for relative refs (ie do not require + reloc @ loadtime, only linkage) */ + long offset; /* from start of segment in which reference is loc'd */ + char length; /* 1 2 or 4 bytes */ + int16 refseg; /* segment to which reference refers to */ +}; + +struct ImportRec { + char type; /* must be 2 */ + int16 segment; /* segment number allocated to the label for reloc + records - label is assumed to be at offset zero + in this segment, so linker must fix up with offset + of segment and of offset within segment */ + char label[33]; /* zero terminated... should be written to file until + the zero, but not after it - max len = 32 chars */ +}; + +struct ExportRec { + char type; /* must be 3 */ + char segment; /* segment referred to (0/1) */ + long offset; /* offset within segment */ + char label[33]; /* zero terminated as above. max len = 32 chars */ +}; + +struct DLLRec { + char type; /* must be 4 */ + char libname[128]; /* name of library to link with at load time */ +}; + +struct BSSRec { + char type; /* must be 5 */ + long amount; /* number of bytes BSS to reserve */ +}; + +typedef union RDFHeaderRec { + char type; /* invariant throughout all below */ + struct RelocRec r; /* type == 1 */ + struct ImportRec i; /* type == 2 */ + struct ExportRec e; /* type == 3 */ + struct DLLRec d; /* type == 4 */ + struct BSSRec b; /* type == 5 */ +} rdfheaderrec; + +typedef struct RDFFileInfo { + FILE *fp; /* file descriptor; must be open to use this struct */ + int rdoff_ver; /* should be 1; any higher => not guaranteed to work */ + long header_len; + long code_len; + long data_len; + long code_ofs; + long data_ofs; + char *header_loc; /* keep location of header */ + long header_fp; /* current location within header for reading */ +} rdffile; + +#define BUF_BLOCK_LEN 4088 /* selected to match page size (4096) + * on 80x86 machines for efficiency */ +typedef struct memorybuffer { + int length; + char buffer[BUF_BLOCK_LEN]; + struct memorybuffer *next; +} memorybuffer; + +typedef memorybuffer rdf_headerbuf; + +/* segments used by RDOFF, understood by rdoffloadseg */ +#define RDOFF_CODE 0 +#define RDOFF_DATA 1 +#define RDOFF_HEADER -1 +/* mask for 'segment' in relocation records to find if relative relocation */ +#define RDOFF_RELATIVEMASK 64 +/* mask to find actual segment value in relocation records */ +#define RDOFF_SEGMENTMASK 63 + +/* RDOFF file manipulation functions */ +int rdfopen(rdffile *f,const char *name); +int rdfclose(rdffile *f); +int rdfloadseg(rdffile *f,int segment,void *buffer); +rdfheaderrec *rdfgetheaderrec(rdffile *f); /* returns static storage */ +void rdfheaderrewind(rdffile *f); /* back to start of header */ +void rdfperror(const char *app,const char *name); + +/* functions to write a new RDOFF header to a file - + use rdfnewheader to allocate a header, rdfaddheader to add records to it, + rdfwriteheader to write 'RDOFF1', length of header, and the header itself + to a file, and then rdfdoneheader to dispose of the header */ + +rdf_headerbuf *rdfnewheader(void); +int rdfaddheader(rdf_headerbuf *h,rdfheaderrec *r); +int rdfwriteheader(FILE *fp,rdf_headerbuf *h); +void rdfdoneheader(rdf_headerbuf *h); + +#endif /* _RDOFF_H */ diff --git a/rdoff/rdx.c b/rdoff/rdx.c new file mode 100644 index 00000000..28ffc427 --- /dev/null +++ b/rdoff/rdx.c @@ -0,0 +1,61 @@ +/* rdx.c RDOFF Object File loader program + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +/* note: most of the actual work of this program is done by the modules + "rdfload.c", which loads and relocates the object file, and by "rdoff.c", + which contains general purpose routines to manipulate RDOFF object + files. You can use these files in your own program to load RDOFF objects + and execute the code in them in a similar way to what is shown here. */ + +#include +#include + +#include "rdfload.h" +#include "rdoff.h" +#include "symtab.h" + +typedef int (*main_fn) (int,char**); /* Main function prototype */ + +int main(int argc, char **argv) +{ + rdfmodule * m; + main_fn code; + symtabEnt * s; + + if (argc < 2) + { + puts("usage: rdf [params]\n"); + exit(255); + } + + m = rdfload(argv[1]); + + if (! m) + { + rdfperror("rdf",argv[1]); + exit(255); + } + + rdf_relocate(m); /* in this instance, the default relocation + values will work fine, but they may need changing + in other cases... */ + + s = symtabFind(m->symtab, "_main"); + if (! s) + { + fprintf(stderr,"rdx: could not find symbol '_main' in '%s'\n",argv[1]); + exit(255); + } + + code = (main_fn) s->offset; + + argv++, argc--; /* remove 'rdx' from command line */ + + return code(argc,argv); /* execute */ +} + diff --git a/rdoff/symtab.c b/rdoff/symtab.c new file mode 100644 index 00000000..c0ff3e56 --- /dev/null +++ b/rdoff/symtab.c @@ -0,0 +1,80 @@ +/* symtab.c Routines to maintain and manipulate a symbol table + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ +#include +#include + +#include "symtab.h" + +/* TODO: Implement a hash table, not this stupid implementation which + is too slow to be of practical use */ + +/* Private data types */ + +typedef struct tagSymtab { + symtabEnt ent; + struct tagSymtab * next; +} symtabList; + +typedef symtabList * _symtab; + +void *symtabNew(void) +{ + void *p = malloc(sizeof(_symtab)); + if (p == NULL) { + fprintf(stderr,"symtab: out of memory\n"); + exit(3); + } + *(_symtab *)p = NULL; + + return p; +} + +void symtabDone(void *symtab) +{ + /* DO SOMETHING HERE! */ +} + +void symtabInsert(void *symtab,symtabEnt *ent) +{ + symtabList *l = malloc(sizeof(symtabList)); + + if (l == NULL) { + fprintf(stderr,"symtab: out of memory\n"); + exit(3); + } + + l->ent = *ent; + l->next = *(_symtab *)symtab; + *(_symtab *)symtab = l; +} + +symtabEnt *symtabFind(void *symtab,char *name) +{ + symtabList *l = *(_symtab *)symtab; + + while (l) { + if (!strcmp(l->ent.name,name)) { + return &(l->ent); + } + l = l->next; + } + return NULL; +} + +void symtabDump(void *symtab,FILE *of) +{ + symtabList *l = *(_symtab *)symtab; + + while(l) { + fprintf(of,"%32s %s:%08lx (%ld)\n",l->ent.name, + l->ent.segment ? "data" : "code" , + l->ent.offset, l->ent.flags); + l = l->next; + } +} + diff --git a/rdoff/symtab.h b/rdoff/symtab.h new file mode 100644 index 00000000..5780d447 --- /dev/null +++ b/rdoff/symtab.h @@ -0,0 +1,22 @@ +/* symtab.h Header file for symbol table manipulation routines + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +typedef struct { + char *name; + long segment; + long offset; + long flags; +} symtabEnt; + +void *symtabNew(void); +void symtabDone(void *symtab); +void symtabInsert(void *symtab,symtabEnt *ent); +symtabEnt *symtabFind(void *symtab,char *name); +void symtabDump(void *symtab,FILE *of); + + diff --git a/sync.c b/sync.c new file mode 100644 index 00000000..77212d8c --- /dev/null +++ b/sync.c @@ -0,0 +1,84 @@ +/* sync.c the Netwide Disassembler synchronisation processing module + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#include +#include + +#include "sync.h" + +#define SYNC_MAX 4096 /* max # of sync points */ + +static struct Sync { + unsigned long pos; + unsigned long length; +} synx[SYNC_MAX]; +static int nsynx; + +void init_sync(void) { + nsynx = 0; +} + +void add_sync(unsigned long pos, unsigned long length) { + int i; + + if (nsynx == SYNC_MAX) + return; /* can't do anything - overflow */ + + nsynx++; + synx[nsynx].pos = pos; + synx[nsynx].length = length; + + for (i = nsynx; i > 1; i /= 2) { + if (synx[i/2].pos > synx[i].pos) { + struct Sync t; + t = synx[i/2]; /* structure copy */ + synx[i/2] = synx[i]; /* structure copy again */ + synx[i] = t; /* another structure copy */ + } + } +} + +unsigned long next_sync(unsigned long position, unsigned long *length) { + while (nsynx > 0 && synx[1].pos + synx[1].length <= position) { + int i, j; + struct Sync t; + t = synx[nsynx]; /* structure copy */ + synx[nsynx] = synx[1]; /* structure copy */ + synx[1] = t; /* ditto */ + + nsynx--; + + i = 1; + while (i*2 <= nsynx) { + j = i*2; + if (synx[j].pos < synx[i].pos && + (j+1 > nsynx || synx[j+1].pos > synx[j].pos)) { + t = synx[j]; /* structure copy */ + synx[j] = synx[i]; /* lots of these... */ + synx[i] = t; /* ...aren't there? */ + i = j; + } else if (j+1 <= nsynx && synx[j+1].pos < synx[i].pos) { + t = synx[j+1]; /* structure copy */ + synx[j+1] = synx[i]; /* structure copy */ + synx[i] = t; /* structure copy */ + i = j+1; + } else + break; + } + } + + if (nsynx > 0) { + if (length) + *length = synx[1].length; + return synx[1].pos; + } else { + if (length) + *length = 0L; + return ULONG_MAX; + } +} diff --git a/sync.h b/sync.h new file mode 100644 index 00000000..ecb92011 --- /dev/null +++ b/sync.h @@ -0,0 +1,16 @@ +/* sync.h header file for sync.c + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + */ + +#ifndef NASM_SYNC_H +#define NASM_SYNC_H + +void init_sync(void); +void add_sync(unsigned long position, unsigned long length); +unsigned long next_sync(unsigned long position, unsigned long *length); + +#endif diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 00000000..5f0e5c6f --- /dev/null +++ b/test/Makefile @@ -0,0 +1,2 @@ +clean: + rm -f *.o *.obj *.com bintest inctest diff --git a/test/aouttest.asm b/test/aouttest.asm new file mode 100644 index 00000000..c52f1120 --- /dev/null +++ b/test/aouttest.asm @@ -0,0 +1,83 @@ +; test source file for assembling to a.out +; build with: +; nasm -f aout aouttest.asm +; gcc -o aouttest aouttest.c aouttest.o +; (assuming your gcc is a.out) + +; This file should test the following: +; [1] Define and export a global text-section symbol +; [2] Define and export a global data-section symbol +; [3] Define and export a global BSS-section symbol +; [4] Define a non-global text-section symbol +; [5] Define a non-global data-section symbol +; [6] Define a non-global BSS-section symbol +; [7] Define a COMMON symbol +; [8] Define a NASM local label +; [9] Reference a NASM local label +; [10] Import an external symbol +; [11] Make a PC-relative call to an external symbol +; [12] Reference a text-section symbol in the text section +; [13] Reference a data-section symbol in the text section +; [14] Reference a BSS-section symbol in the text section +; [15] Reference a text-section symbol in the data section +; [16] Reference a data-section symbol in the data section +; [17] Reference a BSS-section symbol in the data section + +[BITS 32] +[GLOBAL _lrotate] ; [1] +[GLOBAL _greet] ; [1] +[GLOBAL _asmstr] ; [2] +[GLOBAL _textptr] ; [2] +[GLOBAL _selfptr] ; [2] +[GLOBAL _integer] ; [3] +[EXTERN _printf] ; [10] +[COMMON _commvar 4] ; [7] + +[SECTION .text] + +; prototype: long lrotate(long x, int num); +_lrotate: ; [1] + push ebp + mov ebp,esp + mov eax,[ebp+8] + mov ecx,[ebp+12] +.label rol eax,1 ; [4] [8] + loop .label ; [9] [12] + mov esp,ebp + pop ebp + ret + +; prototype: void greet(void); +_greet mov eax,[_integer] ; [14] + inc eax + mov [localint],eax ; [14] + push dword [_commvar] + mov eax,[localptr] ; [13] + push dword [eax] ; + push dword [_integer] ; [1] [14] + push dword _printfstr ; [13] + call _printf ; [11] + add esp,16 + ret + +[SECTION .data] + +; a string +_asmstr db 'hello, world', 0 ; [2] + +; a string for Printf +_printfstr db "integer==%d, localint==%d, commvar=%d" + db 10, 0 + +; some pointers +localptr dd localint ; [5] [17] +_textptr dd _greet ; [15] +_selfptr dd _selfptr ; [16] + +[SECTION .bss] + +; an integer +_integer resd 1 ; [3] + +; a local integer +localint resd 1 ; [6] diff --git a/test/aouttest.c b/test/aouttest.c new file mode 100644 index 00000000..9a8eba30 --- /dev/null +++ b/test/aouttest.c @@ -0,0 +1,35 @@ +/* + * test source file for assembling to a.out + * build with: + * nasm -f aout aouttest.asm + * gcc -o aouttest aouttest.c aouttest.o + * (assuming your gcc is a.out) + */ + +#include + +extern int lrotate(long, int); +extern void greet(void); +extern char asmstr[]; +extern void *selfptr; +extern void *textptr; +extern int integer, commvar; + +int main(void) { + + printf("Testing lrotate: should get 0x00400000, 0x00000001\n"); + printf("lrotate(0x00040000, 4) = 0x%08lx\n", lrotate(0x40000,4)); + printf("lrotate(0x00040000, 14) = 0x%08lx\n", lrotate(0x40000,14)); + + printf("This string should read `hello, world': `%s'\n", asmstr); + + printf("The integers here should be 1234, 1235 and 4321:\n"); + integer = 1234; + commvar = 4321; + greet(); + + printf("These pointers should be equal: %p and %p\n", + &greet, textptr); + + printf("So should these: %p and %p\n", selfptr, &selfptr); +} diff --git a/test/bintest.asm b/test/bintest.asm new file mode 100644 index 00000000..0a3c4ae9 --- /dev/null +++ b/test/bintest.asm @@ -0,0 +1,56 @@ +; test source file for assembling to binary files +; build with: +; nasm -f bin -o bintest.com bintest.asm + +; When run (as a DOS .COM file), this program should print +; hello, world +; on two successive lines, then exit cleanly. + +; This file should test the following: +; [1] Define a text-section symbol +; [2] Define a data-section symbol +; [3] Define a BSS-section symbol +; [4] Define a NASM local label +; [5] Reference a NASM local label +; [6] Reference a text-section symbol in the text section +; [7] Reference a data-section symbol in the text section +; [8] Reference a BSS-section symbol in the text section +; [9] Reference a text-section symbol in the data section +; [10] Reference a data-section symbol in the data section +; [11] Reference a BSS-section symbol in the data section + +[BITS 16] +[ORG 0x100] + +[SECTION .text] + + jmp start ; [6] + +end mov ax,0x4c00 ; [1] + int 0x21 + +start mov byte [bss_sym],',' ; [1] [8] + mov bx,[bssptr] ; [7] + mov al,[bx] + mov bx,[dataptr] ; [7] + mov [bx],al + mov cx,2 +.loop mov dx,datasym ; [1] [4] [7] + mov ah,9 + push cx + int 0x21 + pop cx + loop .loop ; [5] [6] + mov bx,[textptr] ; [7] + jmp bx + +[SECTION .data] + +datasym db 'hello world', 13, 10, '$' ; [2] +bssptr dw bss_sym ; [2] [11] +dataptr dw datasym+5 ; [2] [10] +textptr dw end ; [2] [9] + +[SECTION .bss] + +bss_sym resb 1 ; [3] diff --git a/test/cofftest.asm b/test/cofftest.asm new file mode 100644 index 00000000..bb843a15 --- /dev/null +++ b/test/cofftest.asm @@ -0,0 +1,82 @@ +; test source file for assembling to COFF +; build with (under DJGPP, for example): +; nasm -f coff cofftest.asm +; gcc -o cofftest cofftest.c cofftest.o + +; This file should test the following: +; [1] Define and export a global text-section symbol +; [2] Define and export a global data-section symbol +; [3] Define and export a global BSS-section symbol +; [4] Define a non-global text-section symbol +; [5] Define a non-global data-section symbol +; [6] Define a non-global BSS-section symbol +; [7] Define a COMMON symbol +; [8] Define a NASM local label +; [9] Reference a NASM local label +; [10] Import an external symbol +; [11] Make a PC-relative call to an external symbol +; [12] Reference a text-section symbol in the text section +; [13] Reference a data-section symbol in the text section +; [14] Reference a BSS-section symbol in the text section +; [15] Reference a text-section symbol in the data section +; [16] Reference a data-section symbol in the data section +; [17] Reference a BSS-section symbol in the data section + +[BITS 32] +[GLOBAL _lrotate] ; [1] +[GLOBAL _greet] ; [1] +[GLOBAL _asmstr] ; [2] +[GLOBAL _textptr] ; [2] +[GLOBAL _selfptr] ; [2] +[GLOBAL _integer] ; [3] +[EXTERN _printf] ; [10] +[COMMON _commvar 4] ; [7] + +[SECTION .text] + +; prototype: long lrotate(long x, int num); +_lrotate: ; [1] + push ebp + mov ebp,esp + mov eax,[ebp+8] + mov ecx,[ebp+12] +.label rol eax,1 ; [4] [8] + loop .label ; [9] [12] + mov esp,ebp + pop ebp + ret + +; prototype: void greet(void); +_greet mov eax,[_integer] ; [14] + inc eax + mov [localint],eax ; [14] + push dword [_commvar] + mov eax,[localptr] ; [13] + push dword [eax] + push dword [_integer] ; [1] [14] + push dword _printfstr ; [13] + call _printf ; [11] + add esp,16 + ret + +[SECTION .data] + +; a string +_asmstr db 'hello, world', 0 ; [2] + +; a string for Printf +_printfstr db "integer==%d, localint==%d, commvar=%d" + db 10, 0 + +; some pointers +localptr dd localint ; [5] [17] +_textptr dd _greet ; [15] +_selfptr dd _selfptr ; [16] + +[SECTION .bss] + +; an integer +_integer resd 1 ; [3] + +; a local integer +localint resd 1 ; [6] diff --git a/test/cofftest.c b/test/cofftest.c new file mode 100644 index 00000000..4dec0df9 --- /dev/null +++ b/test/cofftest.c @@ -0,0 +1,34 @@ +/* + * test source file for assembling to COFF + * build with (under DJGPP, for example): + * nasm -f coff cofftest.asm + * gcc -o cofftest cofftest.c cofftest.o + */ + +#include + +extern int lrotate(long, int); +extern void greet(void); +extern char asmstr[]; +extern void *selfptr; +extern void *textptr; +extern int integer, commvar; + +int main(void) { + + printf("Testing lrotate: should get 0x00400000, 0x00000001\n"); + printf("lrotate(0x00040000, 4) = 0x%08lx\n", lrotate(0x40000,4)); + printf("lrotate(0x00040000, 14) = 0x%08lx\n", lrotate(0x40000,14)); + + printf("This string should read `hello, world': `%s'\n", asmstr); + + printf("The integers here should be 1234, 1235 and 4321:\n"); + integer = 1234; + commvar = 4321; + greet(); + + printf("These pointers should be equal: %p and %p\n", + &greet, textptr); + + printf("So should these: %p and %p\n", selfptr, &selfptr); +} diff --git a/test/elftest.asm b/test/elftest.asm new file mode 100644 index 00000000..a6034a6f --- /dev/null +++ b/test/elftest.asm @@ -0,0 +1,83 @@ +; test source file for assembling to ELF +; build with: +; nasm -f elf elftest.asm +; gcc -o elftest elftest.c elftest.o +; (assuming your gcc is ELF) + +; This file should test the following: +; [1] Define and export a global text-section symbol +; [2] Define and export a global data-section symbol +; [3] Define and export a global BSS-section symbol +; [4] Define a non-global text-section symbol +; [5] Define a non-global data-section symbol +; [6] Define a non-global BSS-section symbol +; [7] Define a COMMON symbol +; [8] Define a NASM local label +; [9] Reference a NASM local label +; [10] Import an external symbol +; [11] Make a PC-relative call to an external symbol +; [12] Reference a text-section symbol in the text section +; [13] Reference a data-section symbol in the text section +; [14] Reference a BSS-section symbol in the text section +; [15] Reference a text-section symbol in the data section +; [16] Reference a data-section symbol in the data section +; [17] Reference a BSS-section symbol in the data section + +[BITS 32] +[GLOBAL lrotate] ; [1] +[GLOBAL greet] ; [1] +[GLOBAL asmstr] ; [2] +[GLOBAL textptr] ; [2] +[GLOBAL selfptr] ; [2] +[GLOBAL integer] ; [3] +[EXTERN printf] ; [10] +[COMMON commvar 4] ; [7] + +[SECTION .text] + +; prototype: long lrotate(long x, int num); +lrotate: ; [1] + push ebp + mov ebp,esp + mov eax,[ebp+8] + mov ecx,[ebp+12] +.label rol eax,1 ; [4] [8] + loop .label ; [9] [12] + mov esp,ebp + pop ebp + ret + +; prototype: void greet(void); +greet mov eax,[integer] ; [14] + inc eax + mov [localint],eax ; [14] + push dword [commvar] + mov eax,[localptr] ; [13] + push dword [eax] + push dword [integer] ; [1] [14] + push dword printfstr ; [13] + call printf ; [11] + add esp,16 + ret + +[SECTION .data] + +; a string +asmstr db 'hello, world', 0 ; [2] + +; a string for Printf +printfstr db "integer==%d, localint==%d, commvar=%d" + db 10, 0 + +; some pointers +localptr dd localint ; [5] [17] +textptr dd greet ; [15] +selfptr dd selfptr ; [16] + +[SECTION .bss] + +; an integer +integer resd 1 ; [3] + +; a local integer +localint resd 1 ; [6] diff --git a/test/elftest.c b/test/elftest.c new file mode 100644 index 00000000..1965fcf8 --- /dev/null +++ b/test/elftest.c @@ -0,0 +1,35 @@ +/* + * test source file for assembling to ELF + * build with: + * nasm -f elf elftest.asm + * gcc -o elftest elftest.c elftest.o + * (assuming your gcc is ELF) + */ + +#include + +extern int lrotate(long, int); +extern void greet(void); +extern char asmstr[]; +extern void *selfptr; +extern void *textptr; +extern int integer, commvar; + +int main(void) { + + printf("Testing lrotate: should get 0x00400000, 0x00000001\n"); + printf("lrotate(0x00040000, 4) = 0x%08lx\n", lrotate(0x40000,4)); + printf("lrotate(0x00040000, 14) = 0x%08lx\n", lrotate(0x40000,14)); + + printf("This string should read `hello, world': `%s'\n", asmstr); + + printf("The integers here should be 1234, 1235 and 4321:\n"); + integer = 1234; + commvar = 4321; + greet(); + + printf("These pointers should be equal: %p and %p\n", + &greet, textptr); + + printf("So should these: %p and %p\n", selfptr, &selfptr); +} diff --git a/test/inc1.asm b/test/inc1.asm new file mode 100644 index 00000000..e9e5819b --- /dev/null +++ b/test/inc1.asm @@ -0,0 +1,4 @@ +; This file is part of the include test. +; See inctest.asm for build instructions. + +message: db 'hello, world',13,10,'$' diff --git a/test/inc2.asm b/test/inc2.asm new file mode 100644 index 00000000..c3ba2f75 --- /dev/null +++ b/test/inc2.asm @@ -0,0 +1,8 @@ +; This file is part of the include test. +; See inctest.asm for build instructions. + +_main: mov dx,message + mov ah,9 + int 21h + mov ax,4c00h + int 21h diff --git a/test/inctest.asm b/test/inctest.asm new file mode 100644 index 00000000..95ab40ff --- /dev/null +++ b/test/inctest.asm @@ -0,0 +1,15 @@ +; This file, plus inc1.asm and inc2.asm, test NASM's file inclusion +; mechanism. +; +; This produces a DOS .COM file: to assemble, use +; nasm -f bin inctest.asm -o inctest.com +; and when run, it should print `hello, world'. + +[BITS 16] +[ORG 0x100] + + jmp _main + +[INC inc1.asm] + +[INCLUDE inc2.asm] diff --git a/test/objlink.c b/test/objlink.c new file mode 100644 index 00000000..2f92f05e --- /dev/null +++ b/test/objlink.c @@ -0,0 +1,30 @@ +/* + * test source file for assembling to Microsoft 16-bit .OBJ + * build with (16-bit Microsoft C): + * nasm -f obj objtest.asm + * cl /AL objtest.obj objlink.c + * other compilers should work too, provided they handle large + * model in the same way as MS C + */ + +#include + +char text[] = "hello, world\n"; + +extern void function(char *); +extern int bsssym, commvar; +extern void *selfptr; +extern void *selfptr2; + +int main(void) { + printf("these should be identical: %p, %p\n", + (long) selfptr, (long) &selfptr); + printf("these should be equivalent but different: %p, %p\n", + (long) selfptr2, (long) &selfptr2); + printf("you should see \"hello, world\" twice:\n"); + bsssym = 0xF00D; + commvar = 0xD00F; + function(text); + printf("this should be 0xF00E: 0x%X\n", bsssym); + printf("this should be 0xD00E: 0x%X\n", commvar); +} diff --git a/test/objtest.asm b/test/objtest.asm new file mode 100644 index 00000000..8530baee --- /dev/null +++ b/test/objtest.asm @@ -0,0 +1,82 @@ +; test source file for assembling to Microsoft 16-bit .OBJ +; build with (16-bit Microsoft C): +; nasm -f obj objtest.asm +; cl /AL objtest.obj objlink.c +; other compilers should work too, provided they handle large +; model in the same way as MS C + +; This file should test the following: +; [1] Define and export a global symbol +; [2] Define a non-global symbol +; [3] Define a common symbol +; [4] Define a NASM local label +; [5] Reference a NASM local label +; [6] Import an external symbol +; [7] Make a PC-relative relocated reference +; [8] Reference a symbol in the same section as itself +; [9] Reference a symbol in a different segment from itself +; [10] Define a segment group +; [11] Take the offset of a symbol in a grouped segment w.r.t. its segment +; [12] Reserve uninitialised data space in a segment +; [13] Directly take the segment address of a segment +; [14] Directly take the segment address of a group +; [15] Use SEG on a non-external +; [16] Use SEG on an external + +[bits 16] + +[global _bsssym] ; [1] +[global _function] ; [1] +[global _selfptr] ; [1] +[global _selfptr2] ; [1] +[common _commvar 2] ; [3] +[extern _printf] ; [6] + +[group mygroup mybss mydata] ; [10] +[group mygroup2 mycode mycode2] ; [10] + +[segment mycode private] + +_function push bp + mov bp,sp + push ds + mov ax,mygroup ; [14] + mov ds,ax + inc word [_bsssym] ; [9] + mov ax,seg _commvar + mov ds,ax + dec word [_commvar] + pop ds + mov ax,[bp+6] + mov dx,[bp+8] + push dx + push ax + push dx + push ax + call far [cs:.printf] ; [5] [8] + pop ax + pop ax + call trampoline ; [7] + pop ax + pop ax + mov sp,bp + pop bp + retf + +.printf dw _printf, seg _printf ; [2] [4] [16] + +[segment mycode2 private] + +trampoline: pop ax + push cs + push ax + jmp far _printf + +[segment mybss private] + +_bsssym resw 64 ; [12] + +[segment mydata private] + +_selfptr dw _selfptr, seg _selfptr ; [8] [15] +_selfptr2 dw _selfptr2 wrt mydata, mydata ; [11] [13] -- 2.11.4.GIT