From 6768eb71d8debde65562619c938b997aea1bd9f9 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 30 Apr 2002 20:52:26 +0000 Subject: [PATCH] NASM 0.95 --- Changes | 120 +++++++++++++- Makefile | 33 ++-- Makefile.bc2 | 57 +++++-- Makefile.bor | 6 +- Makefile.dos | 16 +- Makefile.dos => Makefile.sc | 197 ++++++++++++++++++++--- Makefile.dos => Makefile.vc | 39 ++--- Makefile.wc | 6 +- Makefile.wcw | 6 +- Readme | 48 ++++-- assemble.c | 202 ++++++++++++++++------- assemble.h | 3 +- disasm.c | 4 +- float.c | 16 +- insns.pl | 2 +- internal.doc | 21 ++- labels.c | 6 +- listing.c | 240 ++++++++++++++++++++++++++++ preproc.h => listing.h | 8 +- macros.c | 41 +++-- macros.pl | 27 ++++ misc/nasm.sl | 8 +- nasm.c | 340 +++++++++++++++++++++++++++++---------- nasm.doc | 382 +++++++++++++++++++++++++++++++++++++------- nasm.h | 98 +++++++++++- ndisasm.doc | 2 +- outaout.c | 3 +- outas86.c | 3 +- outbin.c | 5 + outcoff.c | 3 +- outdbg.c | 4 +- outelf.c | 20 ++- outform.c | 4 +- outform.h | 15 +- outobj.c | 107 ++++++++++--- outrdf.c | 7 +- parser.c | 38 +++-- preproc.c | 272 +++++++++++++++++++++++-------- preproc.h | 4 + rdoff/Makefile.sc | 112 +++++++++++++ rdoff/README | 3 +- standard.mac | 77 +++++++++ sync.c | 22 ++- test/objlink.c | 1 + 44 files changed, 2157 insertions(+), 471 deletions(-) copy Makefile.dos => Makefile.sc (55%) copy Makefile.dos => Makefile.vc (74%) create mode 100644 listing.c copy preproc.h => listing.h (67%) create mode 100644 macros.pl create mode 100644 rdoff/Makefile.sc create mode 100644 standard.mac diff --git a/Changes b/Changes index 8d1997b8..4a01227e 100644 --- a/Changes +++ b/Changes @@ -82,8 +82,8 @@ seg-fault under Linux. Included a new Borland C makefile, Makefile.bc2, donated by Fox Cutter . -0.94 not released yet ---------------------- +0.94 released April 1997 +------------------------ Major item: added the macro processor. @@ -124,4 +124,120 @@ Due to the advent of the preprocessor, the [INCLUDE] and [INC] directives have become obsolete. They are still supported in this version, with a warning, but won't be in the next. +Fixed a bug in OBJ format, which caused incorrect object records to +be output when absolute labels were made global. + Updates to RDOFF subdirectory, and changes to outrdf.c. + +0.95 released July 1997 +----------------------- + +Fixed yet another ELF bug. This one manifested if the user relied on +the default segment, and attempted to define global symbols without +first explicitly declaring the target segment. + +Added makefiles (for NASM and the RDF tools) to build Win32 console +apps under Symantec C++. Donated by Mark Junker. + +Added `macros.bas' and `insns.bas', QBasic versions of the Perl +scripts that convert `standard.mac' to `macros.c' and convert +`insns.dat' to `insnsa.c' and `insnsd.c'. Also thanks to Mark +Junker. + +Changed the diassembled forms of the conditional instructions so +that JB is now emitted as JC, and other similar changes. Suggested +list by Ulrich Doewich. + +Added `@' to the list of valid characters to begin an identifier +with. + +Documentary changes, notably the addition of the `Common Problems' +section in nasm.doc. + +Fixed a bug relating to 32-bit PC-relative fixups in OBJ. + +Fixed a bug in perm_copy() in labels.c which was causing exceptions +in cleanup_labels() on some systems. + +Positivity sanity check in TIMES argument changed from a warning to +an error following a further complaint. + +Changed the acceptable limits on byte and word operands to allow +things like `~10111001b' to work. + +Fixed a major problem in the preprocessor which caused seg-faults if +macro definitions contained blank lines or comment-only lines. + +Fixed inadequate error checking on the commas separating the +arguments to `db', `dw' etc. + +Fixed a crippling bug in the handling of macros with operand counts +defined with a `+' modifier. + +Fixed a bug whereby object file formats which stored the input file +name in the output file (such as OBJ and COFF) weren't doing so +correctly when the output file name was specified on the command +line. + +Removed [INC] and [INCLUDE] support for good, since they were +obsolete anyway. + +Fixed a bug in OBJ which caused all fixups to be output in 16-bit +(old-format) FIXUPP records, rather than putting the 32-bit ones in +FIXUPP32 (new-format) records. + +Added, tentatively, OS/2 object file support (as a minor variant on +OBJ). + +Updates to Fox Cutter's Borland C makefile, Makefile.bc2. + +Removed a spurious second fclose() on the output file. + +Added the `-s' command line option to redirect all messages which +would go to stderr (errors, help text) to stdout instead. + +Added the `-w' command line option to selectively suppress some +classes of assembly warning messages. + +Added the `-p' pre-include and `-d' pre-define command-line options. + +Added an include file search path: the `-i' command line option. + +Fixed a silly little preprocessor bug whereby starting a line with a +`%!' environment-variable reference caused an `unknown directive' +error. + +Added the long-awaited listing file support: the `-l' command line +option. + +Fixed a problem with OBJ format whereby, in the absence of any +explicit segment definition, non-global symbols declared in the +implicit default segment generated spurious EXTDEF records in the +output. + +Added the NASM environment variable. + +From this version forward, Win32 console-mode binaries will be +included in the DOS distribution in addition to the 16-bit binaries. +Added Makefile.vc for this purpose. + +Added `return 0;' to test/objlink.c to prevent compiler warnings. + +Added the __NASM_MAJOR__ and __NASM_MINOR__ standard defines. + +Added an alternative memory-reference syntax in which prefixing an +operand with `&' is equivalent to enclosing it in square brackets, +at the request of Fox Cutter. + +Errors in pass two now cause the program to return a non-zero error +code, which they didn't before. + +Fixed the single-line macro cycle detection, which didn't work at +all on macros with no parameters (caused an infinite loop). Also +changed the behaviour of single-line macro cycle detection to work +like cpp, so that macros like `extrn' as given in the documentation +can be implemented. + +Fixed the implementation of WRT, which was too restrictive in that +you couldn't do `mov ax,[di+abc wrt dgroup]' because (di+abc) wasn't +a relocatable reference. diff --git a/Makefile b/Makefile index 465d6fdd..7a9c54cc 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ NASMOBJS = nasm.$(OBJ) nasmlib.$(OBJ) float.$(OBJ) insnsa.$(OBJ) \ assemble.$(OBJ) labels.$(OBJ) parser.$(OBJ) outform.$(OBJ) \ outbin.$(OBJ) outaout.$(OBJ) outcoff.$(OBJ) outelf.$(OBJ) \ outobj.$(OBJ) outas86.$(OBJ) outrdf.$(OBJ) outdbg.$(OBJ) \ - preproc.$(OBJ) + preproc.$(OBJ) listing.$(OBJ) NDISASMOBJS = ndisasm.$(OBJ) disasm.$(OBJ) sync.$(OBJ) nasmlib.$(OBJ) \ insnsd.$(OBJ) @@ -45,26 +45,30 @@ nasm$(EXE): $(NASMOBJS) ndisasm$(EXE): $(NDISASMOBJS) $(LINK) $(DLINKFLAGS) $(NDISASMOBJS) $(LIBRARIES) -assemble.$(OBJ): assemble.c nasm.h assemble.h insns.h +assemble.$(OBJ): assemble.c nasm.h nasmlib.h assemble.h insns.h disasm.$(OBJ): disasm.c nasm.h disasm.h sync.h insns.h names.c float.$(OBJ): float.c nasm.h insnsa.$(OBJ): insnsa.c nasm.h insns.h insnsd.$(OBJ): insnsd.c nasm.h insns.h labels.$(OBJ): labels.c nasm.h nasmlib.h -nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h outform.h +listing.$(OBJ): listing.c nasm.h nasmlib.h listing.h +macros.$(OBJ): macros.c +names.$(OBJ): names.c +nasm.$(OBJ): nasm.c nasm.h nasmlib.h preproc.h parser.h assemble.h labels.h \ + outform.h listing.h nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h -ndisasm.$(OBJ): ndisasm.c nasm.h sync.h disasm.h -outas86.$(OBJ): outas86.c nasm.h nasmlib.h -outaout.$(OBJ): outaout.c nasm.h nasmlib.h -outbin.$(OBJ): outbin.c nasm.h nasmlib.h -outcoff.$(OBJ): outcoff.c nasm.h nasmlib.h -outdbg.$(OBJ): outdbg.c nasm.h nasmlib.h -outelf.$(OBJ): outelf.c nasm.h nasmlib.h -outobj.$(OBJ): outobj.c nasm.h nasmlib.h -outrdf.$(OBJ): outrdf.c nasm.h nasmlib.h +ndisasm.$(OBJ): ndisasm.c nasm.h nasmlib.h sync.h disasm.h +outaout.$(OBJ): outaout.c nasm.h nasmlib.h outform.h +outas86.$(OBJ): outas86.c nasm.h nasmlib.h outform.h +outbin.$(OBJ): outbin.c nasm.h nasmlib.h outform.h +outcoff.$(OBJ): outcoff.c nasm.h nasmlib.h outform.h +outdbg.$(OBJ): outdbg.c nasm.h nasmlib.h outform.h +outelf.$(OBJ): outelf.c nasm.h nasmlib.h outform.h outform.$(OBJ): outform.c outform.h nasm.h +outobj.$(OBJ): outobj.c nasm.h nasmlib.h outform.h +outrdf.$(OBJ): outrdf.c nasm.h nasmlib.h outform.h parser.$(OBJ): parser.c nasm.h nasmlib.h parser.h float.h names.c -preproc.$(OBJ): preproc.c macros.c preproc.h nasm.h nasmlib.h +preproc.$(OBJ): preproc.c nasm.h nasmlib.h macros.c sync.$(OBJ): sync.c sync.h # These two source files are automagically generated from a single @@ -94,7 +98,6 @@ clean : # unless you're using the Makefile under Linux, running bash, with # gzip, GNU tar and a sensible version of zip readily available. -DOSEXES = nasm.exe ndisasm.exe MANPAGES = nasm.man ndisasm.man .SUFFIXES: .man .1 @@ -102,5 +105,5 @@ MANPAGES = nasm.man ndisasm.man .1.man: -man ./$< | ul > $@ -dist: $(AUTOSRCS) $(MANPAGES) $(DOSEXES) clean +dist: $(AUTOSRCS) $(MANPAGES) clean makedist.sh diff --git a/Makefile.bc2 b/Makefile.bc2 index d2e9b52d..7daf4a43 100644 --- a/Makefile.bc2 +++ b/Makefile.bc2 @@ -27,20 +27,43 @@ # Libaries, if not you'll get some errors. Make sure to keep the trailing # backslash, as it's needed, and remeber to use \\ not \ as that will cause # some errors. +# +# Also inportant, if you get a DGROUP error when you compile NASM, remove +# or comment out the 'NASMSize=l' line, and uncoment (remove the #) from the +# NASMSize=h line. Then run 'make Clean' to delete the object files. Then run +# make again to re-build NASM as huge. +# +# History: +# 06/13/97: * Added the EXED varable for the location to put the EXE files. +# * Because different versions of Borland and Turbo C have +# different GROUPings for the DGROUP, some version, when you +# compile NASM, you will get a DGROUP overflow error, making it +# so NASM has to be compiled as huge. As this isn't a constant +# through systems (and apperently some version of Borland, +# compileing as huge causes some errors) the NASMSize verable +# has been added to spicify what size of code you want to +# compile as and defaults to large. +# 06/16/97: * Added 'merge dupicate strings' to the options for compiles. + +NASMSize=l #Compile Nasm as Large +#NASMSize=h #Compile Nasm as Huge LIB =c:\\tc\\lib\\ #location standard libaries OBJD=obj\\ #directory to put OBJ files in +EXED=.\ #directory to put the EXE files. CC = tcc #compiler LINK = tlink #linker -CCFLAGS = /c /O /A /ml /n$(OBJD) #compiler flags for NASM +CCFLAGS = /d /c /O /A /m$(NASMSize) /n$(OBJD) #compiler flags for NASM + #/d=merge dupicate strings #/c=compile only #/O=Optimise jumps #/A=ANSI standard C - #/ml=Model Large + #/m$(NASMSize>=the model to use #/n$(OBJD)= put the OBJ files in the diectory given. -DCCFLAGS = /c /O /A /mh /n$(OBJD) #compiler flags for NDISASM +DCCFLAGS = /d /c /O /A /mh /n$(OBJD) #compiler flags for NDISASM + #/d=merge dupicate strings #/c=compile only #/O=Optimise jumps #/A=ANSI standard C @@ -71,7 +94,8 @@ DASM_ASM=$(CC) $(DCCFLAGS) $&.c #command line for NDISASM NASMOBJS = $(OBJD)nasm.$(OBJ) $(OBJD)nasmlib.$(OBJ) $(OBJD)float.$(OBJ) \ $(OBJD)insnsa.$(OBJ) $(OBJD)assemble.$(OBJ) $(OBJD)labels.$(OBJ) \ - $(OBJD)parser.$(OBJ) $(OBJD)outform.$(OBJ) $(OBJD)preproc.$(OBJ) + $(OBJD)parser.$(OBJ) $(OBJD)outform.$(OBJ) $(OBJD)preproc.$(OBJ) \ + $(OBJD)listing.$(OBJ) ################################################################ #The OBJ files that NDISASM is dependent on @@ -96,17 +120,17 @@ all : nasm$(EXE) ndisasm$(EXE) #NASM, NDISASM compile, I hope it's self explanitorie nasm$(EXE): $(NASMOBJS) $(OUTOBJ) - $(LINK) $(LINKFLAGS) @&&^ #command for the linker - $(LIB)c0l.obj $(NASMOBJS) $(OUTOBJ) #OBJ file list, - nasm$(EXE) #EXE file name + $(LINK) $(LINKFLAGS) @&&^ #command for the linker + $(LIB)c0$(NASMSize).obj $(NASMOBJS) $(OUTOBJ) #OBJ file list + $(EXED)nasm$(EXE) #EXE file name # No need of a map file - $(LIB)cl.lib $(LIBRARIES) #Libaries needed + $(LIB)c$(NASMSize).lib $(LIBRARIES) #Libaries needed ^ ndisasm$(EXE): $(NDISASMOBJS) $(LINK) $(LINKFLAGS) @&&^ #command for the linker $(LIB)c0h.obj $(NDISASMOBJS) #OBJ file list - ndisasm$(EXE) #EXE file name + $(EXED)ndisasm$(EXE) #EXE file name # No need of a map file $(LIB)ch.lib $(LIBRARIES) #Libaries needed ^ @@ -123,8 +147,11 @@ $(OBJD)float.$(OBJ): float.c nasm.h $(OBJD)labels.$(OBJ): labels.c nasm.h nasmlib.h $(NASM_ASM) -$(OBJD)nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h -outform.h +$(OBJD)listing.$(OBJ): listing.c nasm.h nasmlib.h listing.h + $(NASM_ASM) + +$(OBJD)nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h \ + listing.h outform.h $(NASM_ASM) $(OBJD)nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h @@ -155,10 +182,10 @@ $(OBJD)insnsd.$(OBJ): insnsd.c nasm.h insns.h $(DASM_ASM) # This is a kludge from the word go, as we can't use the nasmlib.obj compiled -# for NASM, as it's the wrong model size, so we have to compile it again, -# but in huge. +# for NASM, as it's could be the wrong model size, so we have to compile it +# again as huge to make sure. # -# So as not to overwrite the nasmlib.obj for NASM (if I did, that +# So as not to overwrite the nasmlib.obj for NASM (if it did, that # could cause all kinds of problems) it compiles it into nasmlibd.obj. # # the -o... switch tells it the name to compile the obj file to, right here @@ -205,4 +232,4 @@ clean : del nasm$(EXE) del ndisasm$(EXE) -# Makefile created by Fox Cutter --01/21/97 +# Makefile created by Fox Cutter --01/27/97 diff --git a/Makefile.bor b/Makefile.bor index 7de37918..c415de00 100644 --- a/Makefile.bor +++ b/Makefile.bor @@ -27,7 +27,7 @@ NASMOBJS1 = nasm.$(OBJ) nasmlib.$(OBJ) float.$(OBJ) insnsa.$(OBJ) NASMOBJS2 = assemble.$(OBJ) labels.$(OBJ) parser.$(OBJ) outform.$(OBJ) NASMOBJS3 = outbin.$(OBJ) outaout.$(OBJ) outcoff.$(OBJ) outelf.$(OBJ) NASMOBJS4 = outobj.$(OBJ) outas86.$(OBJ) outdbg.$(OBJ) outrdf.$(OBJ) -NASMOBJS5 = preproc.$(OBJ) +NASMOBJS5 = preproc.$(OBJ) listing.$(OBJ) NASMOBJS = $(NASMOBJS1) $(NASMOBJS2) $(NASMOBJS3) $(NASMOBJS4) $(NASMOBJS5) @@ -57,7 +57,9 @@ float.$(OBJ): float.c nasm.h insnsa.$(OBJ): insnsa.c nasm.h insns.h insnsd.$(OBJ): insnsd.c nasm.h insns.h labels.$(OBJ): labels.c nasm.h nasmlib.h -nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h outform.h +listing.$(OBJ): listing.c nasm.h nasmlib.h listing.h +nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h \ + listing.h outform.h nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h ndisasm.$(OBJ): ndisasm.c nasm.h sync.h disasm.h outas86.$(OBJ): outas86.c nasm.h nasmlib.h diff --git a/Makefile.dos b/Makefile.dos index 99a7a72f..18a3b363 100644 --- a/Makefile.dos +++ b/Makefile.dos @@ -10,8 +10,8 @@ # It's been tested with Microsoft C 5.x plus Borland Make. (Yes, I # know it's silly, but...) -CC = cl -CCFLAGS = /c /O /AL +CC = cl /c /O /AL +QCL = qcl /c /AL LINK = cl LINKFLAGS = LIBRARIES = @@ -19,13 +19,13 @@ EXE = .exe# OBJ = obj# .c.$(OBJ): - $(CC) $(CCFLAGS) $*.c + $(CC) $*.c NASMOBJS = nasm.$(OBJ) nasmlib.$(OBJ) float.$(OBJ) insnsa.$(OBJ) \ assemble.$(OBJ) labels.$(OBJ) parser.$(OBJ) outform.$(OBJ) \ outbin.$(OBJ) outaout.$(OBJ) outcoff.$(OBJ) outelf.$(OBJ) \ outobj.$(OBJ) outas86.$(OBJ) outrdf.$(OBJ) outdbg.$(OBJ) \ - preproc.$(OBJ) + preproc.$(OBJ) listing.$(OBJ) NDISASMOBJS = ndisasm.$(OBJ) disasm.$(OBJ) sync.$(OBJ) nasmlib.$(OBJ) \ insnsd.$(OBJ) @@ -45,7 +45,9 @@ assemble.$(OBJ): assemble.c nasm.h assemble.h insns.h disasm.$(OBJ): disasm.c nasm.h disasm.h sync.h insns.h names.c float.$(OBJ): float.c nasm.h labels.$(OBJ): labels.c nasm.h nasmlib.h -nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h outform.h +listing.$(OBJ): listing.c nasm.h nasmlib.h listing.h +nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h \ + listing.h outform.h nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h ndisasm.$(OBJ): ndisasm.c nasm.h sync.h disasm.h outas86.$(OBJ): outas86.c nasm.h nasmlib.h @@ -65,9 +67,9 @@ sync.$(OBJ): sync.c sync.h # CL proper; and we don't need any optimisation in these modules # since they're just data. insnsa.$(OBJ): insnsa.c nasm.h insns.h - qcl /c /AL insnsa.c + $(QCL) insnsa.c insnsd.$(OBJ): insnsd.c nasm.h insns.h - qcl /c /AL insnsd.c + $(QCL) insnsd.c clean : del *.obj diff --git a/Makefile.dos b/Makefile.sc similarity index 55% copy from Makefile.dos copy to Makefile.sc index 99a7a72f..e8386bfd 100644 --- a/Makefile.dos +++ b/Makefile.sc @@ -1,75 +1,222 @@ -# Makefile for the Netwide Assembler under 16-bit DOS +# Makefile for the Netwide Assembler under 32-bit Windows(tm) + # + # The Netwide Assembler is copyright (C) 1996 Simon Tatham and + # Julian Hall. All rights reserved. The software is + # redistributable under the licence given in the file "Licence" + # distributed in the NASM archive. + # -# This Makefile is designed to build NASM using a 16-bit DOS C -# compiler such as Microsoft C, provided you have a compatible MAKE. -# It's been tested with Microsoft C 5.x plus Borland Make. (Yes, I -# know it's silly, but...) - -CC = cl -CCFLAGS = /c /O /AL -LINK = cl -LINKFLAGS = + +# This Makefile is designed to build NASM using the 32-bit WIN32 C + +# compiler Symantec(tm) C++ 7.5, provided you have a MAKE-utility + +# that's compatible to SMAKE. + + + +CC = sc + +CCFLAGS = -c -a1 -mn -Nc -w2 -w7 -o+time -5 + +# -5 optimize for pentium (tm) + +# -c compile only + +# -o-all no optimizations (to avoid problems in disasm.c) + +# -o+time optimize for speed + +# -o+space optimize for size + +# -A1 byte alignment for structures + +# -mn compile for Win32 executable + +# -Nc create COMDAT records + +# -w2 possible unattended assignment: off + +# -w7 for loops with empty instruction-body + + + +LINK = link + +LINKFLAGS = /noi /exet:NT /su:console + +# /noignorecase all symbols are case-sensitive + +# /exet:NT Exetype: NT (Win32) + +# /su:console Subsystem: Console (Console-App) + + + LIBRARIES = -EXE = .exe# -OBJ = obj# + +EXE = .exe + +OBJ = obj + + .c.$(OBJ): - $(CC) $(CCFLAGS) $*.c + + $(CC) $(CCFLAGS) $*.c + + + + + +# + +# modules needed for different programs + +# + + NASMOBJS = nasm.$(OBJ) nasmlib.$(OBJ) float.$(OBJ) insnsa.$(OBJ) \ + assemble.$(OBJ) labels.$(OBJ) parser.$(OBJ) outform.$(OBJ) \ + outbin.$(OBJ) outaout.$(OBJ) outcoff.$(OBJ) outelf.$(OBJ) \ + outobj.$(OBJ) outas86.$(OBJ) outrdf.$(OBJ) outdbg.$(OBJ) \ - preproc.$(OBJ) + + preproc.$(OBJ) listing.$(OBJ) + + NDISASMOBJS = ndisasm.$(OBJ) disasm.$(OBJ) sync.$(OBJ) nasmlib.$(OBJ) \ + insnsd.$(OBJ) + + + + +# + +# programs to create + +# + + + all : nasm$(EXE) ndisasm$(EXE) + + + + +# + # We have to have a horrible kludge here to get round the 128 character -# limit, as usual... -LINKOBJS = a*.obj f*.obj insnsa.obj l*.obj na*.obj o*.obj p*.obj + +# limit, as usual... we'll simply use LNK-files :) + +# + nasm$(EXE): $(NASMOBJS) - cl /Fenasm.exe /F 4000 $(LINKOBJS) + + $(LINK) $(LINKFLAGS) @<< + +$(NASMOBJS) + +nasm.exe; + +<< + + ndisasm$(EXE): $(NDISASMOBJS) - cl /Fendisasm.exe $(NDISASMOBJS) -assemble.$(OBJ): assemble.c nasm.h assemble.h insns.h + $(LINK) $(LINKFLAGS) @<< + +$(NDISASMOBJS) + +ndisasm.exe; + +<< + + + + + + + +# + +# modules for programs + +# + + + disasm.$(OBJ): disasm.c nasm.h disasm.h sync.h insns.h names.c + +assemble.$(OBJ): assemble.c nasm.h assemble.h insns.h + float.$(OBJ): float.c nasm.h + labels.$(OBJ): labels.c nasm.h nasmlib.h -nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h outform.h + +listing.$(OBJ): listing.c nasm.h nasmlib.h listing.h + +nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h \ + + listing.h outform.h + nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h + ndisasm.$(OBJ): ndisasm.c nasm.h sync.h disasm.h + outas86.$(OBJ): outas86.c nasm.h nasmlib.h + outaout.$(OBJ): outaout.c nasm.h nasmlib.h + outbin.$(OBJ): outbin.c nasm.h nasmlib.h + outcoff.$(OBJ): outcoff.c nasm.h nasmlib.h + outdbg.$(OBJ): outdbg.c nasm.h nasmlib.h + outelf.$(OBJ): outelf.c nasm.h nasmlib.h + outobj.$(OBJ): outobj.c nasm.h nasmlib.h + outrdf.$(OBJ): outrdf.c nasm.h nasmlib.h + outform.$(OBJ): outform.c outform.h nasm.h + parser.$(OBJ): parser.c nasm.h nasmlib.h parser.h float.h names.c + preproc.$(OBJ): preproc.c macros.c preproc.h nasm.h nasmlib.h + sync.$(OBJ): sync.c sync.h -# Another grotty hack: QC is less likely to run out of memory than -# CL proper; and we don't need any optimisation in these modules -# since they're just data. insnsa.$(OBJ): insnsa.c nasm.h insns.h - qcl /c /AL insnsa.c + insnsd.$(OBJ): insnsd.c nasm.h insns.h - qcl /c /AL insnsd.c + + + + + + clean : + del *.obj + del nasm$(EXE) + del ndisasm$(EXE) + diff --git a/Makefile.dos b/Makefile.vc similarity index 74% copy from Makefile.dos copy to Makefile.vc index 99a7a72f..0140a008 100644 --- a/Makefile.dos +++ b/Makefile.vc @@ -1,51 +1,52 @@ -# Makefile for the Netwide Assembler under 16-bit DOS +# Makefile for the Netwide Assembler under Win32 # # The Netwide Assembler is copyright (C) 1996 Simon Tatham and # Julian Hall. All rights reserved. The software is # redistributable under the licence given in the file "Licence" # distributed in the NASM archive. # -# This Makefile is designed to build NASM using a 16-bit DOS C -# compiler such as Microsoft C, provided you have a compatible MAKE. -# It's been tested with Microsoft C 5.x plus Borland Make. (Yes, I -# know it's silly, but...) +# This Makefile is designed to build NASM as a Win32 command- +# line executable. It's been tested with Visual C++ 1.10. -CC = cl -CCFLAGS = /c /O /AL +CC = cl /c /O +QCL = cl /c LINK = cl LINKFLAGS = LIBRARIES = EXE = .exe# OBJ = obj# +SUFFIX = w# # by default, this makefile produces nasmw.exe and ndisasmw.exe .c.$(OBJ): - $(CC) $(CCFLAGS) $*.c + $(CC) $*.c NASMOBJS = nasm.$(OBJ) nasmlib.$(OBJ) float.$(OBJ) insnsa.$(OBJ) \ assemble.$(OBJ) labels.$(OBJ) parser.$(OBJ) outform.$(OBJ) \ outbin.$(OBJ) outaout.$(OBJ) outcoff.$(OBJ) outelf.$(OBJ) \ outobj.$(OBJ) outas86.$(OBJ) outrdf.$(OBJ) outdbg.$(OBJ) \ - preproc.$(OBJ) + preproc.$(OBJ) listing.$(OBJ) NDISASMOBJS = ndisasm.$(OBJ) disasm.$(OBJ) sync.$(OBJ) nasmlib.$(OBJ) \ insnsd.$(OBJ) -all : nasm$(EXE) ndisasm$(EXE) +all : nasm$(SUFFIX)$(EXE) ndisasm$(SUFFIX)$(EXE) # We have to have a horrible kludge here to get round the 128 character # limit, as usual... LINKOBJS = a*.obj f*.obj insnsa.obj l*.obj na*.obj o*.obj p*.obj -nasm$(EXE): $(NASMOBJS) - cl /Fenasm.exe /F 4000 $(LINKOBJS) +nasm$(SUFFIX)$(EXE): $(NASMOBJS) + cl /Fenasm$(SUFFIX).exe $(LINKOBJS) -ndisasm$(EXE): $(NDISASMOBJS) - cl /Fendisasm.exe $(NDISASMOBJS) +ndisasm$(SUFFIX)$(EXE): $(NDISASMOBJS) + cl /Fendisasm$(SUFFIX).exe $(NDISASMOBJS) assemble.$(OBJ): assemble.c nasm.h assemble.h insns.h disasm.$(OBJ): disasm.c nasm.h disasm.h sync.h insns.h names.c float.$(OBJ): float.c nasm.h labels.$(OBJ): labels.c nasm.h nasmlib.h -nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h outform.h +listing.$(OBJ): listing.c nasm.h nasmlib.h listing.h +nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h \ + listing.h outform.h nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h ndisasm.$(OBJ): ndisasm.c nasm.h sync.h disasm.h outas86.$(OBJ): outas86.c nasm.h nasmlib.h @@ -65,11 +66,11 @@ sync.$(OBJ): sync.c sync.h # CL proper; and we don't need any optimisation in these modules # since they're just data. insnsa.$(OBJ): insnsa.c nasm.h insns.h - qcl /c /AL insnsa.c + $(QCL) insnsa.c insnsd.$(OBJ): insnsd.c nasm.h insns.h - qcl /c /AL insnsd.c + $(QCL) insnsd.c clean : del *.obj - del nasm$(EXE) - del ndisasm$(EXE) + del nasm$(SUFFIX)$(EXE) + del ndisasm$(SUFFIX)$(EXE) diff --git a/Makefile.wc b/Makefile.wc index f2184a67..ab08b049 100644 --- a/Makefile.wc +++ b/Makefile.wc @@ -42,7 +42,7 @@ NASMOBJS = nasm.$(OBJ) nasmlib.$(OBJ) float.$(OBJ) insnsa.$(OBJ) \ assemble.$(OBJ) labels.$(OBJ) parser.$(OBJ) outform.$(OBJ) \ outbin.$(OBJ) outaout.$(OBJ) outcoff.$(OBJ) outelf.$(OBJ) \ outobj.$(OBJ) outas86.$(OBJ) outrdf.$(OBJ) outdbg.$(OBJ) \ - preproc.$(OBJ) + preproc.$(OBJ) listing.$(OBJ) NDISASMOBJS = ndisasm.$(OBJ) disasm.$(OBJ) sync.$(OBJ) nasmlib.$(OBJ) \ insnsd.$(OBJ) @@ -92,7 +92,9 @@ float.$(OBJ): float.c nasm.h insnsa.$(OBJ): insnsa.c nasm.h insns.h insnsd.$(OBJ): insnsd.c nasm.h insns.h labels.$(OBJ): labels.c nasm.h nasmlib.h -nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h outform.h +listing.$(OBJ): listing.c nasm.h nasmlib.h listing.h +nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h \ + listing.h outform.h nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h ndisasm.$(OBJ): ndisasm.c nasm.h sync.h disasm.h outas86.$(OBJ): outas86.c nasm.h nasmlib.h diff --git a/Makefile.wcw b/Makefile.wcw index 4942df5a..25705ab7 100644 --- a/Makefile.wcw +++ b/Makefile.wcw @@ -42,7 +42,7 @@ NASMOBJS = nasm.$(OBJ) nasmlib.$(OBJ) float.$(OBJ) insnsa.$(OBJ) \ assemble.$(OBJ) labels.$(OBJ) parser.$(OBJ) outform.$(OBJ) \ outbin.$(OBJ) outaout.$(OBJ) outcoff.$(OBJ) outelf.$(OBJ) \ outobj.$(OBJ) outas86.$(OBJ) outrdf.$(OBJ) outdbg.$(OBJ) \ - preproc.$(OBJ) + preproc.$(OBJ) listing.$(OBJ) NDISASMOBJS = ndisasm.$(OBJ) disasm.$(OBJ) sync.$(OBJ) nasmlib.$(OBJ) \ insnsd.$(OBJ) @@ -92,7 +92,9 @@ float.$(OBJ): float.c nasm.h insnsa.$(OBJ): insnsa.c nasm.h insns.h insnsd.$(OBJ): insnsd.c nasm.h insns.h labels.$(OBJ): labels.c nasm.h nasmlib.h -nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h outform.h +listing.$(OBJ): listing.c nasm.h nasmlib.h listing.h +nasm.$(OBJ): nasm.c nasm.h nasmlib.h parser.h assemble.h labels.h \ + listing.h outform.h nasmlib.$(OBJ): nasmlib.c nasm.h nasmlib.h ndisasm.$(OBJ): ndisasm.c nasm.h sync.h disasm.h outas86.$(OBJ): outas86.c nasm.h nasmlib.h diff --git a/Readme b/Readme index d52c7bc4..97e8beb3 100644 --- a/Readme +++ b/Readme @@ -1,8 +1,8 @@ This is a distribution of NASM, the Netwide Assembler. NASM is a prototype general-purpose x86 assembler. It will currently output flat-form binary files, a.out, COFF and ELF Unix object files, -Microsoft 16-bit DOS and Win32 object files, the as86 object format, -and a home-grown format called RDF. +Microsoft Win32 and 16-bit DOS object files, OS/2 object files, the +as86 object format, and a home-grown format called RDF. Also included is NDISASM, a prototype x86 binary-file disassembler which uses the same instruction table as NASM. @@ -14,24 +14,38 @@ access). You may also want to copy the man page `nasm.1' (and maybe `ndisasm.1') to somewhere sensible. To rebuild the DOS sources, various makefiles are provided: -- Makefile.dos, the one I build the standard releases from, designed - for a hybrid system using Microsoft C and Borland Make (don't ask - why :-) -- Makefile.bor (for Borland C) -- Makefile.bc2 (also for Borland C, contributed by Fox Cutter - , may work better than Makefile.bor in some - cases). + +- Makefile.dos, the one I build the standard 16-bit releases from, + designed for a hybrid system using Microsoft C and Borland Make + (don't ask why :-) +- Makefile.vc, for Microsoft Visual C++ compiling to a Win32 + command-line application. This is the one I build the standard + Win32 release binaries from. + +- Makefile.bor, for Borland C. +- Makefile.bc2, also for Borland C, contributed by Fox Cutter. + Reported to work better than Makefile.bor on some systems. + +- Makefile.sc, for Symantec C++. Contributed by Mark Junker. - Makefile.wc, for Watcom C, compiling to a 32-bit extended DOS executable. Contributed by Dominik Behr. - Makefile.wcw, also for Watcom C, compiling to a Win32 command- line application. Also contributed by Dominik Behr. -I don't guarantee that any of those, other than Makefile.dos, work, -since I don't have the compilers to test them myself. Also be -warned: I have had various conflicting reports regarding building -NASM using Borland C. Several people have informed me that it -doesn't work except under Huge model, and one or two have said that -it doesn't work under Huge model either. +I can't guarantee that all of those makefiles work, because I don't +have all of those compilers. However, Makefile.dos and Makefile.vc +work on my system, and so do Makefile.bor and Makefile.bc2. + +Be careful with Borland C: there have been various conflicting +reports about how reliable the Huge memory model is. If you try to +compile NASM in Large model, you may get DGROUP overflows due to the +vast quantity of data in the instruction tables. I've had reports +from some people that Huge model doesn't work at all (and also +reports from others that it works fine), so if you don't want to try +moving to Huge, you could try adding the option `-dc' to the +compiler command line instead, which causes string literals to be +moved from DGROUP to the code segments and might make Large model +start working. (Either solution works for me.) Dominik Behr has also contributed the file misc/pmw.bat, which is a batch file to turn the output from Makefile.wc (NASM.EXE and @@ -85,5 +99,7 @@ information about the internal structure of NASM, see `internal.doc'. (In particular, _please_ read `internal.doc' before writing any code for us...) +The NASM web page is at http://www.cryogen.com/Nasm/ + Bug reports (and patches if you can) should be sent to - or . + or . diff --git a/assemble.c b/assemble.c index 3e4bc1f1..c6cc00a3 100644 --- a/assemble.c +++ b/assemble.c @@ -53,6 +53,7 @@ #include #include "nasm.h" +#include "nasmlib.h" #include "assemble.h" #include "insns.h" @@ -67,6 +68,7 @@ typedef struct { static efunc errfunc; static struct ofmt *outfmt; +static ListGen *list; static long calcsize (long, long, int, insn *, char *); static void gencode (long, long, int, insn *, char *, long); @@ -75,8 +77,49 @@ static int matches (struct itemplate *, insn *); static ea *process_ea (operand *, ea *, int, int, int); static int chsize (operand *, int); +/* + * This routine wrappers the real output format's output routine, + * in order to pass a copy of the data off to the listing file + * generator at the same time. + */ +static void out (long offset, long segto, void *data, unsigned long type, + long segment, long wrt) { + if ((type & OUT_TYPMASK) == OUT_ADDRESS) { + if (segment != NO_SEG || wrt != NO_SEG) { + /* + * This address is relocated. We must write it as + * OUT_ADDRESS, so there's no work to be done here. + */ + list->output (offset, data, type); + } else { + unsigned char p[4], *q = p; + /* + * This is a non-relocated address, and we're going to + * convert it into RAWDATA format. + */ + if ((type & OUT_SIZMASK) == 4) { + WRITELONG (q, * (long *) data); + list->output (offset, p, OUT_RAWDATA+4); + } else { + WRITESHORT (q, * (long *) data); + list->output (offset, p, OUT_RAWDATA+2); + } + } + } else if ((type & OUT_TYPMASK) == OUT_RAWDATA) { + list->output (offset, data, type); + } else if ((type & OUT_TYPMASK) == OUT_RESERVE) { + list->output (offset, NULL, type); + } else if ((type & OUT_TYPMASK) == OUT_REL2ADR || + (type & OUT_TYPMASK) == OUT_REL4ADR) { + list->output (offset, data, type); + } + + outfmt->output (segto, data, type, segment, wrt); +} + long assemble (long segment, long offset, int bits, - insn *instruction, struct ofmt *output, efunc error) { + insn *instruction, struct ofmt *output, efunc error, + ListGen *listgen) { int j, size_prob; long insn_end, itimes; long start = offset; @@ -84,6 +127,7 @@ long assemble (long segment, long offset, int bits, errfunc = error; /* to pass to other functions */ outfmt = output; /* likewise */ + list = listgen; /* and again */ if (instruction->opcode == -1) return 0; @@ -114,16 +158,16 @@ long assemble (long segment, long offset, int bits, "one-byte relocation attempted"); else { unsigned char c = e->offset; - outfmt->output (segment, &c, OUT_RAWDATA+1, - NO_SEG, NO_SEG); + out (offset, segment, &c, OUT_RAWDATA+1, + NO_SEG, NO_SEG); } } else if (wsize > 5) { errfunc (ERR_NONFATAL, "integer supplied to a D%c" " instruction", wsize==8 ? 'Q' : 'T'); } else - outfmt->output (segment, &e->offset, - OUT_ADDRESS+wsize, e->segment, - e->wrt); + out (offset, segment, &e->offset, + OUT_ADDRESS+wsize, e->segment, + e->wrt); offset += wsize; } else if (e->type == EOT_DB_STRING) { int align; @@ -131,15 +175,25 @@ long assemble (long segment, long offset, int bits, align = (-e->stringlen) % wsize; if (align < 0) align += wsize; - outfmt->output (segment, e->stringval, - OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG); + out (offset, segment, e->stringval, + OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG); if (align) - outfmt->output (segment, "\0\0\0\0", - OUT_RAWDATA+align, NO_SEG, NO_SEG); + out (offset, segment, "\0\0\0\0", + OUT_RAWDATA+align, NO_SEG, NO_SEG); offset += e->stringlen + align; } } + if (t > 0 && t == instruction->times-1) { + /* + * Dummy call to list->output to give the offset to the + * listing module. + */ + list->output (offset, NULL, OUT_RAWDATA); + list->uplevel (LIST_TIMES); + } } + if (instruction->times > 1) + list->downlevel (LIST_TIMES); return offset - start; } @@ -170,6 +224,12 @@ long assemble (long segment, long offset, int bits, len > instruction->eops->next->next->offset) len = instruction->eops->next->next->offset; } + /* + * Dummy call to list->output to give the offset to the + * listing module. + */ + list->output (offset, NULL, OUT_RAWDATA); + list->uplevel(LIST_INCBIN); while (t--) { fseek (fp, (instruction->eops->next ? @@ -189,11 +249,21 @@ long assemble (long segment, long offset, int bits, " reading file `%s'", fname); return 0; /* it doesn't much matter... */ } - outfmt->output (segment, buf, OUT_RAWDATA+m, - NO_SEG, NO_SEG); + out (offset, segment, buf, OUT_RAWDATA+m, + NO_SEG, NO_SEG); l -= m; } } + list->downlevel(LIST_INCBIN); + if (instruction->times > 1) { + /* + * Dummy call to list->output to give the offset to the + * listing module. + */ + list->output (offset, NULL, OUT_RAWDATA); + list->uplevel(LIST_TIMES); + list->downlevel(LIST_TIMES); + } fclose (fp); return instruction->times * len; } @@ -257,13 +327,23 @@ long assemble (long segment, long offset, int bits, "invalid instruction prefix"); } if (c != 0) - outfmt->output (segment, &c, OUT_RAWDATA+1, - NO_SEG, NO_SEG); + out (offset, segment, &c, OUT_RAWDATA+1, + NO_SEG, NO_SEG); offset++; } gencode (segment, offset, bits, instruction, codes, insn_end); offset += insn_size; + if (itimes > 0 && itimes == instruction->times-1) { + /* + * Dummy call to list->output to give the offset to the + * listing module. + */ + list->output (offset, NULL, OUT_RAWDATA); + list->uplevel (LIST_TIMES); + } } + if (instruction->times > 1) + list->downlevel (LIST_TIMES); return offset - start; } else if (m > 0) { size_prob = m; @@ -473,7 +553,7 @@ static void gencode (long segment, long offset, int bits, while (*codes) switch (c = *codes++) { case 01: case 02: case 03: - outfmt->output (segment, codes, OUT_RAWDATA+c, NO_SEG, NO_SEG); + out (offset, segment, codes, OUT_RAWDATA+c, NO_SEG, NO_SEG); codes += c; offset += c; break; @@ -486,7 +566,7 @@ static void gencode (long segment, long offset, int bits, default: errfunc (ERR_PANIC, "bizarre 8086 segment register received"); } - outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); offset++; break; case 05: case 07: @@ -496,48 +576,48 @@ static void gencode (long segment, long offset, int bits, default: errfunc (ERR_PANIC, "bizarre 386 segment register received"); } - outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); offset++; break; case 010: case 011: case 012: bytes[0] = *codes++ + regval(&ins->oprs[c-010]); - outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); offset += 1; break; case 017: bytes[0] = 0; - outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); offset += 1; break; case 014: case 015: case 016: if (ins->oprs[c-014].offset < -128 || ins->oprs[c-014].offset > 127) errfunc (ERR_WARNING, "signed byte value exceeds bounds"); bytes[0] = ins->oprs[c-014].offset; - outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); offset += 1; break; case 020: case 021: case 022: - if (ins->oprs[c-020].offset < -128 || ins->oprs[c-020].offset > 255) + if (ins->oprs[c-020].offset < -256 || ins->oprs[c-020].offset > 255) errfunc (ERR_WARNING, "byte value exceeds bounds"); bytes[0] = ins->oprs[c-020].offset; - outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); offset += 1; break; case 024: case 025: case 026: if (ins->oprs[c-024].offset < 0 || ins->oprs[c-024].offset > 255) errfunc (ERR_WARNING, "unsigned byte value exceeds bounds"); bytes[0] = ins->oprs[c-024].offset; - outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); offset += 1; break; case 030: case 031: case 032: if (ins->oprs[c-030].segment == NO_SEG && ins->oprs[c-030].wrt == NO_SEG && - (ins->oprs[c-030].offset < -32768L || + (ins->oprs[c-030].offset < -65536L || ins->oprs[c-030].offset > 65535L)) errfunc (ERR_WARNING, "word value exceeds bounds"); data = ins->oprs[c-030].offset; - outfmt->output (segment, &data, OUT_ADDRESS+2, + out (offset, segment, &data, OUT_ADDRESS+2, ins->oprs[c-030].segment, ins->oprs[c-030].wrt); offset += 2; break; @@ -545,10 +625,10 @@ static void gencode (long segment, long offset, int bits, data = ins->oprs[c-034].offset; size = ((ins->oprs[c-034].addr_size ? ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4); - if (size==16 && (data < -32768L || data > 65535L)) + if (size==16 && (data < -65536L || data > 65535L)) errfunc (ERR_WARNING, "word value exceeds bounds"); - outfmt->output (segment, &data, OUT_ADDRESS+size, - ins->oprs[c-034].segment, ins->oprs[c-034].wrt); + out (offset, segment, &data, OUT_ADDRESS+size, + ins->oprs[c-034].segment, ins->oprs[c-034].wrt); offset += size; break; case 037: @@ -556,15 +636,15 @@ static void gencode (long segment, long offset, int bits, errfunc (ERR_NONFATAL, "value referenced by FAR is not" " relocatable"); data = 0L; - outfmt->output (segment, &data, OUT_ADDRESS+2, - outfmt->segbase(1+ins->oprs[0].segment), + out (offset, segment, &data, OUT_ADDRESS+2, + outfmt->segbase(1+ins->oprs[0].segment), ins->oprs[0].wrt); offset += 2; break; case 040: case 041: case 042: data = ins->oprs[c-040].offset; - outfmt->output (segment, &data, OUT_ADDRESS+4, - ins->oprs[c-040].segment, ins->oprs[c-040].wrt); + out (offset, segment, &data, OUT_ADDRESS+4, + ins->oprs[c-040].segment, ins->oprs[c-040].wrt); offset += 4; break; case 050: case 051: case 052: @@ -574,17 +654,18 @@ static void gencode (long segment, long offset, int bits, if (data > 127 || data < -128) errfunc (ERR_NONFATAL, "short jump is out of range"); bytes[0] = data; - outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); offset += 1; break; case 060: case 061: case 062: if (ins->oprs[c-060].segment != segment) { data = ins->oprs[c-060].offset; - outfmt->output (segment, &data, OUT_REL2ADR+insn_end-offset, - ins->oprs[c-060].segment, ins->oprs[c-060].wrt); + out (offset, segment, &data, OUT_REL2ADR+insn_end-offset, + ins->oprs[c-060].segment, ins->oprs[c-060].wrt); } else { data = ins->oprs[c-060].offset - insn_end; - outfmt->output (segment, &data, OUT_ADDRESS+2, NO_SEG, NO_SEG); + out (offset, segment, &data, + OUT_ADDRESS+2, NO_SEG, NO_SEG); } offset += 2; break; @@ -594,30 +675,33 @@ static void gencode (long segment, long offset, int bits, if (ins->oprs[c-064].segment != segment) { data = ins->oprs[c-064].offset; size = (bits == 16 ? OUT_REL2ADR : OUT_REL4ADR); - outfmt->output (segment, &data, size+insn_end-offset, - ins->oprs[c-064].segment, ins->oprs[c-064].wrt); + out (offset, segment, &data, size+insn_end-offset, + ins->oprs[c-064].segment, ins->oprs[c-064].wrt); size = (bits == 16 ? 2 : 4); } else { data = ins->oprs[c-064].offset - insn_end; - outfmt->output (segment, &data, OUT_ADDRESS+size, NO_SEG, NO_SEG); + out (offset, segment, &data, + OUT_ADDRESS+size, NO_SEG, NO_SEG); } offset += size; break; case 070: case 071: case 072: if (ins->oprs[c-070].segment != segment) { data = ins->oprs[c-070].offset; - outfmt->output (segment, &data, OUT_REL4ADR+insn_end-offset, - ins->oprs[c-070].segment, ins->oprs[c-070].wrt); + out (offset, segment, &data, OUT_REL4ADR+insn_end-offset, + ins->oprs[c-070].segment, ins->oprs[c-070].wrt); } else { data = ins->oprs[c-070].offset - insn_end; - outfmt->output (segment, &data, OUT_ADDRESS+4, NO_SEG, NO_SEG); + out (offset, segment, &data, + OUT_ADDRESS+4, NO_SEG, NO_SEG); } offset += 4; break; case 0300: case 0301: case 0302: if (chsize (&ins->oprs[c-0300], bits)) { *bytes = 0x67; - outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + out (offset, segment, bytes, + OUT_RAWDATA+1, NO_SEG, NO_SEG); offset += 1; } else offset += 0; @@ -625,7 +709,8 @@ static void gencode (long segment, long offset, int bits, case 0310: if (bits==32) { *bytes = 0x67; - outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + out (offset, segment, bytes, + OUT_RAWDATA+1, NO_SEG, NO_SEG); offset += 1; } else offset += 0; @@ -633,7 +718,8 @@ static void gencode (long segment, long offset, int bits, case 0311: if (bits==16) { *bytes = 0x67; - outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + out (offset, segment, bytes, + OUT_RAWDATA+1, NO_SEG, NO_SEG); offset += 1; } else offset += 0; @@ -643,7 +729,8 @@ static void gencode (long segment, long offset, int bits, case 0320: if (bits==32) { *bytes = 0x66; - outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + out (offset, segment, bytes, + OUT_RAWDATA+1, NO_SEG, NO_SEG); offset += 1; } else offset += 0; @@ -651,7 +738,8 @@ static void gencode (long segment, long offset, int bits, case 0321: if (bits==16) { *bytes = 0x66; - outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + out (offset, segment, bytes, + OUT_RAWDATA+1, NO_SEG, NO_SEG); offset += 1; } else offset += 0; @@ -660,7 +748,8 @@ static void gencode (long segment, long offset, int bits, break; case 0330: *bytes = *codes++ + condval[ins->condition]; - outfmt->output (segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG); + out (offset, segment, bytes, + OUT_RAWDATA+1, NO_SEG, NO_SEG); offset += 1; break; case 0340: case 0341: case 0342: @@ -668,7 +757,8 @@ static void gencode (long segment, long offset, int bits, errfunc (ERR_PANIC, "non-constant BSS size in pass two"); else { long size = ins->oprs[0].offset << (c-0340); - outfmt->output (segment, NULL, OUT_RESERVE+size, NO_SEG, NO_SEG); + out (offset, segment, NULL, + OUT_RESERVE+size, NO_SEG, NO_SEG); offset += size; } break; @@ -694,8 +784,8 @@ static void gencode (long segment, long offset, int bits, /* * the cast in the next line is to placate MS C... */ - outfmt->output (segment, bytes, OUT_RAWDATA+(long)(p-bytes), - NO_SEG, NO_SEG); + out (offset, segment, bytes, OUT_RAWDATA+(long)(p-bytes), + NO_SEG, NO_SEG); s = p-bytes; switch (ea_data.bytes) { @@ -703,16 +793,16 @@ static void gencode (long segment, long offset, int bits, break; case 1: *bytes = ins->oprs[(c>>3)&7].offset; - outfmt->output (segment, bytes, OUT_RAWDATA+1, - NO_SEG, NO_SEG); + out (offset, segment, bytes, OUT_RAWDATA+1, + NO_SEG, NO_SEG); s++; break; case 2: case 4: data = ins->oprs[(c>>3)&7].offset; - outfmt->output (segment, &data, OUT_ADDRESS+ea_data.bytes, - ins->oprs[(c>>3)&7].segment, - ins->oprs[(c>>3)&7].wrt); + out (offset, segment, &data, + OUT_ADDRESS+ea_data.bytes, + ins->oprs[(c>>3)&7].segment, ins->oprs[(c>>3)&7].wrt); s += ea_data.bytes; break; } diff --git a/assemble.h b/assemble.h index cb93a2c5..2ead91f9 100644 --- a/assemble.h +++ b/assemble.h @@ -12,6 +12,7 @@ long insn_size (long segment, long offset, int bits, insn *instruction, efunc error); long assemble (long segment, long offset, int bits, - insn *instruction, struct ofmt *output, efunc error); + insn *instruction, struct ofmt *output, efunc error, + ListGen *listgen); #endif diff --git a/disasm.c b/disasm.c index 705c57c8..c4c0dc12 100644 --- a/disasm.c +++ b/disasm.c @@ -96,8 +96,8 @@ static int whichreg(long regflags, int regval) { static char *whichcond(int condval) { static int conds[] = { - C_O, C_NO, C_B, C_AE, C_E, C_NE, C_BE, C_A, - C_S, C_NS, C_PE, C_PO, C_L, C_GE, C_LE, C_G + C_O, C_NO, C_C, C_NC, C_Z, C_NZ, C_NA, C_A, + C_S, C_NS, C_PE, C_PO, C_L, C_NL, C_NG, C_G }; return conditions[conds[condval]]; } diff --git a/float.c b/float.c index e9b7f4a3..1f66ca61 100644 --- a/float.c +++ b/float.c @@ -55,7 +55,8 @@ static int multiply(unsigned short *to, unsigned short *from) { } } -static void flconvert(char *string, unsigned short *mant, long *exponent) { +static void flconvert(char *string, unsigned short *mant, long *exponent, + efunc error) { char digits[MANT_DIGITS], *p, *q, *r; unsigned short mult[MANT_WORDS], *m, bit; long tenpwr, twopwr; @@ -69,7 +70,8 @@ static void flconvert(char *string, unsigned short *mant, long *exponent) { if (!seendot) seendot = TRUE; else { - fprintf(stderr, "too many periods!\n"); + error (ERR_NONFATAL, + "too many periods in floating-point constant"); return; } } else if (*string >= '0' && *string <= '9') { @@ -84,7 +86,9 @@ static void flconvert(char *string, unsigned short *mant, long *exponent) { tenpwr++; } } else { - fprintf(stderr, "`%c' is invalid char\n", *string); + error (ERR_NONFATAL, + "floating-point constant: `%c' is invalid character", + *string); return; } string++; @@ -209,7 +213,7 @@ static int to_double(char *str, long sign, unsigned char *result, sign = (sign < 0 ? 0x8000L : 0L); - flconvert (str, mant, &exponent); + flconvert (str, mant, &exponent, error); if (mant[0] & 0x8000) { /* * Non-zero. @@ -269,7 +273,7 @@ static int to_float(char *str, long sign, unsigned char *result, sign = (sign < 0 ? 0x8000L : 0L); - flconvert (str, mant, &exponent); + flconvert (str, mant, &exponent, error); if (mant[0] & 0x8000) { /* * Non-zero. @@ -322,7 +326,7 @@ static int to_ldoub(char *str, long sign, unsigned char *result, sign = (sign < 0 ? 0x8000L : 0L); - flconvert (str, mant, &exponent); + flconvert (str, mant, &exponent, error); if (mant[0] & 0x8000) { /* * Non-zero. diff --git a/insns.pl b/insns.pl index 14586390..def84bd7 100644 --- a/insns.pl +++ b/insns.pl @@ -138,7 +138,7 @@ sub format { # \17 means byte zero # \330 means byte plus condition code # \0 or \340 mean give up and return empty set -sub startbyte { # FIXME we cheat, for now :-) +sub startbyte { local ($codes) = @_; local $word, @range; diff --git a/internal.doc b/internal.doc index 9f84bb5a..8a73aa11 100644 --- a/internal.doc +++ b/internal.doc @@ -15,8 +15,10 @@ look like: | float.c | | | +--- assemble.c ---+ - nasm.c ---+ | +--- nasmlib.c - | insnsa.c | + | | | + nasm.c ---+ insnsa.c +--- nasmlib.c + | | + +--- listing.c ----+ | | +---- labels.c ----+ | | @@ -25,9 +27,9 @@ look like: +----- *out.c -----+ In other words, each of `preproc.c', `parser.c', `assemble.c', -`labels.c', `outform.c' and each of the output format modules -`*out.c' are independent modules, which do not inter-communicate -except through the main program. +`labels.c', `listing.c', `outform.c' and each of the output format +modules `*out.c' are independent modules, which do not directly +inter-communicate except through the main program. The Netwide *Disassembler* is not intended to be particularly portable or reusable or anything, however. So I won't bother @@ -167,6 +169,15 @@ The label manager module is (theoretically :) restartable: after calling `cleanup_labels', you can call `init_labels' again, and start a new assembly with a new set of symbols. +listing.c +--------- + +This file contains the listing file generator. The interface to the +module is through the one symbol it exports, `nasmlist', which is a +structure containing six function pointers. The calling semantics of +these functions isn't terribly well thought out, as yet, but it +works (just about) so it's going to get left alone for now... + outform.c --------- diff --git a/labels.c b/labels.c index 7323c45c..7793a0a9 100644 --- a/labels.c +++ b/labels.c @@ -133,7 +133,8 @@ void define_label_stub (char *label, efunc error) { lptr = find_label (label, 1); if (!lptr) error (ERR_PANIC, "can't find label `%s' on pass two", label); - prevlabel = lptr->defn.label; + if (*label != '.') + prevlabel = lptr->defn.label; } } @@ -156,7 +157,7 @@ void define_label (char *label, long segment, long offset, if (label[0] != '.') /* not local, but not special either */ prevlabel = lptr->defn.label; - else if (!*prevlabel) + else if (label[1] != '.' && !*prevlabel) error(ERR_NONFATAL, "attempt to define a local label before any" " non-local labels"); @@ -282,6 +283,7 @@ static char *perm_copy (char *string1, char *string2) { if (perm_tail->size - perm_tail->usage < len) { perm_tail->next = (struct permts *)nasm_malloc(sizeof(struct permts)); perm_tail = perm_tail->next; + perm_tail->next = NULL; perm_tail->size = PERMTS_SIZE; perm_tail->usage = 0; } diff --git a/listing.c b/listing.c new file mode 100644 index 00000000..89b722a6 --- /dev/null +++ b/listing.c @@ -0,0 +1,240 @@ +/* listing.c listing file generator for the Netwide Assembler + * + * The Netwide Assembler is copyright (C) 1996 Simon Tatham and + * Julian Hall. All rights reserved. The software is + * redistributable under the licence given in the file "Licence" + * distributed in the NASM archive. + * + * initial version 2/vii/97 by Simon Tatham + */ + +#include +#include +#include +#include +#include + +#include "nasm.h" +#include "nasmlib.h" +#include "listing.h" + +#define LIST_MAX_LEN 216 /* something sensible */ +#define LIST_INDENT 40 +#define LIST_HEXBIT 18 + +typedef struct MacroInhibit MacroInhibit; + +static struct MacroInhibit { + MacroInhibit *next; + int level; + int inhibiting; +} *mistack; + +static char xdigit[] = "0123456789ABCDEF"; + +#define HEX(a,b) (*(a)=xdigit[((b)>>4)&15],(a)[1]=xdigit[(b)&15]); + +static char listline[LIST_MAX_LEN]; +static int listlinep; + +static char listdata[2*LIST_INDENT]; /* we need less than that actually */ +static long listoffset; + +static long listlineno; + +static long listp; + +static int suppress; /* for INCBIN & TIMES special cases */ + +static int listlevel, listlevel_e; + +static FILE *listfp; + +static void list_emit (void) { + if (!listlinep && !listdata[0]) + return; + fprintf(listfp, "%6ld ", ++listlineno); + if (listdata[0]) + fprintf(listfp, "%08lX %-*s", listoffset, LIST_HEXBIT+1, listdata); + else + fprintf(listfp, "%*s", LIST_HEXBIT+10, ""); + if (listlevel_e) + fprintf(listfp, "%s<%d>", (listlevel < 10 ? " " : ""), listlevel_e); + else if (listlinep) + fprintf(listfp, " "); + if (listlinep) + fprintf(listfp, " %s", listline); + fputc('\n', listfp); + listlinep = FALSE; + listdata[0] = '\0'; +} + +static void list_init (char *fname, efunc error) { + listfp = fopen (fname, "w"); + if (!listfp) { + error (ERR_NONFATAL, "unable to open listing file `%s'", fname); + return; + } + *listline = '\0'; + listlineno = 0; + listp = TRUE; + listlevel = 0; + suppress = 0; + mistack = nasm_malloc(sizeof(MacroInhibit)); + mistack->next = NULL; + mistack->level = 0; + mistack->inhibiting = TRUE; +} + +static void list_cleanup (void) { + if (!listp) + return; + while (mistack) { + MacroInhibit *temp = mistack; + mistack = temp->next; + nasm_free (temp); + } + list_emit(); + fclose (listfp); +} + +static void list_out (long offset, char *str) { + if (strlen(listdata) + strlen(str) > LIST_HEXBIT) { + strcat(listdata, "-"); + list_emit(); + } + if (!listdata[0]) + listoffset = offset; + strcat(listdata, str); +} + +static void list_output (long offset, void *data, unsigned long type) { + long typ, size; + + if (!listp || suppress) + return; + + typ = type & OUT_TYPMASK; + size = type & OUT_SIZMASK; + + if (typ == OUT_RAWDATA) { + unsigned char *p = data; + char q[3]; + while (size--) { + HEX (q, *p); + q[2] = '\0'; + list_out (offset++, q); + p++; + } + } else if (typ == OUT_ADDRESS) { + unsigned long d = *(long *)data; + char q[11]; + unsigned char p[4], *r = p; + if (size == 4) { + q[0] = '['; q[9] = ']'; q[10] = '\0'; + WRITELONG (r, d); + HEX (q+1, p[0]); + HEX (q+3, p[1]); + HEX (q+5, p[2]); + HEX (q+7, p[3]); + list_out (offset, q); + } else { + q[0] = '['; q[5] = ']'; q[6] = '\0'; + WRITESHORT (r, d); + HEX (q+1, p[0]); + HEX (q+3, p[1]); + list_out (offset, q); + } + } else if (typ == OUT_REL2ADR) { + unsigned long d = *(long *)data; + char q[11]; + unsigned char p[4], *r = p; + q[0] = '('; q[5] = ')'; q[6] = '\0'; + WRITESHORT (r, d); + HEX (q+1, p[0]); + HEX (q+3, p[1]); + list_out (offset, q); + } else if (typ == OUT_REL4ADR) { + unsigned long d = *(long *)data; + char q[11]; + unsigned char p[4], *r = p; + q[0] = '('; q[9] = ')'; q[10] = '\0'; + WRITELONG (r, d); + HEX (q+1, p[0]); + HEX (q+3, p[1]); + HEX (q+5, p[2]); + HEX (q+7, p[3]); + list_out (offset, q); + } else if (typ == OUT_RESERVE) { + char q[20]; + sprintf(q, "", size); + list_out (offset, q); + } +} + +static void list_line (int type, char *line) { + if (!listp) + return; + if (mistack && mistack->inhibiting) { + if (type == LIST_MACRO) + return; + else { /* pop the m i stack */ + MacroInhibit *temp = mistack; + mistack = temp->next; + nasm_free (temp); + } + } + list_emit(); + listlinep = TRUE; + strncpy (listline, line, LIST_MAX_LEN-1); + listline[LIST_MAX_LEN-1] = '\0'; + listlevel_e = listlevel; +} + +static void list_uplevel (int type) { + if (!listp) + return; + if (type == LIST_INCBIN || type == LIST_TIMES) { + suppress |= (type == LIST_INCBIN ? 1 : 2); + list_out (listoffset, type == LIST_INCBIN ? "" : ""); + return; + } + listlevel++; + if (mistack && mistack->inhibiting && type == LIST_INCLUDE) { + MacroInhibit *temp = nasm_malloc(sizeof(MacroInhibit)); + temp->next = mistack; + temp->level = listlevel; + temp->inhibiting = FALSE; + mistack = temp; + } else if (type == LIST_MACRO_NOLIST) { + MacroInhibit *temp = nasm_malloc(sizeof(MacroInhibit)); + temp->next = mistack; + temp->level = listlevel; + temp->inhibiting = TRUE; + mistack = temp; + } +} + +static void list_downlevel (int type) { + if (!listp) + return; + if (type == LIST_INCBIN || type == LIST_TIMES) { + suppress &= ~(type == LIST_INCBIN ? 1 : 2); + return; + } + listlevel--; + while (mistack && mistack->level > listlevel) { + MacroInhibit *temp = mistack; + mistack = temp->next; + nasm_free (temp); + } +} + +ListGen nasmlist = { + list_init, + list_cleanup, + list_output, + list_line, + list_uplevel, + list_downlevel +}; diff --git a/preproc.h b/listing.h similarity index 67% copy from preproc.h copy to listing.h index 550a66ea..c3ac153f 100644 --- a/preproc.h +++ b/listing.h @@ -1,4 +1,4 @@ -/* preproc.h header file for preproc.c +/* listing.h header file for listing.c * * The Netwide Assembler is copyright (C) 1996 Simon Tatham and * Julian Hall. All rights reserved. The software is @@ -6,9 +6,9 @@ * distributed in the NASM archive. */ -#ifndef NASM_PREPROC_H -#define NASM_PREPROC_H +#ifndef NASM_LISTING_H +#define NASM_LISTING_H -extern Preproc nasmpp; +extern ListGen nasmlist; #endif diff --git a/macros.c b/macros.c index 7509bcaa..c7e03ae1 100644 --- a/macros.c +++ b/macros.c @@ -1,51 +1,66 @@ /* This file auto-generated from standard.mac by macros.pl - don't edit it */ static char *stdmac[] = { + "%define __NASM_MAJOR__ 0", + "%define __NASM_MINOR__ 95", "%define __SECT__", - "%imacro section 1+", + "%imacro section 1+.nolist", "%define __SECT__ [section %1]", "__SECT__", "%endmacro", - "%imacro segment 1+", + "%imacro segment 1+.nolist", "%define __SECT__ [segment %1]", "__SECT__", "%endmacro", - "%imacro absolute 1+", + "%imacro absolute 1+.nolist", "%define __SECT__ [absolute %1]", "__SECT__", "%endmacro", - "%imacro struc 1", + "%imacro struc 1.nolist", "%push struc", "%define %$strucname %1", "[absolute 0]", "%endmacro", - "%imacro endstruc 0", + "%imacro endstruc 0.nolist", "%{$strucname}_size:", "%pop", "__SECT__", "%endmacro", - "%imacro extern 1+", + "%imacro istruc 1.nolist", + "%push istruc", + "%define %$strucname %1", + "%$strucstart:", + "%endmacro", + "%imacro at 1-2+.nolist", + "times %1-($-%$strucstart) db 0", + "%2", + "%endmacro", + "%imacro iend 0.nolist", + "times %{$strucname}_size-($-%$strucstart) db 0", + "%pop", + "%endmacro", + "%imacro extern 1+.nolist", "[extern %1]", "%endmacro", - "%imacro bits 1+", + "%imacro bits 1+.nolist", "[bits %1]", "%endmacro", - "%imacro global 1+", + "%imacro global 1+.nolist", "[global %1]", "%endmacro", - "%imacro common 1+", + "%imacro common 1+.nolist", "[common %1]", "%endmacro", - "%imacro org 1+", + "%imacro org 1+.nolist", "[org %1]", "%endmacro", - "%imacro group 1+", + "%imacro group 1+.nolist", "[group %1]", "%endmacro", - "%imacro uppercase 1+", + "%imacro uppercase 1+.nolist", "[uppercase %1]", "%endmacro", - "%imacro library 1+", + "%imacro library 1+.nolist", "[library %1]", "%endmacro", NULL diff --git a/macros.pl b/macros.pl new file mode 100644 index 00000000..733f7e1e --- /dev/null +++ b/macros.pl @@ -0,0 +1,27 @@ +#!/usr/bin/perl +# +# macros.pl produce macros.c from standard.mac +# +# The Netwide Assembler is copyright (C) 1996 Simon Tatham and +# Julian Hall. All rights reserved. The software is +# redistributable under the licence given in the file "Licence" +# distributed in the NASM archive. + +open INPUT,"standard.mac" || die "unable to open standard.mac\n"; +open OUTPUT,">macros.c" || die "unable to open macros.c\n"; + +print OUTPUT "/* This file auto-generated from standard.mac by macros.pl" . + " - don't edit it */\n\nstatic char *stdmac[] = {\n"; + +while () { + chomp; + # this regexp ought to match anything at all, so why bother with + # a sensible error message ;-) + die "swirly thing alert" unless /^\s*((\s*([^"';\s]+|"[^"]*"|'[^']*'))*)/; + $_ = $1; + s/\\/\\\\/g; + s/"/\\"/g; + print OUTPUT " \"$_\",\n" if length > 0; +} + +print OUTPUT " NULL\n};\n" diff --git a/misc/nasm.sl b/misc/nasm.sl index 325e1b6a..c47d28b8 100644 --- a/misc/nasm.sl +++ b/misc/nasm.sl @@ -52,10 +52,10 @@ variable nasm_kw_6 = strncat("cmovaecmovbecmovgecmovlecmovnacmovnbcmovnc", "cmovpecmovpofcmovbfcmovefcmovufcomipfcompp", "fdivrpficompfidivrfisubrfldenvfldl2efldl2t", "fldlg2fldln2fpatanfprem1frstorfscalefsetpm", - "fstenvfsubrpfucomifucompinvlpgloopneloopnz", - "paddsbpaddswpmulhwpmullwpsubsbpsubswpushad", - "pushawpushfdpushfwsetnaesetnbesetngesetnle", - "wbinvd", 9); + "fstenvfsubrpfucomifucompincbininvlpgloopne", + "loopnzpaddsbpaddswpmulhwpmullwpsubsbpsubsw", + "pushadpushawpushfdpushfwsetnaesetnbesetnge", + "setnlewbinvd", 9); variable nasm_kw_7 = strncat("cmovnaecmovnbecmovngecmovnlecmpxchgfcmovbe", "fcmovnbfcmovnefcmovnufdecstpfincstpfrndint", "fsincosfucomipfucomppfxtractfyl2xp1loadall", diff --git a/nasm.c b/nasm.c index 75783969..1d663b36 100644 --- a/nasm.c +++ b/nasm.c @@ -19,6 +19,7 @@ #include "assemble.h" #include "labels.h" #include "outform.h" +#include "listing.h" static void report_error (int, char *, ...); static void parse_cmdline (int, char **); @@ -30,6 +31,7 @@ static void usage(void); static char *obuf; static char inname[FILENAME_MAX]; static char outname[FILENAME_MAX]; +static char listname[FILENAME_MAX]; static char realout[FILENAME_MAX]; static int lineno; /* for error reporting */ static int lineinc; /* set by [LINE] or [ONELINE] */ @@ -40,6 +42,8 @@ static struct ofmt *ofmt = NULL; static FILE *ofile = NULL; static int sb = 16; /* by default */ +static int use_stdout = FALSE; /* by default, errors to stderr */ + static long current_seg; static struct RAA *offsets; static long abs_offset; @@ -54,12 +58,37 @@ static int preprocess_only; static char currentfile[FILENAME_MAX]; /* + * Which of the suppressible warnings are suppressed. Entry zero + * doesn't do anything. Initial defaults are given here. + */ +static char suppressed[1+ERR_WARN_MAX] = { + 0, FALSE, TRUE +}; + +/* + * The option names for the suppressible warnings. As before, entry + * zero does nothing. + */ +static char *suppressed_names[1+ERR_WARN_MAX] = { + NULL, "macro-params", "orphan-labels" +}; + +/* + * The explanations for the suppressible warnings. As before, entry + * zero does nothing. + */ +static char *suppressed_what[1+ERR_WARN_MAX] = { + NULL, "macro calls with wrong no. of params", + "labels alone on lines without trailing `:'" +}; + +/* * This is a null preprocessor which just copies lines from input * to output. It's used when someone explicitly requests that NASM * not preprocess their source file. */ -static void no_pp_reset (char *, efunc); +static void no_pp_reset (char *, efunc, ListGen *); static char *no_pp_getline (void); static void no_pp_cleanup (void); static Preproc no_pp = { @@ -111,7 +140,7 @@ int main(int argc, char **argv) { "unable to open output file `%s'", outname); } else ofile = NULL; - preproc->reset (inname, report_error); + preproc->reset (inname, report_error, &nasmlist); strcpy(currentfile,inname); lineno = 0; lineinc = 1; @@ -130,8 +159,15 @@ int main(int argc, char **argv) { if (ofile && terminate_after_phase) remove(outname); } else { + /* + * We must call ofmt->filename _anyway_, even if the user + * has specified their own output file, because some + * formats (eg OBJ and COFF) use ofmt->filename to find out + * the name of the input file and then put that inside the + * file. + */ + ofmt->filename (inname, realout, report_error); if (!*outname) { - ofmt->filename (inname, realout, report_error); strcpy(outname, realout); } @@ -140,15 +176,29 @@ int main(int argc, char **argv) { report_error (ERR_FATAL | ERR_NOFILE, "unable to open output file `%s'", outname); } + /* + * We must call init_labels() before ofmt->init() since + * some object formats will want to define labels in their + * init routines. (eg OS/2 defines the FLAT group) + */ + init_labels (); ofmt->init (ofile, report_error, define_label); assemble_file (inname); if (!terminate_after_phase) { ofmt->cleanup (); cleanup_labels (); } - fclose (ofile); - if (terminate_after_phase) + /* + * We had an fclose on the output file here, but we + * actually do that in all the object file drivers as well, + * so we're leaving out the one here. + * fclose (ofile); + */ + if (terminate_after_phase) { remove(outname); + if (listname[0]) + remove(listname); + } } if (want_usage) @@ -156,78 +206,171 @@ int main(int argc, char **argv) { raa_free (offsets); saa_free (forwrefs); - return 0; + if (terminate_after_phase) + return 1; + else + return 0; } -static void parse_cmdline(int argc, char **argv) { +static int process_arg (char *p, char *q) { char *param; + int i; + int advance = 0; - *inname = *outname = '\0'; - while (--argc) { - char *p = *++argv; - if (p[0]=='-') { - switch (p[1]) { - case 'o': /* these parameters take values */ - case 'f': - if (p[2]) /* the parameter's in the option */ - param = p+2; - else if (!argv[1]) { - report_error (ERR_NONFATAL | ERR_NOFILE | ERR_USAGE, - "option `-%c' requires an argument", - p[1]); - break; - } else - --argc, param = *++argv; - if (p[1]=='o') { /* output file */ - strcpy (outname, param); - } else if (p[1]=='f') { /* output format */ - ofmt = ofmt_find(param); - if (!ofmt) { - report_error (ERR_FATAL | ERR_NOFILE | ERR_USAGE, - "unrecognised output format `%s'", - param); - } - } - break; - case 'h': - fprintf(stderr, - "usage: nasm [-o outfile] [-f format]" - " [-a] [-e] filename\n"); - fprintf(stderr, - " or nasm -r for version info\n\n"); - fprintf(stderr, - " -e means preprocess only; " - "-a means don't preprocess\n\n"); - fprintf(stderr, - "valid output formats for -f are" - " (`*' denotes default):\n"); - ofmt_list(ofmt); - exit (0); /* never need usage message here */ - break; - case 'r': - fprintf(stderr, "NASM version %s\n", NASM_VER); - exit (0); /* never need usage message here */ - break; - case 'e': /* preprocess only */ - preprocess_only = TRUE; - break; - case 'a': /* assemble only - don't preprocess */ - preproc = &no_pp; - break; - default: + if (!p || !p[0]) + return 0; + + if (p[0]=='-') { + switch (p[1]) { + case 's': + use_stdout = TRUE; + break; + case 'o': /* these parameters take values */ + case 'f': + case 'p': + case 'd': + case 'i': + case 'l': + if (p[2]) /* the parameter's in the option */ + param = p+2; + else if (!q) { report_error (ERR_NONFATAL | ERR_NOFILE | ERR_USAGE, - "unrecognised option `-%c'", + "option `-%c' requires an argument", p[1]); break; + } else + advance = 1, param = q; + if (p[1]=='o') { /* output file */ + strcpy (outname, param); + } else if (p[1]=='f') { /* output format */ + ofmt = ofmt_find(param); + if (!ofmt) { + report_error (ERR_FATAL | ERR_NOFILE | ERR_USAGE, + "unrecognised output format `%s'", + param); + } + } else if (p[1]=='p') { /* pre-include */ + pp_pre_include (param); + } else if (p[1]=='d') { /* pre-define */ + pp_pre_define (param); + } else if (p[1]=='i') { /* include search path */ + pp_include_path (param); + } else if (p[1]=='l') { /* listing file */ + strcpy (listname, param); } - } else { - if (*inname) { + break; + case 'h': + fprintf(use_stdout ? stdout : stderr, + "usage: nasm [-o outfile] [-f format] [-l listfile]" + " [options...] filename\n"); + fprintf(use_stdout ? stdout : stderr, + " or nasm -r for version info\n\n"); + fprintf(use_stdout ? stdout : stderr, + " -e means preprocess only; " + "-a means don't preprocess\n"); + fprintf(use_stdout ? stdout : stderr, + " -s means send errors to stdout not stderr\n"); + fprintf(use_stdout ? stdout : stderr, + " -i adds a pathname to the include file" + " path\n -p pre-includes a file;" + " -d[= 1 ? argv[1] : NULL); + argv += i, argc -= i; } + if (!*inname) report_error (ERR_NONFATAL | ERR_NOFILE | ERR_USAGE, "no input file specified"); @@ -239,12 +382,10 @@ static void assemble_file (char *fname) { int i, rn_error; long seg; - init_labels (); - /* pass one */ pass = 1; current_seg = ofmt->section(NULL, pass, &sb); - preproc->reset(fname, report_error); + preproc->reset(fname, report_error, &nasmlist); strcpy(currentfile,fname); lineno = 0; lineinc = 1; @@ -378,15 +519,19 @@ static void assemble_file (char *fname) { if (output_ins.opcode == I_EQU) { /* - * Special `..' EQUs get processed in pass two. + * Special `..' EQUs get processed in pass two, + * except `..@' macro-processor EQUs which are done + * in the normal place. */ if (!output_ins.label) report_error (ERR_NONFATAL, "EQU not preceded by label"); else if (output_ins.label[0] != '.' || - output_ins.label[1] != '.') { + output_ins.label[1] != '.' || + output_ins.label[2] == '@') { if (output_ins.operands == 1 && - (output_ins.oprs[0].type & IMMEDIATE)) { + (output_ins.oprs[0].type & IMMEDIATE) && + output_ins.oprs[0].wrt == NO_SEG) { define_label (output_ins.label, output_ins.oprs[0].segment, output_ins.oprs[0].offset, @@ -395,8 +540,10 @@ static void assemble_file (char *fname) { (output_ins.oprs[0].type & IMMEDIATE) && (output_ins.oprs[0].type & COLON) && output_ins.oprs[0].segment == NO_SEG && + output_ins.oprs[0].wrt == NO_SEG && (output_ins.oprs[1].type & IMMEDIATE) && - output_ins.oprs[1].segment == NO_SEG) { + output_ins.oprs[1].segment == NO_SEG && + output_ins.oprs[1].wrt == NO_SEG) { define_label (output_ins.label, output_ins.oprs[0].offset | SEG_ABS, output_ins.oprs[1].offset, @@ -430,6 +577,8 @@ static void assemble_file (char *fname) { /* pass two */ pass = 2; saa_rewind (forwrefs); + if (*listname) + nasmlist.init(listname, report_error); { int *p = saa_rstruct (forwrefs); if (p) @@ -440,7 +589,7 @@ static void assemble_file (char *fname) { current_seg = ofmt->section(NULL, pass, &sb); raa_free (offsets); offsets = raa_init(); - preproc->reset(fname, report_error); + preproc->reset(fname, report_error, &nasmlist); strcpy(currentfile,fname); lineno = 0; lineinc = 1; @@ -542,10 +691,12 @@ static void assemble_file (char *fname) { define_label_stub (output_ins.label, report_error); if (output_ins.opcode == I_EQU) { /* - * Special `..' EQUs get processed here. + * Special `..' EQUs get processed here, except + * `..@' macro processor EQUs which are done above. */ if (output_ins.label[0] == '.' && - output_ins.label[1] == '.') { + output_ins.label[1] == '.' && + output_ins.label[2] != '@') { if (output_ins.operands == 1 && (output_ins.oprs[0].type & IMMEDIATE)) { define_label (output_ins.label, @@ -567,13 +718,14 @@ static void assemble_file (char *fname) { } } offs += assemble (current_seg, offs, sb, - &output_ins, ofmt, report_error); + &output_ins, ofmt, report_error, &nasmlist); cleanup_insn (&output_ins); set_curr_ofs (offs); } nasm_free (line); } preproc->cleanup(); + nasmlist.cleanup(); } static int getkw (char *buf, char **value) { @@ -601,6 +753,7 @@ static int getkw (char *buf, char **value) { *value = buf; } else { *buf++ = '\0'; + while (isspace(*buf)) buf++; /* beppu - skip leading whitespace */ *value = buf; while (*buf!=']') buf++; *buf++ = '\0'; @@ -625,20 +778,28 @@ static int getkw (char *buf, char **value) { static void report_error (int severity, char *fmt, ...) { va_list ap; + /* + * See if it's a suppressed warning. + */ + if ((severity & ERR_MASK) == ERR_WARNING && + (severity & ERR_WARN_MASK) != 0 && + suppressed[ (severity & ERR_WARN_MASK) >> ERR_WARN_SHR ]) + return; /* and bail out if so */ + if (severity & ERR_NOFILE) - fputs ("nasm: ", stderr); + fputs ("nasm: ", use_stdout ? stdout : stderr); else - fprintf (stderr, "%s:%d: ", currentfile, + fprintf (use_stdout ? stdout : stderr, "%s:%d: ", currentfile, lineno + (severity & ERR_OFFBY1 ? lineinc : 0)); if ( (severity & ERR_MASK) == ERR_WARNING) - fputs ("warning: ", stderr); + fputs ("warning: ", use_stdout ? stdout : stderr); else if ( (severity & ERR_MASK) == ERR_PANIC) - fputs ("panic: ", stderr); + fputs ("panic: ", use_stdout ? stdout : stderr); va_start (ap, fmt); - vfprintf (stderr, fmt, ap); - fputc ('\n', stderr); + vfprintf (use_stdout ? stdout : stderr, fmt, ap); + fputc ('\n', use_stdout ? stdout : stderr); if (severity & ERR_USAGE) want_usage = TRUE; @@ -666,7 +827,7 @@ static void report_error (int severity, char *fmt, ...) { } static void usage(void) { - fputs("type `nasm -h' for help\n", stderr); + fputs("type `nasm -h' for help\n", use_stdout ? stdout : stderr); } static void register_output_formats(void) { @@ -688,13 +849,16 @@ static void register_output_formats(void) { #ifdef OF_AS86 extern struct ofmt of_as86; #endif - /* DOS formats: OBJ, Win32 */ + /* DOS and DOS-ish formats: OBJ, OS/2, Win32 */ #ifdef OF_OBJ extern struct ofmt of_obj; #endif #ifdef OF_WIN32 extern struct ofmt of_win32; #endif +#ifdef OF_OS2 + extern struct ofmt of_os2; +#endif #ifdef OF_RDF extern struct ofmt of_rdf; #endif @@ -723,6 +887,9 @@ static void register_output_formats(void) { #ifdef OF_WIN32 ofmt_register (&of_win32); #endif +#ifdef OF_OS2 + ofmt_register (&of_os2); +#endif #ifdef OF_RDF ofmt_register (&of_rdf); #endif @@ -740,12 +907,13 @@ static void register_output_formats(void) { static FILE *no_pp_fp; static efunc no_pp_err; -static void no_pp_reset (char *file, efunc error) { +static void no_pp_reset (char *file, efunc error, ListGen *listgen) { no_pp_err = error; no_pp_fp = fopen(file, "r"); if (!no_pp_fp) no_pp_err (ERR_FATAL | ERR_NOFILE, "unable to open input file `%s'", file); + (void) listgen; /* placate compilers */ } static char *no_pp_getline (void) { diff --git a/nasm.doc b/nasm.doc index 0613e18e..264d5ba7 100644 --- a/nasm.doc +++ b/nasm.doc @@ -74,8 +74,15 @@ will assemble `myfile.asm' into an ELF object file `myfile.o'. And will assemble `myfile.asm' into a raw binary program `myfile.com'. -To get usage instructions from NASM, try typing `nasm -h'. This will -also list the available output file formats, and what they are. +To produce a listing file, with the hex codes output from NASM +displayed on the left of the original sources, use `-l' to give a +listing file name, for example: + + nasm -f coff myfile.asm -l myfile.lst + +To get further usage instructions from NASM, try typing `nasm -h'. +This will also list the available output file formats, and what they +are. If you use Linux but aren't sure whether your system is a.out or ELF, type `file /usr/bin/nasm' or wherever you put the NASM binary. @@ -95,6 +102,92 @@ Like Unix compilers and assemblers, NASM is silent unless it goes wrong: you won't see any output at all, unless it gives error messages. +If you define an environment variable called NASM, the program will +interpret it as a list of extra command-line options, processed +before the real command line. This is probably most useful for +defining an include-file search path by putting a lot of `-i' +options in the NASM variable. + +The variable's value will be considered to be a space-separated list +of options unless it begins with something other than a minus sign, +in which case the first character will be taken as the separator. +For example, if you want to define a macro whose value has a space +in it, then setting the NASM variable to `-dNAME="my name"' won't +work because the string will be split at the space into `-dNAME="my' +and `name"', but setting it to `|-dNAME="my name"' will be fine +because all further operands will be considered to be separated by +vertical bars and so the space has no special meaning. + +Quick Start for MASM Users +========================== + +If you're used to writing programs with MASM, or with TASM in +MASM-compatible (non-Ideal) mode, or with A86, this section attempts +to outline the major differences between MASM's syntax and NASM's. +If you're not already used to MASM, it's probably worth skipping +this section. + +One simple difference is that NASM is case-sensitive. It makes a +difference whether you call your label `foo', `Foo' or `FOO'. If +you're assembling to the `obj' MS-DOS output format (or `os2'), you +can invoke the `UPPERCASE' directive (documented below, in the +Output Formats section) and ensure that all symbols exported to +other code modules are forced to uppercase; but even then, _within_ +a single module, NASM will distinguish between labels differing only +in case. + +There are also differences in some of the instructions and register +names: for example, NASM calls the floating-point stack registers +`st0', `st1' and so on, rather than MASM's `ST(0)' notation or A86's +simple numeric `0'. And NASM doesn't support LODS, MOVS, STOS, SCAS, +CMPS, INS, or OUTS, but only supports the size-specified versions +LODSB, MOVSW, SCASD and so on. + +The _major_ difference, though, is the absence in NASM of variable +typing. MASM will notice when you declare a variable as `var dw 0', +and will remember that `var' is a WORD-type variable, so that +instructions such as `mov var,2' can be unambiguously given the WORD +size rather than BYTE or DWORD. NASM doesn't and won't do this. The +statement `var dw 0' merely defines `var' to be a label marking a +point in memory: no more and no less. It so happens that there are +two bytes of data following that point in memory before the next +line of code, but NASM doesn't remember or care. If you want to +store the number 2 in such a variable, you must specify the size of +the operation _always_: `mov word [var],2'. This is a deliberate +design decision, _not_ a bug, so please could people not send us +mail asking us to `fix' it... + +The above example also illustrates another important difference +between MASM and NASM syntax: the use of OFFSET and of square +brackets. In MASM, declaring `var dw 0' entitles you to code `mov +ax,var' to get at the _contents_ of the variable, and you must write +`mov ax,offset var' to get the _address_ of the variable. In NASM, +`mov ax,var' gives you the address, and to get at the contents you +must code `mov ax,[var]'. Again, this is a deliberate design +decision, since it brings consistency to the syntax: `mov ax,[var]' +and `mov ax,[bx]' both refer to the contents of memory and both have +square brackets, whereas neither `mov ax,bx' nor `mov ax,var' refers +to memory contents and so neither one has square brackets. + +This is even more confusing in A86, where declaring a label with a +trailing colon defines it to be a `label' as opposed to a `variable' +and causes A86 to adopt NASM-style semantics; so in A86, `mov +ax,var' has different behaviour depending on whether `var' was +declared as `var: dw 0' or `var dw 0'. NASM is very simple by +comparison: _everything_ is a label. The OFFSET keyword is not +required, and in fact constitutes a syntax error (though you can +code `%define offset' to suppress the error messages if you want), +and `var' always refers to the _address_ of the label whereas +`[var]' refers to the _contents_. + +As an addendum to this point of syntax, it's also worth noting that +the hybrid-style syntaxes supported by MASM and its clones, such as +`mov ax,table[bx]', where a memory reference is denoted by one +portion outside square brackets and another portion inside, are also +not supported by NASM. The correct syntax for the above is `mov +ax,[table+bx]'. Likewise, `mov ax,es:[di]' is wrong and `mov +ax,[es:di]' is right. + Writing Programs with NASM ========================== @@ -106,7 +199,11 @@ LABEL: INSTRUCTION OPERANDS ; COMMENT `LABEL' defines a label pointing to that point in the source. There are no restrictions on white space: labels may have white space before them, or not, as you please. The colon after the label is -also optional. +also optional. (Note that NASM can be made to give a warning when it +sees a label which is the only thing on a line with no trailing +colon, on the grounds that such a label might easily be a mistyped +instruction name. The command line option `-w+orphan-labels' will +enable this feature.) Valid characters in labels are letters, numbers, `_', `$', `#', `@', `~', `?', and `.'. The only characters which may be used as the @@ -271,6 +368,11 @@ Note that there is no effective difference between `times 100 resb 1' and `resb 100', except that the latter will be assembled about 100 times faster due to the internal structure of the assembler. +Note also that TIMES can't be applied to macros: the reason for this +is that TIMES is processed after the macro phase, which allows the +argument to TIMES to contain expressions such as `64-$+buffer' as +above. + Effective Addresses =================== @@ -334,6 +436,12 @@ for both of the above instructions, in an effort to save space. There is not, currently, any means for forcing NASM to generate the larger form of the instruction. +An alternative syntax is supported, in which prefixing an operand +with `&' is synonymous with enclosing it in square brackets. The +square bracket syntax is the recommended one, however, and is the +syntax generated by NDISASM. But, for example, `mov eax,&ebx+ecx' is +equivalent to `mov eax,[ebx+ecx]'. + Mixing 16 and 32 Bit Code: Unusual Instruction Sizes ==================================================== @@ -349,13 +457,13 @@ difficult instructions are things like far jumps. Suppose you are in a 16-bit segment, in protected mode, and you want to execute a far jump to a point in a 32-bit segment. You need to -code a 32-bit far jump in a 16-bit segment; not many assemblers I -know of will easily support this. NASM can, by means of the `word' -and `dword' specifiers. So you can code +code a 32-bit far jump in a 16-bit segment; not all assemblers will +easily support this. NASM can, by means of the `word' and `dword' +specifiers. So you can code - call 1234h:5678h ; this uses the default segment size - call word 1234h:5678h ; this is guaranteed to be 16-bit - call dword 1234h:56789ABCh ; and this is guaranteed 32-bit + jmp 1234h:5678h ; this uses the default segment size + jmp word 1234h:5678h ; this is guaranteed to be 16-bit + jmp dword 1234h:56789ABCh ; and this is guaranteed 32-bit and NASM will generate correct code for them. @@ -512,6 +620,11 @@ unary + and -, ~, SEG highest As usual, operators within a precedence level associate to the left (i.e. `2-3-4' evaluates the same way as `(2-3)-4'). +Note that since the `%' character is used by the preprocessor, it's +worth making sure that the `%' and `%%' operators are followed by a +space, to prevent the preprocessor trying to interpret them as +macro-related things. + A form of algebra is done by NASM when evaluating expressions: I have already stated that an effective address expression such as `[EAX*6-EAX]' will be recognised by NASM as algebraically equivalent @@ -537,24 +650,26 @@ to the beginning of the _segment_; if you can't guarantee that the segment itself begins on a four-byte boundary, this alignment is useless or worse. Be sure you know what kind of alignment you can guarantee to get out of your linker before you start trying to use -TIMES to align to page boundaries. (Of course, the OBJ file format -can happily cope with page alignment, provided you specify that -segment attribute.) +TIMES to align to page boundaries. (Of course, the `obj' and `os2' +file formats can happily cope with page alignment, provided you +specify that segment attribute.) SEG and WRT =========== NASM contains the capability for its object file formats (currently, -only `obj' makes use of this) to permit programs to directly refer -to the segment-base values of their segments. This is achieved -either by the object format defining the segment names as symbols -(`obj' does this), or by the use of the SEG operator. +only `obj' and its variant `os2' make use of this) to permit +programs to directly refer to the segment-base values of their +segments. This is achieved either by the object format defining the +segment names as symbols (`obj' and `os2' do this), or by the use of +the SEG operator. SEG is a unary prefix operator which, when applied to a symbol defined in a segment, will yield the segment base value of that -segment. (In `obj' format, symbols defined in segments which are -grouped are considered to be primarily a member of the _group_, not -the segment, and the return value of SEG reflects this.) +segment. (In `obj' and `os2' format, symbols defined in segments +which are grouped are considered to be primarily a member of the +_group_, not the segment, and the return value of SEG reflects +this.) SEG may be used for far pointers: it is guaranteed that for any symbol `sym', using the offset `sym' from the segment base `SEG sym' @@ -708,8 +823,8 @@ below. In 32-bit mode, instructions are prefixed with 0x66 or 0x67 prefixes when they use 16-bit data or addresses; in 16-bit mode, the reverse happens. NASM's default depends on the object format; the defaults -are documented with the formats. (See `obj', in particular, for some -unusual behaviour.) +are documented with the formats. (See `obj' and `os2', in +particular, for some unusual behaviour.) `SECTION name' or `SEGMENT name' changes which section the code you write will be assembled into. Acceptable section names vary between @@ -756,8 +871,8 @@ it refers to. `COMMON symbol size' defines a symbol as being common: it is declared to have the given size, and it is merged at link time with any declarations of the same symbol in other modules. This is not -_fully_ supported in the `obj' file format: see the section on `obj' -for details. +_fully_ supported in the `obj' or `os2' file format: see the section +on `obj' for details. `STRUC structure' begins the definition of a data structure, and `ENDSTRUC' ends it. The structure shown above may be defined, @@ -766,8 +881,8 @@ exactly equivalently, using STRUC as follows: struc st stLong resd 1 stWord resw 1 - stByte1 resb 1 - stByte2 resb 1 + stByte resb 1 + stStr resb 32 endstruc Notice that this code still defines the symbol `st_size' to be the @@ -777,6 +892,36 @@ remembering which section you were assembling in (whereas in the version using `ABSOLUTE' it was up to the programmer to sort that out). +`ISTRUC structure' begins the declaration of an initialised instance +of a data structure. You can then use the `AT' macro to assign +values to the structure members, and `IEND' to finish. So, for +example, given the structure `st' above: + + istruc st + at stLong, dd 0x1234 + at stWord, dw 23 + at stByte, db 'q' + at stStr, db 'hello, world', 13, 10, 0 + iend + +Note that there's nothing stopping the instruction after `at' from +overflowing on to the next line if you want. So the above example +could just as well have contained + + at stStr, db 'hello, world' + db 13, 10, 0 + +or even (if you prefer this style) + + at stStr + db 'hello, world' + db 13, 10, 0 + +Note also that the `ISTRUC' mechanism is implemented as a set of +macros, and uses TIMES internally to achieve its effect; so the +structure fields must be initialised in the same order as they were +defined in. + This is where user-level directives differ from primitives: the `SECTION' (and `SEGMENT') user-level directives don't just call the primitive versions, but they also `%define' the special preprocessor @@ -788,14 +933,9 @@ ENDSTRUC - they are implemented in terms of ABSOLUTE and SECTION. This also means that if you use STRUC before explicitly announcing a target section, you should explicitly announce one after ENDSTRUC. -The primitive directive [INCLUDE filename] (or the equivalent form -[INC filename]) is supported as a synonym for the preprocessor- -oriented `%include' form, but only temporarily: this usage will be -phased out in the next version of NASM. - Directives may also be specific to the output file format. At -present, the `bin' and `obj' formats define extra directives, which -are specified below. +present, the `bin', `obj' and `os2' formats define extra directives, +which are specified below. The Preprocessor ================ @@ -841,7 +981,30 @@ all to expand to `bar'. There is a mechanism which detects when a macro call has occurred as a result of a previous expansion of the same macro, to guard against circular references and infinite loops. If this happens, the -preprocessor will report an error. +preprocessor will only expand the first occurrence of the macro. +Hence: + + %define a(x) 1+a(x) + mov ax,a(3) ; becomes 1+a(3) and expands no further + +This can be useful for doing things like this: + + %macro extrn 1 ; see next section for explanation of `%macro' + extern _%1 + %define %1 _%1 + %endmacro + +which would avoid having to put leading underscores on external +variables, because you could just code + + extrn foo + mov ax,foo + +and it would expand as + + extern foo + %define foo _foo + mov ax,foo ; becomes mov ax,_foo as required Single-line macros with parameters can be overloaded: it is possible to define two or more single-line macros with the same name, each @@ -852,6 +1015,19 @@ name _with_ parameters, and vice versa (though single-line macros may be redefined, keeping the same number of parameters, without error). +You can pre-define single-line macros using the `-d' option on the +NASM command line, such as + + nasm filename -dDEBUG + +(and then you might have various conditional-assembly bits under +`%ifdef DEBUG'), or possibly + + nasm filename -dTYPE=4 + +(which might allow you to re-assemble your code to do several +different things depending on the value of TYPE). + Multiple-line macros -------------------- @@ -875,6 +1051,16 @@ expects no parameters. Macros can be overloaded: if two macros are defined with the same name but different numbers of parameters, they will be treated as separate. Multi-line macros may not be redefined. +The assembler will usually generate a warning if you code a line +which looks like a macro call but involves a number of parameters +which the macro in question isn't ready to support. (For example, if +you code a macro `%macro foo 1' and also `%macro foo 3', then you +write `foo a,b', a warning will be generated.) This feature can be +disabled by the use of the command line option `-w-macro-params', +since sometimes it's intentional (for example, you might define +`%macro push 2' to allow you to push two registers at once; but +`push ax' shouldn't then generate a warning). + Macros taking parameters can be written using `%1', `%2' and so on to reference the parameters. So this code @@ -902,7 +1088,7 @@ with `%%'. So: This defines a different label in place of `%%skip' every time it's called. (Of course the above code could have easily been coded using `jnz $+3', but not in more complex cases...) The actual label -defined would be `macro.2345.skip', where 2345 is replaced by some +defined would be `..@2345.skip', where 2345 is replaced by some number that changes with each macro call. Users are warned to avoid defining labels of this shape themselves. @@ -923,7 +1109,7 @@ modifier on the `%macro' line: %endmacro fputs [filehandle], "hi there", 13, 10 -This declares `pstring' to be a macro that accepts _at least two_ +This declares `fputs' to be a macro that accepts _at least two_ parameters, and all parameters after the first one are lumped together as part of the last specified one (in this case %2). So in the macro call, `%1' expands to `[filehandle]' while `%2' expands to @@ -1002,9 +1188,9 @@ defined: which will expand to something like - jnae macro.1234.skip + jnae ..@1234.skip mov ax,bx - macro.1234.skip: + ..@1234.skip: Note that `%+1' will allow CXZ or ECXZ to be passed as condition codes, but `%-1' will of course be unable to invert them. @@ -1034,6 +1220,28 @@ Defaults may be omitted, in which case they are taken to be blank. `%endm' is a valid synonym for `%endmacro'. +The specification for the number of macro parameters can be suffixed +with `.nolist' if you don't want the macro to be explicitly expanded +in listing files: + + %macro ping 1-2+.nolist + ; some stuff + %endmacro + +Standard Macros and `%clear' +---------------------------- + +NASM defines a set of standard macros, before the input file gets +processed; these are primarily there in order to provide standard +language features (such as structure support). However, it's +conceivable that a user might want to write code that doesn't have +the standard macros defined; you can achieve this by using the +preprocessor directive `%clear' at the top of your program, which +will undefine _everything_ that's defined by the preprocessor. + +In particular, NASM defines the symbols `__NASM_MAJOR__' and +`__NASM_MINOR__' to be the major and minor version numbers of NASM. + Conditional Assembly -------------------- @@ -1054,9 +1262,12 @@ File Inclusion -------------- You can include a file using the `%include' directive. Included -files are only searched for in the current directory: there isn't -(yet - if there's demand for it it could be arranged) any default -search path for standard include files. +files are searched for in the current directory, and then in all +directories specified on the command line with the `-i' option. +(Note that the directories specified on the command line are +directly prepended to the filename, so they must include the +necessary trailing slash under DOS or Unix, or the equivalent on +other systems.) This, again, works like C: `%include' is used to include a file. Of course it's quite likely you'd want to do the normal sort of thing @@ -1075,6 +1286,10 @@ and then elsewhere so that it doesn't matter if the file accidentally gets included more than once. +You can force an include file to be included without using a +`%include' command, by specifying it as a pre-include file on the +command line using the `-p' option. + The Context Stack ----------------- @@ -1159,8 +1374,8 @@ Output Formats ============== The current output formats supported are `bin', `aout', `coff', -`elf', `as86', `obj', `win32', `rdf', and the debug pseudo-format -`dbg'. +`elf', `as86', `obj', `os2', `win32', `rdf', and the debug +pseudo-format `dbg'. `bin': flat-form binary ----------------------- @@ -1181,17 +1396,18 @@ NASM does not support the use of ORG to jump around inside an object file, like MASM does (see the `Bugs' section for a demonstration of the use of MASM's form of ORG to do something that NASM's won't do.) -Like almost all formats (not `obj'), the `bin' format defines the -section names `.text', `.data' and `.bss'. The layout is that -`.text' comes first in the output file, followed by `.data', and -notionally followed by `.bss'. So if you declare a BSS section in a -flat binary file, references to the BSS section will refer to space -past the end of the actual file. The `.data' and `.bss' sections are -considered to be aligned on four-byte boundaries: this is achieved -by inserting padding zero bytes between the end of the text section -and the start of the data, if there is data present. Of course if no -SECTION directives are present, everything will go into `.text', and -you will get nothing in the output except the code you wrote. +Like almost all formats (but not `obj' or `os2'), the `bin' format +defines the section names `.text', `.data' and `.bss'. The layout is +that `.text' comes first in the output file, followed by `.data', +and notionally followed by `.bss'. So if you declare a BSS section +in a flat binary file, references to the BSS section will refer to +space past the end of the actual file. The `.data' and `.bss' +sections are considered to be aligned on four-byte boundaries: this +is achieved by inserting padding zero bytes between the end of the +text section and the start of the data, if there is data present. Of +course if no SECTION directives are present, everything will go into +`.text', and you will get nothing in the output except the code you +wrote. `bin' silently ignores GLOBAL directives, and will also not complain at EXTERN ones. You only get an error if you actually _reference_ an @@ -1324,8 +1540,8 @@ to pass directives to the MS linker. Both `coff' and `win32' default to 32-bit assembly mode. -`obj': Microsoft 16-bit Object Module Format --------------------------------------------- +`obj' and `os2': Microsoft 16-bit Object Module Format +------------------------------------------------------ The `obj' format generates 16-bit Microsoft object files, suitable for feeding to 16-bit versions of Microsoft C, and probably @@ -1416,6 +1632,26 @@ place 32-bit code in a Use16 segment, you can use an explicit `BITS 32' override, but if you switch temporarily away from that segment, you will have to repeat the override after coming back to it. +If you're trying to build a .COM application by linking several .OBJ +files together, you need to put `resb 0x100' at the front of the +code segment in the first object file, since otherwise the linker +will get the linking wrong. + +OS/2 uses an almost exactly similar file format to DOS, with a +couple of differences, principally that OS/2 defines a pseudo-group +called FLAT, containing no segments, and every relocation is made +relative to that (so it would be equivalent to writing `label WRT +FLAT' in place of `label' _throughout_ your code). Since this would +be inconvenient to write code for, NASM implements the `os2' variant +on `obj', which provides this FLAT group itself and automatically +makes the default relocation format relative to FLAT. + +NOTE TO OS/2 USERS: The OS/2 output format is new in NASM version +0.95. It hasn't been tested on any actual OS/2 systems, and I don't +know for sure that it'll work properly. Any OS/2 users are +encouraged to give it a thorough testing and report the results to +me. Thanks! + `as86': Linux as86 (bin86-0.3) ------------------------------ @@ -1448,14 +1684,46 @@ debugging purposes. It produces a debug dump of everything that the NASM assembly module feeds to the output driver, for the benefit of people trying to write their own output drivers. +Common Problems +=============== + +A few problems that people repeatedly ask me about are documented +here. + +NASM's design philosophy of generating exactly the code the +programmer asks for, without second-guessing or re-interpreting, has +been known to cause confusion in a couple of areas. + +Firstly, several people have complained that instructions such as +`add esp,4' are assembled in a form that allocates a full four-byte +offset field to store the `4' in, even though the instruction has a +shorter form with a single-byte offset field which would work in +this case. The answer is that NASM by design doesn't try to guess +which one of these forms you want: if you want one, you code one, +and if you want the other, you code the other. The other form is +`add esp, byte 4'. + +Secondly, and similarly, I've had repeated questions about +conditional jumps. The simple `jne label', in NASM, translates +directly to the old 8086 form of the conditional jump, in which the +offset can be up to 128 bytes (or thereabouts) in either direction. +NASM won't automatically generate `je $+3 / jmp label' for labels +that are further away, and neither will it generate the 386 long- +offset form of the instruction. If you want the 386-specific +conditional jump that's capable of reaching anywhere in the same +segment as the jump instruction, you want `jne near label'. If you +want an 8086-compatible `je' over another `jmp', code one +explicitly, or define a macro to do so. NASM doesn't do either of +these things for you, again by design. + Bugs ==== Apart from the missing features (correct OBJ COMMON support, ELF alignment, ELF PIC support, etc.), there are no _known_ bugs. However, any you find, with patches if possible, should be sent to - or , and we'll try to -fix them. + or , and we'll try to fix +them. Beware of Pentium-specific instructions: Intel have provided a macro file for MASM, to implement the eight or nine new Pentium opcodes as diff --git a/nasm.h b/nasm.h index 40c4e0e8..dfee14a9 100644 --- a/nasm.h +++ b/nasm.h @@ -12,8 +12,8 @@ #define NASM_NASM_H #define NASM_MAJOR_VER 0 -#define NASM_MINOR_VER 94 -#define NASM_VER "0.94" +#define NASM_MINOR_VER 95 +#define NASM_VER "0.95" #ifndef NULL #define NULL 0 @@ -66,6 +66,15 @@ typedef void (*efunc) (int severity, char *fmt, ...); #define ERR_OFFBY1 0x40 /* report error as being on the line * we're just _about_ to read, not * the one we've just read */ +/* + * These codes define specific types of suppressible warning. + */ +#define ERR_WARN_MNP 0x0100 /* macro-num-parameters warning */ +#define ERR_WARN_OL 0x0200 /* orphan label (no colon, and + * alone on line) */ +#define ERR_WARN_MASK 0xFF00 /* the mask for this feature */ +#define ERR_WARN_SHR 8 /* how far to shift right */ +#define ERR_WARN_MAX 2 /* the highest numbered one */ /* * ----------------------- @@ -85,14 +94,73 @@ typedef void (*ldfunc) (char *label, long segment, long offset, struct ofmt *ofmt, efunc error); /* + * List-file generators should look like this: + */ +typedef struct { + /* + * Called to initialise the listing file generator. Before this + * is called, the other routines will silently do nothing when + * called. The `char *' parameter is the file name to write the + * listing to. + */ + void (*init) (char *, efunc); + + /* + * Called to clear stuff up and close the listing file. + */ + void (*cleanup) (void); + + /* + * Called to output binary data. Parameters are: the offset; + * the data; the data type. Data types are similar to the + * output-format interface, only OUT_ADDRESS will _always_ be + * displayed as if it's relocatable, so ensure that any non- + * relocatable address has been converted to OUT_RAWDATA by + * then. Note that OUT_RAWDATA+0 is a valid data type, and is a + * dummy call used to give the listing generator an offset to + * work with when doing things like uplevel(LIST_TIMES) or + * uplevel(LIST_INCBIN). + */ + void (*output) (long, void *, unsigned long); + + /* + * Called to send a text line to the listing generator. The + * `int' parameter is LIST_READ or LIST_MACRO depending on + * whether the line came directly from an input file or is the + * result of a multi-line macro expansion. + */ + void (*line) (int, char *); + + /* + * Called to change one of the various levelled mechanisms in + * the listing generator. LIST_INCLUDE and LIST_MACRO can be + * used to increase the nesting level of include files and + * macro expansions; LIST_TIMES and LIST_INCBIN switch on the + * two binary-output-suppression mechanisms for large-scale + * pseudo-instructions. + * + * LIST_MACRO_NOLIST is synonymous with LIST_MACRO except that + * it indicates the beginning of the expansion of a `nolist' + * macro, so anything under that level won't be expanded unless + * it includes another file. + */ + void (*uplevel) (int); + + /* + * Reverse the effects of uplevel. + */ + void (*downlevel) (int); +} ListGen; + +/* * Preprocessors ought to look like this: */ typedef struct { /* - * Called at the start of a pass; given a file name and an - * error reporting function. + * Called at the start of a pass; given a file name, an error + * reporting function and a listing generator to talk to. */ - void (*reset) (char *, efunc); + void (*reset) (char *, efunc, ListGen *); /* * Called to fetch a line of preprocessed source. The line @@ -120,9 +188,10 @@ typedef struct { * (for local labels), whereas a number may appear anywhere *but* at the * start. */ -#define isidstart(c) ( isalpha(c) || (c)=='_' || (c)=='.' || (c)=='?' ) +#define isidstart(c) ( isalpha(c) || (c)=='_' || (c)=='.' || (c)=='?' \ + || (c)=='@' ) #define isidchar(c) ( isidstart(c) || isdigit(c) || (c)=='$' || (c)=='#' \ - || (c)=='@' || (c)=='~' ) + || (c)=='~' ) /* Ditto for numeric constants. */ @@ -134,6 +203,14 @@ typedef struct { #define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0') /* + * Data-type flags that get passed to listing-file routines. + */ +enum { + LIST_READ, LIST_MACRO, LIST_MACRO_NOLIST, LIST_INCLUDE, + LIST_INCBIN, LIST_TIMES +}; + +/* * ----------------------------------------------------------- * Format of the `insn' structure returned from `parser.c' and * passed into `assemble.c' @@ -381,6 +458,13 @@ struct ofmt { * which case `offset' holds the _size_ of the variable). * Anything else is available for the output driver to use * internally. + * + * This routine explicitly _is_ allowed to call the label + * manager to define further symbols, if it wants to, even + * though it's been called _from_ the label manager. That much + * re-entrancy is guaranteed in the label manager. However, the + * label manager will in turn call this routine, so it should + * be prepared to be re-entrant itself. */ void (*symdef) (char *name, long segment, long offset, int is_global); diff --git a/ndisasm.doc b/ndisasm.doc index 5b5374af..76367f5f 100644 --- a/ndisasm.doc +++ b/ndisasm.doc @@ -174,7 +174,7 @@ Bugs and Improvements ===================== There are no known bugs. However, any you find, with patches if -possible, should be sent to or +possible, should be sent to or , and we'll try to fix them. Feel free to send contributions and new features as well. diff --git a/outaout.c b/outaout.c index 72b98bfb..2b5a381c 100644 --- a/outaout.c +++ b/outaout.c @@ -153,7 +153,8 @@ static void aout_deflabel (char *name, long segment, long offset, int pos = strslen+4; struct Symbol *sym; - if (name[0] == '.' && name[1] == '.') { + if (name[0] == '.' && name[1] == '.' && name[2] != '@') { + error (ERR_NONFATAL, "unrecognised special symbol `%s'", name); return; } diff --git a/outas86.c b/outas86.c index 9bc0379a..dd0656f2 100644 --- a/outas86.c +++ b/outas86.c @@ -161,7 +161,8 @@ static void as86_deflabel (char *name, long segment, long offset, int is_global) { struct Symbol *sym; - if (name[0] == '.' && name[1] == '.') { + if (name[0] == '.' && name[1] == '.' && name[2] != '@') { + error (ERR_NONFATAL, "unrecognised special symbol `%s'", name); return; } diff --git a/outbin.c b/outbin.c index 362e4f84..da82abc5 100644 --- a/outbin.c +++ b/outbin.c @@ -241,6 +241,11 @@ static void bin_out (long segto, void *data, unsigned long type, static void bin_deflabel (char *name, long segment, long offset, int is_global) { + if (name[0] == '.' && name[1] == '.' && name[2] != '@') { + error (ERR_NONFATAL, "unrecognised special symbol `%s'", name); + return; + } + if (is_global == 2) { error (ERR_NONFATAL, "binary output format does not support common" " variables"); diff --git a/outcoff.c b/outcoff.c index 7257fc64..21b9bac4 100644 --- a/outcoff.c +++ b/outcoff.c @@ -285,7 +285,8 @@ static void coff_deflabel (char *name, long segment, long offset, int pos = strslen+4; struct Symbol *sym; - if (name[0] == '.' && name[1] == '.') { + if (name[0] == '.' && name[1] == '.' && name[2] != '@') { + error (ERR_NONFATAL, "unrecognised special symbol `%s'", name); return; } diff --git a/outdbg.c b/outdbg.c index a55d3db8..e37ebdbf 100644 --- a/outdbg.c +++ b/outdbg.c @@ -63,8 +63,8 @@ static long dbg_section_names (char *name, int pass, int *bits) static void dbg_deflabel (char *name, long segment, long offset, int is_global) { - fprintf(dbgf,"deflabel %s := %08lx:%08lx %s (%d)\n",name,segment,offset, - is_global ? "global" : "local", is_global); + fprintf(dbgf,"deflabel %s := %08lx:%08lx %s (%d)\n",name,segment,offset, + is_global ? "global" : "local", is_global); } static void dbg_out (long segto, void *data, unsigned long type, diff --git a/outelf.c b/outelf.c index a413c01b..3c7f2763 100644 --- a/outelf.c +++ b/outelf.c @@ -290,7 +290,8 @@ static void elf_deflabel (char *name, long segment, long offset, int pos = strslen; struct Symbol *sym; - if (name[0] == '.' && name[1] == '.') { + if (name[0] == '.' && name[1] == '.' && name[2] != '@') { + error (ERR_NONFATAL, "unrecognised special symbol `%s'", name); return; } @@ -306,11 +307,18 @@ static void elf_deflabel (char *name, long segment, long offset, else { int i; sym->section = SHN_UNDEF; - for (i=0; iindex) { - sym->section = i+1; - break; - } + if (nsects == 0 && segment == def_seg) { + int tempint; + if (segment != elf_section_names (".text", 2, &tempint)) + error (ERR_PANIC, "strange segment conditions in ELF driver"); + sym->section = nsects; + } else { + for (i=0; iindex) { + sym->section = i+1; + break; + } + } } if (is_global == 2) { diff --git a/outform.c b/outform.c index 154c63f7..09202de3 100644 --- a/outform.c +++ b/outform.c @@ -27,11 +27,11 @@ struct ofmt *ofmt_find(char *name) /* find driver */ return NULL; } -void ofmt_list(struct ofmt *deffmt) +void ofmt_list(struct ofmt *deffmt, FILE *fp) { int i; for (i=0; ishortname, drivers[i]->fullname); diff --git a/outform.h b/outform.h index 48b8276a..a77e534e 100644 --- a/outform.h +++ b/outform.h @@ -18,7 +18,7 @@ * OF_NO_name -- remove output format 'name' * OF_DOS -- ensure that 'obj', 'bin' & 'win32' are included. * OF_UNIX -- ensure that 'aout', 'coff' and 'elf' are in. - * OF_OTHERS -- ensure that 'bin', 'as86' & 'rdf' are in. + * OF_OTHERS -- ensure that 'bin', 'as86', 'os2' & 'rdf' are in. * OF_ALL -- ensure that all formats are included. * * OF_DEFAULT=of_name -- ensure that 'name' is the default format. @@ -37,8 +37,8 @@ #define MAX_OUTPUT_FORMATS 16 -struct ofmt *ofmt_find(char *name); -void ofmt_list(struct ofmt *deffmt); +struct ofmt *ofmt_find(char *); +void ofmt_list(struct ofmt *, FILE *); void ofmt_register (struct ofmt *); /* -------------- USER MODIFIABLE PART ---------------- */ @@ -77,6 +77,9 @@ void ofmt_register (struct ofmt *); #ifndef OF_OBJ #define OF_OBJ #endif +#ifndef OF_OS2 +#define OF_OS2 +#endif #ifndef OF_ELF #define OF_ELF #endif @@ -132,6 +135,9 @@ void ofmt_register (struct ofmt *); #ifndef OF_RDF #define OF_RDF #endif +#ifndef OF_OS2 +#define OF_OS2 +#endif #endif /* finally... override any format specifically specifed to be off */ @@ -159,6 +165,9 @@ void ofmt_register (struct ofmt *); #ifdef OF_NO_RDF #undef OF_RDF #endif +#ifdef OF_NO_OS2 +#undef OF_OS2 +#endif #ifndef OF_DEFAULT #define OF_DEFAULT of_bin diff --git a/outobj.c b/outobj.c index 68f1745b..544ec663 100644 --- a/outobj.c +++ b/outobj.c @@ -85,7 +85,7 @@ static struct Group { long index; char *name; } segs[GROUP_MAX]; /* ...in this */ -} *grphead, **grptail, *obj_grp_needs_update; +} *grphead, **grptail, *obj_grp_needs_update, *defgrp; static struct ObjData { struct ObjData *next; @@ -99,6 +99,8 @@ static struct ObjData { static long obj_entry_seg, obj_entry_ofs; +static int os2; + enum RecordID { /* record ID codes */ THEADR = 0x80, /* module header */ @@ -136,6 +138,7 @@ static unsigned char *obj_write_name(unsigned char *, char *); static unsigned char *obj_write_index(unsigned char *, int); static unsigned char *obj_write_value(unsigned char *, unsigned long); static void obj_record(int, unsigned char *, unsigned char *); +static int obj_directive (char *, char *, int); static void obj_init (FILE *fp, efunc errfunc, ldfunc ldef) { ofp = fp; @@ -158,6 +161,22 @@ static void obj_init (FILE *fp, efunc errfunc, ldfunc ldef) { datatail = &datahead; obj_entry_seg = NO_SEG; obj_uppercase = FALSE; + + if (os2) { + obj_directive ("group", "FLAT", 1); + defgrp = grphead; + } else + defgrp = NULL; +} + +static void dos_init (FILE *fp, efunc errfunc, ldfunc ldef) { + os2 = FALSE; + obj_init (fp, errfunc, ldef); +} + +static void os2_init (FILE *fp, efunc errfunc, ldfunc ldef) { + os2 = TRUE; + obj_init (fp, errfunc, ldef); } static void obj_cleanup (void) { @@ -227,12 +246,13 @@ static void obj_deflabel (char *name, long segment, * First check for the double-period, signifying something * unusual. */ - if (name[0] == '.' && name[1] == '.') { + if (name[0] == '.' && name[1] == '.' && name[2] != '@') { if (!strcmp(name, "..start")) { obj_entry_seg = segment; obj_entry_ofs = offset; + return; } - return; + error (ERR_NONFATAL, "unrecognised special symbol `%s'", name); } /* @@ -265,6 +285,17 @@ static void obj_deflabel (char *name, long segment, return; } + /* + * If `any_segs' is still FALSE, we might need to define a + * default segment, if they're trying to declare a label in + * `first_seg'. + */ + if (!any_segs && segment == first_seg) { + int tempint; /* ignored */ + if (segment != obj_segment("__NASMDEFSEG", 2, &tempint)) + error (ERR_PANIC, "strange segment conditions in OBJ driver"); + } + for (seg = seghead; seg; seg = seg->next) if (seg->index == segment) { /* @@ -272,7 +303,6 @@ static void obj_deflabel (char *name, long segment, */ if (is_global) { struct Public *pub; - pub = *seg->pubtail = nasm_malloc(sizeof(*pub)); seg->pubtail = &pub->next; pub->next = NULL; @@ -390,7 +420,8 @@ static void obj_out (long segto, void *data, unsigned long type, datacurr->nonempty = TRUE; if (segment != NO_SEG) obj_write_fixup (datacurr, size, - (realtype == OUT_REL2ADR ? 0 : 0x4000), + (realtype == OUT_REL2ADR || + realtype == OUT_REL4ADR ? 0 : 0x4000), segment, wrt, (seg->currentpos - datacurr->startpos)); seg->currentpos += size; @@ -506,10 +537,14 @@ static void obj_write_fixup (struct ObjData *data, int bytes, /* * If no WRT given, assume the natural default, which is method * F5 unless we are doing an OFFSET fixup for a grouped - * segment, in which case we require F1 (group). + * segment, in which case we require F1 (group). Oh, and in + * OS/2 mode we're in F1 (group) on `defgrp' _always_, by + * default. */ if (wrt == NO_SEG) { - if (!base && s && s->grp) + if (os2) + method |= 0x10, fidx = defgrp->obj_index; + else if (!base && s && s->grp) method |= 0x10, fidx = s->grp->obj_index; else method |= 0x50, fidx = -1; @@ -731,7 +766,7 @@ static long obj_segment (char *name, int pass, int *bits) { static int obj_directive (char *directive, char *value, int pass) { if (!strcmp(directive, "group")) { - char *p, *q; + char *p, *q, *v; if (pass == 1) { struct Group *grp; struct Segment *seg; @@ -740,6 +775,7 @@ static int obj_directive (char *directive, char *value, int pass) { q = value; while (*q == '.') q++; /* hack, but a documented one */ + v = q; while (*q && !isspace(*q)) q++; if (isspace(*q)) { @@ -747,16 +783,23 @@ static int obj_directive (char *directive, char *value, int pass) { while (*q && isspace(*q)) q++; } - if (!*q) { - error(ERR_NONFATAL, "GROUP directive contains no segments"); - return 1; - } + /* + * Here we used to sanity-check the group directive to + * ensure nobody tried to declare a group containing no + * segments. However, OS/2 does this as standard + * practice, so the sanity check has been removed. + * + * if (!*q) { + * error(ERR_NONFATAL,"GROUP directive contains no segments"); + * return 1; + * } + */ obj_idx = 1; for (grp = grphead; grp; grp = grp->next) { obj_idx++; - if (!strcmp(grp->name, value)) { - error(ERR_NONFATAL, "group `%s' defined twice", value); + if (!strcmp(grp->name, v)) { + error(ERR_NONFATAL, "group `%s' defined twice", v); return 1; } } @@ -770,7 +813,7 @@ static int obj_directive (char *directive, char *value, int pass) { grp->name = NULL; obj_grp_needs_update = grp; - deflabel (value, grp->index+1, 0L, &of_obj, error); + deflabel (v, grp->index+1, 0L, &of_obj, error); obj_grp_needs_update = NULL; while (*q) { @@ -1072,12 +1115,17 @@ static void obj_write_file (void) { /* * Write a COMENT record stating that the linker's first pass - * may stop processing at this point. + * may stop processing at this point. Exception is if we're in + * OS/2 mode and our MODEND record specifies a start point, in + * which case, according to the OS/2 documentation, this COMENT + * should be omitted. */ - recptr = record; - recptr = obj_write_rword (recptr, 0x40A2); - recptr = obj_write_byte (recptr, 1); - obj_record (COMENT, record, recptr); + if (!os2 || obj_entry_seg == NO_SEG) { + recptr = record; + recptr = obj_write_rword (recptr, 0x40A2); + recptr = obj_write_byte (recptr, 1); + obj_record (COMENT, record, recptr); + } /* * Write the LEDATA/FIXUPP pairs. @@ -1086,7 +1134,7 @@ static void obj_write_file (void) { if (data->nonempty) { obj_record (data->letype, data->ledata, data->lptr); if (data->fptr != data->fixupp) - obj_record (FIXUPP, data->fixupp, data->fptr); + obj_record (data->ftype, data->fixupp, data->fptr); } } @@ -1218,9 +1266,22 @@ static void obj_record(int type, unsigned char *start, unsigned char *end) { } struct ofmt of_obj = { - "Microsoft MS-DOS 16-bit object files", + "Microsoft MS-DOS 16-bit OMF object files", "obj", - obj_init, + dos_init, + obj_out, + obj_deflabel, + obj_segment, + obj_segbase, + obj_directive, + obj_filename, + obj_cleanup +}; + +struct ofmt of_os2 = { + "OS/2 object files (variant of OMF)", + "os2", + os2_init, obj_out, obj_deflabel, obj_segment, diff --git a/outrdf.c b/outrdf.c index 2a70db84..dd877751 100644 --- a/outrdf.c +++ b/outrdf.c @@ -91,7 +91,7 @@ typedef struct memorybuffer { struct memorybuffer *next; } memorybuffer; -static memorybuffer * newmembuf(){ +static memorybuffer * newmembuf(void){ memorybuffer * t; t = nasm_malloc(sizeof(memorybuffer)); @@ -269,6 +269,11 @@ static void rdf_deflabel(char *name, long segment, long offset, int is_global) static int warned_common = 0; #endif + if (name[0] == '.' && name[1] == '.' && name[2] != '@') { + error (ERR_NONFATAL, "unrecognised special symbol `%s'", name); + return; + } + if (is_global && segment > 4) { #ifdef VERBOSE_WARNINGS if (! warned_common) { diff --git a/parser.c b/parser.c index 2cd7f1d0..f031ec0e 100644 --- a/parser.c +++ b/parser.c @@ -153,6 +153,9 @@ insn *parse_line (long segment, long offset, lfunc lookup_label, int pass, i = nexttoken(); if (i == ':') { /* skip over the optional colon */ i = nexttoken(); + } else if (i == 0 && pass == 1) { + error (ERR_WARNING|ERR_WARN_OL, + "label alone on a line without a colon might be in error"); } } else /* no label; so, moving swiftly on */ result->label = NULL; @@ -187,7 +190,7 @@ insn *parse_line (long segment, long offset, lfunc lookup_label, int pass, } else { result->times = value->value; if (value->value < 0) - error(ERR_WARNING, "TIMES value %d is negative", + error(ERR_NONFATAL, "TIMES value %d is negative", value->value); } } else { @@ -318,6 +321,21 @@ insn *parse_line (long segment, long offset, lfunc lookup_label, int pass, insn_names[result->opcode], oper_num); } } + + /* + * We're about to call nexttoken(), which will eat the + * comma that we're currently sitting on between + * arguments. However, we'd better check first that it + * _is_ a comma. + */ + if (i == 0) /* also could be EOL */ + break; + if (i != ',') { + error (ERR_NONFATAL, "comma expected after `%s' operand %d", + insn_names[result->opcode], oper_num); + result->opcode = -1;/* unrecoverable parse error: */ + return result; /* ignore this instruction */ + } } if (result->opcode == I_INCBIN) { @@ -358,6 +376,7 @@ insn *parse_line (long segment, long offset, lfunc lookup_label, int pass, for (operand = 0; operand < 3; operand++) { expr *seg, *value; /* used most of the time */ int mref; /* is this going to be a memory ref? */ + int bracket; /* is it a [] mref, or a & mref? */ result->oprs[operand].addr_size = 0;/* have to zero this whatever */ i = nexttoken(); @@ -397,9 +416,10 @@ insn *parse_line (long segment, long offset, lfunc lookup_label, int pass, i = nexttoken(); } - if (i == '[') { /* memory reference */ - i = nexttoken(); + if (i == '[' || i == '&') { /* memory reference */ mref = TRUE; + bracket = (i == '['); + i = nexttoken(); if (i == TOKEN_SPECIAL) { /* check for address size override */ switch ((int)tokval.t_integer) { case S_WORD: @@ -415,8 +435,10 @@ insn *parse_line (long segment, long offset, lfunc lookup_label, int pass, } i = nexttoken(); } - } else /* immediate operand, or register */ + } else { /* immediate operand, or register */ mref = FALSE; + bracket = FALSE; /* placate optimisers */ + } eval_reset(); @@ -454,7 +476,7 @@ insn *parse_line (long segment, long offset, lfunc lookup_label, int pass, return result; /* ignore this instruction */ } } else seg = NULL; - if (mref) { /* find ] at the end */ + if (mref && bracket) { /* find ] at the end */ if (i != ']') { error (ERR_NONFATAL, "parser: expecting ]"); do { /* error recovery again */ @@ -910,6 +932,8 @@ static int is_reloc (expr *vect) { if (!vect->type) return 1; } + if (vect->type != EXPR_WRT && vect->value != 0 && vect->value != 1) + return 0; /* segment base multiplier non-unity */ do { vect++; } while (vect->type && (vect->type == EXPR_WRT || !vect->value)); @@ -1313,10 +1337,6 @@ static expr *evaluate (int critical) { return NULL; if (i == TOKEN_WRT) { - if (!is_reloc(e)) { - error(ERR_NONFATAL, "invalid left-hand operand to WRT"); - return NULL; - } i = nexttoken(); /* eat the WRT */ f = expr6 (critical); if (!f) diff --git a/preproc.c b/preproc.c index cd8c6170..574e852f 100644 --- a/preproc.c +++ b/preproc.c @@ -24,6 +24,7 @@ typedef struct Token Token; typedef struct Line Line; typedef struct Include Include; typedef struct Cond Cond; +typedef struct IncPath IncPath; /* * Store the definition of a single-line macro. @@ -46,6 +47,7 @@ struct MMacro { int casesense; int nparam_min, nparam_max; int plus; /* is the last parameter greedy? */ + int nolist; /* is this macro listing-inhibited? */ int in_progress; Token **defaults, *dlist; Line *expansion; @@ -79,6 +81,11 @@ struct Context { * the token representing `x' will have its type changed to * TOK_SMAC_PARAM, but the one representing `y' will be * TOK_SMAC_PARAM+1. + * + * TOK_INTERNAL_STRING is a dirty hack: it's a single string token + * which doesn't need quotes around it. Used in the pre-include + * mechanism as an alternative to trying to find a sensible type of + * quote to use on the filename we were passed. */ struct Token { Token *next; @@ -88,7 +95,8 @@ struct Token { }; enum { TOK_WHITESPACE = 1, TOK_COMMENT, TOK_ID, TOK_PREPROC_ID, TOK_STRING, - TOK_NUMBER, TOK_SMAC_END, TOK_OTHER, TOK_PS_OTHER, TOK_SMAC_PARAM + TOK_NUMBER, TOK_SMAC_END, TOK_OTHER, TOK_PS_OTHER, TOK_SMAC_PARAM, + TOK_INTERNAL_STRING }; /* @@ -110,8 +118,8 @@ enum { * markers delimiting the end of the expansion of a given macro. * This is for use in the cycle-tracking code. Such structures have * `finishes' non-NULL, and `first' NULL. All others have - * `finishes' NULL, but `first' may still be non-NULL if the line - * is blank. + * `finishes' NULL, but `first' may still be NULL if the line is + * blank. */ struct Line { Line *next; @@ -133,6 +141,16 @@ struct Include { }; /* + * Include search path. This is simply a list of strings which get + * prepended, in turn, to the name of an include file, in an + * attempt to find the file if it's not in the current directory. + */ +struct IncPath { + IncPath *next; + char *path; +}; + +/* * Conditional assembly: we maintain a separate stack of these for * each level of file inclusion. (The only reason we keep the * stacks separate is to ensure that a stray `%endif' in a file @@ -195,6 +213,7 @@ static int inverse_ccs[] = { static Context *cstk; static Include *istk; +static IncPath *ipath = NULL; static efunc error; @@ -202,6 +221,10 @@ static unsigned long unique; /* unique identifier numbers */ static char *linesync, *outline; +static Line *predef = NULL; + +static ListGen *list; + /* * The number of hash values we use for the macro lookup tables. */ @@ -235,13 +258,10 @@ static MMacro *defining; static char **stdmacpos; /* - * The pre-preprocessing stage... This function has two purposes: - * firstly, it translates line number indications as they emerge - * from GNU cpp (`# lineno "file" flags') into NASM preprocessor - * line number indications (`%line lineno file'), and secondly, it - * converts [INCLUDE] and [INC] old-style inclusion directives into - * the new-style `%include' form (though in the next version it - * won't do that any more). + * The pre-preprocessing stage... This function translates line + * number indications as they emerge from GNU cpp (`# lineno "file" + * flags') into NASM preprocessor line number indications (`%line + * lineno file'). */ static char *prepreproc(char *line) { int lineno, fnlen; @@ -258,31 +278,8 @@ static char *prepreproc(char *line) { line = nasm_malloc(20+fnlen); sprintf(line, "%%line %d %.*s", lineno, fnlen, fname); nasm_free (oldline); - return line; - } else if (!nasm_strnicmp(line, "[include", 8)) { - oldline = line; - fname = oldline+8; - fname += strspn(fname, " \t"); - fnlen = strcspn(fname, "]"); - line = nasm_malloc(20+fnlen); - sprintf(line, "%%include \"%.*s\"", fnlen, fname); - error (ERR_WARNING|ERR_OFFBY1, "use of [INCLUDE] is being phased out;" - " suggest `%%include'"); - nasm_free (oldline); - return line; - } else if (!nasm_strnicmp(line, "[inc", 4)) { - oldline = line; - fname = oldline+4; - fname += strspn(fname, " \t"); - fnlen = strcspn(fname, "]"); - line = nasm_malloc(20+fnlen); - sprintf(line, "%%include \"%.*s\"", fnlen, fname); - error (ERR_WARNING|ERR_OFFBY1, "use of [INC] is being phased out;" - " suggest `%%include'"); - nasm_free (oldline); - return line; - } else - return line; + } + return line; } /* @@ -384,9 +381,38 @@ static char *read_line (void) { int bufsize; if (stdmacpos) { - if (*stdmacpos) - return nasm_strdup(*stdmacpos++); - else { + if (*stdmacpos) { + char *ret = nasm_strdup(*stdmacpos++); + /* + * Nasty hack: here we push the contents of `predef' on + * to the top-level expansion stack, since this is the + * most convenient way to implement the pre-include and + * pre-define features. + */ + if (!*stdmacpos) { + Line *pd, *l; + Token *head, **tail, *t, *tt; + + for (pd = predef; pd; pd = pd->next) { + head = NULL; + tail = &head; + for (t = pd->first; t; t = t->next) { + tt = *tail = nasm_malloc(sizeof(Token)); + tt->next = NULL; + tail = &tt->next; + tt->type = t->type; + tt->text = nasm_strdup(t->text); + tt->mac = t->mac; /* always NULL here, in fact */ + } + l = nasm_malloc(sizeof(Line)); + l->next = istk->expansion; + l->first = head; + l->finishes = FALSE; + istk->expansion = l; + } + } + return ret; + } else { stdmacpos = NULL; line_sync(); } @@ -428,6 +454,8 @@ static char *read_line (void) { */ buffer[strcspn(buffer, "\032")] = '\0'; + list->line (LIST_READ, buffer); + return buffer; } @@ -600,6 +628,37 @@ static int mstrcmp(char *p, char *q, int casesense) { } /* + * Open an include file. This routine must always return a valid + * file pointer if it returns - it's responsible for throwing an + * ERR_FATAL and bombing out completely if not. It should also try + * the include path one by one until it finds the file or reaches + * the end of the path. + */ +static FILE *inc_fopen(char *file) { + FILE *fp; + char *prefix = "", *combine; + IncPath *ip = ipath; + int len = strlen(file); + + do { + combine = nasm_malloc(strlen(prefix)+len+1); + strcpy(combine, prefix); + strcat(combine, file); + fp = fopen(combine, "r"); + nasm_free (combine); + if (fp) + return fp; + prefix = ip ? ip->path : NULL; + if (ip) + ip = ip->next; + } while (prefix); + + error (ERR_FATAL|ERR_OFFBY1, + "unable to open include file `%s'", file); + return NULL; /* never reached - placate compilers */ +} + +/* * Determine if we should warn on defining a single-line macro of * name `name', with `nparam' parameters. If nparam is 0, will * return TRUE if _any_ single-line macro of that name is defined. @@ -741,7 +800,7 @@ static int do_directive (Token *tline) { if (tline && tline->type == TOK_WHITESPACE) tline = tline->next; if (!tline || tline->type != TOK_PREPROC_ID || - (tline->text[1] == '%' || tline->text[1] == '$')) + (tline->text[1]=='%' || tline->text[1]=='$' || tline->text[1]=='!')) return 0; i = -1; @@ -790,7 +849,7 @@ static int do_directive (Token *tline) { case PP_CLEAR: if (tline->next) error(ERR_WARNING|ERR_OFFBY1, - "trailing garbage after `%%pop' ignored"); + "trailing garbage after `%%clear' ignored"); for (j=0; jnext; if (tline && tline->type == TOK_WHITESPACE) tline = tline->next; - if (!tline || tline->type != TOK_STRING) { + if (!tline || (tline->type != TOK_STRING && + tline->type != TOK_INTERNAL_STRING)) { error(ERR_NONFATAL|ERR_OFFBY1, "`%%include' expects a file name"); return 3; /* but we did _something_ */ } if (tline->next) error(ERR_WARNING|ERR_OFFBY1, "trailing garbage after `%%include' ignored"); - p = tline->text+1; /* point past the quote to the name */ - p[strlen(p)-1] = '\0'; /* remove the trailing quote */ + if (tline->type != TOK_INTERNAL_STRING) { + p = tline->text+1; /* point past the quote to the name */ + p[strlen(p)-1] = '\0'; /* remove the trailing quote */ + } else + p = tline->text; /* internal_string is easier */ inc = nasm_malloc(sizeof(Include)); inc->next = istk; inc->conds = NULL; - inc->fp = fopen(p, "r"); + inc->fp = inc_fopen(p); inc->fname = nasm_strdup(p); inc->lineno = inc->lineinc = 1; inc->expansion = NULL; - if (!inc->fp) - error (ERR_FATAL|ERR_OFFBY1, - "unable to open include file `%s'", p); istk = inc; + list->uplevel (LIST_INCLUDE); return 5; case PP_PUSH: @@ -1078,6 +1139,7 @@ static int do_directive (Token *tline) { defining->name = nasm_strdup(tline->text); defining->casesense = (i == PP_MACRO); defining->plus = FALSE; + defining->nolist = FALSE; defining->in_progress = FALSE; tline = tline->next; if (tline && tline->type == TOK_WHITESPACE) @@ -1117,6 +1179,11 @@ static int do_directive (Token *tline) { tline = tline->next; defining->plus = TRUE; } + if (tline && tline->next && tline->next->type == TOK_ID && + !nasm_stricmp(tline->next->text, ".nolist")) { + tline = tline->next; + defining->nolist = TRUE; + } mmac = mmacros[hash(defining->name)]; while (mmac) { if (!strcmp(mmac->name, defining->name) && @@ -1253,7 +1320,7 @@ static int do_directive (Token *tline) { * Good. We now have a macro name, a parameter count, and a * token list (in reverse order) for an expansion. We ought * to be OK just to create an SMacro, store it, and let - * tlist_free have the rest of the line (which we have + * free_tlist have the rest of the line (which we have * carefully re-terminated after chopping off the expansion * from the end). */ @@ -1375,7 +1442,7 @@ static Token *expand_smacro (Token *tline) { if (c) { q = t->text+1; q += strspn(q, "$"); - sprintf(buffer, "macro.%lu.", c->number); + sprintf(buffer, "..@%lu.", c->number); p = nasm_malloc (strlen(buffer)+strlen(q)+1); strcpy (p, buffer); strcat (p, q); @@ -1411,10 +1478,12 @@ static Token *expand_smacro (Token *tline) { for (m = head; m; m = m->next) if (!mstrcmp(m->name, p, m->casesense)) break; - if (!m) { + if (!m || m->in_progress) { /* - * Didn't find one: this can't be a macro call. Copy it - * through and ignore it. + * Either we didn't find a macro, so this can't be a + * macro call, or we found a macro which was already in + * progress, in which case we don't _treat_ this as a + * macro call. Copy it through and ignore it. */ tline->type = TOK_PS_OTHER; /* so it will get copied above */ continue; @@ -1515,7 +1584,7 @@ static Token *expand_smacro (Token *tline) { break; } if (!m) { - error (ERR_WARNING|ERR_OFFBY1, + error (ERR_WARNING|ERR_OFFBY1|ERR_WARN_MNP, "macro `%s' exists, but not taking %d parameters", mstart->text, nparam); nasm_free (params); @@ -1524,15 +1593,6 @@ static Token *expand_smacro (Token *tline) { tline->type = TOK_PS_OTHER; continue; } - if (m->in_progress) { - error (ERR_NONFATAL, "self-reference in single-line macro" - " `%s'", mstart->text); - nasm_free (params); - nasm_free (paramsize); - tline = mstart; - tline->type = TOK_PS_OTHER; - continue; - } } /* * Expand the macro: we are placed on the last token of the @@ -1713,7 +1773,7 @@ static MMacro *is_mmacro (Token *tline, Token ***params_array) { * After all that, we didn't find one with the right number of * parameters. Issue a warning, and fail to expand the macro. */ - error (ERR_WARNING|ERR_OFFBY1, + error (ERR_WARNING|ERR_OFFBY1|ERR_WARN_MNP, "macro `%s' exists, but not taking %d parameters", tline->text, nparam); nasm_free (params); @@ -1783,7 +1843,7 @@ static int expand_mmacro (Token *tline) { for (i = 0; params[i]; i++) { int brace = FALSE; - int comma = !m->plus; + int comma = (!m->plus || i < nparam-1); t = params[i]; if (t && t->type == TOK_WHITESPACE) @@ -1796,12 +1856,12 @@ static int expand_mmacro (Token *tline) { if (!t) /* end of param because EOL */ break; if (comma && t->type == TOK_OTHER && !strcmp(t->text, ",")) - break; /* ... because we have hit a comma */ + break; /* ... because we have hit a comma */ if (comma && t->type == TOK_WHITESPACE && t->next->type == TOK_OTHER && !strcmp(t->next->text, ",")) break; /* ... or a space then a comma */ if (brace && t->type == TOK_OTHER && !strcmp(t->text, "}")) - break; /* ... or a brace */ + break; /* ... or a brace */ t = t->next; paramlen[i]++; } @@ -1833,6 +1893,7 @@ static int expand_mmacro (Token *tline) { ll = nasm_malloc(sizeof(Line)); ll->next = istk->expansion; ll->finishes = NULL; + ll->first = NULL; tail = &ll->first; for (t = l->first; t; t = t->next) { @@ -1851,7 +1912,7 @@ static int expand_mmacro (Token *tline) { switch (t->text[1]) { case '%': type = TOK_ID; - sprintf(tmpbuf, "macro.%lu.", unique); + sprintf(tmpbuf, "..@%lu.", unique); text = nasm_malloc(strlen(tmpbuf)+strlen(t->text+2)+1); strcpy(text, tmpbuf); strcat(text, t->text+2); @@ -1923,6 +1984,7 @@ static int expand_mmacro (Token *tline) { } istk->expansion = ll; + } /* @@ -1942,10 +2004,12 @@ static int expand_mmacro (Token *tline) { nasm_free (params); free_tlist (tline); + list->uplevel (m->nolist ? LIST_MACRO_NOLIST : LIST_MACRO); + return need_sync ? 2 : 1; } -static void pp_reset (char *file, efunc errfunc) { +static void pp_reset (char *file, efunc errfunc, ListGen *listgen) { int h; error = errfunc; @@ -1967,6 +2031,7 @@ static void pp_reset (char *file, efunc errfunc) { } unique = 0; stdmacpos = stdmac; + list = listgen; } static char *pp_getline (void) { @@ -1988,18 +2053,22 @@ static char *pp_getline (void) { tline = NULL; while (istk->expansion && istk->expansion->finishes) { Line *l = istk->expansion; - tline = l->first; l->finishes->in_progress = FALSE; istk->expansion = l->next; nasm_free (l); + list->downlevel (LIST_MACRO); if (!istk->expansion) line_sync(); } if (istk->expansion) { + char *p; Line *l = istk->expansion; tline = l->first; istk->expansion = l->next; nasm_free (l); + p = detoken(tline); + list->line (LIST_MACRO, p); + nasm_free(p); if (!istk->expansion) line_sync(); } else { @@ -2015,6 +2084,7 @@ static char *pp_getline (void) { error(ERR_FATAL, "expected `%%endif' before end of file"); i = istk; istk = istk->next; + list->downlevel (LIST_INCLUDE); nasm_free (i->fname); nasm_free (i); if (!istk) @@ -2141,6 +2211,70 @@ static void pp_cleanup (void) { ctx_pop(); } +void pp_include_path (char *path) { + IncPath *i; + + i = nasm_malloc(sizeof(IncPath)); + i->path = nasm_strdup(path); + i->next = ipath; + + ipath = i; +} + +void pp_pre_include (char *fname) { + Token *inc, *space, *name; + Line *l; + + inc = nasm_malloc(sizeof(Token)); + inc->next = space = nasm_malloc(sizeof(Token)); + space->next = name = nasm_malloc(sizeof(Token)); + name->next = NULL; + + inc->type = TOK_PREPROC_ID; + inc->text = nasm_strdup("%include"); + space->type = TOK_WHITESPACE; + space->text = nasm_strdup(" "); + name->type = TOK_INTERNAL_STRING; + name->text = nasm_strdup(fname); + + inc->mac = space->mac = name->mac = NULL; + + l = nasm_malloc(sizeof(Line)); + l->next = predef; + l->first = inc; + l->finishes = FALSE; + predef = l; +} + +void pp_pre_define (char *definition) { + Token *def, *space, *name; + Line *l; + char *equals; + + equals = strchr(definition, '='); + + def = nasm_malloc(sizeof(Token)); + def->next = space = nasm_malloc(sizeof(Token)); + if (equals) + *equals = ' '; + space->next = name = tokenise(definition); + if (equals) + *equals = '='; + + def->type = TOK_PREPROC_ID; + def->text = nasm_strdup("%define"); + space->type = TOK_WHITESPACE; + space->text = nasm_strdup(" "); + + def->mac = space->mac = NULL; + + l = nasm_malloc(sizeof(Line)); + l->next = predef; + l->first = def; + l->finishes = FALSE; + predef = l; +} + Preproc nasmpp = { pp_reset, pp_getline, diff --git a/preproc.h b/preproc.h index 550a66ea..fea3e8cb 100644 --- a/preproc.h +++ b/preproc.h @@ -9,6 +9,10 @@ #ifndef NASM_PREPROC_H #define NASM_PREPROC_H +void pp_include_path (char *); +void pp_pre_include (char *); +void pp_pre_define (char *); + extern Preproc nasmpp; #endif diff --git a/rdoff/Makefile.sc b/rdoff/Makefile.sc new file mode 100644 index 00000000..816cc98e --- /dev/null +++ b/rdoff/Makefile.sc @@ -0,0 +1,112 @@ +# Makefile for RDOFF object file utils; part of the Netwide Assembler + +# + +# The Netwide Assembler is copyright (C) 1996 Simon Tatham and + +# Julian Hall. All rights reserved. The software is + +# redistributable under the licence given in the file "Licence" + +# distributed in the NASM archive. + +# + +# This Makefile is designed for use under Unix (probably fairly + +# portably). + + + +CC = sc + +CCFLAGS = -I..\ -c -a1 -mn -Nc -w2 -w7 -o+time -5 + +LINK = link + +LINKFLAGS = /noi /exet:NT /su:console + + + +OBJ=obj + +EXE=.exe + + + +NASMLIB = ..\nasmlib.$(OBJ) + +NASMLIB_H = ..\nasmlib.h + +LDRDFLIBS = rdoff.$(OBJ) $(NASMLIB) symtab.$(OBJ) collectn.$(OBJ) rdlib.$(OBJ) + +RDXLIBS = rdoff.$(OBJ) rdfload.$(OBJ) symtab.$(OBJ) collectn.$(OBJ) + + + +.c.$(OBJ): + + $(CC) $(CCFLAGS) $*.c + + + +all : rdfdump$(EXE) ldrdf$(EXE) rdx$(EXE) rdflib$(EXE) rdf2bin$(EXE) rdf2com$(EXE) + + + +rdfdump$(EXE) : rdfdump.$(OBJ) + + $(LINK) $(LINKFLAGS) rdfdump.$(OBJ), rdfdump$(EXE); + +ldrdf$(EXE) : ldrdf.$(OBJ) $(LDRDFLIBS) + + $(LINK) $(LINKFLAGS) ldrdf.$(OBJ) $(LDRDFLIBS), ldrdf$(EXE); + +rdx$(EXE) : rdx.$(OBJ) $(RDXLIBS) + + $(LINK) $(LINKFLAGS) rdx.$(OBJ) $(RDXLIBS), rdx$(EXE); + +rdflib$(EXE) : rdflib.$(OBJ) + + $(LINK) $(LINKFLAGS) rdflib.$(OBJ), rdflib$(EXE); + +rdf2bin$(EXE) : rdf2bin.$(OBJ) $(RDXLIBS) $(NASMLIB) + + $(LINK) $(LINKFLAGS) rdf2bin.$(OBJ) $(RDXLIBS) $(NASMLIB), rdf2bin$(EXE); + +rdf2com$(EXE) : rdf2bin$(EXE) + + copy rdf2bin$(EXE) rdf2com$(EXE) + + + +rdf2bin.$(OBJ) : rdf2bin.c + +rdfdump.$(OBJ) : rdfdump.c + +rdoff.$(OBJ) : rdoff.c rdoff.h + +ldrdf.$(OBJ) : ldrdf.c rdoff.h $(NASMLIB_H) symtab.h collectn.h rdlib.h + +symtab.$(OBJ) : symtab.c symtab.h + +collectn.$(OBJ) : collectn.c collectn.h + +rdx.$(OBJ) : rdx.c rdoff.h rdfload.h symtab.h + +rdfload.$(OBJ) : rdfload.c rdfload.h rdoff.h collectn.h symtab.h + +rdlib.$(OBJ) : rdlib.c rdlib.h + +rdflib.$(OBJ) : rdflib.c + + + +clean : + + del *.$(OBJ) rdfdump$(EXE) ldrdf$(EXE) rdx$(EXE) rdflib$(EXE) rdf2bin$(EXE) + + + + + diff --git a/rdoff/README b/rdoff/README index 54f3b2ae..bea5ecbe 100644 --- a/rdoff/README +++ b/rdoff/README @@ -81,5 +81,4 @@ amply documented in the source code... look at 'rdflib.c' and 'rdlib.c', and the relevant sections of 'ldrdf.c' to see how libraries can be handled). -Julian Hall (jules@dcs.warwick.ac.uk) - +Julian Hall diff --git a/standard.mac b/standard.mac new file mode 100644 index 00000000..5653ba62 --- /dev/null +++ b/standard.mac @@ -0,0 +1,77 @@ +; Standard macro set for NASM 0.95 + +%define __NASM_MAJOR__ 0 +%define __NASM_MINOR__ 95 + +%define __SECT__ ; it ought to be defined, even if as nothing + +%imacro section 1+.nolist +%define __SECT__ [section %1] + __SECT__ +%endmacro +%imacro segment 1+.nolist +%define __SECT__ [segment %1] + __SECT__ +%endmacro + +%imacro absolute 1+.nolist +%define __SECT__ [absolute %1] + __SECT__ +%endmacro + +%imacro struc 1.nolist +%push struc +%define %$strucname %1 +[absolute 0] +%endmacro +%imacro endstruc 0.nolist +%{$strucname}_size: +%pop +__SECT__ +%endmacro + +%imacro istruc 1.nolist +%push istruc +%define %$strucname %1 +%$strucstart: +%endmacro +%imacro at 1-2+.nolist + times %1-($-%$strucstart) db 0 + %2 +%endmacro +%imacro iend 0.nolist + times %{$strucname}_size-($-%$strucstart) db 0 +%pop +%endmacro + +%imacro extern 1+.nolist +[extern %1] +%endmacro + +%imacro bits 1+.nolist +[bits %1] +%endmacro + +%imacro global 1+.nolist +[global %1] +%endmacro + +%imacro common 1+.nolist +[common %1] +%endmacro + +%imacro org 1+.nolist +[org %1] +%endmacro + +%imacro group 1+.nolist +[group %1] +%endmacro + +%imacro uppercase 1+.nolist +[uppercase %1] +%endmacro + +%imacro library 1+.nolist +[library %1] +%endmacro diff --git a/sync.c b/sync.c index 71665915..7acba0ee 100644 --- a/sync.c +++ b/sync.c @@ -7,6 +7,7 @@ */ #include +#include #include #include "sync.h" @@ -21,10 +22,29 @@ static struct Sync { unsigned long pos; unsigned long length; -} synx[SYNC_MAX+1]; /* synx[0] never used - who cares :) */ +} *synx; static int nsynx; void init_sync(void) { + /* + * I'd like to allocate an array of size SYNC_MAX, then write + * `synx--' which would allow numbering the array from one + * instead of zero without wasting memory. Sadly I don't trust + * this to work in 16-bit Large model, so it's staying the way + * it is. Btw, we don't care about freeing this array, since it + * has to last for the duration of the program and will then be + * auto-freed on exit. And I'm lazy ;-) + * + * Speaking of 16-bit Large model, that's also the reason I'm + * not declaring this array statically - by doing it + * dynamically I avoid problems with the total size of DGROUP + * in Borland C. + */ + synx = malloc((SYNC_MAX+1) * sizeof(*synx)); + if (!synx) { + fprintf(stderr, "ndisasm: not enough memory for sync array\n"); + exit(1); + } nsynx = 0; } diff --git a/test/objlink.c b/test/objlink.c index 2f92f05e..9898e015 100644 --- a/test/objlink.c +++ b/test/objlink.c @@ -27,4 +27,5 @@ int main(void) { function(text); printf("this should be 0xF00E: 0x%X\n", bsssym); printf("this should be 0xD00E: 0x%X\n", commvar); + return 0; } -- 2.11.4.GIT