2 --[[-------------------------------------------------------------------
5 Compresses Lua source code by removing unnecessary characters.
7 Copyright (c) 2005 Kein-Hong Man <khman@users.sf.net>
8 The COPYRIGHT file describes the conditions under which this
9 software may be distributed (basically a Lua 5-style license.)
11 http://luaforge.net/projects/luasrcdiet/
12 (TODO) http://www.geocities.com/keinhong/luasrcdiet.html
13 See the ChangeLog for more information.
15 -----------------------------------------------------------------------
16 -- * See the README file and script comments for notes and caveats.
17 -----------------------------------------------------------------------
20 --[[-------------------------------------------------------------------
21 -- description and help texts
22 --]]-------------------------------------------------------------------
25 LuaSrcDiet: Puts your Lua 5 source code on a diet
26 Version 0.9.1 (20050816) Copyright (c) 2005 Kein-Hong Man
27 The COPYRIGHT file describes the conditions under which this
28 software may be distributed (basically a Lua 5-style license.)
32 usage: %s [options] [filenames]
35 -h, --help prints usage information
36 -o <file> specify file name to write output
37 --quiet do not display statistics
38 --read-only read file and print token stats
39 --keep-lines preserve line numbering
40 --maximum maximize reduction of source
41 --dump dump raw tokens from lexer
42 -- stop handling arguments
45 >%s myscript.lua -o myscript_.lua
48 -- for embedding, we won't set arg[0]
50 if arg
[0] then exec
= "lua LuaSrcDiet.lua" else exec
= "LuaSrcDiet" end
51 usage
= string.format(USAGE
, exec
, exec
)
57 --[[-------------------------------------------------------------------
58 -- llex is a port of the Lua 5.0.2 lexer (llex.*) to Lua, with the
59 -- token output modified and the code simplified for LuaSrcDiet.
60 -----------------------------------------------------------------------
61 -- Instead of returning a number, llex:lex() returns strings, like
62 -- "TK_EOS". The other values returned are the original snippet of
63 -- source and the "value" of the lexed token, if applicable.
64 -----------------------------------------------------------------------
65 -- * Prep lexer with llex:setinput(), llex will close the file handle.
66 -- * For LuaSrcDiet, llex has been changed:
67 -- TK_* returns classes of tokens, made less specific
68 -- "TK_OP" -> operators and punctuations, "TK_KEYWORD" -> keywords
69 -- "TK_EOL" -> end-of-lines, "TK_SPACE" -> whitespace
70 -- "TK_COMMENT" -> comments, "TK_LCOMMENT" -> block comments
71 -----------------------------------------------------------------------
76 -----------------------------------------------------------------------
77 -- llex initialization stuff
78 -----------------------------------------------------------------------
80 llex
.EOZ
= -1 -- end of stream marker
81 llex
.keywords
= -- Lua 5 keywords
82 "and break do else elseif end false for function if in local \
83 nil not or repeat return then true until while "
85 llex
.str2tok
= {} -- for matching keywords
86 for v
in string.gfind(llex
.keywords
, "[^%s]+") do
87 llex
.str2tok
[v
] = true
90 --[[-------------------------------------------------------------------
91 -- Support functions for Lua lexer (mainly error handling)
92 -- * REMOVED functions luaX_errorline, luaX_errorline, luaX_token2str,
93 -- luaX_syntaxerror, either unused or simplified.
94 -----------------------------------------------------------------------
97 function llex
:checklimit(val
, limit
, msg
)
99 msg
= string.format("too many %s (limit=%d)", msg
, limit
)
100 -- luaX_syntaxerror merged here; removed token reference
101 error(string.format("%s:%d: %s", self
.source
, self
.line
, msg
))
105 function llex
:error(s
, token
)
106 -- luaX_errorline merged here
107 error(string.format("%s:%d: %s near '%s'", self
.source
, self
.line
, s
, token
))
110 function llex
:lexerror(s
, token
)
111 if token
then self
:error(s
, token
) else self
:error(s
, self
.buff
) end
114 --[[-------------------------------------------------------------------
115 -- Principal input, output stream functions: nextc, save
116 -- * self.c and self.ch are identical, self.ch is the string version
117 -- * lexer has a token buffer, buff, intended for the lexed value, and
118 -- another buffer, obuff, for the original characters -- it's not a
119 -- very efficient method, but we want both, just in case
120 -----------------------------------------------------------------------
123 -----------------------------------------------------------------------
124 -- returns the next character as a number
125 -----------------------------------------------------------------------
126 function llex
:nextc()
127 if self
.ipos
> self
.ilen
then
128 if self
.z
then -- read from specified stream
129 self
.ibuf
= self
.z
:read("*l")
130 if self
.ibuf
== nil then -- close stream
132 self
.c
= self
.EOZ
; self
.ch
= ""
135 else -- preprocess source line
136 self
.ibuf
= self
.ibuf
.."\n"
138 self
.ilen
= string.len(self
.ibuf
)
139 -- then grabs the first char (below)
141 else -- end of string chunk
142 self
.c
= self
.EOZ
; self
.ch
= ""
146 self
.c
= string.byte(self
.ibuf
, self
.ipos
) -- return a character
147 self
.ch
= string.char(self
.c
)
148 self
.ipos
= self
.ipos
+ 1
151 -----------------------------------------------------------------------
152 -- ADDED initialize token buffers
153 -----------------------------------------------------------------------
154 function llex
:initbuff()
159 -----------------------------------------------------------------------
160 -- saves given character into buffer, c must be a string
161 -----------------------------------------------------------------------
162 function llex
:save(c
)
163 self
.buff
= self
.buff
..c
166 -----------------------------------------------------------------------
167 -- ADDED saves original character into buffer
168 -----------------------------------------------------------------------
169 function llex
:osave(c
)
170 self
.obuff
= self
.obuff
..c
173 -----------------------------------------------------------------------
174 -- save current character and grabs next character
175 -----------------------------------------------------------------------
176 function llex
:save_and_next()
182 -----------------------------------------------------------------------
183 -- move on to next line, updating line number count
184 -----------------------------------------------------------------------
185 function llex
:inclinenumber()
186 self
:nextc() -- skip EOL
187 self
.line
= self
.line
+ 1
188 -- number of lines is limited to MAXINT
189 self
:checklimit(self
.line
, 2147483645, "lines in a chunk")
192 --[[-------------------------------------------------------------------
193 -- Initialize lexer to a particular stream (handle) or string
194 -----------------------------------------------------------------------
197 -----------------------------------------------------------------------
198 -- input stream initialization (file handle)
199 -----------------------------------------------------------------------
200 function llex
:setinput(z
, source
)
202 self
.ilen
= 0 -- length
203 self
.z
= z
-- input stream
205 self
.ipos
= 1 -- position
209 if not self
.source
then -- default source name
212 self
:nextc() -- read first char
213 -- shbang handling moved to llex()
216 -----------------------------------------------------------------------
217 -- input stream initialization (string)
218 -----------------------------------------------------------------------
219 function llex
:setstring(chunk
, source
)
221 self
.ilen
= string.len(self
.ibuf
) -- length
222 self
:setinput(nil, source
)
225 --[[-------------------------------------------------------------------
226 -- Main Lua lexer functions
227 -----------------------------------------------------------------------
230 -----------------------------------------------------------------------
231 -- grab a class of characters
232 -----------------------------------------------------------------------
233 function llex
:readloop(pat
)
234 while string.find(self
.ch
, pat
) do
239 -----------------------------------------------------------------------
240 -- grab characters until end-of-line
241 -----------------------------------------------------------------------
242 function llex
:readtoeol()
243 while self
.ch
~= '\n' and self
.c
~= self
.EOZ
do
248 -----------------------------------------------------------------------
250 -----------------------------------------------------------------------
251 function llex
:read_numeral(comma
)
254 self
.buff
= '.'; self
.obuff
= '.'
257 if self
.ch
== '.' then
259 if self
.ch
== '.' then
261 self
:lexerror("ambiguous syntax (decimal point x string concatenation)")
265 if self
.ch
== 'e' or self
.ch
== 'E' then
266 self
:save_and_next() -- read 'E'
267 if self
.ch
== '+' or self
.ch
== '-' then
268 self
:save_and_next() -- optional exponent sign
272 local value
= tonumber(self
.buff
)
274 self
:lexerror("malformed number")
276 return self
.obuff
, value
279 -----------------------------------------------------------------------
280 -- read a long string or long comment
281 -----------------------------------------------------------------------
282 function llex
:read_long_string(comment
)
283 local cont
= 0 -- nesting
288 self
.buff
= "[" -- save first '['
290 self
.obuff
= self
.buff
291 self
:save_and_next() -- pass the second '['
292 if self
.ch
== '\n' then -- string starts with a newline?
295 self
:inclinenumber() -- skip it
298 -- case -----------------------------------------------------------
299 if self
.c
== self
.EOZ
then -- EOZ
301 self
:lexerror("unfinished long comment", "<eof>")
303 self
:lexerror("unfinished long string", "<eof>")
305 -- case -----------------------------------------------------------
306 elseif self
.ch
== '[' then
308 if self
.ch
== '[' then
312 -- case -----------------------------------------------------------
313 elseif self
.ch
== ']' then
315 if self
.ch
== ']' then
316 if cont
== 0 then break end
320 -- case -----------------------------------------------------------
321 elseif self
.ch
== '\n' then
326 -- case -----------------------------------------------------------
329 -- endcase --------------------------------------------------------
332 self
:save_and_next() -- skip the second ']'
334 return self
.obuff
, eols
336 return self
.obuff
, string.sub(self
.buff
, 3, -3)
339 -----------------------------------------------------------------------
341 -----------------------------------------------------------------------
342 function llex
:read_string(del
)
345 while self
.ch
~= del
do
346 -- case -----------------------------------------------------------
347 if self
.c
== self
.EOZ
then
348 self
:lexerror("unfinished string", "<eof>")
349 -- case -----------------------------------------------------------
350 elseif self
.ch
== '\n' then
351 self
:lexerror("unfinished string")
352 -- case -----------------------------------------------------------
353 elseif self
.ch
== '\\' then
355 self
:nextc() -- do not save the '\'
356 if self
.c
~= self
.EOZ
then -- will raise an error next loop
357 local i
= string.find("\nabfnrtv", self
.ch
, 1, 1)
360 self
:save(string.sub("\n\a\b\f\n\r\t\v", i
, i
))
367 elseif string.find(self
.ch
, "%d") == nil then
368 -- escaped punctuation
369 self
:save_and_next() -- handles \\, \", \', and \?
375 c
= 10 * c
+ self
.ch
-- (coerced)
379 until (i
>= 3 or not string.find(self
.ch
, "%d"))
380 if c
> 255 then -- UCHAR_MAX
381 self
:lexerror("escape sequence too large")
383 self
:save(string.char(c
))
386 -- case -----------------------------------------------------------
389 -- endcase --------------------------------------------------------
392 self
:save_and_next() -- skip delimiter
393 return self
.obuff
, string.sub(self
.buff
, 2, -2)
396 --[[-------------------------------------------------------------------
397 -- Lexer feeder function for parser
398 -- * As we are not actually parsing the token stream, we return a token
399 -- class, the original snippet, and the token's value (for strings and
400 -- numbers.) Most token just passes through LuaSrcDiet processing...
401 -----------------------------------------------------------------------
404 -----------------------------------------------------------------------
405 -- lex function enhanced to return the snippets required for processing
406 -- * basically adds: TK_COMMENT, TK_LCOMMENT, TK_EOL, TK_SPACE
407 -----------------------------------------------------------------------
409 local strfind = string.find
412 -- case -----------------------------------------------------------
413 if self
.line
== 1 and self
.ipos
== 2 -- shbang handling
414 and self
.ch
== '#' then -- skip first line
417 return "TK_COMMENT", self
.obuff
419 -- case -----------------------------------------------------------
420 if self
.ch
== '\n' then -- end of line
422 return "TK_EOL", '\n'
423 -- case -----------------------------------------------------------
424 elseif self
.ch
== '-' then -- comment
426 if self
.ch
~= '-' then -- '-' operator
429 -- else is a comment '--' or '--[['
431 if self
.ch
== '[' then
433 if self
.ch
== '[' then -- block comment
434 return "TK_LCOMMENT", self
:read_long_string(1) -- long comment
435 else -- short comment
439 return "TK_COMMENT", self
.obuff
441 else -- short comment
445 return "TK_COMMENT", self
.obuff
447 -- case -----------------------------------------------------------
448 elseif self
.ch
== '[' then -- literal string
450 if self
.ch
~= '[' then
453 return "TK_STRING", self
:read_long_string()
455 -- case -----------------------------------------------------------
456 elseif self
.ch
== "\"" or self
.ch
== "\'" then -- strings
457 return "TK_STRING", self
:read_string(self
.ch
)
458 -- case -----------------------------------------------------------
459 elseif self
.ch
== '.' then -- dot, concat,
460 self
:nextc() -- or number
461 if self
.ch
== '.' then
463 if self
.ch
== '.' then
465 return "TK_OP", '...'
469 elseif strfind(self
.ch
, "%d") == nil then
472 return "TK_NUMBER", self
:read_numeral(1)
474 -- case -----------------------------------------------------------
475 elseif self
.c
== self
.EOZ
then -- end of input
477 -- case -----------------------------------------------------------
479 local op
= strfind("=><~", self
.ch
, 1, 1) -- relational ops
483 if self
.ch
~= '=' then -- single-char ops
485 else -- double-char ops
487 return "TK_OP", c
..'='
490 if strfind(self
.ch
, "%s") then -- whitespace
493 return "TK_SPACE", self
.obuff
494 elseif strfind(self
.ch
, "%d") then -- number
495 return "TK_NUMBER", self
:read_numeral()
496 elseif strfind(self
.ch
, "[%a_]") then -- identifier
497 -- identifier or reserved word
499 self
:readloop("[%w_]")
500 if self
.str2tok
[self
.buff
] then -- reserved word
501 return "TK_KEYWORD", self
.buff
503 return "TK_NAME", self
.buff
504 else -- control/symbol
505 if strfind(self
.ch
, "%c") then
506 self
:error("invalid control char", string.format("char(%d)", self
.c
))
509 return "TK_OP", c
-- single-chars
512 -- endcase --------------------------------------------------------
517 -----------------------------------------------------------------------
518 -- 'original' lex function, behaves *exactly* like original llex.c
519 -- * currently unused by LuaSrcDiet
520 -----------------------------------------------------------------------
522 local _ltok
, _lorig
, _lval
524 _ltok
, _lorig
, _lval
= self
:lex()
525 if _ltok
~= "TK_COMMENT" and _ltok
~= "TK_LCOMMENT"
526 and _ltok
~= "TK_EOL" and _ltok
~= "TK_SPACE" then
527 return _ltok
, _lorig
, _lval
532 --[[-------------------------------------------------------------------
534 -- * We aren't using lval[] for now, except for TK_LCOMMENT processing,
535 -- perhaps for heavy-duty optimization, like constant optimization...
536 -----------------------------------------------------------------------
539 stats_c
= nil -- number of tokens of a given type
540 stats_l
= nil -- bytes occupied by tokens of a given type
541 ltok
= nil -- source list of tokens
542 lorig
= nil -- source list of original snippets
543 lval
= nil -- source list of actual token values
544 ntokens
= 0 -- number of tokens processed from file
546 -----------------------------------------------------------------------
547 -- "classes" of tokens; the last 4 aren't standard in llex.c
548 -- * arrangement/count significant!!! hardcoded for stats display
549 -----------------------------------------------------------------------
551 "TK_KEYWORD", "TK_NAME", "TK_NUMBER", "TK_STRING", "TK_OP",
552 "TK_EOS", "TK_COMMENT", "TK_LCOMMENT", "TK_EOL", "TK_SPACE",
555 -----------------------------------------------------------------------
556 -- reads source file and create token array + fill in statistics
557 -----------------------------------------------------------------------
558 function LoadFile(filename
)
559 if not filename
and type(filename
) ~= "string" then
560 error("invalid filename specified")
568 for _
, i
in ipairs(ttypes
) do -- init counters
569 stats_c
[i
] = 0; stats_l
[i
] = 0
571 ---------------------------------------------------------------------
572 local INF
= io
.open(filename
, "rb")
574 error("cannot open \""..filename
.."\" for reading")
576 llex
:setinput(INF
, filename
)
577 local _ltok
, _lorig
, _lval
579 while _ltok
~= "TK_EOS" do
580 _ltok
, _lorig
, _lval
= llex
:lex()
585 stats_c
[_ltok
] = stats_c
[_ltok
] + 1
586 stats_l
[_ltok
] = stats_l
[_ltok
] + string.len(_lorig
)
589 -- INF closed by llex
592 -----------------------------------------------------------------------
593 -- returns token tables containing valid tokens only (for verification)
594 -----------------------------------------------------------------------
595 function GetRealTokens(stok
, sorig
, stokens
)
596 local rtok
, rorig
, rtokens
= {}, {}, 0
597 for i
= 1, stokens
do
598 local _stok
= stok
[i
]
599 local _sorig
= sorig
[i
]
600 if _stok
~= "TK_COMMENT" and _stok
~= "TK_LCOMMENT"
601 and _stok
~= "TK_EOL" and _stok
~= "TK_SPACE" then
602 rtokens
= rtokens
+ 1
603 rtok
[rtokens
] = _stok
604 rorig
[rtokens
] = _sorig
607 return rtok
, rorig
, rtokens
610 -----------------------------------------------------------------------
611 -- display only source token statistics (for --read-only option)
612 -----------------------------------------------------------------------
613 function DispSrcStats(filename
)
614 local underline
= "--------------------------------\n"
617 io
.stdout
:write("Statistics for: "..filename
.."\n\n"
618 ..string.format("%-14s%8s%10s\n", "Elements", "Count", "Bytes")
620 local total_c
, total_l
, tok_c
, tok_l
= 0, 0, 0, 0
623 local c
, l
= stats_c
[i
], stats_l
[i
]
624 total_c
= total_c
+ c
625 total_l
= total_l
+ l
630 io
.stdout
:write(string.format("%-14s%8d%10d\n", i
, c
, l
))
631 if i
== "TK_EOS" then io
.stdout
:write(underline
) end
633 io
.stdout
:write(underline
634 ..string.format("%-14s%8d%10d\n", "Total Elements", total_c
, total_l
)
636 ..string.format("%-14s%8d%10d\n", "Total Tokens", tok_c
, tok_l
)
640 -----------------------------------------------------------------------
641 -- display source and destination stats (enabled by default)
642 -----------------------------------------------------------------------
643 function DispAllStats(srcfile
, src_c
, src_l
, destfile
, dest_c
, dest_l
)
644 local underline
= "--------------------------------------------------\n"
646 local stot_c
, stot_l
, stok_c
, stok_l
= 0, 0, 0, 0
647 local dtot_c
, dtot_l
, dtok_c
, dtok_l
= 0, 0, 0, 0
648 io
.stdout
:write("Statistics for: "..srcfile
.." -> "..destfile
.."\n\n"
649 ..string.format("%-14s%8s%10s%8s%10s\n", "Lexical", "Input", "Input", "Output", "Output")
650 ..string.format("%-14s%8s%10s%8s%10s\n", "Elements", "Count", "Bytes", "Count", "Bytes")
654 local s_c
, s_l
= src_c
[i
], src_l
[i
]
655 local d_c
, d_l
= dest_c
[i
], dest_l
[i
]
656 stot_c
= stot_c
+ s_c
657 stot_l
= stot_l
+ s_l
658 dtot_c
= dtot_c
+ d_c
659 dtot_l
= dtot_l
+ d_l
661 stok_c
= stok_c
+ s_c
662 stok_l
= stok_l
+ s_l
663 dtok_c
= dtok_c
+ d_c
664 dtok_l
= dtok_l
+ d_l
666 io
.stdout
:write(string.format("%-14s%8d%10d%8d%10d\n", i
, s_c
, s_l
, d_c
, d_l
))
667 if i
== "TK_EOS" then io
.stdout
:write(underline
) end
669 io
.stdout
:write(underline
670 ..string.format("%-14s%8d%10d%8d%10d\n", "Total Elements", stot_c
, stot_l
, dtot_c
, dtot_l
)
672 ..string.format("%-14s%8d%10d%8d%10d\n", "Total Tokens", stok_c
, stok_l
, dtok_c
, dtok_l
)
676 -----------------------------------------------------------------------
677 -- token processing function
678 -----------------------------------------------------------------------
679 function ProcessToken(srcfile
, destfile
)
682 error("no tokens to process")
684 local dtok
= {} -- processed list of tokens
685 local dorig
= {} -- processed list of original snippets
686 local dtokens
= 0 -- number of tokens generated
687 local stok
, sorig
, stokens
= -- src tokens for verification
688 GetRealTokens(ltok
, lorig
, ntokens
)
689 ---------------------------------------------------------------------
690 -- saves specified token to the destination token list
691 ---------------------------------------------------------------------
692 local function savetok(src
)
693 dtokens
= dtokens
+ 1
694 dtok
[dtokens
] = ltok
[src
]
695 dorig
[dtokens
] = lorig
[src
]
697 ---------------------------------------------------------------------
698 -- check if token at location is whitespace-equivalent
699 ---------------------------------------------------------------------
700 local function iswhitespace(i
)
702 if tok
== "TK_SPACE" or tok
== "TK_EOL"
703 or tok
== "TK_COMMENT" or tok
== "TK_LCOMMENT" then
707 ---------------------------------------------------------------------
708 -- compare two tokens and returns whitespace if needed in between
709 -- * note that some comparisons won't occur in Lua code; we assume
710 -- no knowledge of Lua syntax, only knowledge of lexical analysis
711 ---------------------------------------------------------------------
712 local function whitesp(previ
, nexti
)
713 local p
= ltok
[previ
]
714 local n
= ltok
[nexti
]
715 -- if next token is a whitespace, remove current whitespace token
716 if iswhitespace(n
) then return "" end
717 -- otherwise we are comparing non-whitespace tokens, so we use
718 -- the following optimization rules...
719 -------------------------------------------------------------------
721 if n
== "TK_NUMBER" then
723 if string.sub(lorig
[nexti
], 1, 1) == "." then return " " end
726 -------------------------------------------------------------------
727 elseif p
== "TK_KEYWORD" or p
== "TK_NAME" then
728 if n
== "TK_KEYWORD" or n
== "TK_NAME" then
730 elseif n
== "TK_NUMBER" then
732 if string.sub(lorig
[nexti
], 1, 1) == "." then return "" end
736 -------------------------------------------------------------------
737 elseif p
== "TK_STRING" then
739 -------------------------------------------------------------------
740 elseif p
== "TK_NUMBER" then
741 if n
== "TK_NUMBER" then
743 elseif n
== "TK_KEYWORD" or n
== "TK_NAME" then
745 local c
= string.sub(lorig
[nexti
], 1, 1)
746 if string.lower(c
) == "e" then return " " end
749 -------------------------------------------------------------------
750 else -- should never arrive here
751 error("token comparison failed")
754 ---------------------------------------------------------------------
755 -- main processing loop (pass 1)
756 ---------------------------------------------------------------------
757 local i
= 1 -- token position
758 local linestart
= true -- true at the start of a line
759 local tok
= "" -- current token
760 local prev
= 0 -- index of previous non-whitespace tok
763 -------------------------------------------------------------------
764 if tok
== "TK_SPACE" then
766 -- delete leading whitespace
769 -- remove in-between whitespace if possible
770 lorig
[i
] = whitesp(prev
, i
+ 1)
773 -------------------------------------------------------------------
774 elseif tok
== "TK_NAME" or tok
== "TK_KEYWORD" or tok
== "TK_OP"
775 or tok
== "TK_STRING" or tok
== "TK_NUMBER" then
776 -- these are all unchanged
780 -------------------------------------------------------------------
781 elseif tok
== "TK_EOL" then
783 if config
.KEEP_LINES
then
787 -- otherwise it's an empty line, drop it
792 -------------------------------------------------------------------
793 elseif tok
== "TK_COMMENT" then
794 -- must keep shbang for correctness, force a TK_EOL too
795 if i
== 1 and string.sub(lorig
[i
], 1, 1) == "#" then
799 -- don't change linestart; the now empty line can be consumed
800 -------------------------------------------------------------------
801 elseif tok
== "TK_LCOMMENT" then
803 if config
.KEEP_LINES
then
804 -- preserve newlines inside long comments
805 if lval
[i
] > 0 then eols
= string.rep("\n", lval
[i
]) end
807 if iswhitespace(i
+ 1) then
808 lorig
[i
] = eols
or ""
810 lorig
[i
] = eols
or " "
813 -------------------------------------------------------------------
814 elseif tok
== "TK_EOS" then
817 -------------------------------------------------------------------
819 error("unidentified token encountered")
823 ---------------------------------------------------------------------
824 -- aggressive end-of-line removal pass (pass 2)
825 ---------------------------------------------------------------------
826 if config
.ZAP_EOLS
then
829 -- redo source table by removing deleted bits
830 for i
= 1, dtokens
do
832 local orig
= dorig
[i
]
833 if orig
~= "" or tok
== "TK_EOS" then
834 ntokens
= ntokens
+ 1
836 lorig
[ntokens
] = orig
839 -- try to remove end-of-lines by comparing token pairs
844 while tok
~= "TK_EOS" do
846 if tok
== "TK_EOL" and prev
~= "TK_COMMENT" then
847 -- TK_COMMENT to trap shbang case
848 if whitesp(i
- 1, i
+ 1) == " " then -- can't delete
858 ---------------------------------------------------------------------
860 ---------------------------------------------------------------------
861 local dest
= table.concat(dorig
) -- join up source
862 local OUTF
= io
.open(destfile
, "wb")
864 error("cannot open \""..destfile
.."\" for writing")
868 ---------------------------------------------------------------------
869 -- post processing: verification and reporting
870 ---------------------------------------------------------------------
871 src_stats_c
= stats_c
872 src_stats_l
= stats_l
873 LoadFile(destfile
) -- reload to verify output okay
874 dtok
, dorig
, dtokens
= -- dest tokens for verification
875 GetRealTokens(ltok
, lorig
, ntokens
)
876 -- WARNING the following WON'T WORK when an optimization method
877 -- changes the real token stream in any way
878 if stokens
~= dtokens
then
879 error("token count incorrect")
881 for i
= 1, stokens
do
882 if stok
[i
] ~= dtok
[i
] or sorig
[i
] ~= dorig
[i
] then
883 error("token verification by comparison failed")
886 if not config
.QUIET
then
887 DispAllStats(srcfile
, src_stats_c
, src_stats_l
, destfile
, stats_c
, stats_l
)
891 -----------------------------------------------------------------------
892 -- dump token (diagnostic feature)
893 -----------------------------------------------------------------------
894 function DumpTokens(srcfile
)
895 local function Esc(v
) return string.format("%q", v
) end
897 for i
= 1, ntokens
do
898 local ltok
, lorig
, lval
= ltok
[i
], lorig
[i
], lval
[i
]
899 -- display only necessary information
900 if ltok
== "TK_KEYWORD" or ltok
== "TK_NAME" or
901 ltok
== "TK_NUMBER" or ltok
== "TK_STRING" or
904 elseif ltok
== "TK_COMMENT" or ltok
== "TK_LCOMMENT" or
905 ltok
== "TK_SPACE" then
906 print(ltok
, Esc(lorig
))
907 elseif ltok
== "TK_EOS" or ltok
== "TK_EOL" then
910 error("unknown token type encountered")
915 -----------------------------------------------------------------------
916 -- perform per-file handling
917 -----------------------------------------------------------------------
918 function DoFiles(files
)
919 for i
, srcfile
in ipairs(files
) do
921 -------------------------------------------------------------------
922 -- find and replace extension for filenames
923 -------------------------------------------------------------------
924 local extb
, exte
= string.find(srcfile
, "%.[^%.%\\%/]*$")
925 local basename
, extension
= srcfile
, ""
926 if extb
and extb
> 1 then
927 basename
= string.sub(srcfile
, 1, extb
- 1)
928 extension
= string.sub(srcfile
, extb
, exte
)
930 destfile
= config
.OUTPUT_FILE
or basename
..config
.SUFFIX
..extension
931 if srcfile
== destfile
then
932 error("output filename identical to input filename")
934 -------------------------------------------------------------------
935 -- perform requested operations
936 -------------------------------------------------------------------
939 elseif config
.READ_ONLY
then
940 DispSrcStats(srcfile
)
942 ProcessToken(srcfile
, destfile
)
947 --[[-------------------------------------------------------------------
948 -- Command-line interface
949 -----------------------------------------------------------------------
953 ---------------------------------------------------------------
955 ---------------------------------------------------------------
956 if table.getn(arg
) == 0 then
957 print(title
..usage
) return
959 local files
, i
= {}, 1
960 while i
<= table.getn(arg
) do
961 local a
, b
= arg
[i
], arg
[i
+ 1]
962 if string.sub(a
, 1, 1) == "-" then -- handle options here
963 if a
== "-h" or a
== "--help" then
964 print(title
) print(usage
) return
965 elseif a
== "--quiet" then
967 elseif a
== "--read-only" then
968 config
.READ_ONLY
= true
969 elseif a
== "--keep-lines" then
970 config
.KEEP_LINES
= true
971 elseif a
== "--maximum" then
973 elseif a
== "--dump" then
975 elseif a
== "-o" then
976 if not b
then error("-o option needs a file name") end
977 config
.OUTPUT_FILE
= b
979 elseif a
== "--" then
980 break -- ignore rest of args
982 error("unrecognized option "..a
)
985 table.insert(files
, a
) -- potential filename
989 ---------------------------------------------------------------
991 -- set flags for maximum reduction
992 config
.KEEP_LINES
= false
993 config
.ZAP_EOLS
= true
995 if table.getn(files
) > 0 then
996 if table.getn(files
) > 1 then
997 if config
.OUTPUT_FILE
then
998 error("with -o, only one source file can be specified")
1003 print("LuaSrcDiet: nothing to do!")
1007 -----------------------------------------------------------------------
1008 -- program entry point
1009 -----------------------------------------------------------------------
1011 local OK
, msg
= pcall(main
)
1013 print("* Run with option -h or --help for usage information")