Merge branch 'master' into translations
[QuestHelper.git] / Development / LuaSrcDiet.lua
blobbf84fb0d6b91503b2dd0641795120b6b3d3d7742
1 #!/usr/bin/env lua
2 --[[-------------------------------------------------------------------
4 LuaSrcDiet
5 Compresses Lua source code by removing unnecessary characters.
7 Copyright (c) 2005 Kein-Hong Man <khman@users.sf.net>
8 The COPYRIGHT file describes the conditions under which this
9 software may be distributed (basically a Lua 5-style license.)
11 http://luaforge.net/projects/luasrcdiet/
12 (TODO) http://www.geocities.com/keinhong/luasrcdiet.html
13 See the ChangeLog for more information.
15 -----------------------------------------------------------------------
16 -- * See the README file and script comments for notes and caveats.
17 -----------------------------------------------------------------------
18 --]]
20 --[[-------------------------------------------------------------------
21 -- description and help texts
22 --]]-------------------------------------------------------------------
24 title = [[
25 LuaSrcDiet: Puts your Lua 5 source code on a diet
26 Version 0.9.1 (20050816) Copyright (c) 2005 Kein-Hong Man
27 The COPYRIGHT file describes the conditions under which this
28 software may be distributed (basically a Lua 5-style license.)
31 USAGE = [[
32 usage: %s [options] [filenames]
34 options:
35 -h, --help prints usage information
36 -o <file> specify file name to write output
37 --quiet do not display statistics
38 --read-only read file and print token stats
39 --keep-lines preserve line numbering
40 --maximum maximize reduction of source
41 --dump dump raw tokens from lexer
42 -- stop handling arguments
44 example:
45 >%s myscript.lua -o myscript_.lua
48 -- for embedding, we won't set arg[0]
49 local usage, exec
50 if arg[0] then exec = "lua LuaSrcDiet.lua" else exec = "LuaSrcDiet" end
51 usage = string.format(USAGE, exec, exec)
53 -- user options
54 config = {}
55 config.SUFFIX = "_"
57 --[[-------------------------------------------------------------------
58 -- llex is a port of the Lua 5.0.2 lexer (llex.*) to Lua, with the
59 -- token output modified and the code simplified for LuaSrcDiet.
60 -----------------------------------------------------------------------
61 -- Instead of returning a number, llex:lex() returns strings, like
62 -- "TK_EOS". The other values returned are the original snippet of
63 -- source and the "value" of the lexed token, if applicable.
64 -----------------------------------------------------------------------
65 -- * Prep lexer with llex:setinput(), llex will close the file handle.
66 -- * For LuaSrcDiet, llex has been changed:
67 -- TK_* returns classes of tokens, made less specific
68 -- "TK_OP" -> operators and punctuations, "TK_KEYWORD" -> keywords
69 -- "TK_EOL" -> end-of-lines, "TK_SPACE" -> whitespace
70 -- "TK_COMMENT" -> comments, "TK_LCOMMENT" -> block comments
71 -----------------------------------------------------------------------
72 --]]
74 llex = {}
76 -----------------------------------------------------------------------
77 -- llex initialization stuff
78 -----------------------------------------------------------------------
80 llex.EOZ = -1 -- end of stream marker
81 llex.keywords = -- Lua 5 keywords
82 "and break do else elseif end false for function if in local \
83 nil not or repeat return then true until while "
85 llex.str2tok = {} -- for matching keywords
86 for v in string.gfind(llex.keywords, "[^%s]+") do
87 llex.str2tok[v] = true
88 end
90 --[[-------------------------------------------------------------------
91 -- Support functions for Lua lexer (mainly error handling)
92 -- * REMOVED functions luaX_errorline, luaX_errorline, luaX_token2str,
93 -- luaX_syntaxerror, either unused or simplified.
94 -----------------------------------------------------------------------
95 --]]
97 function llex:checklimit(val, limit, msg)
98 if val > limit then
99 msg = string.format("too many %s (limit=%d)", msg, limit)
100 -- luaX_syntaxerror merged here; removed token reference
101 error(string.format("%s:%d: %s", self.source, self.line, msg))
105 function llex:error(s, token)
106 -- luaX_errorline merged here
107 error(string.format("%s:%d: %s near '%s'", self.source, self.line, s, token))
110 function llex:lexerror(s, token)
111 if token then self:error(s, token) else self:error(s, self.buff) end
114 --[[-------------------------------------------------------------------
115 -- Principal input, output stream functions: nextc, save
116 -- * self.c and self.ch are identical, self.ch is the string version
117 -- * lexer has a token buffer, buff, intended for the lexed value, and
118 -- another buffer, obuff, for the original characters -- it's not a
119 -- very efficient method, but we want both, just in case
120 -----------------------------------------------------------------------
121 --]]
123 -----------------------------------------------------------------------
124 -- returns the next character as a number
125 -----------------------------------------------------------------------
126 function llex:nextc()
127 if self.ipos > self.ilen then
128 if self.z then -- read from specified stream
129 self.ibuf = self.z:read("*l")
130 if self.ibuf == nil then -- close stream
131 self.z:close()
132 self.c = self.EOZ; self.ch = ""
133 self.z = nil
134 return
135 else -- preprocess source line
136 self.ibuf = self.ibuf.."\n"
137 self.ipos = 1
138 self.ilen = string.len(self.ibuf)
139 -- then grabs the first char (below)
141 else -- end of string chunk
142 self.c = self.EOZ; self.ch = ""
143 return
146 self.c = string.byte(self.ibuf, self.ipos) -- return a character
147 self.ch = string.char(self.c)
148 self.ipos = self.ipos + 1
151 -----------------------------------------------------------------------
152 -- ADDED initialize token buffers
153 -----------------------------------------------------------------------
154 function llex:initbuff()
155 self.buff = ""
156 self.obuff = ""
159 -----------------------------------------------------------------------
160 -- saves given character into buffer, c must be a string
161 -----------------------------------------------------------------------
162 function llex:save(c)
163 self.buff = self.buff..c
166 -----------------------------------------------------------------------
167 -- ADDED saves original character into buffer
168 -----------------------------------------------------------------------
169 function llex:osave(c)
170 self.obuff = self.obuff..c
173 -----------------------------------------------------------------------
174 -- save current character and grabs next character
175 -----------------------------------------------------------------------
176 function llex:save_and_next()
177 self:save(self.ch)
178 self:osave(self.ch)
179 self:nextc()
182 -----------------------------------------------------------------------
183 -- move on to next line, updating line number count
184 -----------------------------------------------------------------------
185 function llex:inclinenumber()
186 self:nextc() -- skip EOL
187 self.line = self.line + 1
188 -- number of lines is limited to MAXINT
189 self:checklimit(self.line, 2147483645, "lines in a chunk")
192 --[[-------------------------------------------------------------------
193 -- Initialize lexer to a particular stream (handle) or string
194 -----------------------------------------------------------------------
195 --]]
197 -----------------------------------------------------------------------
198 -- input stream initialization (file handle)
199 -----------------------------------------------------------------------
200 function llex:setinput(z, source)
201 if z then
202 self.ilen = 0 -- length
203 self.z = z -- input stream
205 self.ipos = 1 -- position
206 self.line = 1
207 self.lastline = 1
208 self.source = source
209 if not self.source then -- default source name
210 self.source = "main"
212 self:nextc() -- read first char
213 -- shbang handling moved to llex()
216 -----------------------------------------------------------------------
217 -- input stream initialization (string)
218 -----------------------------------------------------------------------
219 function llex:setstring(chunk, source)
220 self.ibuf = chunk
221 self.ilen = string.len(self.ibuf) -- length
222 self:setinput(nil, source)
225 --[[-------------------------------------------------------------------
226 -- Main Lua lexer functions
227 -----------------------------------------------------------------------
228 --]]
230 -----------------------------------------------------------------------
231 -- grab a class of characters
232 -----------------------------------------------------------------------
233 function llex:readloop(pat)
234 while string.find(self.ch, pat) do
235 self:save_and_next()
239 -----------------------------------------------------------------------
240 -- grab characters until end-of-line
241 -----------------------------------------------------------------------
242 function llex:readtoeol()
243 while self.ch ~= '\n' and self.c ~= self.EOZ do
244 self:save_and_next()
248 -----------------------------------------------------------------------
249 -- read a number
250 -----------------------------------------------------------------------
251 function llex:read_numeral(comma)
252 self:initbuff()
253 if comma then
254 self.buff = '.'; self.obuff = '.'
256 self:readloop("%d")
257 if self.ch == '.' then
258 self:save_and_next()
259 if self.ch == '.' then
260 self:save_and_next()
261 self:lexerror("ambiguous syntax (decimal point x string concatenation)")
264 self:readloop("%d")
265 if self.ch == 'e' or self.ch == 'E' then
266 self:save_and_next() -- read 'E'
267 if self.ch == '+' or self.ch == '-' then
268 self:save_and_next() -- optional exponent sign
270 self:readloop("%d")
272 local value = tonumber(self.buff)
273 if not value then
274 self:lexerror("malformed number")
276 return self.obuff, value
279 -----------------------------------------------------------------------
280 -- read a long string or long comment
281 -----------------------------------------------------------------------
282 function llex:read_long_string(comment)
283 local cont = 0 -- nesting
284 local eols = 0
285 if comment then
286 self.buff = "--["
287 else
288 self.buff = "[" -- save first '['
290 self.obuff = self.buff
291 self:save_and_next() -- pass the second '['
292 if self.ch == '\n' then -- string starts with a newline?
293 eols = eols + 1
294 self:osave('\n')
295 self:inclinenumber() -- skip it
297 while true do
298 -- case -----------------------------------------------------------
299 if self.c == self.EOZ then -- EOZ
300 if comment then
301 self:lexerror("unfinished long comment", "<eof>")
302 else
303 self:lexerror("unfinished long string", "<eof>")
305 -- case -----------------------------------------------------------
306 elseif self.ch == '[' then
307 self:save_and_next()
308 if self.ch == '[' then
309 cont = cont + 1
310 self:save_and_next()
312 -- case -----------------------------------------------------------
313 elseif self.ch == ']' then
314 self:save_and_next()
315 if self.ch == ']' then
316 if cont == 0 then break end
317 cont = cont - 1
318 self:save_and_next()
320 -- case -----------------------------------------------------------
321 elseif self.ch == '\n' then
322 self:save('\n')
323 eols = eols + 1
324 self:osave('\n')
325 self:inclinenumber()
326 -- case -----------------------------------------------------------
327 else
328 self:save_and_next()
329 -- endcase --------------------------------------------------------
331 end--while
332 self:save_and_next() -- skip the second ']'
333 if comment then
334 return self.obuff, eols
336 return self.obuff, string.sub(self.buff, 3, -3)
339 -----------------------------------------------------------------------
340 -- read a string
341 -----------------------------------------------------------------------
342 function llex:read_string(del)
343 self:initbuff()
344 self:save_and_next()
345 while self.ch ~= del do
346 -- case -----------------------------------------------------------
347 if self.c == self.EOZ then
348 self:lexerror("unfinished string", "<eof>")
349 -- case -----------------------------------------------------------
350 elseif self.ch == '\n' then
351 self:lexerror("unfinished string")
352 -- case -----------------------------------------------------------
353 elseif self.ch == '\\' then
354 self:osave('\\')
355 self:nextc() -- do not save the '\'
356 if self.c ~= self.EOZ then -- will raise an error next loop
357 local i = string.find("\nabfnrtv", self.ch, 1, 1)
358 if i then
359 -- standard escapes
360 self:save(string.sub("\n\a\b\f\n\r\t\v", i, i))
361 self:osave(self.ch)
362 if i == 1 then
363 self:inclinenumber()
364 else
365 self:nextc()
367 elseif string.find(self.ch, "%d") == nil then
368 -- escaped punctuation
369 self:save_and_next() -- handles \\, \", \', and \?
370 else
371 -- \xxx sequence
372 local c = 0
373 i = 0
374 repeat
375 c = 10 * c + self.ch -- (coerced)
376 self:osave(self.ch)
377 self:nextc()
378 i = i + 1
379 until (i >= 3 or not string.find(self.ch, "%d"))
380 if c > 255 then -- UCHAR_MAX
381 self:lexerror("escape sequence too large")
383 self:save(string.char(c))
386 -- case -----------------------------------------------------------
387 else
388 self:save_and_next()
389 -- endcase --------------------------------------------------------
391 end -- endwhile
392 self:save_and_next() -- skip delimiter
393 return self.obuff, string.sub(self.buff, 2, -2)
396 --[[-------------------------------------------------------------------
397 -- Lexer feeder function for parser
398 -- * As we are not actually parsing the token stream, we return a token
399 -- class, the original snippet, and the token's value (for strings and
400 -- numbers.) Most token just passes through LuaSrcDiet processing...
401 -----------------------------------------------------------------------
402 --]]
404 -----------------------------------------------------------------------
405 -- lex function enhanced to return the snippets required for processing
406 -- * basically adds: TK_COMMENT, TK_LCOMMENT, TK_EOL, TK_SPACE
407 -----------------------------------------------------------------------
408 function llex:lex()
409 local strfind = string.find
410 while true do
411 local c = self.c
412 -- case -----------------------------------------------------------
413 if self.line == 1 and self.ipos == 2 -- shbang handling
414 and self.ch == '#' then -- skip first line
415 self:initbuff()
416 self:readtoeol()
417 return "TK_COMMENT", self.obuff
419 -- case -----------------------------------------------------------
420 if self.ch == '\n' then -- end of line
421 self:inclinenumber()
422 return "TK_EOL", '\n'
423 -- case -----------------------------------------------------------
424 elseif self.ch == '-' then -- comment
425 self:nextc()
426 if self.ch ~= '-' then -- '-' operator
427 return "TK_OP", '-'
429 -- else is a comment '--' or '--[['
430 self:nextc()
431 if self.ch == '[' then
432 self:nextc()
433 if self.ch == '[' then -- block comment
434 return "TK_LCOMMENT", self:read_long_string(1) -- long comment
435 else -- short comment
436 self.buff = ""
437 self.obuff = "--["
438 self:readtoeol()
439 return "TK_COMMENT", self.obuff
441 else -- short comment
442 self.buff = ""
443 self.obuff = "--"
444 self:readtoeol()
445 return "TK_COMMENT", self.obuff
447 -- case -----------------------------------------------------------
448 elseif self.ch == '[' then -- literal string
449 self:nextc()
450 if self.ch ~= '[' then
451 return "TK_OP", '['
452 else
453 return "TK_STRING", self:read_long_string()
455 -- case -----------------------------------------------------------
456 elseif self.ch == "\"" or self.ch == "\'" then -- strings
457 return "TK_STRING", self:read_string(self.ch)
458 -- case -----------------------------------------------------------
459 elseif self.ch == '.' then -- dot, concat,
460 self:nextc() -- or number
461 if self.ch == '.' then
462 self:nextc()
463 if self.ch == '.' then
464 self:nextc()
465 return "TK_OP", '...'
466 else
467 return "TK_OP", '..'
469 elseif strfind(self.ch, "%d") == nil then
470 return "TK_OP", '.'
471 else
472 return "TK_NUMBER", self:read_numeral(1)
474 -- case -----------------------------------------------------------
475 elseif self.c == self.EOZ then -- end of input
476 return "TK_EOS", ''
477 -- case -----------------------------------------------------------
478 else
479 local op = strfind("=><~", self.ch, 1, 1) -- relational ops
480 local c = self.ch
481 if op then
482 self:nextc()
483 if self.ch ~= '=' then -- single-char ops
484 return "TK_OP", c
485 else -- double-char ops
486 self:nextc()
487 return "TK_OP", c..'='
489 else
490 if strfind(self.ch, "%s") then -- whitespace
491 self:initbuff()
492 self:readloop("%s")
493 return "TK_SPACE", self.obuff
494 elseif strfind(self.ch, "%d") then -- number
495 return "TK_NUMBER", self:read_numeral()
496 elseif strfind(self.ch, "[%a_]") then -- identifier
497 -- identifier or reserved word
498 self:initbuff()
499 self:readloop("[%w_]")
500 if self.str2tok[self.buff] then -- reserved word
501 return "TK_KEYWORD", self.buff
503 return "TK_NAME", self.buff
504 else -- control/symbol
505 if strfind(self.ch, "%c") then
506 self:error("invalid control char", string.format("char(%d)", self.c))
508 self:nextc()
509 return "TK_OP", c -- single-chars
512 -- endcase --------------------------------------------------------
513 end--if self.ch
514 end--while
517 -----------------------------------------------------------------------
518 -- 'original' lex function, behaves *exactly* like original llex.c
519 -- * currently unused by LuaSrcDiet
520 -----------------------------------------------------------------------
521 function llex:olex()
522 local _ltok, _lorig, _lval
523 while true do
524 _ltok, _lorig, _lval = self:lex()
525 if _ltok ~= "TK_COMMENT" and _ltok ~= "TK_LCOMMENT"
526 and _ltok ~= "TK_EOL" and _ltok ~= "TK_SPACE" then
527 return _ltok, _lorig, _lval
532 --[[-------------------------------------------------------------------
533 -- Major functions
534 -- * We aren't using lval[] for now, except for TK_LCOMMENT processing,
535 -- perhaps for heavy-duty optimization, like constant optimization...
536 -----------------------------------------------------------------------
537 --]]
539 stats_c = nil -- number of tokens of a given type
540 stats_l = nil -- bytes occupied by tokens of a given type
541 ltok = nil -- source list of tokens
542 lorig = nil -- source list of original snippets
543 lval = nil -- source list of actual token values
544 ntokens = 0 -- number of tokens processed from file
546 -----------------------------------------------------------------------
547 -- "classes" of tokens; the last 4 aren't standard in llex.c
548 -- * arrangement/count significant!!! hardcoded for stats display
549 -----------------------------------------------------------------------
550 ttypes = {
551 "TK_KEYWORD", "TK_NAME", "TK_NUMBER", "TK_STRING", "TK_OP",
552 "TK_EOS", "TK_COMMENT", "TK_LCOMMENT", "TK_EOL", "TK_SPACE",
555 -----------------------------------------------------------------------
556 -- reads source file and create token array + fill in statistics
557 -----------------------------------------------------------------------
558 function LoadFile(filename)
559 if not filename and type(filename) ~= "string" then
560 error("invalid filename specified")
562 stats_c = {}
563 stats_l = {}
564 ltok = {}
565 lorig = {}
566 lval = {}
567 ntokens = 0
568 for _, i in ipairs(ttypes) do -- init counters
569 stats_c[i] = 0; stats_l[i] = 0
571 ---------------------------------------------------------------------
572 local INF = io.open(filename, "rb")
573 if not INF then
574 error("cannot open \""..filename.."\" for reading")
576 llex:setinput(INF, filename)
577 local _ltok, _lorig, _lval
578 local i = 0
579 while _ltok ~= "TK_EOS" do
580 _ltok, _lorig, _lval = llex:lex()
581 i = i + 1
582 ltok[i] = _ltok
583 lorig[i] = _lorig
584 lval[i] = _lval
585 stats_c[_ltok] = stats_c[_ltok] + 1
586 stats_l[_ltok] = stats_l[_ltok] + string.len(_lorig)
588 ntokens = i
589 -- INF closed by llex
592 -----------------------------------------------------------------------
593 -- returns token tables containing valid tokens only (for verification)
594 -----------------------------------------------------------------------
595 function GetRealTokens(stok, sorig, stokens)
596 local rtok, rorig, rtokens = {}, {}, 0
597 for i = 1, stokens do
598 local _stok = stok[i]
599 local _sorig = sorig[i]
600 if _stok ~= "TK_COMMENT" and _stok ~= "TK_LCOMMENT"
601 and _stok ~= "TK_EOL" and _stok ~= "TK_SPACE" then
602 rtokens = rtokens + 1
603 rtok[rtokens] = _stok
604 rorig[rtokens] = _sorig
607 return rtok, rorig, rtokens
610 -----------------------------------------------------------------------
611 -- display only source token statistics (for --read-only option)
612 -----------------------------------------------------------------------
613 function DispSrcStats(filename)
614 local underline = "--------------------------------\n"
615 LoadFile(filename)
616 print(title)
617 io.stdout:write("Statistics for: "..filename.."\n\n"
618 ..string.format("%-14s%8s%10s\n", "Elements", "Count", "Bytes")
619 ..underline)
620 local total_c, total_l, tok_c, tok_l = 0, 0, 0, 0
621 for j = 1, 10 do
622 local i = ttypes[j]
623 local c, l = stats_c[i], stats_l[i]
624 total_c = total_c + c
625 total_l = total_l + l
626 if j <= 6 then
627 tok_c = tok_c + c
628 tok_l = tok_l + l
630 io.stdout:write(string.format("%-14s%8d%10d\n", i, c, l))
631 if i == "TK_EOS" then io.stdout:write(underline) end
633 io.stdout:write(underline
634 ..string.format("%-14s%8d%10d\n", "Total Elements", total_c, total_l)
635 ..underline
636 ..string.format("%-14s%8d%10d\n", "Total Tokens", tok_c, tok_l)
637 ..underline.."\n")
640 -----------------------------------------------------------------------
641 -- display source and destination stats (enabled by default)
642 -----------------------------------------------------------------------
643 function DispAllStats(srcfile, src_c, src_l, destfile, dest_c, dest_l)
644 local underline = "--------------------------------------------------\n"
645 print(title)
646 local stot_c, stot_l, stok_c, stok_l = 0, 0, 0, 0
647 local dtot_c, dtot_l, dtok_c, dtok_l = 0, 0, 0, 0
648 io.stdout:write("Statistics for: "..srcfile.." -> "..destfile.."\n\n"
649 ..string.format("%-14s%8s%10s%8s%10s\n", "Lexical", "Input", "Input", "Output", "Output")
650 ..string.format("%-14s%8s%10s%8s%10s\n", "Elements", "Count", "Bytes", "Count", "Bytes")
651 ..underline)
652 for j = 1, 10 do
653 local i = ttypes[j]
654 local s_c, s_l = src_c[i], src_l[i]
655 local d_c, d_l = dest_c[i], dest_l[i]
656 stot_c = stot_c + s_c
657 stot_l = stot_l + s_l
658 dtot_c = dtot_c + d_c
659 dtot_l = dtot_l + d_l
660 if j <= 6 then
661 stok_c = stok_c + s_c
662 stok_l = stok_l + s_l
663 dtok_c = dtok_c + d_c
664 dtok_l = dtok_l + d_l
666 io.stdout:write(string.format("%-14s%8d%10d%8d%10d\n", i, s_c, s_l, d_c, d_l))
667 if i == "TK_EOS" then io.stdout:write(underline) end
669 io.stdout:write(underline
670 ..string.format("%-14s%8d%10d%8d%10d\n", "Total Elements", stot_c, stot_l, dtot_c, dtot_l)
671 ..underline
672 ..string.format("%-14s%8d%10d%8d%10d\n", "Total Tokens", stok_c, stok_l, dtok_c, dtok_l)
673 ..underline.."\n")
676 -----------------------------------------------------------------------
677 -- token processing function
678 -----------------------------------------------------------------------
679 function ProcessToken(srcfile, destfile)
680 LoadFile(srcfile)
681 if ntokens < 1 then
682 error("no tokens to process")
684 local dtok = {} -- processed list of tokens
685 local dorig = {} -- processed list of original snippets
686 local dtokens = 0 -- number of tokens generated
687 local stok, sorig, stokens = -- src tokens for verification
688 GetRealTokens(ltok, lorig, ntokens)
689 ---------------------------------------------------------------------
690 -- saves specified token to the destination token list
691 ---------------------------------------------------------------------
692 local function savetok(src)
693 dtokens = dtokens + 1
694 dtok[dtokens] = ltok[src]
695 dorig[dtokens] = lorig[src]
697 ---------------------------------------------------------------------
698 -- check if token at location is whitespace-equivalent
699 ---------------------------------------------------------------------
700 local function iswhitespace(i)
701 local tok = ltok[i]
702 if tok == "TK_SPACE" or tok == "TK_EOL"
703 or tok == "TK_COMMENT" or tok == "TK_LCOMMENT" then
704 return true
707 ---------------------------------------------------------------------
708 -- compare two tokens and returns whitespace if needed in between
709 -- * note that some comparisons won't occur in Lua code; we assume
710 -- no knowledge of Lua syntax, only knowledge of lexical analysis
711 ---------------------------------------------------------------------
712 local function whitesp(previ, nexti)
713 local p = ltok[previ]
714 local n = ltok[nexti]
715 -- if next token is a whitespace, remove current whitespace token
716 if iswhitespace(n) then return "" end
717 -- otherwise we are comparing non-whitespace tokens, so we use
718 -- the following optimization rules...
719 -------------------------------------------------------------------
720 if p == "TK_OP" then
721 if n == "TK_NUMBER" then
722 -- eg . .123
723 if string.sub(lorig[nexti], 1, 1) == "." then return " " end
725 return ""
726 -------------------------------------------------------------------
727 elseif p == "TK_KEYWORD" or p == "TK_NAME" then
728 if n == "TK_KEYWORD" or n == "TK_NAME" then
729 return " "
730 elseif n == "TK_NUMBER" then
731 -- eg foo.123
732 if string.sub(lorig[nexti], 1, 1) == "." then return "" end
733 return " "
735 return ""
736 -------------------------------------------------------------------
737 elseif p == "TK_STRING" then
738 return ""
739 -------------------------------------------------------------------
740 elseif p == "TK_NUMBER" then
741 if n == "TK_NUMBER" then
742 return " "
743 elseif n == "TK_KEYWORD" or n == "TK_NAME" then
744 -- eg 123 e4
745 local c = string.sub(lorig[nexti], 1, 1)
746 if string.lower(c) == "e" then return " " end
748 return ""
749 -------------------------------------------------------------------
750 else -- should never arrive here
751 error("token comparison failed")
754 ---------------------------------------------------------------------
755 -- main processing loop (pass 1)
756 ---------------------------------------------------------------------
757 local i = 1 -- token position
758 local linestart = true -- true at the start of a line
759 local tok = "" -- current token
760 local prev = 0 -- index of previous non-whitespace tok
761 while true do
762 tok = ltok[i]
763 -------------------------------------------------------------------
764 if tok == "TK_SPACE" then
765 if linestart then
766 -- delete leading whitespace
767 lorig[i] = ""
768 else
769 -- remove in-between whitespace if possible
770 lorig[i] = whitesp(prev, i + 1)
772 savetok(i)
773 -------------------------------------------------------------------
774 elseif tok == "TK_NAME" or tok == "TK_KEYWORD" or tok == "TK_OP"
775 or tok == "TK_STRING" or tok == "TK_NUMBER" then
776 -- these are all unchanged
777 prev = i
778 savetok(i)
779 linestart = false
780 -------------------------------------------------------------------
781 elseif tok == "TK_EOL" then
782 if linestart then
783 if config.KEEP_LINES then
784 savetok(i)
785 linestart = true
787 -- otherwise it's an empty line, drop it
788 else
789 savetok(i)
790 linestart = true
792 -------------------------------------------------------------------
793 elseif tok == "TK_COMMENT" then
794 -- must keep shbang for correctness, force a TK_EOL too
795 if i == 1 and string.sub(lorig[i], 1, 1) == "#" then
796 savetok(i)
797 linestart = false
799 -- don't change linestart; the now empty line can be consumed
800 -------------------------------------------------------------------
801 elseif tok == "TK_LCOMMENT" then
802 local eols = nil
803 if config.KEEP_LINES then
804 -- preserve newlines inside long comments
805 if lval[i] > 0 then eols = string.rep("\n", lval[i]) end
807 if iswhitespace(i + 1) then
808 lorig[i] = eols or ""
809 else
810 lorig[i] = eols or " "
812 savetok(i)
813 -------------------------------------------------------------------
814 elseif tok == "TK_EOS" then
815 savetok(i)
816 break
817 -------------------------------------------------------------------
818 else
819 error("unidentified token encountered")
820 end--if tok
821 i = i + 1
822 end--while
823 ---------------------------------------------------------------------
824 -- aggressive end-of-line removal pass (pass 2)
825 ---------------------------------------------------------------------
826 if config.ZAP_EOLS then
827 ltok, lorig = {}, {}
828 ntokens = 0
829 -- redo source table by removing deleted bits
830 for i = 1, dtokens do
831 local tok = dtok[i]
832 local orig = dorig[i]
833 if orig ~= "" or tok == "TK_EOS" then
834 ntokens = ntokens + 1
835 ltok[ntokens] = tok
836 lorig[ntokens] = orig
839 -- try to remove end-of-lines by comparing token pairs
840 dtok, dorig = {}, {}
841 dtokens = 0
842 i = 1
843 tok, prev = "", ""
844 while tok ~= "TK_EOS" do
845 tok = ltok[i]
846 if tok == "TK_EOL" and prev ~= "TK_COMMENT" then
847 -- TK_COMMENT to trap shbang case
848 if whitesp(i - 1, i + 1) == " " then -- can't delete
849 savetok(i)
851 else
852 prev = tok
853 savetok(i)
855 i = i + 1
856 end--while
858 ---------------------------------------------------------------------
859 -- write output file
860 ---------------------------------------------------------------------
861 local dest = table.concat(dorig) -- join up source
862 local OUTF = io.open(destfile, "wb")
863 if not OUTF then
864 error("cannot open \""..destfile.."\" for writing")
866 OUTF:write(dest)
867 io.close(OUTF)
868 ---------------------------------------------------------------------
869 -- post processing: verification and reporting
870 ---------------------------------------------------------------------
871 src_stats_c = stats_c
872 src_stats_l = stats_l
873 LoadFile(destfile) -- reload to verify output okay
874 dtok, dorig, dtokens = -- dest tokens for verification
875 GetRealTokens(ltok, lorig, ntokens)
876 -- WARNING the following WON'T WORK when an optimization method
877 -- changes the real token stream in any way
878 if stokens ~= dtokens then
879 error("token count incorrect")
881 for i = 1, stokens do
882 if stok[i] ~= dtok[i] or sorig[i] ~= dorig[i] then
883 error("token verification by comparison failed")
886 if not config.QUIET then
887 DispAllStats(srcfile, src_stats_c, src_stats_l, destfile, stats_c, stats_l)
891 -----------------------------------------------------------------------
892 -- dump token (diagnostic feature)
893 -----------------------------------------------------------------------
894 function DumpTokens(srcfile)
895 local function Esc(v) return string.format("%q", v) end
896 LoadFile(srcfile)
897 for i = 1, ntokens do
898 local ltok, lorig, lval = ltok[i], lorig[i], lval[i]
899 -- display only necessary information
900 if ltok == "TK_KEYWORD" or ltok == "TK_NAME" or
901 ltok == "TK_NUMBER" or ltok == "TK_STRING" or
902 ltok == "TK_OP" then
903 print(ltok, lorig)
904 elseif ltok == "TK_COMMENT" or ltok == "TK_LCOMMENT" or
905 ltok == "TK_SPACE" then
906 print(ltok, Esc(lorig))
907 elseif ltok == "TK_EOS" or ltok == "TK_EOL" then
908 print(ltok)
909 else
910 error("unknown token type encountered")
915 -----------------------------------------------------------------------
916 -- perform per-file handling
917 -----------------------------------------------------------------------
918 function DoFiles(files)
919 for i, srcfile in ipairs(files) do
920 local destfile
921 -------------------------------------------------------------------
922 -- find and replace extension for filenames
923 -------------------------------------------------------------------
924 local extb, exte = string.find(srcfile, "%.[^%.%\\%/]*$")
925 local basename, extension = srcfile, ""
926 if extb and extb > 1 then
927 basename = string.sub(srcfile, 1, extb - 1)
928 extension = string.sub(srcfile, extb, exte)
930 destfile = config.OUTPUT_FILE or basename..config.SUFFIX..extension
931 if srcfile == destfile then
932 error("output filename identical to input filename")
934 -------------------------------------------------------------------
935 -- perform requested operations
936 -------------------------------------------------------------------
937 if config.DUMP then
938 DumpTokens(srcfile)
939 elseif config.READ_ONLY then
940 DispSrcStats(srcfile)
941 else
942 ProcessToken(srcfile, destfile)
944 end--for
947 --[[-------------------------------------------------------------------
948 -- Command-line interface
949 -----------------------------------------------------------------------
950 --]]
952 function main()
953 ---------------------------------------------------------------
954 -- handle arguments
955 ---------------------------------------------------------------
956 if table.getn(arg) == 0 then
957 print(title..usage) return
959 local files, i = {}, 1
960 while i <= table.getn(arg) do
961 local a, b = arg[i], arg[i + 1]
962 if string.sub(a, 1, 1) == "-" then -- handle options here
963 if a == "-h" or a == "--help" then
964 print(title) print(usage) return
965 elseif a == "--quiet" then
966 config.QUIET = true
967 elseif a == "--read-only" then
968 config.READ_ONLY = true
969 elseif a == "--keep-lines" then
970 config.KEEP_LINES = true
971 elseif a == "--maximum" then
972 config.MAX = true
973 elseif a == "--dump" then
974 config.DUMP = true
975 elseif a == "-o" then
976 if not b then error("-o option needs a file name") end
977 config.OUTPUT_FILE = b
978 i = i + 1
979 elseif a == "--" then
980 break -- ignore rest of args
981 else
982 error("unrecognized option "..a)
984 else
985 table.insert(files, a) -- potential filename
987 i = i + 1
988 end--while
989 ---------------------------------------------------------------
990 if config.MAX then
991 -- set flags for maximum reduction
992 config.KEEP_LINES = false
993 config.ZAP_EOLS = true
995 if table.getn(files) > 0 then
996 if table.getn(files) > 1 then
997 if config.OUTPUT_FILE then
998 error("with -o, only one source file can be specified")
1001 DoFiles(files)
1002 else
1003 print("LuaSrcDiet: nothing to do!")
1007 -----------------------------------------------------------------------
1008 -- program entry point
1009 -----------------------------------------------------------------------
1010 if not TEST then
1011 local OK, msg = pcall(main)
1012 if not OK then
1013 print("* Run with option -h or --help for usage information")
1014 print(msg)
1018 -- end of script