tools/distribution/distribution.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 # vim: set noexpandtab sw=4 ts=4:
   4 # --
   5 # A recent battle with vim and a Go program finally settled this for me.
   6 # Tabs for indent, spaces for formatting. If you change your shiftwidth and
   7 # tabstop to different values and your code looks ugly, say aloud: tabs
   8 # for indent, spaces for formatting.
   9
  10 """
  11 Generate Graphs Directly in the (ASCII- or Unicode-based) Terminal
  12
  13 If you find yourself typing:
  14   [long | list | of | commands | sort | uniq -c | sort -rn]
  15
  16 Replace:
  17   [| sort | uniq -c | sort -rn]
  18
  19 With:
  20   [| distribution]
  21
  22 Then bask in the glory of your new-found data visualization. There are other
  23 use cases as well.
  24 """
  25
  26 import math,os,re,sys,time
  27
  28 class Histogram(object):
  29         """
  30         Takes the tokenDict built in the InputReader class and goes through it,
  31         printing a histogram for each of the highest height entries
  32         """
  33         def __init__(self):
  34                 pass
  35
  36         def histogram_bar(self, s, histWidth, maxVal, barVal):
  37                 # given a value and max, return string for histogram bar of the proper
  38                 # number of characters, including unicode partial-width characters
  39                 returnBar = ''
  40
  41                 # first case is partial-width chars
  42                 if s.charWidth < 1:
  43                         zeroChar = s.graphChars[-1]
  44                 elif len(s.histogramChar) > 1 and s.unicodeMode == False:
  45                         zeroChar = s.histogramChar[0]
  46                         oneChar = s.histogramChar[1]
  47                 else:
  48                         zeroChar = s.histogramChar
  49                         oneChar = s.histogramChar
  50
  51                 # write out the full-width integer portion of the histogram
  52                 if s.logarithmic:
  53                         maxLog = math.log(maxVal)
  54                         if barVal > 0:
  55                                 barLog = math.log(barVal)
  56                         else:
  57                                 barLog = 0
  58                         intWidth = int(barLog / maxLog * histWidth)
  59                         remainderWidth = (barLog / maxLog * histWidth) - intWidth
  60                 else:
  61                         intWidth = int(barVal * 1.0 / maxVal * histWidth)
  62                         remainderWidth = (barVal * 1.0 / maxVal * histWidth) - intWidth
  63
  64                 # write the zeroeth character intWidth times...
  65                 returnBar += zeroChar * intWidth
  66
  67                 # we always have at least one remaining char for histogram - if
  68                 # we have full-width chars, then just print it, otherwise do a
  69                 # calculation of how much remainder we need to print
  70                 #
  71                 # FIXME: The remainder partial char printed does not take into
  72                 # account logarithmic scale (can humans notice?).
  73                 if s.charWidth == 1:
  74                         returnBar += oneChar
  75                 elif s.charWidth < 1:
  76                         # this is high-resolution, so figure out what remainder we
  77                         # have to represent
  78                         if remainderWidth > s.charWidth:
  79                                 whichChar = int(remainderWidth / s.charWidth)
  80                                 returnBar += s.graphChars[whichChar]
  81
  82                 return returnBar
  83
  84         def write_hist(self, s, tokenDict):
  85                 maxTokenLen = 0
  86                 outputDict = {}
  87
  88                 numItems = 0
  89                 maxVal = 0
  90                 s.totalValues = int(s.totalValues)
  91
  92                 # given a dict, create a comparison tuple that sorts first by the value of a key,
  93                 # then by the key itself in case of a tie. this allows us to create deterministic sorts
  94                 # when we have multiple entries in our histogram with the same frequency.
  95                 def value_key_compare(dict):
  96                         return lambda key: (dict.get(key), key)
  97
  98                 for k in sorted(tokenDict, key=value_key_compare(tokenDict), reverse=True):
  99                         # can't remember what feature "if k:" adds - i think there's an
 100                         # off-by-one death the script sometimes suffers without it.
 101                         if k:
 102                                 outputDict[k] = tokenDict[k]
 103                                 if len(str(k)) > maxTokenLen: maxTokenLen = len(str(k))
 104                                 if outputDict[k] > maxVal: maxVal = outputDict[k]
 105                                 numItems += 1
 106                                 if numItems >= s.height:
 107                                         break
 108
 109                 s.endTime = int(time.time() * 1000)
 110                 totalMillis = s.endTime - s.startTime
 111                 if s.verbose == True:
 112                         sys.stderr.write("tokens/lines examined: {:,d}".format(s.totalObjects) + "\n")
 113                         sys.stderr.write(" tokens/lines matched: {:,d}".format(s.totalValues) + "\n")
 114                         sys.stderr.write("       histogram keys: {:,d}".format(len(tokenDict)) + "\n")
 115                         sys.stderr.write("              runtime: {:,.2f}ms".format(totalMillis) + "\n")
 116
 117                 # the first entry will determine these values
 118                 maxValueWidth = 0
 119                 maxPctWidth = 0
 120                 sortedOutput = sorted(outputDict, key=value_key_compare(outputDict), reverse=True)
 121                 for i in range(0, len(sortedOutput)):
 122                         k = sortedOutput[i]
 123                         # can't remember what feature "if k:" adds - i think there's an
 124                         # off-by-one death the script sometimes suffers without it.
 125                         if k:
 126                                 if maxValueWidth == 0:
 127                                         testString = "%s" % outputDict[k]
 128                                         maxValueWidth = len(testString)
 129                                         testString = "(%2.2f%%)" % (outputDict[k] * 1.0 / s.totalValues * 100)
 130                                         maxPctWidth = len(testString)
 131
 132                                         # we always output a single histogram char at the end, so
 133                                         # we output one less than actual number here
 134                                         histWidth = s.width - (maxTokenLen+1) - (maxValueWidth+1) - (maxPctWidth+1) - 1
 135
 136                                         # output a header
 137                                         sys.stderr.write("Key".rjust(maxTokenLen) + "|")
 138                                         sys.stderr.write("Ct".ljust(maxValueWidth) + " ")
 139                                         sys.stderr.write("(Pct)".ljust(maxPctWidth) + " ")
 140                                         sys.stderr.write("Histogram")
 141
 142                                         # get ready for the output, but sorting gets hosed if we print the
 143                                         # colour code before the key, so put it on the line before
 144                                         sys.stderr.write(s.keyColour)
 145                                         sys.stderr.write("\n")
 146
 147                                 sys.stdout.write(str(k).rjust(maxTokenLen))
 148                                 sys.stdout.write(s.regularColour)
 149                                 sys.stdout.write("|")
 150                                 sys.stdout.write(s.ctColour)
 151
 152                                 outVal = "%s" % outputDict[k]
 153                                 sys.stdout.write(outVal.rjust(maxValueWidth) + " ")
 154
 155                                 pct = "(%2.2f%%)" % (outputDict[k] * 1.0 / s.totalValues * 100)
 156                                 sys.stdout.write(s.pctColour)
 157                                 sys.stdout.write(pct.rjust(maxPctWidth) + " ")
 158
 159                                 sys.stdout.write(s.graphColour)
 160                                 sys.stdout.write(self.histogram_bar(s, histWidth, maxVal, outputDict[k]))
 161
 162                                 if i == len(sortedOutput) - 1:
 163                                         # put the terminal back into a normal-colour mode on last entry
 164                                         sys.stdout.write(s.regularColour)
 165                                 else:
 166                                         # we do these antics of printing $keyColour on the line before
 167                                         # the key so that piping output to sort will work
 168                                         sys.stdout.write(s.keyColour)
 169                                 sys.stdout.write("\n")
 170
 171 class InputReader(object):
 172         """
 173         Reads stdin, parses it into a dictionary of key and value is number
 174         of appearances of that key in the input - this will also prune the
 175         token frequency dict on after a certain number of insertions to
 176         prevent OOME on large datasets
 177         """
 178         def __init__(self):
 179                 self.tokenDict = {}
 180
 181         def prune_keys(self, s):
 182                 newDict = {}
 183                 numKeysTransferred = 0
 184                 for k in sorted(self.tokenDict, key=self.tokenDict.get, reverse=True):
 185                         if k:
 186                                 newDict[k] = self.tokenDict[k]
 187                                 numKeysTransferred += 1
 188                                 if numKeysTransferred > s.maxKeys:
 189                                         break
 190                 self.tokenDict = newDict
 191                 s.numPrunes += 1
 192
 193         def tokenize_input(self, s):
 194                 # how to split the input... typically we split on whitespace or
 195                 # word boundaries, but the user can specify any regexp
 196                 if   s.tokenize == 'white': s.tokenize = r'\s+'
 197                 elif s.tokenize == 'word': s.tokenize = r'\W'
 198
 199                 # how to match (filter) the input... typically we want either
 200                 # all-alpha or all-numeric, but again, user can specify
 201                 if   s.matchRegexp == 'word':   s.matchRegexp = r'^[A-Z,a-z]+$'
 202                 elif s.matchRegexp == 'num':    s.matchRegexp = r'^\d+$'
 203                 elif s.matchRegexp == 'number': s.matchRegexp = r'^\d+$'
 204
 205                 # docs say these are cached, but i got about 2x speed boost
 206                 # from doing the compile
 207                 pt = re.compile(s.tokenize)
 208                 pm = re.compile(s.matchRegexp)
 209
 210                 nextStat = time.time() + s.statInterval
 211
 212                 pruneObjects = 0
 213                 for line in sys.stdin:
 214                         line = line.rstrip('\n')
 215                         if s.tokenize:
 216                                 for token in pt.split(line):
 217                                         # user desires to break line into tokens...
 218                                         s.totalObjects += 1
 219                                         if pm.match(token):
 220                                                 s.totalValues += 1
 221                                                 pruneObjects += 1
 222                                                 if token in self.tokenDict:
 223                                                         self.tokenDict[token] += 1
 224                                                 else:
 225                                                         self.tokenDict[token] = 1
 226                         else:
 227                                 # user just wants every line to be a token
 228                                 s.totalObjects += 1
 229                                 if pm.match(line):
 230                                         s.totalValues += 1
 231                                         pruneObjects += 1
 232                                         if line in self.tokenDict:
 233                                                 self.tokenDict[line] += 1
 234                                         else:
 235                                                 self.tokenDict[line] = 1
 236
 237                         # prune the hash if it gets too large
 238                         if pruneObjects >= s.keyPruneInterval:
 239                                 self.prune_keys(s)
 240                                 pruneObjects = 0
 241
 242                         if s.verbose and time.time() > nextStat:
 243                                 sys.stderr.write("tokens/lines examined: {:,d} ; hash prunes: {:,d}...".format(s.totalObjects, s.numPrunes) + chr(13))
 244                                 nextStat = time.time() + s.statInterval
 245
 246         def read_pretallied_tokens(self, s):
 247                 # the input is already just a series of keys with the frequency of the
 248                 # keys precomputed, as in "du -sb" - vk means the number is first, key
 249                 # second. kv means key first, number second
 250                 vk = re.compile(r'^\s*(\d+)\s+(.+)$')
 251                 kv = re.compile(r'^(.+?)\s+(\d+)$')
 252                 if s.graphValues == 'vk':
 253                         for line in sys.stdin:
 254                                 m = vk.match(line)
 255                                 try:
 256                                         self.tokenDict[m.group(2)] = int(m.group(1))
 257                                         s.totalValues += int(m.group(1))
 258                                         s.totalObjects += 1
 259                                 except:
 260                                         sys.stderr.write(" E Input malformed+discarded (perhaps pass -g=kv?): %s\n" % line)
 261                 elif s.graphValues == 'kv':
 262                         for line in sys.stdin:
 263                                 m = kv.match(line)
 264                                 try:
 265                                         self.tokenDict[m.group(1)] = int(m.group(2))
 266                                         s.totalValues += int(m.group(2))
 267                                         s.totalObjects += 1
 268                                 except:
 269                                         sys.stderr.write(" E Input malformed+discarded (perhaps pass -g=vk?): %s\n" % line)
 270
 271         def read_numerics(self, s, h):
 272                 # in this special mode, we print out the histogram here instead
 273                 # of later - because it's a far simpler histogram without all the
 274                 # totals, percentages, etc of the real histogram. we're just
 275                 # showing a graph of a series of numbers
 276                 lastVal = 0
 277                 maxVal = 0
 278                 maxWidth = 0
 279                 sumVal = 0
 280                 outList = []
 281                 for line in sys.stdin:
 282                         try:
 283                                 line = float(line.rstrip())
 284                         except:
 285                                 line = lastVal
 286
 287                         graphVal = 0
 288                         if s.numOnly == 'mon':
 289                                 if s.totalObjects > 0:
 290                                         graphVal = line - lastVal
 291                                 lastVal = line
 292                         else:
 293                                 graphVal = line
 294
 295                         if graphVal > maxVal:
 296                                 maxVal = graphVal
 297                                 maxWidth = len(str(graphVal))
 298
 299                         sumVal += int(graphVal)
 300
 301                         if s.totalObjects > 0:
 302                                 outList.append(graphVal)
 303                         s.totalObjects += 1
 304
 305                 # simple graphical output
 306                 for k in outList:
 307                         sys.stdout.write(s.keyColour)
 308                         sys.stdout.write(str(int(k)).rjust(maxWidth))
 309                         pct = "(%2.2f%%)" % (float(k) / float(sumVal) * 100)
 310                         sys.stdout.write(s.pctColour)
 311                         sys.stdout.write(pct.rjust(9) + " ")
 312                         sys.stdout.write(s.graphColour)
 313                         sys.stdout.write(h.histogram_bar(s, s.width - 11 - maxWidth, maxVal, k) + "\n")
 314                         sys.stdout.write(s.regularColour)
 315
 316
 317 class Settings(object):
 318         def __init__(self):
 319                 self.totalMillis = 0
 320                 self.startTime = int(time.time() * 1000)
 321                 self.endTime = 0
 322                 self.widthArg = 0
 323                 self.heightArg = 0
 324                 self.width = 80
 325                 self.height = 15
 326                 self.histogramChar = '-'
 327                 self.colourisedOutput = False
 328                 self.logarithmic = False
 329                 self.numOnly = 'XXX'
 330                 self.verbose = False
 331                 self.graphValues = ''
 332                 self.size = ''
 333                 self.tokenize = ''
 334                 # by default, everything matches (nothing is stripped out)
 335                 self.matchRegexp = '.'
 336                 # how often to give status if verbose
 337                 self.statInterval = 1.0
 338                 self.numPrunes = 0
 339                 # for colourised output
 340                 self.colourPalette = '0,0,32,35,34'
 341                 self.regularColour = ""
 342                 self.keyColour = ""
 343                 self.ctColour = ""
 344                 self.pctColour = ""
 345                 self.graphColour = ""
 346                 # for stats
 347                 self.totalObjects = 0
 348                 self.totalValues = 0
 349                 # every keyPruneInterval keys, prune the hash to maxKeys top keys
 350                 self.keyPruneInterval = 1500000
 351                 self.maxKeys = 5000
 352                 # for advanced graphing
 353                 self.unicodeMode = False
 354                 self.charWidth = 1
 355                 self.graphChars = []
 356                 self.partialBlocks =    ["▏", "▎", "▍", "▌", "▋", "▊", "▉", "█"] # char=pb
 357                 self.partialLines =     ["╸", "╾", "━"] # char=hl
 358
 359                 # rcfile grabbing/parsing if specified
 360                 if len(sys.argv) > 1 and '--rcfile' in sys.argv[1]:
 361                         rcFile = sys.argv[1].split('=')[1]
 362                         rcFile = os.path.expanduser(rcFile)
 363                 else:
 364                         rcFile = os.environ.get('HOME') + '/.distributionrc'
 365
 366                 # parse opts from the rcFile if it exists
 367                 try:
 368                         rcfileOptList = open(rcFile).readlines()
 369                         for rcOpt in rcfileOptList:
 370                                 rcOpt = rcOpt.rstrip()
 371                                 rcOpt = rcOpt.split('#')[0]
 372                                 if rcOpt != '':
 373                                         sys.argv.insert(0, rcOpt)
 374                 except:
 375                         # don't die or in fact do anything if rcfile doesn't exist
 376                         pass
 377
 378                 # manual argument parsing easier than getopts IMO
 379                 for arg in sys.argv:
 380                         if arg in ('-h', '--help'):
 381                                 doUsage(self)
 382                                 sys.exit(0)
 383                         elif arg in ("-c", "--color", "--colour"):
 384                                 self.colourisedOutput = True
 385                         elif arg in ("-g", "--graph"):
 386                                 # can pass --graph without option, will default to value/key ordering
 387                                 # since Unix prefers that for piping-to-sort reasons
 388                                 self.graphValues = 'vk'
 389                         elif arg in ("-l", "--logarithmic"):
 390                                 self.logarithmic = True
 391                         elif arg in ("-n", "--numonly"):
 392                                 self.numOnly = 'abs'
 393                         elif arg in ("-v", "--verbose"):
 394                                 self.verbose = True
 395                         else:
 396                                 argList = arg.split('=', 1)
 397                                 if argList[0] in ("-w", "--width"):
 398                                         self.widthArg = int(argList[1])
 399                                 elif argList[0] in ("-h", "--height"):
 400                                         self.heightArg = int(argList[1])
 401                                 elif argList[0] in ("-k", "--keys"):
 402                                         self.maxKeys = int(argList[1])
 403                                 elif argList[0] in ("-c", "--char"):
 404                                         self.histogramChar = argList[1]
 405                                 elif argList[0] in ("-g", "--graph"):
 406                                         self.graphValues = argList[1]
 407                                 elif argList[0] in ("-n", "--numonly"):
 408                                         self.numOnly = argList[1]
 409                                 elif argList[0] in ("-p", "--palette"):
 410                                         self.colourPalette = argList[1]
 411                                         self.colourisedOutput = True
 412                                 elif argList[0] in ("-s", "--size"):
 413                                         self.size = argList[1]
 414                                 elif argList[0] in ("-t", "--tokenize"):
 415                                         self.tokenize = argList[1]
 416                                 elif argList[0] in ("-m", "--match"):
 417                                         self.matchRegexp = argList[1]
 418
 419                 # first, size, which might be further overridden by width/height later
 420                 if self.size in ("full", "fl", "f"):
 421                         # tput will tell us the term width/height even if input is stdin
 422                         self.width, self.height = os.popen('echo "`tput cols` `tput lines`"', 'r').read().split()
 423                         # convert to numerics from string
 424                         self.width = int(self.width)
 425                         self.height = int(self.height) - 3
 426                         # need room for the verbosity output
 427                         if self.verbose == True: self.height -= 4
 428                         # in case tput went all bad, ensure some minimum size
 429                         if self.width < 40: self.width = 40
 430                         if self.height < 10: self.height = 10
 431                 elif self.size in ("small", "sm", "s"):
 432                         self.width  = 60
 433                         self.height = 10
 434                 elif self.size in ("medium", "med", "m"):
 435                         self.width  = 100
 436                         self.height = 20
 437                 elif self.size in ("large", "lg", "l"):
 438                         self.width  = 140
 439                         self.height = 35
 440
 441                 # synonyms "monotonically-increasing": derivative, difference, delta, increasing
 442                 # so all "d" "i" and "m" words will be graphing those differences
 443                 if self.numOnly[0] in ('d', 'i', 'm'): self.numOnly = 'mon'
 444                 # synonyms "actual values": absolute, actual, number, normal, noop,
 445                 # so all "a" and "n" words will graph straight up numbers
 446                 if self.numOnly[0] in ('a', 'n'): self.numOnly = 'abs'
 447
 448                 # override variables if they were explicitly given
 449                 if self.widthArg  != 0: self.width  = self.widthArg
 450                 if self.heightArg != 0: self.height = self.heightArg
 451
 452                 # maxKeys should be at least a few thousand greater than height to reduce odds
 453                 # of throwing away high-count values that appear sparingly in the data
 454                 if self.maxKeys < self.height + 3000:
 455                         self.maxKeys = self.height + 3000
 456                         if self.verbose: sys.stderr.write("Updated maxKeys to %d (height + 3000)\n" % self.maxKeys)
 457
 458                 # colour palette
 459                 if self.colourisedOutput == True:
 460                         cl = self.colourPalette.split(',')
 461                         # ANSI color code is ESC+[+NN+m where ESC=chr(27), [ and m are
 462                         # the literal characters, and NN is a two-digit number, typically
 463                         # from 31 to 37 - why is this knowledge still useful in 2014?
 464                         cl = [chr(27) + '[' + e + 'm' for e in cl]
 465                         (self.regularColour, self.keyColour, self.ctColour, self.pctColour, self.graphColour) = cl
 466
 467                 # some useful ASCII-->utf-8 substitutions
 468                 if   self.histogramChar == "ba": self.unicodeMode = True; self.histogramChar = "▬"
 469                 elif self.histogramChar == "bl": self.unicodeMode = True; self.histogramChar = "Ξ"
 470                 elif self.histogramChar == "em": self.unicodeMode = True; self.histogramChar = "—"
 471                 elif self.histogramChar == "me": self.unicodeMode = True; self.histogramChar = "⋯"
 472                 elif self.histogramChar == "di": self.unicodeMode = True; self.histogramChar = "♦"
 473                 elif self.histogramChar == "dt": self.unicodeMode = True; self.histogramChar = "•"
 474                 elif self.histogramChar == "sq": self.unicodeMode = True; self.histogramChar = "□"
 475
 476                 # sub-full character width graphing systems
 477                 if self.histogramChar == "pb":
 478                         self.charWidth = 0.125;
 479                         self.graphChars = self.partialBlocks
 480                 elif self.histogramChar == "pl":
 481                         self.charWidth = 0.3334;
 482                         self.graphChars = self.partialLines
 483
 484                 # detect whether the user has passed a multibyte unicode character directly as the histogram char
 485                 if ord(self.histogramChar[0]) >= 128:
 486                         self.unicodeMode = True
 487
 488 def doUsage(s):
 489         print("")
 490         print("usage: <commandWithOutput> | %s" % (scriptName))
 491         print("         [--rcfile=<rcFile>]")
 492         print("         [--size={sm|med|lg|full} | --width=<width> --height=<height>]")
 493         print("         [--color] [--palette=r,k,c,p,g]")
 494         print("         [--tokenize=<tokenChar>]")
 495         print("         [--graph[=[kv|vk]] [--numonly[=derivative,diff|abs,absolute,actual]]")
 496         print("         [--char=<barChars>|<substitutionString>]")
 497         print("         [--help] [--verbose]")
 498         print("  --keys=K       every %d values added, prune hash to K keys (default 5000)" % (s.keyPruneInterval))
 499         print("  --char=C       character(s) to use for histogram character, some substitutions follow:")
 500         print("        pl       Use 1/3-width unicode partial lines to simulate 3x actual terminal width")
 501         print("        pb       Use 1/8-width unicode partial blocks to simulate 8x actual terminal width")
 502         print("        ba       (▬) Bar")
 503         print("        bl       (Ξ) Building")
 504         print("        em       (—) Emdash")
 505         print("        me       (⋯) Mid-Elipses")
 506         print("        di       (♦) Diamond")
 507         print("        dt       (•) Dot")
 508         print("        sq       (□) Square")
 509         print("  --color        colourise the output")
 510         print("  --graph[=G]    input is already key/value pairs. vk is default:")
 511         print("        kv       input is ordered key then value")
 512         print("        vk       input is ordered value then key")
 513         print("  --height=N     height of histogram, headers non-inclusive, overrides --size")
 514         print("  --help         get help")
 515         print("  --logarithmic  logarithmic graph")
 516         print("  --match=RE     only match lines (or tokens) that match this regexp, some substitutions follow:")
 517         print("        word     ^[A-Z,a-z]+\$ - tokens/lines must be entirely alphabetic")
 518         print("        num      ^\\d+\$        - tokens/lines must be entirely numeric")
 519         print("  --numonly[=N]  input is numerics, simply graph values without labels")
 520         print("        actual   input is just values (default - abs, absolute are synonymous to actual)")
 521         print("        diff     input monotonically-increasing, graph differences (of 2nd and later values)")
 522         print("  --palette=P    comma-separated list of ANSI colour values for portions of the output")
 523         print("                 in this order: regular, key, count, percent, graph. implies --color.")
 524         print("  --rcfile=F     use this rcfile instead of ~/.distributionrc - must be first argument!")
 525         print("  --size=S       size of histogram, can abbreviate to single character, overridden by --width/--height")
 526         print("        small    40x10")
 527         print("        medium   80x20")
 528         print("        large    120x30")
 529         print("        full     terminal width x terminal height (approximately)")
 530         print("  --tokenize=RE  split input on regexp RE and make histogram of all resulting tokens")
 531         print("        word     [^\\w] - split on non-word characters like colons, brackets, commas, etc")
 532         print("        white    \\s    - split on whitespace")
 533         print("  --width=N      width of the histogram report, N characters, overrides --size")
 534         print("  --verbose      be verbose")
 535         print("")
 536         print("You can use single-characters options, like so: -h=25 -w=20 -v. You must still include the =")
 537         print("")
 538         print("Samples:")
 539         print("  du -sb /etc/* | %s --palette=0,37,34,33,32 --graph" % (scriptName))
 540         print("  du -sk /etc/* | awk '{print $2\" \"$1}' | %s --graph=kv" % (scriptName))
 541         print("  zcat /var/log/syslog*gz | %s --char=o --tokenize=white" % (scriptName))
 542         print("  zcat /var/log/syslog*gz | awk '{print \$5}'  | %s -t=word -m-word -h=15 -c=/" % (scriptName))
 543         print("  zcat /var/log/syslog*gz | cut -c 1-9        | %s -width=60 -height=10 -char=em" % (scriptName))
 544         print("  find /etc -type f       | cut -c 6-         | %s -tokenize=/ -w=90 -h=35 -c=dt" % (scriptName))
 545         print("  cat /usr/share/dict/words | awk '{print length(\$1)}' | %s -c=* -w=50 -h=10 | sort -n" % (scriptName))
 546         print("")
 547
 548 # simple argument parsing and call top-level routines
 549 def main(argv):
 550         # instantiate our classes
 551         s = Settings()
 552         i = InputReader()
 553         h = Histogram()
 554
 555         if s.graphValues:
 556                 # user passed g=vk or g=kv
 557                 i.read_pretallied_tokens(s)
 558         elif s.numOnly != 'XXX':
 559                 # s.numOnly was specified by the user
 560                 i.read_numerics(s, h)
 561                 # read_numerics will have output a graph already, so exit
 562                 sys.exit(0)
 563         else:
 564                 # this is the original behaviour of distribution
 565                 i.tokenize_input(s)
 566
 567         h.write_hist(s, i.tokenDict)
 568
 569 # what is this magic?
 570 scriptName = sys.argv[0]
 571 if __name__ == "__main__":
 572         main(sys.argv[1:])