regularity_check.py

   1 #!/usr/bin/env python3
   2 # encoding: utf-8
   3
   4 """Check the regularity of a keyboard layout for a reference textfile."""
   5
   6 from optparse import OptionParser
   7
   8 ### config
   9
  10 #: Length of the segments into which we split the text. Currently arbitrary (~two times a twitter message)
  11 segment_length = 270
  12
  13 #: The output filename. Can be overwritten with the -o parameter.
  14 output = "res.txt"
  15 output_words = "res-words.txt"
  16
  17 #: The file with the example text.
  18 textfile = "beispieltext-prosa.txt"
  19
  20 #: Echo the results on the console?
  21 verbose = False
  22
  23 #: The layout to use as base for mutations. If you want a given starting layout, also set prerandomize = 0.
  24 LAYOUT = """xvlcw khgfqß´
  25 uiaeo snrtdy
  26 üöäpz bm,.j"""
  27
  28 ### predefined layouts
  29
  30 Neo2 = """xvlcw khgfqß´
  31 uiaeo snrtdy
  32 üöäpz bm,.j"""
  33
  34 Qwertz = """qwert zuiopü+
  35 asdfg hjklöä
  36 yxcvb nm,.-"""
  37
  38 NordTast = """äuobp kglmfx´
  39 aietc hdnrsß
  40 .,üöq yzwvj"""
  41
  42 Andreas100504 = """jäo.ü khclfv´
  43 teaiu gdnrsß
  44 xqö,y bpmwz"""
  45
  46 Vrijbuiter = """joä,ü khclfv´
  47 taeiu gdnrsß
  48 xöq.y bpmwz"""
  49
  50 fiae = """xuc.ö vdsljq´
  51 fiaeo mtrnhk
  52 ,üzäy bgßwp"""
  53
  54 ### Parse console arguments
  55
  56 parser = OptionParser(usage = "script to check the regularity of the layout for a reference textfile", version = "0.1")
  57 parser.add_option("-l", "--layout", type="string", dest="layout", default=LAYOUT, help="the layout to use")
  58 parser.add_option("-n", "--layout_name", type="string", dest="layout_name", default=None, help="the predefined layout to use, given by name (Neo, Qwertz, …)")
  59 parser.add_option("-o", "--output", type="string", dest="output", default=output, help="the file to use for the output")
  60 parser.add_option("-w", "--words-output", type="string", dest="output_words", default=output_words, help="the file to use for the output of the word statistics")
  61 parser.add_option("-t", "--textfile", type="string", dest="textfile", default=textfile, help="the file with the reference text")
  62 parser.add_option("-v", "--verbose", action="store_true", default=False, help="echo the results on the console")
  63
  64 (options, args) = parser.parse_args()
  65
  66 if options.layout_name is not None:
  67     try:
  68         options.layout = eval(options.layout_name)
  69     except NameError:
  70         print("the layout", options.layout_name, "is not predefined. Please use --layout to give it as string.")
  71         exit()
  72
  73 ### run
  74
  75 from check_neo import string_to_layout, total_cost, get_all_data, read_file
  76
  77 layout = string_to_layout(options.layout)
  78
  79 def check(layout=layout, verbose=False, data=None):
  80     """Get the value for a layout using a given string as reference text."""
  81     letters, number_of_letters, repeats, number_of_bigrams, trigrams, number_of_trigrams = get_all_data(data=data)
  82
  83     total, frep_num, cost, frep_top_bottom, disbalance, no_handswitches, line_change_same_hand = total_cost(letters=letters, repeats=repeats, layout=layout, trigrams=trigrams)[:7]
  84     # total, cost_w, frep_num_w, frep_num_top_bottom_w, neighboring_fings_w, fing_disbalance_w, no_handswitches_w, badly_positioned_w, line_change_same_hand_w, no_switch_after_unbalancing_w = total_cost(letters=letters, repeats=repeats, layout=layout, trigrams=trigrams, return_weighted=True)[:10]
  85     return total / number_of_letters
  86
  87 def std(numbers):
  88     """Calculate the standard deviation from a set of numbers.
  89
  90     This simple calculation is only valid for more than 100 numbers or so. That means I use it in the invalid area. But since it’s just an arbitrary metric, that doesn’t hurt.
  91
  92     >>> std([1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1]*10)
  93     1.607945243653783
  94     """
  95     length = float(len(numbers))
  96     mean = sum(numbers)/max(1, length)
  97     var = 0
  98     for i in numbers:
  99         var += (i - mean)**2
 100     var /= max(1, (length - 1))
 101     from math import sqrt
 102     return sqrt(var)
 103
 104
 105 # processing and output (interleaved to be able to read really big files incrementally)
 106 f = open(options.textfile, "r")
 107 # clear the output file
 108 fout = open(options.output, "w")
 109 fout.write("")
 110 fout.close()
 111
 112 res = []
 113 d = f.read(segment_length)
 114 while d:
 115     cost = check(layout=layout, data=d)
 116     d = f.read(segment_length)
 117     if options.verbose:
 118         print(cost)
 119     with open(options.output, "a") as fout:
 120         fout.write(str(cost) + "\n")
 121     res.append(cost)
 122
 123 f.close()
 124 fout.close()
 125
 126 # same for words
 127 with open(options.textfile, "r") as f:
 128     data = f.read()
 129
 130 f = open(options.textfile, "r")
 131 # clear the output file
 132 fout = open(options.output_words, "w")
 133 fout.write("")
 134 fout.close()
 135
 136 res_words = []
 137 d = f.read(100*segment_length)
 138 while d:
 139     res_tmp = []
 140     for word in d.split():
 141         if word:
 142             cost = check(layout=layout, data=word)
 143             res_tmp.append(cost)
 144             if options.verbose:
 145                 print(cost)
 146     with open(options.output_words, "a") as fout:
 147         fout.writelines([str(cost) + "\n" for cost in res_tmp])
 148     res_words.extend(res_tmp)
 149     d = f.read(100*segment_length)
 150
 151
 152 f.close()
 153 fout.close()
 154
 155 print("mean value and standard deviation of the layout cost:")
 156 print("snippets of", segment_length, "letters:", sum(res)/len(res), "±", std(res))
 157 print("words:", sum(res_words)/len(res_words), "±", std(res))