Start hacking on a C(++) implementation of the optimizer
[evolve-layout.git] / regularity_check.py
blobe62ac8fac4190092ea8852dad741e134b1a46f15
1 #!/usr/bin/env python3
2 # encoding: utf-8
4 """Check the regularity of a keyboard layout for a reference textfile."""
6 from optparse import OptionParser
8 ### config
10 #: Length of the segments into which we split the text. Currently arbitrary (~two times a twitter message)
11 segment_length = 270
13 #: The output filename. Can be overwritten with the -o parameter.
14 output = "res.txt"
15 output_words = "res-words.txt"
17 #: The file with the example text.
18 textfile = "beispieltext-prosa.txt"
20 #: Echo the results on the console?
21 verbose = False
23 #: The layout to use as base for mutations. If you want a given starting layout, also set prerandomize = 0.
24 LAYOUT = """xvlcw khgfqß´
25 uiaeo snrtdy
26 üöäpz bm,.j"""
28 ### predefined layouts
30 Neo2 = """xvlcw khgfqß´
31 uiaeo snrtdy
32 üöäpz bm,.j"""
34 Qwertz = """qwert zuiopü+
35 asdfg hjklöä
36 yxcvb nm,.-"""
38 NordTast = """äuobp kglmfx´
39 aietc hdnrsß
40 .,üöq yzwvj"""
42 Andreas100504 = """jäo.ü khclfv´
43 teaiu gdnrsß
44 xqö,y bpmwz"""
46 Vrijbuiter = """joä,ü khclfv´
47 taeiu gdnrsß
48 xöq.y bpmwz"""
50 fiae = """xuc.ö vdsljq´
51 fiaeo mtrnhk
52 ,üzäy bgßwp"""
54 ### Parse console arguments
56 parser = OptionParser(usage = "script to check the regularity of the layout for a reference textfile", version = "0.1")
57 parser.add_option("-l", "--layout", type="string", dest="layout", default=LAYOUT, help="the layout to use")
58 parser.add_option("-n", "--layout_name", type="string", dest="layout_name", default=None, help="the predefined layout to use, given by name (Neo, Qwertz, …)")
59 parser.add_option("-o", "--output", type="string", dest="output", default=output, help="the file to use for the output")
60 parser.add_option("-w", "--words-output", type="string", dest="output_words", default=output_words, help="the file to use for the output of the word statistics")
61 parser.add_option("-t", "--textfile", type="string", dest="textfile", default=textfile, help="the file with the reference text")
62 parser.add_option("-v", "--verbose", action="store_true", default=False, help="echo the results on the console")
64 (options, args) = parser.parse_args()
66 if options.layout_name is not None:
67 try:
68 options.layout = eval(options.layout_name)
69 except NameError:
70 print("the layout", options.layout_name, "is not predefined. Please use --layout to give it as string.")
71 exit()
73 ### run
75 from check_neo import string_to_layout, total_cost, get_all_data, read_file
77 layout = string_to_layout(options.layout)
79 def check(layout=layout, verbose=False, data=None):
80 """Get the value for a layout using a given string as reference text."""
81 letters, number_of_letters, repeats, number_of_bigrams, trigrams, number_of_trigrams = get_all_data(data=data)
83 total, frep_num, cost, frep_top_bottom, disbalance, no_handswitches, line_change_same_hand = total_cost(letters=letters, repeats=repeats, layout=layout, trigrams=trigrams)[:7]
84 # total, cost_w, frep_num_w, frep_num_top_bottom_w, neighboring_fings_w, fing_disbalance_w, no_handswitches_w, badly_positioned_w, line_change_same_hand_w, no_switch_after_unbalancing_w = total_cost(letters=letters, repeats=repeats, layout=layout, trigrams=trigrams, return_weighted=True)[:10]
85 return total / number_of_letters
87 def std(numbers):
88 """Calculate the standard deviation from a set of numbers.
90 This simple calculation is only valid for more than 100 numbers or so. That means I use it in the invalid area. But since it’s just an arbitrary metric, that doesn’t hurt.
92 >>> std([1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1]*10)
93 1.607945243653783
94 """
95 length = float(len(numbers))
96 mean = sum(numbers)/max(1, length)
97 var = 0
98 for i in numbers:
99 var += (i - mean)**2
100 var /= max(1, (length - 1))
101 from math import sqrt
102 return sqrt(var)
105 # processing and output (interleaved to be able to read really big files incrementally)
106 f = open(options.textfile, "r")
107 # clear the output file
108 fout = open(options.output, "w")
109 fout.write("")
110 fout.close()
112 res = []
113 d = f.read(segment_length)
114 while d:
115 cost = check(layout=layout, data=d)
116 d = f.read(segment_length)
117 if options.verbose:
118 print(cost)
119 with open(options.output, "a") as fout:
120 fout.write(str(cost) + "\n")
121 res.append(cost)
123 f.close()
124 fout.close()
126 # same for words
127 with open(options.textfile, "r") as f:
128 data = f.read()
130 f = open(options.textfile, "r")
131 # clear the output file
132 fout = open(options.output_words, "w")
133 fout.write("")
134 fout.close()
136 res_words = []
137 d = f.read(100*segment_length)
138 while d:
139 res_tmp = []
140 for word in d.split():
141 if word:
142 cost = check(layout=layout, data=word)
143 res_tmp.append(cost)
144 if options.verbose:
145 print(cost)
146 with open(options.output_words, "a") as fout:
147 fout.writelines([str(cost) + "\n" for cost in res_tmp])
148 res_words.extend(res_tmp)
149 d = f.read(100*segment_length)
152 f.close()
153 fout.close()
155 print("mean value and standard deviation of the layout cost:")
156 print("snippets of", segment_length, "letters:", sum(res)/len(res), "±", std(res))
157 print("words:", sum(res_words)/len(res_words), "±", std(res))