4 """Check the regularity of a keyboard layout for a reference textfile."""
6 from optparse
import OptionParser
10 #: Length of the segments into which we split the text. Currently arbitrary (~two times a twitter message)
13 #: The output filename. Can be overwritten with the -o parameter.
15 output_words
= "res-words.txt"
17 #: The file with the example text.
18 textfile
= "beispieltext-prosa.txt"
20 #: Echo the results on the console?
23 #: The layout to use as base for mutations. If you want a given starting layout, also set prerandomize = 0.
24 LAYOUT
= """xvlcw khgfqß´
28 ### predefined layouts
30 Neo2
= """xvlcw khgfqß´
34 Qwertz
= """qwert zuiopü+
38 NordTast
= """äuobp kglmfx´
42 Andreas100504
= """jäo.ü khclfv´
46 Vrijbuiter
= """joä,ü khclfv´
50 fiae
= """xuc.ö vdsljq´
54 ### Parse console arguments
56 parser
= OptionParser(usage
= "script to check the regularity of the layout for a reference textfile", version
= "0.1")
57 parser
.add_option("-l", "--layout", type="string", dest
="layout", default
=LAYOUT
, help="the layout to use")
58 parser
.add_option("-n", "--layout_name", type="string", dest
="layout_name", default
=None, help="the predefined layout to use, given by name (Neo, Qwertz, …)")
59 parser
.add_option("-o", "--output", type="string", dest
="output", default
=output
, help="the file to use for the output")
60 parser
.add_option("-w", "--words-output", type="string", dest
="output_words", default
=output_words
, help="the file to use for the output of the word statistics")
61 parser
.add_option("-t", "--textfile", type="string", dest
="textfile", default
=textfile
, help="the file with the reference text")
62 parser
.add_option("-v", "--verbose", action
="store_true", default
=False, help="echo the results on the console")
64 (options
, args
) = parser
.parse_args()
66 if options
.layout_name
is not None:
68 options
.layout
= eval(options
.layout_name
)
70 print("the layout", options
.layout_name
, "is not predefined. Please use --layout to give it as string.")
75 from check_neo
import string_to_layout
, total_cost
, get_all_data
, read_file
77 layout
= string_to_layout(options
.layout
)
79 def check(layout
=layout
, verbose
=False, data
=None):
80 """Get the value for a layout using a given string as reference text."""
81 letters
, number_of_letters
, repeats
, number_of_bigrams
, trigrams
, number_of_trigrams
= get_all_data(data
=data
)
83 total
, frep_num
, cost
, frep_top_bottom
, disbalance
, no_handswitches
, line_change_same_hand
= total_cost(letters
=letters
, repeats
=repeats
, layout
=layout
, trigrams
=trigrams
)[:7]
84 # total, cost_w, frep_num_w, frep_num_top_bottom_w, neighboring_fings_w, fing_disbalance_w, no_handswitches_w, badly_positioned_w, line_change_same_hand_w, no_switch_after_unbalancing_w = total_cost(letters=letters, repeats=repeats, layout=layout, trigrams=trigrams, return_weighted=True)[:10]
85 return total
/ number_of_letters
88 """Calculate the standard deviation from a set of numbers.
90 This simple calculation is only valid for more than 100 numbers or so. That means I use it in the invalid area. But since it’s just an arbitrary metric, that doesn’t hurt.
92 >>> std([1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1]*10)
95 length
= float(len(numbers
))
96 mean
= sum(numbers
)/max(1, length
)
100 var
/= max(1, (length
- 1))
101 from math
import sqrt
105 # processing and output (interleaved to be able to read really big files incrementally)
106 f
= open(options
.textfile
, "r")
107 # clear the output file
108 fout
= open(options
.output
, "w")
113 d
= f
.read(segment_length
)
115 cost
= check(layout
=layout
, data
=d
)
116 d
= f
.read(segment_length
)
119 with
open(options
.output
, "a") as fout
:
120 fout
.write(str(cost
) + "\n")
127 with
open(options
.textfile
, "r") as f
:
130 f
= open(options
.textfile
, "r")
131 # clear the output file
132 fout
= open(options
.output_words
, "w")
137 d
= f
.read(100*segment_length
)
140 for word
in d
.split():
142 cost
= check(layout
=layout
, data
=word
)
146 with
open(options
.output_words
, "a") as fout
:
147 fout
.writelines([str(cost
) + "\n" for cost
in res_tmp
])
148 res_words
.extend(res_tmp
)
149 d
= f
.read(100*segment_length
)
155 print("mean value and standard deviation of the layout cost:")
156 print("snippets of", segment_length
, "letters:", sum(res
)/len(res
), "±", std(res
))
157 print("words:", sum(res_words
)/len(res_words
), "±", std(res
))