output actual characters instead of codepoints
[evolve-layout.git] / layout_cost.py
blobdb17adecd62cea386cc68c7ea383de102f2d78d0
1 #!/usr/bin/env python3
2 # encoding: utf-8
4 """Calculate the cost of a layout."""
6 from layout_base import *
8 from ngrams import get_all_data, letters_in_file_precalculated, trigrams_in_file_precalculated, trigrams_in_file, split_uppercase_trigrams, repeats_in_file_precalculated, repeats_in_file_sorted, unique_sort, letters_in_file, split_uppercase_letters, repeats_in_file, split_uppercase_repeats
11 ### Cost Functions
13 def key_position_cost_from_file(data=None, letters=None, layout=NEO_LAYOUT, cost_per_key=COST_PER_KEY):
14 """Count the total cost due to key positions.
16 >>> data = read_file("testfile")
17 >>> key_position_cost_from_file(data, cost_per_key=TEST_COST_PER_KEY)
18 150
19 >>> print(data[:3])
20 uia
21 >>> key_position_cost_from_file(data, cost_per_key=TEST_COST_PER_KEY)
22 150
23 >>> key_position_cost_from_file(data[:3], cost_per_key=TEST_COST_PER_KEY)
25 >>> lay = switch_keys(["ax"], layout=NEO_LAYOUT)
26 >>> key_position_cost_from_file(data[:3], cost_per_key=TEST_COST_PER_KEY, layout=lay)
28 >>> data = "UIaĥK\\n"
29 >>> key_position_cost_from_file(data, cost_per_key=TEST_COST_PER_KEY, layout=lay)
31 """
32 if data is not None:
33 letters = letters_in_file(data)
34 elif letters is None:
35 raise Exception("Need either letters or data")
36 letters = split_uppercase_letters(letters, layout=layout)
37 cost = 0
38 for num, letter in letters:
39 pos = find_key(letter, layout=layout)
40 if pos is None: # not found
41 cost += num * COST_PER_KEY_NOT_FOUND
42 # shift, M3 and M4
43 elif COST_LAYER_ADDITION[pos[2]:]:
44 cost += num * (cost_per_key[pos[0]][pos[1]] + COST_LAYER_ADDITION[pos[2]])
45 else:
46 # layer has no addition cost ⇒ undefined layer (higher than layer 6!). Just take the base key…
47 cost += num * cost_per_key[pos[0]][pos[1]]
48 return cost
50 def finger_repeats_from_file(data=None, repeats=None, count_same_key=False, layout=NEO_LAYOUT):
51 """Get a list of two char strings from the file, which repeat the same finger.
53 >>> data = read_file("testfile")
54 >>> finger_repeats_from_file(data)
55 [(1, 'Mittel_R', 'rg'), (1, 'Zeige_L', 'eo'), (1, 'Klein_R', 'd\\n')]
56 >>> finger_repeats_from_file(data, count_same_key=True)
57 [(2, 'Mittel_L', 'aa'), (1, 'Mittel_R', 'rg'), (1, 'Zeige_L', 'eo'), (1, 'Klein_R', 'd\\n'), (1, 'Mittel_L', 'aa'), (1, 'Mittel_L', 'aa')]
58 """
59 if data is not None:
60 repeats = repeats_in_file(data)
61 elif repeats is None:
62 raise Exception("Need either repeats or data")
64 repeats = split_uppercase_repeats(repeats, layout=layout)
66 finger_repeats = []
67 for number, pair in repeats:
68 key1 = pair[0]
69 key2 = pair[1]
70 finger1 = key_to_finger(key1, layout=layout)
71 finger2 = key_to_finger(key2, layout=layout)
73 if finger1 and finger2 and finger1 == finger2:
74 finger_repeats.append((number, finger1, key1+key2))
75 if not count_same_key:
76 finger_repeats = [r for r in finger_repeats if not r[2][0] == r[2][1]]
77 return finger_repeats
79 def finger_repeats_top_and_bottom(finger_repeats, layout):
80 """Check which of the finger repeats go from the top to the bottom row or vice versa."""
81 top_down_repeats = []
82 for number, finger, letters in finger_repeats:
83 pos0 = find_key(letters[0], layout)
84 pos1 = find_key(letters[1], layout)
85 # count it as top down, if the finger has to move over more than one col.
86 if pos0 and pos1 and abs(pos0[0] - pos1[0]) > 1:
87 top_down_repeats.append((number, finger, letters))
88 return top_down_repeats
90 def neighboring_fingers(data=None, repeats=None, layout=NEO_LAYOUT):
91 """Return the number of times we have to use fingers next to each other.
93 >>> data = read_file("testfile")
94 >>> neighboring_fingers(data)
96 """
97 if data is not None:
98 repeats = repeats_in_file(data)
99 elif repeats is None:
100 raise Exception("Need either repeats or data")
102 repeats = split_uppercase_repeats(repeats, layout=layout)
104 fingtups = ((num, (key_to_finger(rep[0]), key_to_finger(rep[1]))) for num, rep in repeats if key_to_finger(rep[0]) and key_to_finger(rep[1]))
105 neighcosts = (num*FINGER_SWITCH_COST[fings[0]][fings[1]] for num, fings in fingtups if fings[1] in FINGER_SWITCH_COST[fings[0]])
106 return sum(neighcosts)
109 def no_handswitch_after_unbalancing_key(data=None, repeats=None, layout=NEO_LAYOUT):
110 """Check how often we have no handswitching after an unbalancing key, weighted by the severity of the unbalancing. This also helps avoiding a handswitch directly after an uppercase key (because shift severly unbalances und with the handswitch we’d effectively have no handswitch after the shift (kind of a shift collision, too).
112 >>> data = read_file("testfile")
113 >>> no_handswitch_after_unbalancing_key(data)
115 >>> reps = [(3, "Ab")]
116 >>> no_handswitch_after_unbalancing_key(repeats=reps)
118 >>> no_handswitch_after_unbalancing_key(repeats=reps, layout=QWERTZ_LAYOUT)
120 >>> reps = [(3, "Ga")]
121 >>> no_handswitch_after_unbalancing_key(repeats=reps, layout=QWERTZ_LAYOUT)
124 if data is not None:
125 repeats = repeats_in_file(data)
126 elif repeats is None:
127 raise Exception("Need either repeats or data")
129 repeats = split_uppercase_repeats(repeats, layout=layout)
131 no_switch = 0
132 for number, pair in repeats:
133 key1 = pair[0]
134 key2 = pair[1]
135 pos1 = find_key(key1, layout=layout)
136 pos2 = find_key(key2, layout=layout)
137 if pos1 and pos2 and pos1 in UNBALANCING_POSITIONS:
138 # check if we”re on the same hand
139 finger1 = key_to_finger(key1, layout=layout)
140 finger2 = key_to_finger(key2, layout=layout)
141 if finger1 and finger2 and finger1[-1] == finger2[-1]:
142 no_switch += UNBALANCING_POSITIONS.get(pos1, 0)*number
143 return no_switch
145 def line_changes(data=None, repeats=None, layout=NEO_LAYOUT):
146 """Get the number of (line changes divided by the horizontal distance) squared: (rows²/dist)².
148 Don’t care about the hand (left index low and right high is still not nice).
150 >>> data = read_file("testfile")
151 >>> line_changes(data)
152 16.29
154 if data is not None:
155 repeats = repeats_in_file(data)
156 elif repeats is None:
157 raise Exception("Need either repeats or data")
159 repeats = split_uppercase_repeats(repeats, layout=layout)
161 line_changes = 0
162 for number, pair in repeats:
163 key1 = pair[0]
164 key2 = pair[1]
165 pos1 = find_key(key1, layout=layout)
166 pos2 = find_key(key2, layout=layout)
167 if pos1 and pos2:
168 if not WEIGHT_COUNT_ROW_CHANGES_BETWEEN_HANDS:
169 # check if we”re on the same hand
170 finger1 = key_to_finger(key1, layout=layout)
171 finger2 = key_to_finger(key2, layout=layout)
172 if finger1 and finger2 and finger1[-1] != finger2[-1]:
173 continue # the keys are on different hands, so we don’t count them as row change.
174 # row 3 is shifted 1 key to the right → fix that.
175 if pos1[0] == 3:
176 pos1 = pos1[0], pos1[1] -1, pos1[2]
177 if pos2[0] == 3:
178 pos2 = pos2[0], pos2[1] -1, pos2[2]
179 num_rows = abs(pos1[0] - pos2[0])
180 finger_distance = abs(pos1[1] - pos2[1])
181 if num_rows:
182 cost = num_rows**2 / max(0.25, finger_distance)
183 line_changes += cost**2 * number
184 return line_changes
186 def load_per_finger(letters, layout=NEO_LAYOUT, print_load_per_finger=False):
187 """Calculate the number of times each finger is being used.
189 >>> letters = [(1, "u"), (5, "i"), (10, "2"), (3, " ")]
190 >>> load_per_finger(letters)
191 {'': 10, 'Klein_L': 1, 'Ring_L': 5, 'Daumen_L': 3}
193 letters = split_uppercase_letters(letters, layout)
194 fingers = {}
195 for num, key in letters:
196 finger = key_to_finger(key, layout=layout)
197 if finger in fingers:
198 fingers[finger] += num
199 else: fingers[finger] = num
200 # Debug: Print the load per finger
201 if print_load_per_finger:
202 from pprint import pprint
203 pprint(fingers)
204 return fingers
206 def load_per_hand(letters=None, finger_load=None, layout=NEO_LAYOUT):
207 """Calculate the load per hand.
209 >>> letters = [(1, "u"), (5, "i"), (10, "2"), (3, " "), (2, "g")]
210 >>> load_per_hand(letters)
211 [9, 2]
212 >>> finger_load = {'': 10, 'Klein_L': 1, 'Ring_L': 5, 'Daumen_L': 3, 'Mittel_R': 2}
213 >>> load_per_hand(finger_load = finger_load)
214 [9, 2]
217 if finger_load is None and letters is not None:
218 finger_load = load_per_finger(letters, layout=layout)
219 elif letters is None and finger_load is None:
220 raise Exception("Need at least letters or precalculated finger_load")
221 hand_load = [sum([finger_load[f] for f in finger_load if f.endswith(hand)]) for hand in ("L", "R")]
222 return hand_load
225 def std(numbers):
226 """Calculate the standard deviation from a set of numbers.
228 This simple calculation is only valid for more than 100 numbers or so. That means I use it in the invalid area. But since it’s just an arbitrary metric, that doesn’t hurt.
230 >>> std([1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1]*10)
231 1.607945243653783
233 length = float(len(numbers))
234 mean = sum(numbers)/max(1, length)
235 var = 0
236 for i in numbers:
237 var += (i - mean)**2
238 var /= max(1, (length - 1))
239 from math import sqrt
240 return sqrt(var)
242 def finger_balance(letters, layout=NEO_LAYOUT, intended_balance=WEIGHT_INTENDED_FINGER_LOAD_LEFT_PINKY_TO_RIGHT_PINKY):
243 """Calculate a cost based on the balance between the fingers (using the standard deviation).
245 Optimum: All fingers get used exactly the same number of times.
247 We ignore unmapped keys ('').
249 #: the usage of each finger: {finger1: num, finger2: num, …}
250 fingers = load_per_finger(letters, layout)
251 # make sure, all fingers are in the list (for very short texts)
252 for fing in FINGER_NAMES:
253 if not fing in fingers and not fing[:6] == "Daumen":
254 fingers[fing] = 0
255 # remove the unmapped keys
256 if "" in fingers:
257 del fingers[""]
258 for finger in fingers:
259 idx = FINGER_NAMES.index(finger)
260 multiplier = intended_balance[idx]
261 fingers[finger] /= multiplier
262 disbalance = std(fingers.values())
263 return disbalance
265 def _no_handswitching(trigrams, key_hand_table, key_pos_horizontal_table, WEIGHT_NO_HANDSWITCH_AFTER_DIRECTION_CHANGE, WEIGHT_NO_HANDSWITCH_WITHOUT_DIRECTION_CHANGE):
266 """Do the hard work for no_handswitching without any call to outer functions."""
267 no_switch = 0
268 for num, trig in trigrams:
269 # if we have a shift in it, we also have a handswitch.
270 if not trig[0] in key_hand_table or not trig[1] in key_hand_table or not trig[2] in key_hand_table:
271 continue
272 hand0 = key_hand_table.get(trig[0], None)
273 hand1 = key_hand_table.get(trig[1], None)
274 hand2 = key_hand_table.get(trig[2], None)
275 if hand0 == hand1 and hand1 == hand2:
276 pos0 = key_pos_horizontal_table[trig[0]]
277 pos1 = key_pos_horizontal_table[trig[1]]
278 pos2 = key_pos_horizontal_table[trig[2]]
279 if pos0 > pos1 and pos1 < pos2 or pos0 < pos1 and pos1 > pos2:
280 num *= WEIGHT_NO_HANDSWITCH_AFTER_DIRECTION_CHANGE
281 else:
282 num *= WEIGHT_NO_HANDSWITCH_WITHOUT_DIRECTION_CHANGE
283 no_switch += num
284 return no_switch
287 def no_handswitching(trigrams, layout=NEO_LAYOUT):
288 """Add a penalty when the hands aren’t switched at least once in every three letters. Doesn’t take any uppercase trigrams into account.
290 If there also is a direction change in the trigram, the number of times it occurs gets multiplied by WEIGHT_NO_HANDSWITCH_AFTER_DIRECTION_CHANGE.
292 (TODO? WEIGHT_TRIGRAM_FINGER_REPEAT_WITHOUT_KEY_REPEAT)
294 TODO: Include the shifts again and split per keyboard. If we did it now, the layout would get optimized for switching after every uppercase letter (as any trigram with a shift and two letters on the same hand would be counted as half a trigram without handswitching). The effect is that it ignores about 7-9% of the trigrams.
296 >>> trigs = [(1, "nrt"), (5, "ige"), (3, "udi")]
297 >>> no_handswitching(trigs, layout=NEO_LAYOUT)
300 # optimization: we precalculate the fingers for all relevent keys (the ones which are being mutated).
301 key_hand_table = {}
302 for key in abc_full:
303 #without "⇧⇗ " -> too many false positives when we include the shifts. This also gets rid of anything with uppercase letters in it.
304 finger = key_to_finger(key, layout=layout)
305 if finger and not finger[:6] == "Daumen":
306 key_hand_table[key] = finger[-1]
308 key_pos_horizontal_table = {}
309 for key in abc_full:
310 #without "⇧⇗ " -> too many false positives when we include the shifts. This also gets rid of anything with uppercase letters in it.
311 pos = find_key(key, layout=layout)
312 if pos is not None:
313 key_pos_horizontal_table[key] = pos[1]
315 return _no_handswitching(trigrams, key_hand_table, key_pos_horizontal_table, WEIGHT_NO_HANDSWITCH_AFTER_DIRECTION_CHANGE, WEIGHT_NO_HANDSWITCH_WITHOUT_DIRECTION_CHANGE)
318 def badly_positioned_shortcut_keys(layout=NEO_LAYOUT, keys="xcvz"):
319 """Check, if x, c, v and z are on the left hand and well positioned (much used shortcuts)."""
320 badly_positioned = []
321 for key in keys:
322 pos = find_key(key, layout=layout)
323 # well means not yet left stretch, in row 3, col 5 is also OK.
324 if not pos[1] < 5 or (pos[0] == 3 and pos[1] > 5):
325 badly_positioned.append(1)
326 return sum(badly_positioned)
329 def total_cost(data=None, letters=None, repeats=None, layout=NEO_LAYOUT, cost_per_key=COST_PER_KEY, trigrams=None, intended_balance=WEIGHT_INTENDED_FINGER_LOAD_LEFT_PINKY_TO_RIGHT_PINKY, return_weighted=False):
330 """Compute a total cost from all costs we have available, wheighted.
332 TODO: reenable the doctests, after the parameters have settled, or pass ALL parameters through the functions.
334 @param return_weighted: Set to true to get the weighted values instead of the real values.
336 >>> data = read_file("testfile")
337 >>> #total_cost(data, cost_per_key=TEST_COST_PER_KEY, intended_balance=TEST_WEIGHT_INTENDED_FINGER_LOAD_LEFT_PINKY_TO_RIGHT_PINKY)
339 (209.4, 3, 150, 0, 3.3380918415851206, 3, 7)
341 # the raw costs
342 if data is not None:
343 finger_repeats = finger_repeats_from_file(data, layout=layout)
344 position_cost = key_position_cost_from_file(data, layout=layout, cost_per_key=cost_per_key)
345 letters = letters_in_file(data)
346 repeats = repeats_in_file(data)
347 trigrams = trigrams_in_file(data)
348 elif letters is None or repeats is None:
349 raise Exception("Need either repeats und letters or data")
350 else:
351 finger_repeats = finger_repeats_from_file(repeats=repeats, layout=layout)
352 position_cost = key_position_cost_from_file(letters=letters, layout=layout, cost_per_key=cost_per_key)
354 no_handswitches = no_handswitching(trigrams, layout=layout)
356 frep_num = sum([num for num, fing, rep in finger_repeats])
357 finger_repeats_top_bottom = finger_repeats_top_and_bottom(finger_repeats, layout=layout)
358 frep_num_top_bottom = sum([num for num, fing, rep in finger_repeats_top_bottom])
360 # the number of times neighboring fingers are used – weighted by the ease of transition for the respective fingers
361 neighboring_fings = neighboring_fingers(repeats=repeats, layout=layout)
363 # the number of changes between lines on the same hand.
364 line_change_same_hand = line_changes(repeats=repeats, layout=layout)
366 # how often the hand wasn’t switched after an unbalancing key, weighted by the severity of the unbalancing.
367 no_switch_after_unbalancing = no_handswitch_after_unbalancing_key(repeats=repeats, layout=layout)
369 # the balance between fingers
370 disbalance = finger_balance(letters, layout=layout, intended_balance=intended_balance)
371 number_of_letters = sum([i for i, s in letters])
373 # the position of the keys xcvz - penalty if they are not among the first 5 keys, counted from left, horizontally.
374 badly_positioned = badly_positioned_shortcut_keys(layout=layout)
376 # the load distribution on the hands: [left keystrokes, right keystrokes]
377 hand_load = load_per_hand(letters, layout=layout)
378 # the disbalance between the hands. Keystrokes of the left / total strokes - 0.5. From 0 to 0.5, ignoring the direction.
379 hand_disbalance = abs(hand_load[0]/max(1, sum(hand_load)) - 0.5)
381 # add all together and weight them
382 total = WEIGHT_POSITION * position_cost
383 total += WEIGHT_FINGER_REPEATS * frep_num # not 0.5, since there may be 2 times as many 2-tuples as letters, but the repeats are calculated on the in-between, and these are single.
384 total += WEIGHT_FINGER_REPEATS_TOP_BOTTOM * frep_num_top_bottom
385 total += WEIGHT_FINGER_SWITCH * neighboring_fings
386 total += WEIGHT_FINGER_DISBALANCE * disbalance # needs a minimum number of letters to be useful.
387 total += WEIGHT_TOO_LITTLE_HANDSWITCHING * no_handswitches
388 total += WEIGHT_XCVZ_ON_BAD_POSITION * number_of_letters * badly_positioned
389 total += WEIGHT_BIGRAM_ROW_CHANGE_PER_ROW * line_change_same_hand
390 total += WEIGHT_NO_HANDSWITCH_AFTER_UNBALANCING_KEY * no_switch_after_unbalancing
391 total += WEIGHT_HAND_DISBALANCE * hand_disbalance * number_of_letters
393 if not return_weighted:
394 return total, frep_num, position_cost, frep_num_top_bottom, disbalance, no_handswitches, line_change_same_hand, hand_load
395 else:
396 return total, WEIGHT_POSITION * position_cost, WEIGHT_FINGER_REPEATS * frep_num , WEIGHT_FINGER_REPEATS_TOP_BOTTOM * frep_num_top_bottom, WEIGHT_FINGER_SWITCH * neighboring_fings, WEIGHT_FINGER_DISBALANCE * disbalance, WEIGHT_TOO_LITTLE_HANDSWITCHING * no_handswitches, WEIGHT_XCVZ_ON_BAD_POSITION * number_of_letters * badly_positioned, WEIGHT_BIGRAM_ROW_CHANGE_PER_ROW * line_change_same_hand, WEIGHT_NO_HANDSWITCH_AFTER_UNBALANCING_KEY * no_switch_after_unbalancing, WEIGHT_HAND_DISBALANCE * hand_disbalance * number_of_letters