check_neo.py

   1 #!/usr/bin/env python3
   2 # encoding: utf-8
   3
   4 """Optimize keyboard layouts evolutionally (with mutations).
   5
   6 """
   7
   8 # python 2.6 compatibility via 3to2
   9 from __future__ import print_function
  10
  11 __usage__ = """Usage:
  12
  13 - check_neo.py --help (display this text)
  14
  15 - check_neo.py [-q] [-v] [-o <file>]
  16   compare the Neo layout with others, using the included datafiles(*gramme.txt).
  17   -q only shows the results for the Neo layout.
  18   -v shows several additional metrics which are included in the total cost.
  19   -o writes the output to a file instead of printing it.
  20
  21 - check_neo.py --file <file> [--switch <lx,wq>] [-q] [-v]
  22   run the script on the file.
  23   --switch switches letters on the neo keyboard (lx,wq switches l for x and w for q).
  24   -q removes the qwertz comparision.
  25   -v adds the list of finger repeats.
  26
  27 - check_neo.py [-v] [--file <file>] --layout-string "khßwv ä.uozj
  28   dnclr aitesb
  29   fpgmx ,üöyq"
  30   check the layout given by a layout string.
  31   -v gives more statistical info on the layout
  32   --file <file> uses a file as corpus for checking the layout.
  33
  34
  35 - check_neo.py --evolve <iterations> [--prerandomize <num_switches>] [-q] [-v] [--controlled-evolution] [--controlled-tail]
  36   randomly permutate keys on the Neo keyboard to see if a better layout emerges.
  37   --controlled-evolution tells it to use the horribly slow and deterministic code which always chooses the best possible change in each step.
  38   --controlled-tail makes it first do <iterations> random mutations and then a controlled evolution, until it can’t go any further. controlled_tail and controlled-evolution are exclusive. When both are used, the tail wins.
  39   --prerandomize tells it to do num_switches random switches before beginning the evolution. Use >100000 to get a mostly random keyboard layout as starting point.
  40
  41 - check_neo.py --best-random-layout <num of random layouts to try> [--prerandomize <num_switches>] [-q]
  42   --prerandomize selects the number of random switches to do to get a random keyboard.
  43
  44 - ./check_neo.py --check "[[('^'),('1'),('2'),('3'),('4'),('5'),('6'),('7'),('8'),('9'),('0'),('-'),('\\`'),('←')], # Zahlenreihe (0)
  45 [('⇥'),('x'),('v'),('l'),('c'),('w'),('k'),('h'),('g'),('f'),('q'),('ß'),('´'),()], # Reihe 1
  46 [('⇩'),('u'),('i'),('a'),('e'),('o'),('s'),('n'),('r'),('t'),('d'),('y'),('⇘'),('\\n')], # Reihe 2
  47 [('⇧'),(),('ü'),('ö'),('ä'),('p'),('z'),('b'),('m'),(','),('.'),('j'),('⇗')],        # Reihe 3
  48 [(), (), (), (' '), (), (), (), ()] # Reihe 4 mit Leertaste
  49 ]" [-q]
  50   check the layout passed on the commandline (mind the shell escapes!)
  51
  52 - check_neo.py [-v] [-q] --check-string "öckäy zhmlß,
  53 atieo dsnru.
  54 xpfüq bgvwj"
  55   check a layout string for layer 1.
  56
  57 - check_neo.py --test (run doctests)
  58
  59 Note: If --prerandomize is set to 1000000 or more, it just does a real shuffle instead of prerandomizing.
  60
  61 """
  62
  63 from design import __design__
  64
  65 __doc__ += __usage__ + __design__
  66
  67 __version__ = "0.1.2"
  68
  69 __copyright__ = """2010 © Arne Babenhauserheide
  70
  71 License: GPLv3 or later
  72 """
  73
  74 from sys import argv
  75 # forced fileoutput instead of printing
  76 if "-o" in argv:
  77     idx = argv.index("-o")
  78     FILE = argv[idx+1]
  79     argv = argv[:idx] + argv[idx+2:]
  80 else:
  81     FILE = None
  82
  83 def info(*args, **kwds):
  84     return print(*args, **kwds)
  85
  86 def result(*args, **kwds):
  87     if FILE is not None:
  88         with open(FILE, "a", encoding='utf-8') as f:
  89             for i in args:
  90                 f.write(str(i) + " ")
  91             f.write("\n")
  92     else:
  93         info(*args, **kwds)
  94
  95 from layout_cost import *
  96 from math import log10, log
  97
  98 # TODO: Split the different ways of evolution into evolve.py. Requirement: Don’t give any output.
  99
 100 ### Evolution
 101
 102 def switch_keys(keypairs, layout=NEO_LAYOUT):
 103     """Switch keys in the layout, so we don't have to fiddle with actual layout files.
 104
 105     >>> lay = switch_keys([], layout = NEO_LAYOUT)
 106     >>> lay == NEO_LAYOUT
 107     True
 108     >>> lay = switch_keys(["lx", "wq"], layout = NEO_LAYOUT)
 109     >>> get_key((1, 1, 0), layout=lay)
 110     'l'
 111     >>> get_key((1, 3, 0), layout=lay)
 112     'x'
 113     >>> get_key((1, 5, 0), layout=lay)
 114     'q'
 115     >>> get_key((1, 10, 0), layout=lay)
 116     'w'
 117     >>> get_key((1, 1, 1), layout=lay)
 118     'L'
 119     >>> get_key((1, 3, 1), layout=lay)
 120     'X'
 121     >>> get_key((1, 5, 1), layout=lay)
 122     'Q'
 123     >>> get_key((1, 10, 1), layout=lay)
 124     'W'
 125     >>> find_key("l", layout=lay) == (1, 1, 0)
 126     True
 127     >>> find_key("L", layout=lay) == (1, 1, 1)
 128     True
 129     >>> NEO_LAYOUT_lxwq == lay[:5]
 130     True
 131     >>> lay = switch_keys(["lx"], layout = NEO_LAYOUT)
 132     >>> NEO_LAYOUT_lx == lay[:5]
 133     True
 134     >>> a = find_key("a", layout=lay)
 135     >>> lay = switch_keys(["ab"], layout=lay)
 136     >>> a == find_key("b", layout=lay)
 137     True
 138     >>> dot = find_key(".", layout=NEO_LAYOUT)
 139     >>> d = find_key("d", layout=NEO_LAYOUT)
 140     >>> lay = switch_keys([".d"], layout=NEO_LAYOUT)
 141     >>> d == find_key(".", layout=lay)
 142     True
 143     >>> dot == find_key("d", layout=lay)
 144     True
 145     """
 146     lay = deepcopy(layout)
 147     from pprint import pprint
 148     #pprint(lay)
 149     for pair in keypairs:
 150             pos0 = find_key(pair[0], layout=lay)
 151             pos1 = find_key(pair[1], layout=lay)
 152
 153             if pair[1].upper() == pair[1]:
 154                 tmp0 = (pair[1], ) + tuple(lay[pos0[0]][pos0[1]][1:])
 155             else:
 156                 tmp0 = (pair[1], pair[1].upper()) + tuple(lay[pos0[0]][pos0[1]][2:])
 157
 158             if pair[0].upper() == pair[0]:
 159                 tmp1 = (pair[0], ) + tuple(lay[pos1[0]][pos1[1]][1:])
 160             else:
 161                 tmp1 = (pair[0], pair[0].upper()) + tuple(lay[pos1[0]][pos1[1]][2:])
 162
 163             lay[pos0[0]][pos0[1]] = tmp0
 164             lay[pos1[0]][pos1[1]] = tmp1
 165             update_letter_to_key_cache_multiple(pair+pair.upper(), layout=lay)
 166             prev = pair
 167         #except:
 168         #    pprint(lay)
 169         #    print(prev, pair, pos0, pos1, tmp0, tmp1)
 170         #    exit()
 171
 172     return lay
 173
 174 def randomize_keyboard(abc, num_switches, layout=NEO_LAYOUT):
 175         """Do num_switches random keyswitches on the layout and
 176         @return: the randomized layout."""
 177         keypairs = []
 178         num_letters = len(abc)
 179         # for very high number of switches just do use shuffle.
 180         if num_switches >= 1000:
 181             from random import shuffle
 182             abc_list = list(abc)
 183             abc_shuffled = list(abc)
 184             shuffle(abc_shuffled)
 185             for i in range(num_letters):
 186                 orig = abc_list[i]
 187                 new = abc_shuffled[i]
 188                 if orig != new and not orig+new in keypairs and not new+orig in keypairs:
 189                     new_in_list = abc_list.index(new)
 190                     abc_list[new_in_list] = orig
 191                     keypairs.append(orig+new)
 192             lay = switch_keys(keypairs, layout=deepcopy(layout))
 193             return lay, keypairs
 194
 195         # incomplete shuffling (only find the given number of switches), slower because we need to avoid dupliates the hard way.
 196         from random import choice
 197         max_unique_tries = 1000
 198         for i in range(num_switches):
 199             key1 = choice(abc)
 200             key2 = choice(abc)
 201             # get unique keypairs, the not nice but very easy to understand way.
 202             tries = 0
 203             while (key2 == key1 or key1+key2 in keypairs or key2+key1 in keypairs) and (num_switches <= num_letters or tries < max_unique_tries):
 204                 key1 = choice(abc)
 205                 key2 = choice(abc)
 206                 if num_switches > num_letters:
 207                     tries += log(len(keypairs)+1, 2) + 1
 208             keypairs.append(key1+key2)
 209         lay = switch_keys(keypairs, layout=deepcopy(layout))
 210         return lay, keypairs
 211
 212 def find_the_best_random_keyboard(letters, repeats, trigrams, num_tries, num_switches=1000, layout=NEO_LAYOUT, abc=abc, quiet=False):
 213         """Create num_tries random keyboards (starting from the layout and doing num_switches random keyswitches), compare them and only keep the best (by total_cost)."""
 214         lay, keypairs = randomize_keyboard(abc, num_switches, layout)
 215         cost = total_cost(letters=letters, repeats=repeats, layout=lay, trigrams=trigrams)[0]
 216         if not quiet:
 217             info("cost of the first random layout:", cost)
 218         for i in range(max(0, num_tries-1)):
 219             if not quiet:
 220                 info("-", i, "/", num_tries)
 221             lay_tmp, keypairs = randomize_keyboard(abc, num_switches, layout)
 222             cost_tmp = total_cost(letters=letters, repeats=repeats, layout=lay_tmp, trigrams=trigrams)[0]
 223             if cost_tmp < cost:
 224                 lay = lay_tmp
 225                 cost = cost_tmp
 226                 if not quiet:
 227                     info("better:", cost)
 228         return lay, cost
 229
 230 def random_evolution_step(letters, repeats, trigrams, num_switches, layout, abc, cost, quiet):
 231         """Do one random switch. Keep it, if it is beneficial."""
 232         lay, keypairs = randomize_keyboard(abc, num_switches, layout)
 233         new_cost, frep, pos_cost = total_cost(letters=letters, repeats=repeats, layout=lay, trigrams=trigrams)[:3]
 234         if new_cost < cost:
 235             return lay, new_cost, cost - new_cost, keypairs, frep, pos_cost
 236         else:
 237             return layout, cost, 0, keypairs, frep, pos_cost
 238
 239 def controlled_evolution_step(letters, repeats, trigrams, num_switches, layout, abc, cost, quiet, cost_per_key=COST_PER_KEY):
 240     """Do the most beneficial change. Keep it, if the new layout is better than the old.
 241
 242     TODO: reenable the doctests, after the parameters have settled, or pass ALL parameters through the functions.
 243
 244     >>> data = read_file("testfile")
 245     >>> repeats = repeats_in_file(data)
 246     >>> letters = letters_in_file(data)
 247     >>> trigrams = trigrams_in_file(data)
 248     >>> #controlled_evolution_step(letters, repeats, trigrams, 1, NEO_LAYOUT, "reo", 190, quiet=False, cost_per_key=TEST_COST_PER_KEY)
 249
 250     # checked switch ('rr',) 201.4
 251     # checked switch ('re',) 181.4
 252     # checked switch ('ro',) 184.4
 253     # checked switch ('ee',) 201.4
 254     # checked switch ('eo',) 204.4
 255     # checked switch ('oo',) 201.4
 256     0.00019 finger repetition: 1e-06 position cost: 0.00015
 257     [['^', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '-', '`', ()], [(), 'x', 'v', 'l', 'c', 'w', 'k', 'h', 'g', 'f', 'q', 'ß', '´', ()], ['⇩', 'u', 'i', 'a', 'r', 'o', 's', 'n', 'e', 't', 'd', 'y', '⇘', '\\n'], ['⇧', (), 'ü', 'ö', 'ä', 'p', 'z', 'b', 'm', ',', '.', 'j', '⇗'], [(), (), (), ' ', (), (), (), ()]]
 258     ([['^', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '-', '`', ()], [(), 'x', 'v', 'l', 'c', 'w', 'k', 'h', 'g', 'f', 'q', 'ß', '´', ()], ['⇩', 'u', 'i', 'a', 'r', 'o', 's', 'n', 'e', 't', 'd', 'y', '⇘', '\\n'], ['⇧', (), 'ü', 'ö', 'ä', 'p', 'z', 'b', 'm', ',', '.', 'j', '⇗'], [(), (), (), ' ', (), (), (), ()]], 181.4, 8.599999999999994)
 259     >>> #controlled_evolution_step(letters, repeats, trigrams, 1, NEO_LAYOUT, "reo", 25, False, cost_per_key=TEST_COST_PER_KEY)
 260
 261     # checked switch ('rr',) 201.4
 262     # checked switch ('re',) 181.4
 263     # checked switch ('ro',) 184.4
 264     # checked switch ('ee',) 201.4
 265     # checked switch ('eo',) 204.4
 266     # checked switch ('oo',) 201.4
 267     worse ('oo',) ([['^', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '-', '`', ()], [(), 'x', 'v', 'l', 'c', 'w', 'k', 'h', 'g', 'f', 'q', 'ß', '´', ()], ['⇩', 'u', 'i', 'a', 'e', 'o', 's', 'n', 'r', 't', 'd', 'y', '⇘', '\\n'], ['⇧', (), 'ü', 'ö', 'ä', 'p', 'z', 'b', 'm', ',', '.', 'j', '⇗'], [(), (), (), ' ', (), (), (), ()]], 25, 0)
 268     """
 269     from random import choice
 270     # First create one long list of possible switches
 271     keypairs = []
 272     best_pairs = []
 273     for key1 in abc:
 274         for key2 in abc[abc.index(key1)+1:]:
 275             keypairs.append(key1+key2)
 276
 277     # then combine it into possible switch tuples (O(N²))
 278     switches = []
 279     for i in range(num_switches):
 280         switches.append([]) # layers
 281     for pair1 in keypairs:
 282         # pair 1 list
 283         for i in range(len(keypairs) ** min(1, num_switches - 1)): # ** (num_switches - 1)):
 284             switches[0].append(pair1) # [[1, 1, 1]]
 285         for i in range(min(1, num_switches - 1)): # num_switches - 1): # TODO: Make it work for num > 2.
 286             #for j in range(len(keypairs) ** max(0, (num_switches - 2))):
 287                 for pair_x in keypairs: #[keypairs.index(pair1)+1:]:
 288                     # add additional possible pairs.
 289                     switches[i+1].append(pair_x) # [[1, 1, 1], [1, 2, 3]]
 290     switches = list(zip(*switches[:2]))
 291
 292     # results for 1 step: [(cost, frep, pos_cost, layout), ...]
 293     step_results = []
 294     for keypairs in switches:
 295         lay = switch_keys(keypairs, layout=deepcopy(layout))
 296         new_cost, frep, pos_cost = total_cost(letters=letters, repeats=repeats, layout=lay, cost_per_key=cost_per_key, trigrams=trigrams)[:3]
 297         step_results.append((new_cost, frep, pos_cost, deepcopy(keypairs), lay))
 298         if not quiet:
 299             info("# checked switch", keypairs, new_cost)
 300
 301     if min(step_results)[0] < cost:
 302         best = min(step_results)
 303         lay, new_cost, best_pairs = best[-1], best[0], best[-2]
 304         new_cost, frep, pos_cost = total_cost(letters=letters, repeats=repeats, layout=lay, cost_per_key=cost_per_key, trigrams=trigrams)[:3]
 305         return lay, new_cost, cost - new_cost, best_pairs, frep, pos_cost
 306     else:
 307         return layout, cost, 0, keypairs, frep, pos_cost
 308
 309 def evolve(letters, repeats, trigrams, layout=NEO_LAYOUT, iterations=3000, abc=abc, quiet=False, controlled=False, controlled_tail=False, anneal=0, anneal_step=100):
 310     """Repeatedly switch a layout randomly and do the same with the new layout,
 311     if it provides a better total score. Can't be tested easily => Check the source.
 312
 313     To only mutate a subset of keys, just pass them as
 314     @param abc: the keys to permutate over.
 315     @param controlled: Do a slow controlled run, where all possible steps are checked and only the best is chosen?
 316     @param anneal: start by switching 1 + int(anneal) keypairs, reduce by 1 after anneal_step iterations.
 317     """
 318     cost = total_cost(letters=letters, repeats=repeats, layout=layout, trigrams=trigrams)[0]
 319     consecutive_fails = 0
 320     # take anneal_step steps for the first anneal level, too
 321     if anneal:
 322         anneal += 1
 323         anneal -= 1/anneal_step
 324
 325     for i in range(iterations):
 326         if not controlled:
 327             # increase the size of the changes when the system seems to become stable (1000 consecutive fails: ~ 2*24*23 = every combination tried) to avoid deterministic purely local minima.
 328             if anneal > 0:
 329                 step = int(anneal + 1)
 330                 anneal -= 1/anneal_step
 331             else:
 332                 step = int(log10(consecutive_fails + 1) / 3 + 1)
 333             lay, cost, better, keypairs, frep, pos_cost = random_evolution_step(letters, repeats, trigrams, step, layout, abc, cost, quiet)
 334         else:
 335             step = int(consecutive_fails / 2 + 1)
 336             # only do the best possible step instead => damn expensive. For a single switch about 10 min per run.
 337             lay, cost, better, keypairs, frep, pos_cost = controlled_evolution_step(letters, repeats, trigrams, step, layout, abc, cost, quiet)
 338         if better:
 339             consecutive_fails = 0
 340             # save the good mutation
 341             layout = lay
 342             if not quiet:
 343                 info(cost / 1000000, keypairs, "finger repetition:", frep / 1000000, "position cost:", pos_cost / 1000000)
 344                 info(format_layer_1_string(lay))
 345         else:
 346             consecutive_fails += 1
 347             if not quiet:
 348                 info("worse", keypairs, end = " ")
 349         if not quiet:
 350             info("- " + str(i) + " / " + str(iterations))
 351
 352     if controlled_tail:
 353         # second round: do controlled evolution steps, as long as they result in better layouts (do a full controlled optimization of the result).
 354         if not quiet:
 355             info("controlled evolution, until there’s no more to improve")
 356         better = True
 357         steps = 0
 358         while better:
 359             # only do the best possible step instead => damn expensive. For a single switch about 10 min per run.
 360             lay, cost, better, keypairs, frep, pos_cost = controlled_evolution_step(letters, repeats, trigrams, 1, layout=layout, abc=abc, cost=cost, quiet=quiet)
 361             if better:
 362                 # save the good mutation - yes, this could go at the start of the loop, but that wouldn’t be as clear.
 363                 layout = lay
 364             if not quiet:
 365                 info("-", steps, "/ ?", keypairs)
 366                 info(format_layer_1_string(lay))
 367     return layout, cost
 368
 369
 370 def combine_genetically(layout1, layout2):
 371     """Combine two layouts genetically (randomly)."""
 372     from random import randint
 373     switchlist = []
 374     for letter in abc:
 375         if randint(0, 1) == 1:
 376             pos = find_key(letter, layout=layout1)
 377             replacement = get_key(pos, layout=layout2)
 378             switchlist.append(letter+replacement)
 379     res = deepcopy(switch_keys(switchlist, layout=layout1))
 380     return res
 381
 382
 383 ### UI ###
 384
 385 def format_keyboard_layout(layout):
 386     """Format a keyboard layout to look like a real keyboard."""
 387     neo = """
 388 ┌───┬───┬───┬───┬───┬───┬───┬───┬───┬───┬───┬───┬───┬──────┐
 389 │ ^ │ 1 │ 2 │ 3 │ 4 │ 5 │ 6 │ 7 │ 8 │ 9 │ 0 │ - │ ` │ Back │
 390 ├───┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬────┤
 391 │Tab  │ x │ v │ l │ c │ w │ k │ h │ g │ f │ q │ ß │ ´ │ Ret│
 392 ├─────┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┐   │
 393 │M3    │ u │ i │ a │ e │ o │ s │ n │ r │ t │ d │ y │ M3│   │
 394 ├────┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴───┴───┤
 395 │Ums │ M4│ ü │ ö │ ä │ p │ z │ b │ m │ , │ . │ j │  Umsch  │
 396 ├────┼───┴┬──┴─┬─┴───┴───┴───┴───┴───┴─┬─┴──┬┴───┼────┬────┤
 397 │Strg│ Fe │ Al │      Leerzeichen      │ M4 │ Fe │ Me │Strg│
 398 └────┴────┴────┴───────────────────────┴────┴────┴────┴────┘
 399
 400     """
 401     lay = "┌───┬───┬───┬───┬───┬───┬───┬───┬───┬───┬───┬───┬───┬──────┐\n"
 402     lay +="│ "
 403     lay += " │ ".join([l[0] for l in layout[0]])
 404     lay += "    │\n"
 405     lay += "├───┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬────┤\n"
 406     lay += "│   "
 407     lay += " │ ".join([l[0] for l in layout[1][:-1]])
 408     lay += " │ Ret│\n"
 409     lay += "├─────┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┬──┴┐   │\n"
 410     lay += "│    "
 411     if layout[2][-2]:
 412         lay += " │ ".join([l[0] for l in layout[2][:-1]])
 413     else:
 414         lay += " │ ".join([l[0] for l in layout[2][:-2]])
 415         lay += " │  "
 416     lay += " │   │\n"
 417     lay += "├────┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴─┬─┴───┴───┤\n"
 418     if layout[3][1]:
 419         lay += "│  "
 420         lay += " │ ".join([l[0] for l in layout[3]])
 421     else:
 422         lay +="│  ⇧ │ M4│ "
 423         lay += " │ ".join([l[0] for l in layout[3][2:]])
 424     lay += "       │\n"
 425     lay += """├────┼───┴┬──┴─┬─┴───┴───┴───┴───┴───┴─┬─┴──┬┴───┼────┬────┤
 426 │Strg│ Fe │ Alt│      Leerzeichen      │ M4 │ Fe │ Me │Strg│
 427 └────┴────┴────┴───────────────────────┴────┴────┴────┴────┘"""
 428     return lay
 429
 430
 431 def short_number(s, letters=8):
 432     """shorten a number to the given number of letters"""
 433     if not "e" in str(s):
 434         return str(s)[:letters]
 435     else:
 436         idx = min(letters-4, str(s).index("e"))
 437         idx = max(0, idx)
 438         return str(s)[:idx] + str(s)[-4:]
 439
 440 def csv_data(layout, letters=None, repeats=None, number_of_letters=None, number_of_bigrams=None, trigrams=None, number_of_trigrams=None, data=None):
 441     """Return a list with data for a csv-line for the layout.
 442
 443     @return cost/word, position_cost, fingerrepeat, finger_disbalance, frep_top_bottom, handswitching, rows², shortcut_keys, unbalancing, patterns"""
 444     letters, number_of_letters, repeats, number_of_bigrams, trigrams, number_of_trigrams = get_all_data(
 445         data=data,
 446         letters=letters, number_of_letters=number_of_letters,
 447         repeats=repeats, number_of_bigrams=number_of_bigrams,
 448         trigrams=trigrams, number_of_trigrams=number_of_trigrams
 449         )
 450
 451     # weighted
 452     total, cost_w, frep_num_w, frep_num_top_bottom_w, neighboring_fings_w, fing_disbalance_w, no_handswitches_w, badly_positioned_w, line_change_same_hand_w, no_switch_after_unbalancing_w = total_cost(letters=letters, repeats=repeats, layout=layout, trigrams=trigrams, return_weighted=True)[:10]
 453
 454     line = []
 455
 456     line.append(total/max(1, number_of_letters)/100)
 457     line.append(cost_w/1000000000)
 458     line.append(frep_num_w/1000000000)
 459     line.append(fing_disbalance_w/1000000000)
 460     line.append(frep_num_top_bottom_w/1000000000)
 461     line.append(no_handswitches_w/1000000000)
 462     line.append(line_change_same_hand_w/1000000000)
 463     line.append(badly_positioned_w/1000000000)
 464     line.append(no_switch_after_unbalancing_w/1000000000)
 465     line.append(neighboring_fings_w/1000000000)
 466     return line
 467
 468
 469 def print_layout_with_statistics(layout, letters=None, repeats=None, number_of_letters=None, number_of_bigrams=None, print_layout=True, trigrams=None, number_of_trigrams=None, verbose=False, data=None, shorten_numbers=False):
 470     """Print a layout along with statistics."""
 471     letters, number_of_letters, repeats, number_of_bigrams, trigrams, number_of_trigrams = get_all_data(
 472         data=data,
 473         letters=letters, number_of_letters=number_of_letters,
 474         repeats=repeats, number_of_bigrams=number_of_bigrams,
 475         trigrams=trigrams, number_of_trigrams=number_of_trigrams
 476         )
 477
 478     res = ""
 479     def c(*args):
 480         """concatenate the args to a string similar to how print() does it, just simpler."""
 481         return " ".join((str(i) for i in args)) + "\n"
 482
 483     if print_layout:
 484         res += c(format_layer_1_string(layout))
 485         res += c(format_keyboard_layout(layout))
 486         #from pprint import pprint
 487         #pprint(layout[:5])
 488
 489     # unweighted
 490     total, frep_num, cost, frep_top_bottom, disbalance, no_handswitches, line_change_same_hand, hand_load = total_cost(letters=letters, repeats=repeats, layout=layout, trigrams=trigrams)[:8]
 491     # weighted
 492     total, cost_w, frep_num_w, frep_num_top_bottom_w, neighboring_fings_w, fing_disbalance_w, no_handswitches_w, badly_positioned_w, line_change_same_hand_w, no_switch_after_unbalancing_w, hand_disbalance_w = total_cost(letters=letters, repeats=repeats, layout=layout, trigrams=trigrams, return_weighted=True)[:11]
 493
 494     if shorten_numbers:
 495         sn = short_number
 496     else:
 497         sn = str
 498
 499     res += c("#", sn(total/max(1, number_of_letters)/100), "x100 total penalty per letter")
 500     res += c("#", sn(total / 10000000000), "x10 billion total penalty compared to notime-noeffort")
 501     res += c("#", sn(cost / max(1, number_of_letters)), "mean key position cost in file 1gramme.txt", "(", str(cost_w/1000000000), ")")
 502     res += c("#", sn(100 * frep_num / max(1, number_of_bigrams)), "% finger repeats in file 2gramme.txt", "(", str(frep_num_w/1000000000), ")")
 503     if verbose:
 504         res += c("#", sn(disbalance / 1000000), "million keystrokes disbalance of the fingers", "(", str(fing_disbalance_w/1000000000), ")")
 505         res += c("#", sn(100 * frep_top_bottom / max(1, number_of_bigrams)), "% finger repeats top to bottom or vice versa", "(", str(frep_num_top_bottom_w/1000000000), ")")
 506         res += c("#", sn(100 * no_handswitches / max(1, number_of_trigrams)), "% of trigrams have no handswitching (after direction change counted x", WEIGHT_NO_HANDSWITCH_AFTER_DIRECTION_CHANGE, ")", "(", str(no_handswitches_w/1000000000), ")")
 507         res += c("#", sn(line_change_same_hand / 1000000000), "billion (rows²/dist)² to cross", "(", str(line_change_same_hand_w/1000000000), ")")
 508         res += c("#", sn(abs(hand_load[0]/max(1, sum(hand_load)) - 0.5)), "hand disbalance. Left:", hand_load[0]/max(1, sum(hand_load)), "%, Right:", hand_load[1]/max(1, sum(hand_load)), "% (", str(hand_disbalance_w/1000000000), ")")
 509         res += c("#", sn(badly_positioned_w/1000000000), "badly positioned shortcut keys (weighted).")
 510         res += c("#", sn(no_switch_after_unbalancing_w/1000000000), "no handswitching after unbalancing key (weighted).")
 511         res += c("#", sn(neighboring_fings_w/1000000000), "movement pattern cost (weighted).")
 512     result(res)
 513
 514
 515 def find_a_qwertzy_layout(steps, prerandomize, quiet, verbose):
 516     """Find a layout with values similar to qwertz."""
 517     info("# Qwertzing Layout")
 518     #data = read_file("/tmp/sskreszta")
 519     data1 = read_file("1gramme.txt")
 520     letters = letters_in_file_precalculated(data1)
 521     #letters = letters_in_file(data)
 522     datalen1 = sum([i for i, s in letters])
 523
 524     data2 = read_file("2gramme.txt")
 525     repeats = repeats_in_file_precalculated(data2)
 526     #repeats = repeats_in_file(data)
 527     datalen2 = sum([i for i, s in repeats])
 528
 529     data3 = read_file("3gramme.txt")
 530     trigrams = trigrams_in_file_precalculated(data3)
 531     number_of_trigrams = sum([i for i, s in trigrams])
 532
 533     if prerandomize:
 534         if not quiet:
 535             info("doing", prerandomize, "randomization switches.")
 536         lay, keypairs = randomize_keyboard(abc, num_switches=prerandomize, layout=NEO_LAYOUT)
 537     else: lay = NEO_LAYOUT
 538
 539     qvals = total_cost(letters=letters, repeats=repeats, layout=QWERTZ_LAYOUT, trigrams=trigrams, return_weighted=True)
 540
 541     qhand_load = load_per_hand(letters, layout=QWERTZ_LAYOUT)
 542
 543     def compare_with_qwertz(lay, base=QWERTZ_LAYOUT):
 544         """compare the layout with qwertz."""
 545         vals = total_cost(letters=letters, repeats=repeats, layout=lay, trigrams=trigrams, return_weighted=True)
 546         hand_load = load_per_hand(letters, layout=lay)
 547         diff = 0
 548         to_compare = zip(vals, qvals)
 549         for l,q in to_compare:
 550             diff += (l - q)**2
 551         return diff
 552
 553     diff = compare_with_qwertz(lay)
 554
 555     for i in range(steps):
 556         lay = deepcopy(lay)
 557         l, keypairs = randomize_keyboard(abc, num_switches=prerandomize, layout=lay)
 558         d = compare_with_qwertz(l)
 559         if d < diff:
 560             info("# qwertzer")
 561             info(format_layer_1_string(l))
 562             lay = deepcopy(l)
 563             diff = d
 564
 565     print_layout_with_statistics(lay, letters=letters, repeats=repeats, number_of_letters=datalen1, number_of_bigrams=datalen2, trigrams=trigrams, number_of_trigrams=number_of_trigrams, verbose=verbose)
 566
 567
 568 def evolve_a_layout(steps, prerandomize, controlled, quiet, verbose, controlled_tail, starting_layout=NEO_LAYOUT, data=None, anneal=0, anneal_step=100):
 569     """Evolve a layout by selecting the fittest of random mutations step by step."""
 570     letters, datalen1, repeats, datalen2, trigrams, number_of_trigrams = get_all_data(data=data)
 571
 572     if prerandomize:
 573         if not quiet:
 574             info("doing", prerandomize, "prerandomization switches.")
 575         lay, keypairs = randomize_keyboard(abc, num_switches=prerandomize, layout=starting_layout)
 576     else: lay = starting_layout
 577
 578     lay, cost = evolve(letters, repeats, trigrams, layout=lay, iterations=steps, quiet=quiet, controlled=controlled, controlled_tail = controlled_tail, anneal=anneal, anneal_step=anneal_step)
 579
 580     result("\n# Evolved Layout")
 581     print_layout_with_statistics(lay, letters=letters, repeats=repeats, number_of_letters=datalen1, number_of_bigrams=datalen2, trigrams=trigrams, number_of_trigrams=number_of_trigrams, verbose=verbose)
 582
 583
 584 def evolution_challenge(layout=NEO_LAYOUT, challengers=100, rounds=10, iterations=20, abc=abc, prerandomize=10000, quiet=False, controlled=False, data=None):
 585      """Run a challenge between many randomized layouts, then combine the best pseudo-genetically (random) and add them to the challenge."""
 586      # Data for evaluating layouts.
 587      letters, datalen1, repeats, datalen2, trigrams, number_of_trigrams = get_all_data(data=data)
 588
 589      from pprint import pprint
 590
 591      #: the maximum number of genetic combination tries to get a unique layout (no clone)
 592      max_unique_tries = 200
 593
 594      layouts = [] # [(cost, lay), …]
 595      if not quiet:
 596          info("# create the", challengers, "starting layouts")
 597      for i in range(challengers):
 598
 599          info("#", i, "of", challengers)
 600          lay, keypairs = randomize_keyboard(abc, num_switches=prerandomize, layout=layout)
 601          lay, cost = evolve(letters, repeats, trigrams, layout=lay, iterations=iterations, quiet=True)
 602          layouts.append((cost, lay))
 603
 604      # run the challenge
 605      for round in range(rounds):
 606          # sort and throw out the worst
 607          layouts.sort()
 608          if not quiet:
 609              info("\n# round", round)
 610              info("# top five")
 611              for cost, lay in layouts[:5]:
 612                  print_layout_with_statistics(lay, letters=letters, repeats=repeats, number_of_letters=datalen1, number_of_bigrams=datalen2, trigrams=trigrams, number_of_trigrams=number_of_trigrams)
 613          info("\n# killing the worst", int(challengers * 3/4)-1, "layouts")
 614          layouts = deepcopy(layouts[:int(challengers / 4)+1])
 615
 616          # combine the best and worst to get new ones.
 617          info("\n# breeding new layouts")
 618          for i in range(int(challengers/8)):
 619             info(i, "of", int(challengers/4-1), "from weak and strong")
 620             new = deepcopy(combine_genetically(layouts[i][1], layouts[-i - 1][1]))
 621             # evolve, then append
 622             new, cost = deepcopy(evolve(letters, repeats, trigrams, layout=new, iterations=iterations, quiet=True))
 623             # make sure we have no clones :)
 624             tries = 0
 625             while (cost, new) in layouts and tries < max_unique_tries:
 626                 new = deepcopy(combine_genetically(layouts[i][1], layouts[-i - 1][1]))
 627                 new, cost = deepcopy(evolve(letters, repeats, trigrams, layout=new, iterations=iterations, quiet=True))
 628                 tries += 1
 629             layouts.append((cost, new))
 630
 631         # also combine the best one with the upper half
 632          for i in range(max(0, int(challengers/8))):
 633             info(i+int(challengers/8), "of", int(challengers/4-1), "from the strongest with the top half")
 634             new = deepcopy(combine_genetically(layouts[0][1], layouts[i+1][1]))
 635             new, cost = evolve(letters, repeats, trigrams, layout=new, iterations=iterations, quiet=True)
 636             # make sure we have no clones :)
 637             tries = 0
 638             while (cost, new) in layouts and tries < max_unique_tries:
 639                 new = deepcopy(combine_genetically(layouts[0][1], layouts[i+1][1]))
 640                 new, cost = evolve(letters, repeats, trigrams, layout=new, iterations=iterations, quiet=True)
 641                 tries += 1
 642             layouts.append((cost, new))
 643
 644          # and new random ones
 645          info("\n# and fill up the ranks with random layouts")
 646          for i in range(challengers - len(layouts)):
 647              info(i, "of", int(challengers/2))
 648              lay, keypairs = deepcopy(randomize_keyboard(abc, num_switches=prerandomize, layout=layout))
 649              lay, cost = evolve(letters, repeats, trigrams, layout=lay, iterations=iterations, quiet=True)
 650              # make sure we have no clones :)
 651              tries = 0
 652              while (cost, lay) in layouts and tries < max_unique_tries:
 653                  lay, keypairs = deepcopy(randomize_keyboard(abc, num_switches=prerandomize, layout=layout))
 654                  lay, cost = evolve(letters, repeats, trigrams, layout=lay, iterations=iterations, quiet=True)
 655                  tries += 1
 656              layouts.append((cost, lay))
 657
 658      info("# Top 3")
 659      layouts.sort()
 660
 661      for num, name in [(0, "\n# gold"), (1, "\n# silver"), (2, "\n# bronze")][:len(layouts)]:
 662          cost, lay = layouts[num]
 663          info(name)
 664          print_layout_with_statistics(lay, letters, repeats, datalen1, datalen2, trigrams=trigrams, number_of_trigrams=number_of_trigrams)
 665
 666 def best_random_layout(number, prerandomize, quiet=False, data=None, layout=NEO_LAYOUT):
 667     """Select the best of a number of randomly created layouts."""
 668     info("Selecting the best from", number, "random layouts.")
 669     letters, datalen1, repeats, datalen2, trigrams, number_of_trigrams = get_all_data(data=data)
 670
 671     if prerandomize:
 672         lay, cost = find_the_best_random_keyboard(letters, repeats, trigrams, num_tries=number, num_switches=prerandomize, layout=layout, abc=abc, quiet=quiet)
 673     else:
 674         lay, cost = find_the_best_random_keyboard(letters, repeats, trigrams, num_tries=number, layout=layout, abc=abc, quiet=quiet)
 675
 676     info("\nBest of the random layouts")
 677     print_layout_with_statistics(lay, letters=letters, repeats=repeats, number_of_letters=datalen1, number_of_bigrams=datalen2, trigrams=trigrams, number_of_trigrams=number_of_trigrams)
 678
 679
 680 def compare_a_layout(quiet, verbose, data=None, layout=NEO_LAYOUT):
 681     """Check the performance of the neo layout, optionally scoring it against Qwertz."""
 682     if layout == NEO_LAYOUT:
 683         info("Neo")
 684     letters, datalen1, repeats, datalen2, trigrams, number_of_trigrams = get_all_data(data=data)
 685
 686     print_layout_with_statistics(layout, letters=letters, repeats=repeats, number_of_letters=datalen1, number_of_bigrams=datalen2, print_layout=not quiet, trigrams=trigrams, number_of_trigrams=number_of_trigrams, verbose=verbose, shorten_numbers=True)
 687
 688     if not quiet:
 689         info("\nQwertz for comparision")
 690         print_layout_with_statistics(QWERTZ_LAYOUT, letters=letters, repeats=repeats, number_of_letters=datalen1, number_of_bigrams=datalen2, trigrams=trigrams, number_of_trigrams=number_of_trigrams, verbose=verbose, shorten_numbers=True)
 691         info("\nAnd the Nordtast Layout")
 692         print_layout_with_statistics(NORDTAST_LAYOUT, letters=letters, repeats=repeats, number_of_letters=datalen1, number_of_bigrams=datalen2, trigrams=trigrams, number_of_trigrams=number_of_trigrams, verbose=verbose, shorten_numbers=True)
 693         info("\nAnd Dvorak")
 694         print_layout_with_statistics(DVORAK_LAYOUT, letters=letters, repeats=repeats, number_of_letters=datalen1, number_of_bigrams=datalen2, trigrams=trigrams, number_of_trigrams=number_of_trigrams, verbose=verbose, shorten_numbers=True)
 695         info("\nAnd Colemak")
 696         print_layout_with_statistics(COLEMAK_LAYOUT, letters=letters, repeats=repeats, number_of_letters=datalen1, number_of_bigrams=datalen2, trigrams=trigrams, number_of_trigrams=number_of_trigrams, verbose=verbose, shorten_numbers=True)
 697
 698 # for compatibility
 699 check_the_neo_layout = compare_a_layout
 700
 701 def check_a_layout_from_shell(layout, quiet, verbose, data=None):
 702     """Check a layout we get passed as shell argument."""
 703     print_layout_with_statistics(layout, print_layout=not quiet, verbose=verbose, data=data, shorten_numbers=True)
 704
 705
 706 def check_a_layout_string_from_shell(layout_string, quiet, verbose, base_layout=NEO_LAYOUT, data=None):
 707     """Check a string passed via shell and formatted as
 708
 709     öckäy zhmlß,´
 710     atieo dsnru.
 711     xpfüq bgvwj
 712
 713     or
 714
 715     qwert zuiopü+
 716     asdfg hjklöä
 717     <yxcvb nm,.-
 718     """
 719     layout = string_to_layout(layout_string, base_layout)
 720     print_layout_with_statistics(layout, print_layout=not quiet, verbose=verbose, data=data, shorten_numbers=True)
 721
 722 ### Self-Test
 723
 724 if __name__ == "__main__":
 725     from sys import argv
 726
 727     if "--test" in argv:
 728         from doctest import testmod
 729         testmod()
 730         exit()
 731
 732     from optparse import OptionParser
 733
 734     parser = OptionParser(description="Check and evolve keyboard layouts. Actions can’t be combined, the other options and arguments can. Without action, it checks layouts.")
 735     # actions
 736     parser.add_option("--best-random-layout", dest="best_random_layout", type="int", default=0,
 737                       help="(action) create the given number of random layouts and show the best one", metavar="number")
 738     parser.add_option("--challenge", dest="challenge_rounds", type="int", default=0,
 739                       help="(action) do an evolution challenge for the given number of rounds. Slow", metavar="rounds")
 740     parser.add_option("--check", dest="check",
 741                       help="(action)check a layout from shell. ignores --base*", metavar="layout")
 742     parser.add_option("--check-string", dest="check_string",
 743                       help="(action) check a layout_string from shell", metavar="layout_string")
 744     parser.add_option("--evolve", dest="evolve", type="int", default=0,
 745                       help="(action) do the given number of random mutation steps", metavar="number")
 746
 747     # options
 748     parser.add_option("--base", dest="base", default=None,
 749                       help="take the given layout as base", metavar="layout")
 750     parser.add_option("--base-name", dest="base_name", default=None,
 751                       help="take the named layout as base. I.e.: NEO_LAYOUT or QWERTZ_LAYOUT", metavar="layout_name")
 752     parser.add_option("--base-string", dest="base_string", default=None,
 753                       help="take the given layout as base for layer 1. Compatible with --base and --base-name", metavar="layout")
 754     parser.add_option("--challenge-evolution-steps", dest="challenge_evolution_steps", type="int", default=3,
 755                       help="the number of individual evolution steps to take between evolution challenge rounds", metavar="number")
 756     parser.add_option("--challengers", dest="challengers", type="int", default=16,
 757                       help="the number of challengers for an evolution challenge", metavar="number")
 758     parser.add_option("-f", "--file", dest="file",
 759                       help="get the ngram data from the given textfile", metavar="textfile")
 760     parser.add_option("--prerandomize", dest="prerandomize", type="int", default=1000,
 761                       help="do the given number of randomization steps", metavar="number")
 762     parser.add_option("--anneal", dest="anneal", type="int", default=0,
 763                       help="start with number additional keyswitches per iteration and slowly reduce them (simulated annealing)", metavar="number")
 764     parser.add_option("--anneal-step", dest="anneal_step", type="int", default=100,
 765                       help="the number of steps after which to reduce the annealing switches by 1", metavar="number")
 766
 767
 768     # arguments
 769     parser.add_option("--controlled",
 770                       action="store_true", dest="controlled_evolution", default=False,
 771                       help="check all possible mutations at each step and only take the best")
 772     parser.add_option("--controlled-tail",
 773                       action="store_true", dest="controlled_tail", default=False,
 774                       help="do a controlled evolution after the random evolution steps")
 775     parser.add_option("-q", "--quiet",
 776                       action="store_true", dest="quiet", default=False,
 777                       help="don’t print progress messages to stdout")
 778     parser.add_option("-v", "--verbose",
 779                       action="store_true", dest="verbose", default=False,
 780                       help="print more detailed layout statistics")
 781
 782     (options, args) = parser.parse_args()
 783
 784     # post process options
 785     if options.base:
 786         options.base = eval(options.base)
 787     elif options.base_name:
 788         options.base = eval(options.base_name)
 789     if not options.base:
 790         options.base = NEO_LAYOUT
 791     if options.base_string:
 792         # base + base-string: base for the surroundings,
 793         # base-string for the base layer.
 794         options.base = string_to_layout(options.base_string, NEO_LAYOUT)
 795
 796     if options.file:
 797         options.data = read_file(options.file)
 798     else:
 799         options.data = None
 800     if options.check:
 801         options.check = eval(options.check)
 802
 803     # act
 804
 805     if options.check:
 806         check_a_layout_from_shell(options.check, quiet=options.quiet, verbose=options.verbose, data=options.data)
 807
 808     elif options.check_string:
 809         check_a_layout_string_from_shell(options.check_string, quiet=options.quiet, verbose=options.verbose, data=options.data, base_layout=options.base)
 810
 811     elif options.evolve:
 812         evolve_a_layout(steps=options.evolve, prerandomize=options.prerandomize, quiet=options.quiet, controlled=options.controlled_evolution, verbose=options.verbose, controlled_tail=options.controlled_tail, data=options.data, starting_layout=options.base, anneal=options.anneal, anneal_step=options.anneal_step)
 813
 814     elif options.best_random_layout:
 815         best_random_layout(number=options.best_random_layout, prerandomize=options.prerandomize, quiet=options.quiet, data=options.data, layout=options.base)
 816
 817     elif options.challenge_rounds:
 818             evolution_challenge(rounds=options.challenge_rounds,
 819                                 iterations=options.challenge_evolution_steps,
 820                                 challengers=options.challengers,
 821                                 prerandomize=options.prerandomize,
 822                                 data=options.data,
 823                                 layout=options.base,
 824                                 controlled=options.controlled_evolution)
 825
 826     else:
 827         check_the_neo_layout(quiet=options.quiet, verbose=options.verbose, data=options.data, layout=options.base)
 828