From e11fe7ab79fe5ae69147a83754b0c9ced08e8cc6 Mon Sep 17 00:00:00 2001 From: Vojtech Horky Date: Tue, 7 Jun 2016 13:36:56 +0200 Subject: [PATCH] Screen-to-text fixes in VM test script ImageMagick (again) updated the internal algorithm used to compute image signatures. That broke the OCR as the hashes were used to identify individual characters. This commit introduced a new mechanism that relies on determining color of individual pixels when converting to text. Hopefully this mechanism will be more stable :-). The OCR patterns are now in separate file to simplify updating (or adding new characters). Also a new switch --train-ocr was added for such task. --- ocr.sed | 97 +++++++++++++++++++++++++++++++ test-in-vm.sh | 182 +++++++++++++++++++++++++++------------------------------- train-ocr.txt | 7 +++ 3 files changed, 190 insertions(+), 96 deletions(-) create mode 100644 ocr.sed create mode 100644 train-ocr.txt diff --git a/ocr.sed b/ocr.sed new file mode 100644 index 0000000..3f1ea9b --- /dev/null +++ b/ocr.sed @@ -0,0 +1,97 @@ +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF: : +s:FFFFFFFFFFFFFFFFFFF00FFFFF000FFFF0000FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFF000000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:1: +s:FFFFFFFFFFFFFFFFF00000FF00FFF00FFFFFF00FFFFF00FFFFF00FFFFF00FFFFF00FFFFF00FFFFFF00FFF00F0000000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:2: +s:FFFFFFFFFFFFFFFFF00000FF00FFF00FFFFFF00FFFFFF00FFF0000FFFFFFF00FFFFFF00FFFFFF00F00FFF00FF00000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:3: +s:FFFFFFFFFFFFFFFFFFFF00FFFFF000FFFF0000FFF00F00FF00FF00FF0000000FFFFF00FFFFFF00FFFFFF00FFFFF0000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:4: +s:FFFFFFFFFFFFFFFF0000000F00FFFFFF00FFFFFF00FFFFFF000000FFFFFFF00FFFFFF00FFFFFF00F00FFF00FF00000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:5: +s:FFFFFFFFFFFFFFFFFF000FFFF00FFFFF00FFFFFF00FFFFFF000000FF00FFF00F00FFF00F00FFF00F00FFF00FF00000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:6: +s:FFFFFFFFFFFFFFFF0000000F00FFF00FFFFFF00FFFFFF00FFFFF00FFFFF00FFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:7: +s:FFFFFFFFFFFFFFFFF00000FF00FFF00F00FFF00F00FFF00FF00000FF00FFF00F00FFF00F00FFF00F00FFF00FF00000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:8: +s:FFFFFFFFFFFFFFFFF00000FF00FFF00F00FFF00F00FFF00FF000000FFFFFF00FFFFFF00FFFFFF00FFFFF00FFF0000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:9: +s:FFFFFFFFFFFFFFFFFF000FFFF00F00FF00FFF00F00FFF00F00F0F00F00F0F00F00FFF00F00FFF00FF00F00FFFF000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:0: +s:FFFFFFFFFFFFFFFFFFF00FFFFF0000FFFF0000FFFF0000FFFFF00FFFFFF00FFFFFF00FFFFFFFFFFFFFF00FFFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:!: +s:FFFFFFFFFFFFFFFFFFFFFFFFF00000FF00FFF00F00FFF00F00F0000F00F0000F00F0000F00F000FF00FFFFFFF00000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:@: +s:FFFFFFFFFFFFFFFFFFFFFFFFF00F00FFF00F00FF0000000FF00F00FFF00F00FFF00F00FF0000000FF00F00FFF00F00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:#: +s:FFF00FFFFFF00FFFF00000FF00FFF00F00FFFF0F00FFFFFFF00000FFFFFFF00FFFFFF00F0FFFF00F00FFF00FF00000FFFFF00FFFFFF00FFFFFFFFFFFFFFFFFFF:$: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00FFFF0F00FFF00FFFFF00FFFFF00FFFFF00FFFFF00FFFFF00FFF00F0FFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:%: +s:FFF0FFFFFF000FFFF00F00FF00FFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:^: +s:FFFFFFFFFFFFFFFFFF000FFFF00F00FFF00F00FFFF000FFFF000F00F00F000FF00FF00FF00FF00FF00FF00FFF000F00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:\&: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00FF00FFF0000FF00000000FF0000FFF00FF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:\*: +s:FFFFFFFFFFFFFFFFFFFF00FFFFF00FFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFFF00FFFFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:(: +s:FFFFFFFFFFFFFFFFFF00FFFFFFF00FFFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFF00FFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:): +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF0000000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:-: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF:_: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00FFFFFF00FFFF000000FFFF00FFFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:+: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF000000FFFFFFFFFFFFFFFFFF000000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:=: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00FFFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:\.: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF00FFFFF00FFFFF00FFFFF00FFFFF00FFFFFFF00FFFFFFF00FFFFFFF00FFFFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:<: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00FFFFFF00FFFFFF00FFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFF:,: +s:FFFFFFFFFFFFFFFFFFFFFFFFF00FFFFFFF00FFFFFFF00FFFFFFF00FFFFFFF00FFFFF00FFFFF00FFFFF00FFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:>: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF0FFFFFF00FFFFF00FFFFF00FFFFF00FFFFF00FFFFF00FFFFFF0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:\/: +s:FFFFFFFFFFFFFFFFF00000FF00FFF00F00FFF00FFFFF00FFFFF00FFFFFF00FFFFFF00FFFFFFFFFFFFFF00FFFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:?: +s:FFFFFFFFFFFFFFFFFFFFFFFF0FFFFFFF00FFFFFF000FFFFFF000FFFFFF000FFFFFF000FFFFFF000FFFFFF00FFFFFFF0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:\\: +s:FFFFFFFFFFFFFFFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFFFFFFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:|: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00FFFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00FFFFFF00FFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:;: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00FFFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00FFFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:\:: +s:FFFFFFFFFFFFFFFFF000F00F00F000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:~: +s:FFFFFFFFFFFFFFFFFF0000FFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF0000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:\[: +s:FFFFFFFFFFFFFFFFFF0000FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFF0000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:\]: +s:FFFFFFFFFFFFFFFFFFFF000FFFF00FFFFFF00FFFFFF00FFFF000FFFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFFF000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:{: +s:FFFFFFFFFFFFFFFFF000FFFFFFF00FFFFFF00FFFFFF00FFFFFFF000FFFF00FFFFFF00FFFFFF00FFFFFF00FFFF000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:}: +s:FFFFFFFFF00FF00FF00FF00FF00FF00FFF0FF0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:": +s:FFFFFFFFFF00FFFFFF00FFFFFF00FFFFFF0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:': +s:FF00FFFFFF00FFFFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:`: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF0000FFFFFFF00FFF00000FF00FF00FF00FF00FF00FF00FFF000F00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:a: +s:FFFFFFFFFFFFFFFF000FFFFFF00FFFFFF00FFFFFF0000FFFF00F00FFF00FF00FF00FF00FF00FF00FF00FF00FF00000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:b: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000FF00FFF00F00FFFFFF00FFFFFF00FFFFFF00FFF00FF00000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:c: +s:FFFFFFFFFFFFFFFFFFF000FFFFFF00FFFFFF00FFFF0000FFF00F00FF00FF00FF00FF00FF00FF00FF00FF00FFF000F00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:d: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000FF00FFF00F0000000F00FFFFFF00FFFFFF00FFF00FF00000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:e: +s:FFFFFFFFFFFFFFFFFF000FFFF00F00FFF00FF0FFF00FFFFF0000FFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFF0000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:f: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF000F00F00FF00FF00FF00FF00FF00FF00FF00FF00FF00FFF00000FFFFFF00FF00FF00FFF0000FFFFFFFFFFF:g: +s:FFFFFFFFFFFFFFFF000FFFFFF00FFFFFF00FFFFFF00F00FFF000F00FF00FF00FF00FF00FF00FF00FF00FF00F000FF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:h: +s:FFFFFFFFFFFFFFFFFFF00FFFFFF00FFFFFFFFFFFFF000FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFF0000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:i: +s:FFFFFFFFFFFFFFFFFFFFF00FFFFFF00FFFFFFFFFFFFF000FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FF00FF00FF00FF00FFF0000FFFFFFFFFF:j: +s:FFFFFFFFFFFFFFFF000FFFFFF00FFFFFF00FFFFFF00FF00FF00F00FFF0000FFFF0000FFFF00F00FFF00FF00F000FF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:k: +s:FFFFFFFFFFFFFFFFFF000FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFF0000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:l: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF000F00FF0000000F00F0F00F00F0F00F00F0F00F00F0F00F00FFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:m: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00F000FFF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:n: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000FF00FFF00F00FFF00F00FFF00F00FFF00F00FFF00FF00000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:o: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00F000FFF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FF00000FFF00FFFFFF00FFFFF0000FFFFFFFFFFFF:p: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF000F00F00FF00FF00FF00FF00FF00FF00FF00FF00FF00FFF00000FFFFFF00FFFFFF00FFFFF0000FFFFFFFFF:q: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00F000FFF000F00FF00FF00FF00FFFFFF00FFFFFF00FFFFF0000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:r: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000FF00FFF00FF00FFFFFFF000FFFFFFF00FF00FFF00FF00000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:s: +s:FFFFFFFFFFFFFFFFFFF0FFFFFF00FFFFFF00FFFF000000FFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00F00FFFF000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:t: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FFF000F00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:u: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FFF0000FFFFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:v: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00FFF00F00FFF00F00F0F00F00F0F00F00F0F00F0000000FF00F00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:w: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00FFF00FF00F00FFFF000FFFFF000FFFFF000FFFF00F00FF00FFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:x: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00FFF00F00FFF00F00FFF00F00FFF00F00FFF00F00FFF00FF000000FFFFFF00FFFFF00FF00000FFFFFFFFFFF:y: +s:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF0000000F00FF00FFFFF00FFFFF00FFFFF00FFFFF00FFF00F0000000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:z: +s:FFFFFFFFFFFFFFFFFFF0FFFFFF000FFFF00F00FF00FFF00F00FFF00F0000000F00FFF00F00FFF00F00FFF00F00FFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:A: +s:FFFFFFFFFFFFFFFF000000FFF00FF00FF00FF00FF00FF00FF00000FFF00FF00FF00FF00FF00FF00FF00FF00F000000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:B: +s:FFFFFFFFFFFFFFFFFF0000FFF00FF00F00FFFF0F00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFF0FF00FF00FFF0000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:C: +s:FFFFFFFFFFFFFFFF00000FFFF00F00FFF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FF00F00FF00000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:D: +s:FFFFFFFFFFFFFFFF0000000FF00FF00FF00FFF0FF00F0FFFF0000FFFF00F0FFFF00FFFFFF00FFF0FF00FF00F0000000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:E: +s:FFFFFFFFFFFFFFFF0000000FF00FF00FF00FFF0FF00F0FFFF0000FFFF00F0FFFF00FFFFFF00FFFFFF00FFFFF0000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F: +s:FFFFFFFFFFFFFFFFFF0000FFF00FF00F00FFFF0F00FFFFFF00FFFFFF00F0000F00FFF00F00FFF00FF00FF00FFF000F0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:G: +s:FFFFFFFFFFFFFFFF00FFF00F00FFF00F00FFF00F00FFF00F0000000F00FFF00F00FFF00F00FFF00F00FFF00F00FFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:H: +s:FFFFFFFFFFFFFFFFFF0000FFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFF0000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:I: +s:FFFFFFFFFFFFFFFFFFF0000FFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FF00FF00FF00FF00FF00FF00FFF0000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:J: +s:FFFFFFFFFFFFFFFF000FF00FF00FF00FF00FF00FF00F00FFF0000FFFF0000FFFF00F00FFF00FF00FF00FF00F000FF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:K: +s:FFFFFFFFFFFFFFFF0000FFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFF0FF00FF00F0000000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:L: +s:FFFFFFFFFFFFFFFF00FFF00F000F000F0000000F0000000F00F0F00F00FFF00F00FFF00F00FFF00F00FFF00F00FFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:M: +s:FFFFFFFFFFFFFFFF00FFF00F000FF00F0000F00F0000000F00F0000F00FF000F00FFF00F00FFF00F00FFF00F00FFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:N: +s:FFFFFFFFFFFFFFFFF00000FF00FFF00F00FFF00F00FFF00F00FFF00F00FFF00F00FFF00F00FFF00F00FFF00FF00000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:O: +s:FFFFFFFFFFFFFFFF000000FFF00FF00FF00FF00FF00FF00FF00000FFF00FFFFFF00FFFFFF00FFFFFF00FFFFF0000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:P: +s:FFFFFFFFFFFFFFFFF00000FF00FFF00F00FFF00F00FFF00F00FFF00F00FFF00F00FFF00F00F0F00F00F0000FF00000FFFFFF00FFFFFF000FFFFFFFFFFFFFFFFF:Q: +s:FFFFFFFFFFFFFFFF000000FFF00FF00FF00FF00FF00FF00FF00000FFF00F00FFF00FF00FF00FF00FF00FF00F000FF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:R: +s:FFFFFFFFFFFFFFFFF00000FF00FFF00F00FFF00FF00FFFFFFF000FFFFFFF00FFFFFFF00F00FFF00F00FFF00FF00000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:S: +s:FFFFFFFFFFFFFFFFF000000FF000000FF0F00F0FFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFF0000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:T: +s:FFFFFFFFFFFFFFFF00FFF00F00FFF00F00FFF00F00FFF00F00FFF00F00FFF00F00FFF00F00FFF00F00FFF00FF00000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:U: +s:FFFFFFFFFFFFFFFF00FFF00F00FFF00F00FFF00F00FFF00F00FFF00F00FFF00F00FFF00FF00F00FFFF000FFFFFF0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:V: +s:FFFFFFFFFFFFFFFF00FFF00F00FFF00F00FFF00F00FFF00F00F0F00F00F0F00F00F0F00F0000000F000F000FF00F00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:W: +s:FFFFFFFFFFFFFFFF00FFF00F00FFF00FF00F00FFF00000FFFF000FFFFF000FFFF00000FFF00F00FF00FFF00F00FFF00FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:X: +s:FFFFFFFFFFFFFFFFF00FF00FF00FF00FF00FF00FF00FF00FFF0000FFFFF00FFFFFF00FFFFFF00FFFFFF00FFFFF0000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:Y: +s:FFFFFFFFFFFFFFFF0000000F00FFF00F0FFFF00FFFFF00FFFFF00FFFFF00FFFFF00FFFFF00FFFF0F00FFF00F0000000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:Z: +s:00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:_: +s:................................................................................................................................:?: diff --git a/test-in-vm.sh b/test-in-vm.sh index 16c9621..932099f 100755 --- a/test-in-vm.sh +++ b/test-in-vm.sh @@ -326,103 +326,15 @@ xx_cls() { sleep 1 } +xx_do_ocr_prepare() { + convert "$1" -crop "8x16" +repage +adjoin txt:- \ + | sed -e 's|[0-9]*,[0-9]*: ([^)]*)[ ]*#\([0-9A-Fa-f]\{6\}\).*|\1|' -e 's:^#.*:@:' -e 's#000000#0#g' -e 's#FFFFFF#F#' \ + | sed -e ':a' -e 'N;s#\n##;s#^@##;/@$/{s#@$##p;d}' -e 't a' +} + + xx_do_ocr() { - convert "$1" -crop "8x16" +repage +adjoin -format "%#" -write info:- null: \ - | fold -w 64 \ - | cut -c 1-4 \ - | sed \ - -e "s:fe6c:a:g" \ - -e "s:3565:b:g" \ - -e "s:e670:c:g" \ - -e "s:858f:d:g" \ - -e "s:71e0:e:g" \ - -e "s:18c4:f:g" \ - -e "s:1ea6:g:g" \ - -e "s:2df4:h:g" \ - -e "s:1434:i:g" \ - -e "s:1c2b:j:g" \ - -e "s:5041:k:g" \ - -e "s:5f89:l:g" \ - -e "s:ddfb:m:g" \ - -e "s:d1a3:n:g" \ - -e "s:b396:o:g" \ - -e "s:7f04:p:g" \ - -e "s:091a:q:g" \ - -e "s:ec55:r:g" \ - -e "s:0547:s:g" \ - -e "s:085b:t:g" \ - -e "s:e86d:u:g" \ - -e "s:b632:v:g" \ - -e "s:1057:w:g" \ - -e "s:0a86:x:g" \ - -e "s:a7f9:y:g" \ - -e "s:f1c4:z:g" \ - -e "s:c402:A:g" \ - -e "s:32dc:B:g" \ - -e "s:4fa4:C:g" \ - -e "s:7e23:D:g" \ - -e "s:6289:E:g" \ - -e "s:6b69:F:g" \ - -e "s:62f2:G:g" \ - -e "s:f0df:H:g" \ - -e "s:93ed:I:g" \ - -e "s:076f:J:g" \ - -e "s:d58f:K:g" \ - -e "s:4665:L:g" \ - -e "s:a1e2:M:g" \ - -e "s:a7f3:N:g" \ - -e "s:80cd:O:g" \ - -e "s:6810:P:g" \ - -e "s:f1d7:Q:g" \ - -e "s:ba8e:R:g" \ - -e "s:b102:S:g" \ - -e "s:6718:T:g" \ - -e "s:4a03:U:g" \ - -e "s:4762:V:g" \ - -e "s:53a7:W:g" \ - -e "s:bcf4:X:g" \ - -e "s:fa75:Y:g" \ - -e "s:a4d8:Z:g" \ - -e "s:b10a:0:g" \ - -e "s:16dd:1:g" \ - -e "s:c5b0:2:g" \ - -e "s:1dc0:3:g" \ - -e "s:9f6b:4:g" \ - -e "s:306e:5:g" \ - -e "s:08b8:6:g" \ - -e "s:f173:7:g" \ - -e "s:4926:8:g" \ - -e "s:3db4:9:g" \ - -e "s:be18:/:g" \ - -e "s:51f5:\\\\:g" \ - -e "s:b187:#:g" \ - -e "s:c29c:|:g" \ - -e "s:467f:_:g" \ - -e "s:013e:-:g" \ - -e "s:4f24:.:g" \ - -e "s:659f:,:g" \ - -e "s:3a40:(:g" \ - -e "s:913d:?:g" \ - -e "s:7c6c:!:g" \ - -e "s:55af:):g" \ - -e "s:100d:[:g" \ - -e "s:fa0c:]:g" \ - -e "s:db86:{:g" \ - -e "s:8935:}:g" \ - -e "s:22bd:\&:g" \ - -e "s:7ff2:*:g" \ - -e "s:f5b0:\$:g" \ - -e "s:bd43:\':g" \ - -e "s:3810:\":g" \ - -e "s:5bd7:%:g" \ - -e "s:810e:@:g" \ - -e "s:3050:<:g" \ - -e "s:9d79:>:g" \ - -e "s#ded3#:#g" \ - -e "s:4708:;:g" \ - -e "s:a292: :g" \ - -e "s:a1a4:_:g" \ - -e "s:....:?:g" + xx_do_ocr_prepare "$1" | sed -f ocr.sed } xx_do_screenshot() { @@ -548,6 +460,7 @@ EOF_USAGE XX_DEBUG_ECHO=: XX_ACTIVITY_ECHO=: +XX_TRAIN_OCR=false XX_HEADLESS=false XX_USE_KVM=true XX_TEMP="$PWD/tmp-vm/" @@ -568,6 +481,13 @@ while [ $# -gt 0 ]; do --headless) XX_HEADLESS=true ;; + --train-ocr) + XX_TRAIN_OCR=true + ;; + --train-ocr=*) + XX_TRAIN_OCR=true + XX_TRAIN_OCR_FILE=`echo "$1" | cut '-d=' -f 2-` + ;; --debug) XX_DEBUG_ECHO=echo ;; @@ -620,6 +540,76 @@ if $XX_AUTODETECT_HELENOS; then fi +if $XX_TRAIN_OCR; then + if [ -z "$XX_TRAIN_OCR_FILE" ]; then + echo "Usage notes for --train-ocr command-line switch." + echo + echo "Prepare HelenOS image.iso with train-ocr.txt in its root." + echo + echo "Run this script with the ISO and with --train-ocr=out.sed" + echo "where out.sed is output file with OCR commands." + echo + echo "If the script finishes with no error you can replace the" + echo "old OCR scheme (ocr.sed) with the new one in out.sed." + echo + exit + fi + + # Set to false to debug the script below without running QEMU + # all the time + if true; then + xx_start_machine name=ocr vterm=false + xx_echo "Will display the training set on the VM screen" + xx_echo "(this will take a while)." + sleep 20 + xx_do_type ocr "cat train-ocr.txt" + sleep 1 + xx_do_qemu_command ocr "sendkey ret" + sleep 5 + xx_do_screenshot ocr "$XX_TEMP/ocr-full.ppm" "$XX_TEMP/ocr-term.png" + xx_stop_machine + fi + + xx_echo "Doing OCR..." + xx_do_ocr_prepare "$XX_TEMP/ocr-term.png" \ + | ( + prev1_line="" + prev2_line="" + prev3_line="" + counter=0 + while read current_line; do + if [ $counter -gt 5 ]; then + while read current_line; do + if ! [ "$current_line" = "$prev1_line" -o "$current_line" = "$prev2_line" ]; then + break + fi + done + alphabet=`sed -n 's#^\(.\)\(.\)\(\1\2\)\{4,\}##p' train-ocr.txt` + alphabet_size=`echo "$alphabet" | wc -c` + for i in `seq 1 $(( $alphabet_size - 1))`; do + symbol="`echo \"$alphabet\" | fold -w 1 | sed -n \"${i}p\" | sed 's#[*\.\&\\:\/\[]\|\]#\\\\&#g'`" + echo "s:$current_line:$symbol:" + read current_line + done + echo "$current_line" | tr 'F' '0' | sed 's#.*#s:&:_:#' + echo "$current_line" | tr '0F' '.' | sed 's#.*#s:&:?:#' + break + fi + if [ "$current_line" = "$prev2_line" -a "$prev1_line" = "$prev3_line" -a "$prev1_line" != "$prev2_line" ]; then + counter=$(( $counter + 1 )) + else + counter=0 + fi + prev3_line="$prev2_line" + prev2_line="$prev1_line" + prev1_line="$current_line" + done >"$XX_TRAIN_OCR_FILE" + ) + xx_echo "New OCR scheme is in $XX_TRAIN_OCR_FILE." + exit +fi + + XX_RESULT_SUMMARY="$XX_TEMP/summary.$$.txt" date '+# Execution started on %Y-%m-%d %H:%M.' >"$XX_RESULT_SUMMARY" diff --git a/train-ocr.txt b/train-ocr.txt new file mode 100644 index 0000000..ffd5d81 --- /dev/null +++ b/train-ocr.txt @@ -0,0 +1,7 @@ +This is training file for pseudo-OCR in test-in-vm.sh script. +Copy it to uspace/overlay before running + + ./test-in-vm.sh --train-ocr + +Do not modify the text below this line! +0101010101010101010101 1234567890!@#$%^&*()-_+=.<,>/?\|;:~[]{}"'`abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ -- 2.11.4.GIT