sc-train: Only load wordlist on first count, specified by --wordlist
[vspell.git] / utils / train
blobc5737193efad9afc68644c4df404adafb5a3bd53
1 #!/bin/bash
2 O=$1
3 i=$2
4 [ "$i" = "0" ] || ii=${O}.arpa.$(($2-1))
5 echo "sc-train --replay"
6 time gzip -d < ${O}.scz.gz|./sc-train --replay $ii 2>${O}.log.sc-train.$i | gzip > ${O}.sc$i.gz
7 echo sc2wngram
8 time gzip -d < ${O}.sc$i.gz | LANG=C sort -S64M -T $(dirname $O)| ./sc2wngram 4 |gzip > ${O}.wngram.$i.gz
9 echo wngram2idngram
10 time gzip -d < ${O}.wngram.$i.gz|../../CMU-Cam_Toolkit_v2/src/wngram2idngram -n 2 -temp $(dirname $O) -vocab ${O}.vocab > ${O}.idngram.$i 2>${O}.log.idngram.$i
11 echo idngram2lm
12 ../../CMU-Cam_Toolkit_v2/src/idngram2lm -four_byte_counts -n 2 -vocab ${O}.vocab -idngram ${O}.idngram.$i -arpa ${O}.arpa.$i -context ${O}.ccs -witten_bell 2>${O}.log.lm.$i