3 # assumes utf8 locale..
4 # remove nonascii from the output of pdftotext -layout standard.pdf
48 # pdftotext layout fixes
50 # floats are sometimes broken
51 s/\([0-9]\)\. \([0-9]\)/\1.\2/g
52 ' | LC_ALL
=C
tr -c '\n-~' '?' |
awk '
62 # TODO: shift page numbers
66 # if (sub(/viii$/,"ix",x) ||
67 # sub(/iii$/,"iv",x) ||
80 # if (p !~ /[0-9]/ && $0 ~ /INTERNATIONAL STANDARD/)
82 # print "\n[page " inc(p) "]"
83 print "\n[page " p "]"