3 # assumes utf8 locale..
4 # remove nonascii from the output of pdftotext -layout standard.pdf
54 # pdftotext layout fixes
56 # floats are sometimes broken
57 s/\([0-9]\)\. \([0-9]\)/\1.\2/g
58 ' | LC_ALL
=C
tr -c '\n-~' '?' |
awk '
68 # TODO: shift page numbers
72 # if (sub(/viii$/,"ix",x) ||
73 # sub(/iii$/,"iv",x) ||
86 # if (p !~ /[0-9]/ && $0 ~ /INTERNATIONAL STANDARD/)
88 # print "\n[page " inc(p) "]"
89 print "\n[page " p "]"