repo.or.cz
/
c-standard.git
/
blob
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
log
|
graphiclog1
|
graphiclog2
|
commit
|
commitdiff
|
tree
|
refs
|
edit
|
fork
blame
|
history
|
raw
|
HEAD
pdftotext
[c-standard.git]
/
translit.sh
blob
0bf85a05a3c524d549f0c71c11702cdb5c4510fd
1
#!/bin/sh
2
3
# assumes utf8 locale..
4
# remove nonascii from the output of pdftotext -layout standard.pdf
5
6
sed
'
7
s/
\f
/(newpage)/g
8
# utf8 fixes
9
s/fi/fi/g
10
s/fl/fl/g
11
s/§/!S/g
12
s/©/(C)/g
13
s/—/--/g
14
s/−/-/g
15
s/∗/*/g
16
s/ˆ/^/g
17
s/〈/</g
18
s/〉/>/g
19
s/⎡/[^/g
20
s/⎤/^]/g
21
s/⎣/[_/g
22
s/⎦/_]/g
23
s/⎢/[ /g
24
s/⎥/ ]/g
25
s/⎧/{/g
26
s/⎨/{/g
27
s/⎩/{/g
28
s/±/(+-)/g
29
s/≤/<=/g
30
s/≥/>=/g
31
s/≠/!=/g
32
s/Σ/(Sum)/g
33
s/√/sqrt:/g
34
s/π/pi/g
35
s/∞/(inf)/g
36
s/ƒ/fl./g
37
s/∫/(integral)/g
38
s/Γ/(Gamma)/g
39
s/×/x/g
40
s/•/o/g
41
s/⎯/-/g
42
s/↑/(uparrow)/g
43
s/↓/(downarrow)/g
44
s/↔/(<->)/g
45
s/→/(->)/g
46
s/‘/'
\'
'/g
47
s/’/'
\'
'/g
48
# pdftotext layout fixes
49
s/_ _/__/g
50
s/\([0-9]\). \([0-9]\)/\1.\2/g
51
'
| LC_ALL
=
C
tr
-c
'
\n
-~'
'?'
|
awk
'
52
BEGIN {
53
getline
54
last=
$0
55
side=0
56
}
57
/^\(newpage\)/ {
58
n=split(last,a)
59
if(side)
60
p=a[1]
61
else
62
p=a[n]
63
side=!side
64
print "[page " p "]"
65
getline
66
getline
67
last=
$0
68
next
69
}
70
{
71
print last
72
last=
$0
73
}
74
'