* swapped glyphs for Eng U+014A from Sami Eng to African Eng, the latter being more...
[dejavu.git] / dejavu-fonts / unicover.pl
blob1a7d1ec6540af0579a58665454df52ea4435e659
1 #!/usr/bin/perl -w
3 # $Id$
5 # unicode coverage analyzator
6 # (c)2004,2005 Stepan Roh (PUBLIC DOMAIN)
7 # usage: ./unicover.pl unicode_data_file blocks_file sfd_file1 label1 (sfd_file2 label2...)
8 # unicode data file can be downloaded from http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
9 # blocks file can be downloaded from http://www.unicode.org/Public/UNIDATA/Blocks.txt
11 sub parse_blocks_file($);
12 sub parse_unicode_data_file($);
13 sub parse_sfd_file($);
14 sub inc_coverage($$);
15 sub print_coverage();
16 sub disable_char($);
18 $debug = 0;
20 if ($debug) {
21 use Data::Dumper;
23 $Data::Dumper::Indent = 1;
24 $Data::Dumper::Sortkeys = 1;
25 $Data::Dumper::Purity = 1;
28 # map (start dec => ( 'name' => block name, 'end' => end dec, 'coverage' => ( sfd_file => coverage ), 'disabled_count' => number of disabled glyphs )
29 %blocks = ();
30 %chars = ();
32 sub parse_blocks_file($) {
33 my ($blocks_file) = @_;
35 open (F, $blocks_file) || die "Unable to open $blocks_file : $!\n";
36 while (<F>) {
37 next if (/^\s*(#|$)/);
38 my ($start, $end, $name) = ($_ =~ /^(.*?)\.\.(.*?);\s*(.*?)\s*$/);
39 $start = hex ($start);
40 $end = hex ($end);
41 $blocks{$start}{'name'} = $name;
42 $blocks{$start}{'end'} = $end;
44 close (F);
47 sub disable_char($) {
48 my ($dec_enc) = @_;
50 foreach $block_start (keys %blocks) {
51 my ($block_end) = $blocks{$block_start}{'end'};
52 if (($dec_enc >= $block_start) && ($dec_enc <= $block_end)) {
53 $blocks{$block_start}{'disabled_count'}++;
54 last;
59 sub disable_char_range($$) {
60 my ($range_start, $range_end) = @_;
62 my $cur_enc = $range_start;
63 while ($cur_enc <= $range_end) {
64 my $cur_block_start = -1;
65 foreach $block_start (keys %blocks) {
66 my ($block_end) = $blocks{$block_start}{'end'};
67 if (($cur_enc >= $block_start) && ($cur_enc <= $block_end)) {
68 $cur_block_start = $block_start;
69 last;
72 return if ($cur_block_start == -1);
73 while (($cur_enc <= $range_end) && ($cur_enc <= $blocks{$cur_block_start}{'end'})) {
74 $blocks{$cur_block_start}{'disabled_count'}++;
75 $cur_enc++;
80 sub parse_unicode_data_file($) {
81 my ($ud_file) = @_;
83 open (F, $ud_file) || die "Unable to open $ud_file : $!\n";
84 my $prev_enc = -1;
85 while (<F>) {
86 next if (/^\s*(#|$)/);
87 my ($enc, $name) = split (/;/);
88 $enc = hex ($enc);
89 if ($prev_enc + 1 < $enc) {
90 disable_char_range ($prev_enc + 1, $enc - 1);
92 disable_char ($enc) if ($name =~ /^</);
93 $chars{$enc} = 1 if ($name !~ /^</);
94 $prev_enc = $enc;
96 # find last possible character
97 $last_enc = $prev_enc;
98 foreach $block_start (keys %blocks) {
99 my ($block_end) = $blocks{$block_start}{'end'};
100 $last_enc = $block_end if ($block_end > $last_enc);
102 if ($prev_enc + 1 <= $last_enc) {
103 disable_char_range ($prev_enc + 1, $last_enc);
105 close (F);
108 sub inc_coverage($$) {
109 my ($sfd_file, $dec_enc) = @_;
111 foreach $block_start (keys %blocks) {
112 my ($block_end) = $blocks{$block_start}{'end'};
113 if (($dec_enc >= $block_start) && ($dec_enc <= $block_end)) {
114 if (exists $chars{$dec_enc}) {
115 $blocks{$block_start}{'coverage'}{$sfd_file}++;
117 last;
122 sub parse_sfd_file($) {
123 my ($sfd_file) = @_;
125 open (F, $sfd_file) || die "Unable to open $sfd_file : $!\n";
126 my $curchar = '';
127 my $curenc = '';
128 my $empty = 0;
129 while (<F>) {
130 if (/^StartChar:\s*(\S+)\s*$/) {
131 $curchar = $1;
132 $curenc = '';
133 $empty = 0;
134 } elsif (/^Colour:/) {
135 # XXX this is quick'n'dirty hack to detect non-empty glyphs
136 $empty = 1;
137 } elsif (/^Encoding:\s*\d+\s*(\d+)\s*\d+\s*$/) {
138 $curenc = $1;
139 } elsif ($curenc && !$empty && /^EndChar\s*/) {
140 inc_coverage ($sfd_file, $curenc);
143 close (F);
146 # TODO: formats would be better
147 sub print_coverage() {
148 print <<END;
149 This is the Unicode coverage file for DejaVu fonts
150 (\$Id\$)
152 Control and similar characters are discounted from totals.
155 print " ";
156 foreach $sfd_file (@sfd_files) {
157 my $label = $sfd_files{$sfd_file};
158 printf "%-19s", $label;
160 print "\n";
161 foreach $block_start (sort { $a <=> $b } keys %blocks) {
162 my ($block_end) = $blocks{$block_start}{'end'};
163 my ($name) = $blocks{$block_start}{'name'};
164 my ($disabled) = $blocks{$block_start}{'disabled_count'};
165 $disabled = 0 if (!defined $disabled);
166 my ($length) = $block_end - $block_start + 1 - $disabled;
167 printf "U+%04x %-40s", $block_start, $name;
168 foreach $sfd_file (@sfd_files) {
169 my ($coverage) = $blocks{$block_start}{'coverage'}{$sfd_file};
170 $coverage = 0 if (!defined $coverage);
171 my ($percent) = ($length != 0) ? ($coverage/$length * 100) : 0;
172 if ($percent > 0) {
173 printf " %3d%%", $percent;
174 } else {
175 print " ";
177 printf " %-13s", "($coverage/$length)";
179 print "\n";
183 if (@ARGV < 3) {
184 print STDERR "usage: unicode_data_file blocks_file sfd_file1 label1 (sfd_file2 label2...)\n";
185 exit 1;
188 $unicode_data_file = shift @ARGV;
189 $blocks_file = shift @ARGV;
190 @sfd_files = ();
191 %sfd_files = ();
192 while (@ARGV) {
193 $sfd_file = shift @ARGV;
194 $label = shift @ARGV;
195 push (@sfd_files, $sfd_file);
196 $sfd_files{$sfd_file} = $label;
199 parse_blocks_file($blocks_file);
200 parse_unicode_data_file($unicode_data_file);
201 foreach $sfd_file (@sfd_files) {
202 parse_sfd_file($sfd_file);
204 print_coverage();
206 if ($debug) {
207 print STDERR Data::Dumper->Dump([\%blocks], ['*blocks']);