output asked headers in the order they were asked; avoid header name spoofing by...
[hband-tools.git] / user-tools / unicodestyle
blob48526304baf6674608075e93fc57803cb1c03320
1 #!/usr/bin/env perl
3 use utf8;
4 use Getopt::Long;
5 use Data::Dumper;
6 use Encode qw/decode encode/;
7 use constant { LOWER => 0, UPPER => 1, COMMON_SURROGATE => 0xd835, };
8 no if ($] >= 5.018), 'warnings' => 'experimental::smartmatch';
9 binmode STDIN, ':utf8';
10 binmode STDOUT, ':utf8';
12 $msg_not_support_combine = "Combining styles %s and %s is not supported.\n";
15 %Transformer = (
16 double => [0xdcf1, 0xdcf7],
17 script => [0xdc89, 0xdc8f],
18 code => [0xde29, 0xde2f],
19 fraktur => [0xdcbd, 0xdcc3],
20 bold => [0xdd8d, 0xdd93],
21 italic => [0xddc1, 0xddc7],
22 bold_italic => [0xddf5, 0xddfb],
23 smallcaps => {
24 ignorecase => 1,
25 transtable => {qw/
26 B 0x299 G 0x262 H 0x29C I 0x26A L 0x29F N 0x274 R 0x280 Y 0x28F
27 0x276 0x276 0x153 0x276
28 A 0x1D00 C 0x1D04 D 0x1D05 E 0x1D07 J 0x1D0A K 0x1D0B M 0x1D0D O 0x1D0F P 0x1D18 T 0x1D1B U 0x1D1C V 0x1D20 W 0x1D21 Z 0x1D22
29 0xC6 0x1D01 0xE6 0x1D01
30 F 0xA730 S 0xA731
31 Q 0xA7AF
32 /},
34 circled => {
35 transrange => [
36 {input_start => 'A', input_end => 'Z', offset => 0x24B6},
37 {input_start => 'a', input_end => 'z', offset => 0x24D0},
38 {input_start => '1', input_end => '9', offset => 0x2460},
39 {input_start => '0', input_end => '0', offset => 0x24EA},
42 circled_negative => {
43 transrange => [
44 {input_start => 'A', input_end => 'Z', offset => 0x1F150},
45 {input_start => '1', input_end => '9', offset => 0x2776},
46 {input_start => '0', input_end => '0', offset => 0x24FF},
49 squared => {
50 transrange => [
51 {input_start => 'A', input_end => 'Z', offset => 0x1F130},
54 negative_squared => {
55 transrange => [
56 {input_start => 'A', input_end => 'Z', offset => 0x1F170},
59 parenthesized => {
60 transrange => [
61 {input_start => 'A', input_end => 'Z', offset => 0x1F110},
62 {input_start => 'a', input_end => 'z', offset => 0x249C},
63 {input_start => '1', input_end => '9', offset => 0x2474},
69 %Appender = (
70 underline => ['̲', sub{ $_[0] !~ /[gjpqyQ,;_]/; }],
71 strikethrough => ['̶', sub{1;}],
75 sub surrogate_pair
77 my $hi = shift;
78 my $lo = shift;
79 return 0x10000 + ($hi - 0xD800) * 0x400 + ($lo - 0xDC00);
82 sub hexord
84 sprintf "0x%X", ord $_[0];
87 sub uniqsort
89 my %h = map {$_=>1} @_;
90 return sort keys %h;
93 sub is_applicable_transformer
95 my ($chr, $trnkey) = @_;
96 my $trn = $Transformer{$trnkey};
98 if(ref $trn eq 'ARRAY')
100 return 1 if $chr =~ /[A-Za-z]/;
101 return '';
103 if(exists $trn->{transtable})
105 return 1 if hexord($chr) ~~ [keys $trn->{transtable}];
106 return 1 if $chr ~~ [keys $trn->{transtable}];
107 return 1 if $trn->{ignorecase} and uc $chr ~~ [keys $trn->{transtable}];
109 if(exists $trn->{transrange})
111 my $ord = ord $chr;
112 for my $r (@{$trn->{transrange}})
114 return 1 if $ord >= ord $r->{input_start} and $ord <= ord $r->{input_end}
117 return '';
120 sub is_applicable
122 $_[0] =~ /[[:print:]]/;
128 $trans_style = undef;
129 @appenders = ();
132 for my $style (@ARGV)
134 if(exists $Appender{$style})
136 push @appenders, $style;
137 next;
140 if(defined $trans_style)
142 my $combined = join '_', sort split(/_/, $trans_style), $style;
144 if(exists $Transformer{$combined})
146 $trans_style = $combined;
148 else
150 die sprintf $msg_not_support_combine, $trans_style, $style;
153 else
155 $trans_style = $style;
159 unless(exists $Transformer{$trans_style} or @appenders)
161 die "Unknown style or style can not be used alone: $trans_style\n".
162 "Styles: ".(join ' ', uniqsort split /_/, join '_', keys %Transformer, keys %Appender)."\n";
167 while(<STDIN>)
169 for my $char_in (split //)
171 my $char_out = $char_in;
173 if(is_applicable($char_in))
175 if(defined $trans_style and is_applicable_transformer($char_in, $trans_style))
177 if(ref $Transformer{$trans_style} eq 'ARRAY')
179 my $case = $char_in =~ /[a-z]/ ? LOWER : UPPER;
180 $char_out = chr surrogate_pair(COMMON_SURROGATE, $Transformer{$trans_style}->[$case] + ord $char_in);
182 else
184 if(exists $Transformer{$trans_style}->{transrange})
186 for my $range (@{$Transformer{$trans_style}->{transrange}})
188 my ($char_in_ord, $input_start_ord, $input_end_ord) = map {ord $_} $char_in, $range->{input_start}, $range->{input_end};
189 if($char_in_ord >= $input_start_ord and $char_in_ord <= $input_end_ord)
191 $char_out = chr($range->{offset} + ($char_in_ord - $input_start_ord));
192 goto translated;
196 if(exists $Transformer{$trans_style}->{transtable})
198 my @lookupchars = ($char_in, hexord($char_in));
199 push @lookupchars, uc $char_in, hexord(uc $char_in) if $Transformer{$trans_style}->{ignorecase};
200 for my $char_out_hex (map {$Transformer{$trans_style}->{transtable}->{$_}} @lookupchars)
202 if(defined $char_out_hex)
204 $char_out = chr hex $char_out_hex;
205 goto translated;
209 $char_out = $char_in;
210 translated:
214 for my $style (@appenders)
216 if($Appender{$style}->[1]->($char_in))
218 $char_out .= $Appender{$style}->[0];
223 print $char_out;
227 __END__
229 =pod
231 =head1 NAME
233 unicodestyle - Add font styles to input text using Unicode
235 =cut
239 # TODO
240 # remove style
241 # detach diacritics
242 # numbers