clipboard on new cygwin
[sunny256-utils.git] / h2u
blobb08e3fbfc20965f52705b3b6b6a9f3604ec53c37
1 #!/usr/bin/env perl
3 #=======================================================================
4 # h2u
5 # File ID: e93feb18-5d3a-11df-bda7-90e6ba3022ac
7 # Converts from numeric entities in HTML/SGML (☺ and ☺) to
8 # UTF-8.
10 # Character set: UTF-8
11 # ©opyleft 2001– Øyvind A. Holm <sunny@sunbase.org>
12 # License: GNU General Public License version 2 or later, see end of
13 # file for legal stuff.
14 #=======================================================================
16 use strict;
17 use warnings;
18 use Getopt::Long;
20 local $| = 1;
22 our %Opt = (
24 'help' => 0,
25 'invalid' => 0,
26 'latin1' => 0,
27 'quiet' => 0,
28 'verbose' => 0,
29 'version' => 0,
33 our $progname = $0;
34 $progname =~ s/^.*\/(.*?)$/$1/;
35 our $VERSION = '0.1.0';
37 Getopt::Long::Configure('bundling');
38 GetOptions(
40 'help|h' => \$Opt{'help'},
41 'invalid|i' => \$Opt{'invalid'},
42 'latin1|l' => \$Opt{'latin1'},
43 'quiet|q+' => \$Opt{'quiet'},
44 'verbose|v+' => \$Opt{'verbose'},
45 'version' => \$Opt{'version'},
47 ) || die("$progname: Option error. Use -h for help.\n");
49 $Opt{'verbose'} -= $Opt{'quiet'};
50 $Opt{'help'} && usage(0);
51 if ($Opt{'version'}) {
52 print_version();
53 exit(0);
56 exit(main());
58 sub main {
59 # {{{
60 my $Retval = 0;
62 while (<>) {
63 $Opt{'latin1'} && s/([\x80-\xFF])/widechar(ord($1))/ge;
64 s/&#(\d{1,10});/widechar($1)/ge;
65 s/&#x([0-9a-f]{1,8});/widechar(hex($1))/gei;
66 print;
69 return $Retval;
70 # }}}
71 } # main()
73 sub widechar {
74 # {{{
75 my $Val = shift;
76 if ($Val < 0x80) {
77 return sprintf("%c", $Val);
78 } elsif ($Val < 0x800) {
79 return sprintf("%c%c", 0xC0 | ($Val >> 6),
80 0x80 | ($Val & 0x3F));
81 } elsif ($Val < 0x10000) {
82 unless ($Opt{'invalid'}) {
83 if (($Val >= 0xD800 && $Val <= 0xDFFF) || ($Val eq 0xFFFE) || ($Val eq 0xFFFF)) {
84 $Val = 0xFFFD;
87 return sprintf("%c%c%c", 0xE0 | ($Val >> 12),
88 0x80 | (($Val >> 6) & 0x3F),
89 0x80 | ($Val & 0x3F));
90 } elsif ($Val < 0x200000) {
91 return sprintf("%c%c%c%c", 0xF0 | ($Val >> 18),
92 0x80 | (($Val >> 12) & 0x3F),
93 0x80 | (($Val >> 6) & 0x3F),
94 0x80 | ($Val & 0x3F));
95 } elsif ($Val < 0x4000000) {
96 return sprintf("%c%c%c%c%c", 0xF8 | ($Val >> 24),
97 0x80 | (($Val >> 18) & 0x3F),
98 0x80 | (($Val >> 12) & 0x3F),
99 0x80 | (($Val >> 6) & 0x3F),
100 0x80 | ( $Val & 0x3F));
101 } elsif ($Val < 0x80000000) {
102 return sprintf("%c%c%c%c%c%c", 0xFC | ($Val >> 30),
103 0x80 | (($Val >> 24) & 0x3F),
104 0x80 | (($Val >> 18) & 0x3F),
105 0x80 | (($Val >> 12) & 0x3F),
106 0x80 | (($Val >> 6) & 0x3F),
107 0x80 | ( $Val & 0x3F));
108 } else {
109 return widechar(0xFFFD);
111 # }}}
112 } # widechar()
114 sub print_version {
115 # Print program version {{{
116 print("$progname $VERSION\n");
117 return;
118 # }}}
119 } # print_version()
121 sub usage {
122 # Send the help message to stdout {{{
123 my $Retval = shift;
125 if ($Opt{'verbose'}) {
126 print("\n");
127 print_version();
129 print(<<"END");
131 Converts from numeric entities in HTML/SGML (&#x263A; and &#9786;) to
132 UTF-8.
134 Usage: $progname [options] [file [files [...]]]
136 Options:
138 -h, --help
139 Show this help.
140 -i, --invalid
141 Allow invalid character range U+D800 through U+DFFF, U+FFFE and
142 U+FFFF.
143 -l, --latin1
144 Also convert Latin-1 characters.
145 -q, --quiet
146 Be more quiet. Can be repeated to increase silence.
147 -v, --verbose
148 Increase level of verbosity. Can be repeated.
149 --version
150 Print version information.
153 exit($Retval);
154 # }}}
155 } # usage()
157 sub msg {
158 # Print a status message to stderr based on verbosity level {{{
159 my ($verbose_level, $Txt) = @_;
161 if ($Opt{'verbose'} >= $verbose_level) {
162 print(STDERR "$progname: $Txt\n");
164 return;
165 # }}}
166 } # msg()
168 __END__
170 # This program is free software; you can redistribute it and/or modify
171 # it under the terms of the GNU General Public License as published by
172 # the Free Software Foundation; either version 2 of the License, or (at
173 # your option) any later version.
175 # This program is distributed in the hope that it will be useful, but
176 # WITHOUT ANY WARRANTY; without even the implied warranty of
177 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
178 # See the GNU General Public License for more details.
180 # You should have received a copy of the GNU General Public License
181 # along with this program.
182 # If not, see L<http://www.gnu.org/licenses/>.
184 # vim: set fenc=UTF-8 ft=perl fdm=marker ts=4 sw=4 sts=4 et fo+=w :