3 #=======================================================================
5 # File ID: e93feb18-5d3a-11df-bda7-90e6ba3022ac
7 # Converts from numeric entities in HTML/SGML (☺ and ☺) to
10 # Character set: UTF-8
11 # ©opyleft 2001– Øyvind A. Holm <sunny@sunbase.org>
12 # License: GNU General Public License version 2 or later, see end of
13 # file for legal stuff.
14 #=======================================================================
34 $progname =~ s/^.*\/(.*?)$/$1/;
35 our $VERSION = '0.1.0';
37 Getopt
::Long
::Configure
('bundling');
40 'help|h' => \
$Opt{'help'},
41 'invalid|i' => \
$Opt{'invalid'},
42 'latin1|l' => \
$Opt{'latin1'},
43 'quiet|q+' => \
$Opt{'quiet'},
44 'verbose|v+' => \
$Opt{'verbose'},
45 'version' => \
$Opt{'version'},
47 ) || die("$progname: Option error. Use -h for help.\n");
49 $Opt{'verbose'} -= $Opt{'quiet'};
50 $Opt{'help'} && usage
(0);
51 if ($Opt{'version'}) {
63 $Opt{'latin1'} && s/([\x80-\xFF])/widechar(ord($1))/ge;
64 s/&#(\d{1,10});/widechar($1)/ge;
65 s/&#x([0-9a-f]{1,8});/widechar(hex($1))/gei;
77 return sprintf("%c", $Val);
78 } elsif ($Val < 0x800) {
79 return sprintf("%c%c", 0xC0 | ($Val >> 6),
80 0x80 | ($Val & 0x3F));
81 } elsif ($Val < 0x10000) {
82 unless ($Opt{'invalid'}) {
83 if (($Val >= 0xD800 && $Val <= 0xDFFF) || ($Val eq 0xFFFE) || ($Val eq 0xFFFF)) {
87 return sprintf("%c%c%c", 0xE0 | ($Val >> 12),
88 0x80 | (($Val >> 6) & 0x3F),
89 0x80 | ($Val & 0x3F));
90 } elsif ($Val < 0x200000) {
91 return sprintf("%c%c%c%c", 0xF0 | ($Val >> 18),
92 0x80 | (($Val >> 12) & 0x3F),
93 0x80 | (($Val >> 6) & 0x3F),
94 0x80 | ($Val & 0x3F));
95 } elsif ($Val < 0x4000000) {
96 return sprintf("%c%c%c%c%c", 0xF8 | ($Val >> 24),
97 0x80 | (($Val >> 18) & 0x3F),
98 0x80 | (($Val >> 12) & 0x3F),
99 0x80 | (($Val >> 6) & 0x3F),
100 0x80 | ( $Val & 0x3F));
101 } elsif ($Val < 0x80000000) {
102 return sprintf("%c%c%c%c%c%c", 0xFC | ($Val >> 30),
103 0x80 | (($Val >> 24) & 0x3F),
104 0x80 | (($Val >> 18) & 0x3F),
105 0x80 | (($Val >> 12) & 0x3F),
106 0x80 | (($Val >> 6) & 0x3F),
107 0x80 | ( $Val & 0x3F));
109 return widechar
(0xFFFD);
115 # Print program version {{{
116 print("$progname $VERSION\n");
122 # Send the help message to stdout {{{
125 if ($Opt{'verbose'}) {
131 Converts from numeric entities in HTML/SGML (☺ and ☺) to
134 Usage: $progname [options] [file [files [...]]]
141 Allow invalid character range U+D800 through U+DFFF, U+FFFE and
144 Also convert Latin-1 characters.
146 Be more quiet. Can be repeated to increase silence.
148 Increase level of verbosity. Can be repeated.
150 Print version information.
158 # Print a status message to stderr based on verbosity level {{{
159 my ($verbose_level, $Txt) = @_;
161 if ($Opt{'verbose'} >= $verbose_level) {
162 print(STDERR
"$progname: $Txt\n");
170 # This program is free software; you can redistribute it and/or modify
171 # it under the terms of the GNU General Public License as published by
172 # the Free Software Foundation; either version 2 of the License, or (at
173 # your option) any later version.
175 # This program is distributed in the hope that it will be useful, but
176 # WITHOUT ANY WARRANTY; without even the implied warranty of
177 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
178 # See the GNU General Public License for more details.
180 # You should have received a copy of the GNU General Public License
181 # along with this program.
182 # If not, see L<http://www.gnu.org/licenses/>.
184 # vim: set fenc=UTF-8 ft=perl fdm=marker ts=4 sw=4 sts=4 et fo+=w :