New commit in Git/spar/
[sunny256-utils.git] / uj
blobc7396a9c3a66c2874d3dd087f2334d64324d881b
1 #!/usr/bin/env perl
3 # uj
4 # File ID: f8d8d376-5d46-11df-8ae0-90e6ba3022ac
5 # Gjentar editering av filer som inneholder ulovlig UTF-8
7 use strict;
8 use warnings;
10 my @err_list = @ARGV;
11 my @new_list = ();
12 my $error_found = 0;
13 my $Editor = "";
15 if (defined($ENV{EDITOR})) {
16 $Editor = $ENV{EDITOR};
17 } else {
18 warn("\$EDITOR er ikke definert, setter den til \"vim\"\n");
19 $Editor = "vim";
22 while (scalar(@err_list)) {
23 if (scalar(@new_list)) {
24 print("Ulovlige sekvenser ute og går. Gjelder ", join(" ", @new_list), "\nTrykk ENTER eller CTRL-C...");
25 getc(STDIN);
27 my $Cmd = join(" ", $Editor, @err_list);
28 # print("Kjører \"$Cmd\"\n");
29 system("$Cmd");
30 @new_list = ();
31 # print("Starter med ", join(" ", @err_list), "\n");
32 for my $File (@err_list) {
33 if (file_legal($File)) {
34 # print("OK: $File\n");
35 } else {
36 # print("FEIL: $File\n");
37 push(@new_list, $File);
40 @err_list = ();
41 @err_list = @new_list;
44 sub file_legal {
45 # {{{
46 my $File = shift;
48 # print("file_legal(\"$File\")\n");
49 $error_found = 0;
50 if (open(FP, $File)) {
51 while (!$error_found && ($_ = <FP>)) {
52 s/([\xFC-\xFD][\x80-\xBF][\x80-\xBF][\x80-\xBF][\x80-\xBF][\x80-\xBF])/decode_char($1)/ge;
53 s/([\xF8-\xFB][\x80-\xBF][\x80-\xBF][\x80-\xBF][\x80-\xBF])/decode_char($1)/ge;
54 s/([\xF0-\xF7][\x80-\xBF][\x80-\xBF][\x80-\xBF])/decode_char($1)/ge;
55 s/([\xE0-\xEF][\x80-\xBF][\x80-\xBF])/decode_char($1)/ge;
56 s/([\xC0-\xDF][\x80-\xBF])/decode_char($1)/ge;
57 /[\x80-\xFD]/ && ($error_found = 1);
59 close(FP);
60 } else {
61 warn("$File: $!\n");
63 my $Retval = !$error_found;
64 $error_found = 0;
65 return($Retval);
66 # }}}
67 } # file_legal()
69 sub decode_char {
70 # {{{
71 my $Msg = shift;
72 my $Val = "";
73 if ($Msg =~ /^([\xC0-\xDF])([\x80-\xBF])/) {
74 if ($Msg =~ /^[\xC0-\xC1]/) {
75 $error_found = 1;
76 } else {
77 $Val = ((ord($1) & 0x1F) << 6) | (ord($2) & 0x3F);
79 } elsif ($Msg =~ /^([\xE0-\xEF])([\x80-\xBF])([\x80-\xBF])/) {
80 if ($Msg =~ /^\xE0[\x80-\x9F]/) {
81 $error_found = 1;
82 } else {
83 $Val = ((ord($1) & 0x0F) << 12) |
84 ((ord($2) & 0x3F) << 6) |
85 ( ord($3) & 0x3F);
87 } elsif ($Msg =~ /^([\xF0-\xF7])([\x80-\xBF])([\x80-\xBF])([\x80-\xBF])/) {
88 if ($Msg =~ /^\xF0[\x80-\x8F]/) {
89 $error_found = 1;
90 } else {
91 $Val = ((ord($1) & 0x07) << 18) |
92 ((ord($2) & 0x3F) << 12) |
93 ((ord($3) & 0x3F) << 6) |
94 ( ord($4) & 0x3F);
96 } elsif ($Msg =~ /^([\xF8-\xFB])([\x80-\xBF])([\x80-\xBF])([\x80-\xBF])([\x80-\xBF])/) {
97 if ($Msg =~ /^\xF8[\x80-\x87]/) {
98 $error_found = 1;
99 } else {
100 $Val = ((ord($1) & 0x03) << 24) |
101 ((ord($2) & 0x3F) << 18) |
102 ((ord($3) & 0x3F) << 12) |
103 ((ord($4) & 0x3F) << 6) |
104 ( ord($5) & 0x3F);
106 } elsif ($Msg =~ /^([\xFC-\xFD])([\x80-\xBF])([\x80-\xBF])([\x80-\xBF])([\x80-\xBF])([\x80-\xBF])/) {
107 if ($Msg =~ /^\xFC[\x80-\x83]/) {
108 $error_found = 1;
109 } else {
110 $Val = ((ord($1) & 0x01) << 30) |
111 ((ord($2) & 0x3F) << 24) |
112 ((ord($3) & 0x3F) << 18) |
113 ((ord($4) & 0x3F) << 12) |
114 ((ord($5) & 0x3F) << 6) |
115 ( ord($6) & 0x3F);
118 # printf("Val = 0x%X\n", $Val);
119 if (($Val >= 0xD800 && $Val <= 0xDFFF) || ($Val eq 0xFFFE) || ($Val eq 0xFFFF)) {
120 $error_found = 1;
122 # print("error_found satt til \"$error_found\" i decode_char()\n");
123 # return ($error_found);
124 # }}}
125 } # decode_char()
127 __END__
129 # End of file uj