4 # File ID: f8d8d376-5d46-11df-8ae0-90e6ba3022ac
5 # Gjentar editering av filer som inneholder ulovlig UTF-8
15 if (defined($ENV{EDITOR
})) {
16 $Editor = $ENV{EDITOR
};
18 warn("\$EDITOR er ikke definert, setter den til \"vim\"\n");
22 while (scalar(@err_list)) {
23 if (scalar(@new_list)) {
24 print("Ulovlige sekvenser ute og går. Gjelder ", join(" ", @new_list), "\nTrykk ENTER eller CTRL-C...");
27 my $Cmd = join(" ", $Editor, @err_list);
28 # print("Kjører \"$Cmd\"\n");
31 # print("Starter med ", join(" ", @err_list), "\n");
32 for my $File (@err_list) {
33 if (file_legal
($File)) {
34 # print("OK: $File\n");
36 # print("FEIL: $File\n");
37 push(@new_list, $File);
41 @err_list = @new_list;
48 # print("file_legal(\"$File\")\n");
50 if (open(FP
, $File)) {
51 while (!$error_found && ($_ = <FP
>)) {
52 s/([\xFC-\xFD][\x80-\xBF][\x80-\xBF][\x80-\xBF][\x80-\xBF][\x80-\xBF])/decode_char($1)/ge;
53 s/([\xF8-\xFB][\x80-\xBF][\x80-\xBF][\x80-\xBF][\x80-\xBF])/decode_char($1)/ge;
54 s/([\xF0-\xF7][\x80-\xBF][\x80-\xBF][\x80-\xBF])/decode_char($1)/ge;
55 s/([\xE0-\xEF][\x80-\xBF][\x80-\xBF])/decode_char($1)/ge;
56 s/([\xC0-\xDF][\x80-\xBF])/decode_char($1)/ge;
57 /[\x80-\xFD]/ && ($error_found = 1);
63 my $Retval = !$error_found;
73 if ($Msg =~ /^([\xC0-\xDF])([\x80-\xBF])/) {
74 if ($Msg =~ /^[\xC0-\xC1]/) {
77 $Val = ((ord($1) & 0x1F) << 6) | (ord($2) & 0x3F);
79 } elsif ($Msg =~ /^([\xE0-\xEF])([\x80-\xBF])([\x80-\xBF])/) {
80 if ($Msg =~ /^\xE0[\x80-\x9F]/) {
83 $Val = ((ord($1) & 0x0F) << 12) |
84 ((ord($2) & 0x3F) << 6) |
87 } elsif ($Msg =~ /^([\xF0-\xF7])([\x80-\xBF])([\x80-\xBF])([\x80-\xBF])/) {
88 if ($Msg =~ /^\xF0[\x80-\x8F]/) {
91 $Val = ((ord($1) & 0x07) << 18) |
92 ((ord($2) & 0x3F) << 12) |
93 ((ord($3) & 0x3F) << 6) |
96 } elsif ($Msg =~ /^([\xF8-\xFB])([\x80-\xBF])([\x80-\xBF])([\x80-\xBF])([\x80-\xBF])/) {
97 if ($Msg =~ /^\xF8[\x80-\x87]/) {
100 $Val = ((ord($1) & 0x03) << 24) |
101 ((ord($2) & 0x3F) << 18) |
102 ((ord($3) & 0x3F) << 12) |
103 ((ord($4) & 0x3F) << 6) |
106 } elsif ($Msg =~ /^([\xFC-\xFD])([\x80-\xBF])([\x80-\xBF])([\x80-\xBF])([\x80-\xBF])([\x80-\xBF])/) {
107 if ($Msg =~ /^\xFC[\x80-\x83]/) {
110 $Val = ((ord($1) & 0x01) << 30) |
111 ((ord($2) & 0x3F) << 24) |
112 ((ord($3) & 0x3F) << 18) |
113 ((ord($4) & 0x3F) << 12) |
114 ((ord($5) & 0x3F) << 6) |
118 # printf("Val = 0x%X\n", $Val);
119 if (($Val >= 0xD800 && $Val <= 0xDFFF) || ($Val eq 0xFFFE) || ($Val eq 0xFFFF)) {
122 # print("error_found satt til \"$error_found\" i decode_char()\n");
123 # return ($error_found);