4 use CGI
::Carp
qw(fatalsToBrowser);
6 if (param
('separator') eq 'UseMod 0.92' or param
('separator') eq 'UseMod 1.00') {
8 } elsif (param
('separator') eq 'UseMod 1.00 with $NewFS set') {
9 $FS = "\x1e\xff\xfe\x1e";
16 # override $FS if you want!
18 print header
() . start_html
('Upgrading Files'), p
;
19 print q{Upgrade version: $Id: upgrade-files.pl,v 1.16 2010/11/06 11:51:18 as Exp $}, "\n";
20 if (not param('dir')) {
21 print start_form, p, '$DataDir: ', textfield('dir', '/tmp/oddmuse'),
22 p, radio_group('separator', ['Oddmuse', 'UseMod 0.92', 'UseMod 1.00',
23 'UseMod 1.00 with $NewFS set']),
24 p, checkbox('convert', 'checked', 'on', 'Convert Latin-1 to UTF-8'),
25 p, submit('Ok'), "\n", end_form;
26 } elsif (param('dir') and not param('sure')) {
27 print start_form, hidden('sure', 'yes'), hidden('dir', param('dir')),
28 hidden('separator', param('separator')), hidden('convert', param('convert')),
29 p, '$DataDir: ', param('dir'),
30 p, 'separator used when reading pages: ',
31 join(', ', map { sprintf('0x%x', ord($_)) } split (//, $FS)),
32 p, 'separator used when writing pages: ',
33 join(', ', map { sprintf('0x%x', ord($_)) } split (//, $NewFS)),
34 p, 'Convert Latin-1 to UTF-8: ', param('convert') ? 'Yes' : 'No',
35 p, submit('Confirm'), "\n", end_form;
37 rewrite(param('dir'));
46 my @files = glob("$directory/page/*/*.db");
48 print "$directory does not seem to be a data directory.\n";
52 foreach my $file (@files) {
53 print "Reading page $file...\n";
54 my %page = split(/$FS1/, read_file($file), -1);
55 %section = split(/$FS2/, $page{text_default}, -1);
56 %text = split(/$FS3/, $section{data}, -1);
57 $file =~ s/\.db$/.pg/ or die "Invalid page name\n";
58 print "Writing $file...\n";
59 write_page_file($file);
62 @files = glob("$directory/referer/*/*.rb");
64 foreach my $file (@files) {
65 print "Reading refer $file...\n";
66 my $data = read_file($file);
67 $data =~ s/$FS1/$NewFS/g;
68 $file =~ s/\.rb$/.rf/ or die "Invalid page name\n";
69 print "Writing $file...\n";
70 write_file($file, $data);
73 @files = glob("$directory/keep/*/*.kp");
74 foreach my $file (@files) {
76 print "Reading keep $file...\n";
77 my $data = read_file($file);
78 my @list = split(/$FS1/, $data);
80 $out =~ s/\.kp$// or die "Invalid keep name\n";
81 # We introduce a new variable $dir, here, instead of using $out,
82 # because $out will be part of the filename later on, and the
83 # filename will be converted in write_file. To convert $out to
84 # utf8 would double-encode the directory part of the filename.
85 my $dir = param('convert') ? utf8($out) : $out;
86 print "Creating $out...\n";
87 mkdir($dir) or die "Cannot create directory $dir\n" unless -d $dir;
88 foreach my $keep (@list) {
90 %section = split(/$FS2/, $keep, -1);
91 %text = split(/$FS3/, $section{data}, -1);
92 my $current = "$out/$section{'revision'}.kp";
93 print "Writing $current...\n";
94 write_keep_file($current);
98 @files = glob("$directory/*rclog");
100 foreach my $file (@files) {
101 print "Reading $file...\n";
102 my $data = read_file($file);
103 @rc = split(/\n/, $data);
105 my ($ts, $pagename, $summary, $minor, $host, $kind, $extraTemp)
107 my %extra = split(/$FS2/, $extraTemp, -1);
108 foreach ('name', 'revision', 'languages', 'cluster') {
109 $extra{$_} = '' unless $extra{$_};
111 $extra{languages} =~ s/$FS1/,/g;
112 $_ = join($NewFS, $ts, $pagename, $minor, $summary, $host,
113 $extra{name}, $extra{revision}, $extra{languages}, $extra{cluster});
115 $data = join("\n", @rc) . "\n";
116 $file =~ s/log$/.log/;
117 print "Writing $file...\n";
118 write_file($file, $data);
127 local $/ = undef; # Read complete files
128 open(F, "<$filename") or die "can't read $filename: $!";
135 my ($filename, $data) = @_;
136 if (param('convert')) {
137 $filename = utf8($filename);
140 open(F, ">$filename") or die "can't write $filename: $!";
148 my ($block, $flag) = split(/$FS2/, $_);
149 my @blocks = split(/$FS3/, $block);
150 my @flags = split(/$FS3/, $flag);
151 return 'blocks: ' . escape_newlines(join($NewFS, @blocks)) . "\n"
152 . 'flags: ' . escape_newlines(join($NewFS, @flags)) . "\n";
155 sub escape_newlines {
157 $_ =~ s/\n/\n\t/g if $_;
161 # Skip the info encoded in the filename (page name). We need the info
162 # stored in the rclog (summary, ip, host, username) for the history
163 # page. Don't trust the modification dates of the files themselves,
164 # which is why we have the timestamp in the file, too. We need the
165 # timestamp when expiring old keep files. We need all the info in the
166 # page file that will eventually end up in the keep file.
169 my $data = 'ts: ' . $section{ts} . "\n" if $section{ts};
170 $data .= 'keep-ts: ' . $section{keepts} . "\n" if $section{keepts};
171 $data .= 'revision: ' . $section{revision} . "\n" if $section{revision};
172 $data .= 'summary: ' . $section{summary} . "\n" if $section{summary};
173 $data .= 'summary: ' . $text{summary} . "\n" if $text{summary} and not $section{summary};
174 $data .= 'username: ' . $section{username} . "\n" if $section{username};
175 $data .= 'ip: ' . $section{ip} . "\n" if $section{ip};
176 $data .= 'host: ' . $section{host} . "\n" if $section{host};
177 $data .= 'minor: ' . $text{minor} . "\n" if $text{minor};
178 # $data .= 'oldmajor: ' . $page{cache_oldmajor} . "\n" if $page{cache_oldmajor};
179 $data .= 'text: ' . escape_newlines($text{text}) . "\n";
183 sub write_page_file {
185 my $data = basic_data();
186 $data .= cache($page{cache_blocks});
187 $data .= 'diff-major: ' . escape_newlines($page{cache_diff_default_major}) . "\n"
188 if $page{cache_diff_default_major};
189 $data .= 'diff-minor: ' . escape_newlines($page{cache_diff_default_minor}) . "\n"
190 if $page{cache_diff_default_minor};
191 write_file($file, $data);
194 sub write_keep_file {
196 my $data = basic_data();
197 write_file($file, $data);
201 # This Latin-1 to UTF-8 conversion was written by Skalman on the
202 # Oddmuse Wiki. He says: I added a quick, dirty and completely
203 # unreadable hack to convert all characters above 0x7F:
205 # s/([\x80-\xff])/chr(0xc0+(ord($1)>>6)).chr(ord($1)&0b00111111|0b10000000)/ge;
207 # Reading the UTF-8 and Unicode FAQ, I convert every character to
208 # (binary) 110xxxxx 10xxxxxx where the 'x' marks the bits of the
209 # original ISO-8859-1 character. That is: take the two most
210 # significant bits of the caracter and add them to 0xC0 (first byte),
211 # then replace the first two bits with 10 (second byte).
215 s/([\x80-\xff])/chr(0xc0+(ord($1)>>6)).chr(ord($1)&0b00111111|0b10000000)/ge;