3 # Copyright (C) 2005, 2007 Alex Schroeder <alex@emacswiki.org>
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 while ($data =~ /(\S+?): (.*?)(?=\n[^ \t]|\Z)/sg) {
22 my ($key, $value) = ($1, $2);
23 $value =~ s/\n\t/\n/g;
24 $result{$key} = $value;
30 my ($regexp, $PageDir, $RawDir) = @_;
32 local $/ = undef; # Read complete files
33 foreach my $file (glob("$PageDir/*/*.pg $PageDir/*/.*.pg")) {
34 next unless $file =~ m
|/.*/(.+)\
.pg
$|;
36 next if $regexp && $page !~ m
|$regexp|o
;
37 mkdir($RawDir) or die "Cannot create $RawDir directory: $!"
39 open(F
, $file) or die "Cannot read $page file: $!";
42 my $ts = (stat("$RawDir/$page"))[9];
43 my %result = ParseData
($data);
44 if ($ts && $ts == $result{ts
}) {
45 print "skipping $page because it is up to date\n" if $verbose;
47 print "writing $page because $ts != $result{ts}\n" if $verbose;
48 open(F
,"> $RawDir/$page") or die "Cannot write $page raw file: $!";
49 print F
$result{text
};
51 utime $result{ts
}, $result{ts
}, "$RawDir/$page"; # touch file
60 GetOptions
("regexp=s" => \
$regexp,
67 Usage
: $0 [--regexp REGEXP
] [--page DIR
] [--dir DIR
]
69 Writes the raw wiki text into plain text files
.
71 --regexp selects a subsets of pages whose names match the regular
72 expression
. Note that spaces have been translated to underscores
.
74 --page designates the page directory
. By
default this is
'page' in the
75 current directory
. If you run this script
in your data directory
,
76 the
default should be fine
.
78 --dir designates an output directory
. By
default this is
'raw' in the
81 Example
: $0 --regexp
'\\.el\$' --dir elisp
84 main
($regexp, $page, $dir);