3 # $Id: theyoke 34 2007-02-18 18:50:19Z mackers $
4 # http://www.mackers.com/projects/theyoke/
6 # TheYoke is an ultra-simple RSS aggregrator designed for use on the UNIX command line.
12 use Digest
::MD5
qw(md5_base64);
13 use Encode
qw(encode_utf8);
18 my($USAGE) = "Usage: $0: [[--debug]|[-d]]+ [--test] [--description] [--link] [--no-title] [--no-feedname] [[--version] [-V]] [[--columns=int] [-c=int]] [--numfeeds=number] [--onlyfeeds=regexp] [--reversetime] [feedurl]...\n";
19 my $version = "1.23-baka";
20 my $config_dir = $ENV{'HOME'} . "/.theyoke/";
21 my $feeds_dir = $config_dir . ".feeds/";
22 my $feeds_file = $config_dir . "feeds";
23 my $agent = "TheYoke/$version (+http://www.mackers.com/projects/theyoke/) ";
29 Getopt
::Long
::Configure
("bundling", "no_ignore_case", "no_auto_abbrev", "no_getopt_compat", "require_order");
30 GetOptions
(\
%OPTIONS, 'debug|d+', 'test', 'description', 'link', 'title', 'no-title', 'no-feedname', 'version|V+', 'columns|c=i', 'numfeeds=i', 'onlyfeeds=s', 'reversetime') || die $USAGE;
34 if ($OPTIONS{'version'}) {
35 print "$0 version $version\n";
39 ### Check for files and dirs. If none, create ###
40 unless (-d
$config_dir || $#ARGV >= 0) {
41 mkdir ($config_dir) || die ("Couldn't create directory $config_dir");
43 unless (-d
$feeds_dir || $#ARGV >= 0) {
44 mkdir ($feeds_dir) || die ("Couldn't create directory $feeds_dir");
46 unless (-f
$feeds_file || $#ARGV >= 0) {
47 if (open(FEEDS
, ">> $feeds_file")) {
48 # TODO print feeds file content comments
55 ### Read feeds file ###
58 my $u1 = URI
->new($_);
59 push(@feed_urls, $u1);
62 if (open(FEEDS
, "< $feeds_file")) {
66 my $u1 = URI
->new($_);
67 push (@feed_urls, $u1);
70 print STDERR
"theyoke: could not open $feeds_file\n";
75 if (scalar(@feed_urls) == 0) {
76 print STDERR
"theyoke: no feeds found. please enter some URLs in $feeds_file (or as command line argument)\n";
79 ### Create new files if necessary
80 foreach my $feed_url (@feed_urls) {
81 $feed_url = $feed_url->as_string;
82 my $file_path = $feeds_dir . &get_checksum
($feed_url);
83 unless (-f
$file_path || $#ARGV >= 0) {
84 print STDERR
"theyoke: adding feed for $feed_url\n";
85 if (open (FEED
, "> $file_path")) {
86 print FEED
"$feed_url\n0\nno_digest_yet\nno_title_yet\nno_etag_yet\n";
89 print STDERR
"theyoke: couldn't write to $feed_url\n";
94 ### Create the user agent ###
95 my $ua = LWP
::UserAgent
->new(
102 ### For each feed file ###
104 my $dont_have_content = 1;
105 print STDERR
"theyoke: Syndicating first $OPTIONS{'numfeeds'} feeds.\n" if ($OPTIONS{'debug'} && defined($OPTIONS{'numfeeds'}));
106 foreach my $feed_url (@feed_urls) {
107 last if (defined($OPTIONS{'numfeeds'}) && $count++ == $OPTIONS{'numfeeds'});
109 if ($OPTIONS{'onlyfeeds'} && $feed_url !~ /$OPTIONS{'onlyfeeds'}/) {
110 print STDERR
"theyoke: Skipping... not in /$OPTIONS{'onlyfeeds'}/\n" if ($OPTIONS{'debug'});
115 my $file_digest = &get_checksum
($feed_url);
116 my $file_digest_path = $feeds_dir . $file_digest;
119 my $previous_content_digest = "no_digest_yet";
120 my $last_title = "no_title_yet";
122 my $etag = "no_etag_yet";
124 if (open(FEED
, "< $file_digest_path")) {
126 my $this_url = <FEED
>;
127 # 2nd line: last modified system time
130 # 3rd line: previous checksum for whole body
131 $previous_content_digest = <FEED
>;
132 chomp($previous_content_digest);
133 # 4th line: previous checksum for last known item
134 $last_title = <FEED
>;
140 unless (($previous_content_digest ne "") && ($last_title ne "") && ($etag ne "")) {
141 print STDERR
"theyoke: $file_digest_path is corrupt or you're using a new version of theyoke. will regenerate next time...\n";
142 unlink $file_digest_path;
146 die ("theyoke: couldn't open $file_digest_path");
150 ### send request to see if not modified
152 print STDERR
"theyoke: Getting \"$feed_url\" - " if ($OPTIONS{'debug'});
153 my $head = HTTP
::Headers
->new;
154 $head->if_modified_since($last_mod);
155 $head->push_header("If-None-Match" => $etag);
156 my $req = HTTP
::Request
->new("GET", $feed_url, $head);
157 print STDERR
$req->as_string if ($OPTIONS{'debug'} > 1);
158 my $resp = $ua->request($req);
160 if ($resp->code == 304) {
161 print STDERR
" got a 304, skipping\n" if ($OPTIONS{'debug'});
164 } elsif ($resp->is_success) {
165 print STDERR
" got " . $resp->code . "\n" if ($OPTIONS{'debug'});
166 $content = $resp->content();
169 print STDERR
"theyoke: \"$feed_url\": got " . $resp->code . ", skipping\n";
174 ### skip if checksums match (i.e. head lied - no new content)
175 my $new_last_title = "";
176 my $new_content_digest = &get_checksum
($content);
177 if ($new_content_digest eq $previous_content_digest) {
178 print STDERR
"theyoke: checksums match, skipping\n" if ($OPTIONS{'debug'});
181 ### new content - parse the rss
183 my $rss = new XML
::RSS
;
186 # XML::RSS seems to always through a DIE
187 #local $SIG{__DIE__} = sub { print STDERR "theyoke: RSS parser error on \"$feed_url\".\n"; };
189 $rss->parse($content);
192 print STDERR
"theyoke: RSS parser error on \"$feed_url\": $@\n";
196 $rss_title = $rss->channel('title');
198 ### check for no items
199 if (@
{$rss->{'items'}} == 0) {
200 print STDERR
"theyoke: no RSS items found in \"$feed_url\". bad RSS?\n";
204 ### check for no title
205 if ($rss_title eq "") {
206 print STDERR
"theyoke: no channel title found for \"$feed_url\". bad RSS?\n";
210 ### look for new items
211 foreach my $item (@
{$rss->{'items'}}) {
212 my $this_description = $item->{'description'};
213 my $this_title = $item->{'title'};
214 my $this_link = $item->{'link'};
216 if ($this_title ne "") {
217 $wassname = $this_title;
218 } elsif ($this_description ne "") {
219 #$wassname = substr($this_description,0,30) . "...";
220 $wassname = $this_description;
221 } elsif ($this_link ne "") {
222 $wassname = $this_link;
226 my $this_wassname_digest = &get_checksum
($wassname);
227 if ($this_wassname_digest ne $last_title) {
229 my ($columns, $rows) = Term
::Size
::chars
*STDOUT
{IO
};
231 $columns = $OPTIONS{'columns'} if (defined($OPTIONS{'columns'}));
232 $columns = 32768 if ($columns < 10);
234 $wassname = HTML
::FormatText
->format_string($wassname);
235 $wassname =~ s/[\r\n]/ /g;
236 $wassname =~ s/\s+$//g;
240 if (!$OPTIONS{'no-feedname'} && $this_title) {
241 $printy .= "$rss_title: ";
244 if (!$OPTIONS{'no-title'} && $this_title) {
245 $printy .= $wassname;
246 if (length($printy) > $columns-4) {
247 $printy = substr($printy,0,$columns-4) . "...";
252 push(@OUTPUT, $printy . "\n");
255 if ($OPTIONS{'description'} && $this_title) {
256 $this_description = HTML
::FormatText
->format_string($this_description);
257 $this_description =~ s/[\r\n]\s*/\n\t/g;
258 $this_description = "\t$this_description" if (!$OPTIONS{'no-feedname'});
259 push(@OUTPUT, "$this_description\n");
262 if ($OPTIONS{'link'} && $this_title) {
263 $this_link = "\t$this_link" if (!$OPTIONS{'no-feedname'});
264 push(@OUTPUT, "$this_link\n");
266 $dont_have_content = 0;
268 if ($new_last_title eq "") {
269 $new_last_title = $this_wassname_digest;
277 if ($new_content_digest eq "") {
278 print STDERR
"theyoke: empty badness for new_content_digest on $feed_url\n";
283 # check for changed rss file but not changed headings
284 if ($new_last_title eq "") {
285 if ($OPTIONS{'debug'}) {
286 if ($new_content_digest ne $previous_content_digest) {
287 print STDERR
"theyoke: checksums don't match, but ";
289 print STDERR
"theyoke: ";
291 print STDERR
"no new headlines from on $feed_url\n";
293 $new_last_title = $last_title;
297 if ($OPTIONS{'reversetime'}) {
298 print reverse @OUTPUT;
306 if ($ARGV < 1 && !$OPTIONS{'test'}) {
307 if (open(FEED
, "> $file_digest_path")) {
309 print FEED
$feed_url . "\n";
311 if ($resp->last_modified) {
312 print FEED
$resp->last_modified . "\n";
314 print FEED
$resp->date . "\n";
317 print FEED
$new_content_digest . "\n";
319 print FEED
$new_last_title . "\n";
321 if ($resp->header("ETag")) {
322 print FEED
$resp->header("ETag") . "\n";
324 print FEED
"no_etag\n";
328 die ("Couldn't write to $file_digest_path");
333 $exit_val = 2 if $dont_have_content;
338 my $tent = md5_base64
(encode_utf8
($_[0]));
340 print STDERR
$_[0] . " encoding as $tent\n" if ($OPTIONS{'debug'} > 1);