#!/usr/bin/perl

#
# dotnewsfeed.pl, checks for new articles on dot.kde.org and e-mails them off
# should be fairly easy to adapt to other news websites.
#
# This edition edited for debianplanet.org
#
# Comments to jr-AT-jriddell.org
#
# Copyleft Jonathan Riddell 2003
# May be copied under the terms of the GNU GPL only
#

use strict;

use XML::RSS::Parser::Lite;
use LWP::Simple;

use Mail::Mailer;

# use warnings;

my $from_email = "kdedevelopers-feed\@jriddell.org";
my $to_email = "kdedevelopers-feed\@kde.me.uk";
my $article_file = "/home/jr/newsfeed/kdedevelopers/ARTICLE.html";
my $rdf_url = "http://www.kdedevelopers.org/blog/feed";
my $debug = 1;

sub debug {
    if ($debug == 1) {
	print join(" ", @_) . "\n";
    }
}

# Get the RDF/RSS feed and put the URLs of the articles into an array

debug("getting rdf");
my $rdf = get($rdf_url);
die "Couldn't get $rdf_url" unless defined $rdf;
debug("got rdf");

my $rssparser = new XML::RSS::Parser::Lite;
$rssparser->parse($rdf);

# An array of URLs to articles
my @articles;

for (my $i = 0; $i < $rssparser->count(); $i++) {
    my $it = $rssparser->get($i);
    push @articles, $it->get('url');
}

# Read the file containing the articles which have been read
open(PROCESSED_ARTICLES, "PROCESSED-ARTICLES");

my %processed_articles;

while (my $line = <PROCESSED_ARTICLES>) {
    chomp($line);
    $processed_articles{$line} = 1;
}

close(PROCESSED_ARTICLES);

#Global variable (hmm) which we put the parsed article data into
my @table_cells;

foreach my $article (@articles) {
    if ($processed_articles{$article} != 1) {
	#download article
	debug("getting $article");
	my $article_html = get($article);
	debug("got article");
	die "Couldn't get $article" unless defined $article_html;

	#parse

	$article_html =~ s/.*<!-- node: (.*)/$1/s;
	$article_html =~ s/(.*)<tr style="background-color: #EAE9E8;"><td style="background-color: #EAE9E8; text-align: right;" colspan="2">.*/$1/s;
	$article_html =~ s/.*-->(.*)/$1/;

	open (ARTICLE, ">$article_file") or die("could not open $article_file");
	print ARTICLE $article_html;
	close ARTICLE;

	my $article_text .= `lynx -dump $article_file`;

	$article_text =~ /\n(.*)\n/;
	my $headline = $1;

	#add URL
	$article_text = "$article\n\n $article_text";

	debug("headline: $headline\n\n");
	debug($article_text);

	#send e-mail
	my $mailer = new Mail::Mailer;

	$mailer->open({From => $from_email, 
		       To => $to_email, 
		       Subject => $headline}) 
	    or die "Can't open: $!\n";
	
	print $mailer $article_text;
	
	$mailer->close;

	#add to done file
	open(PROCESSED_ARTICLES, ">>PROCESSED-ARTICLES");
	print PROCESSED_ARTICLES "$article\n";
	close(PROCESSED_ARTICLES);

	debug("sleeping for 10");
	sleep 10;
    } else {
	debug("already done $article");
    }
    undef @table_cells;
}


