#!/usr/pkg/bin/perl

use warnings;
use strict;

package fetcher;

sub new {
	my($class) = @_;
	my($self) = {
		count => 0
	};
	init($self);
	return $self;
}

sub init($) {
	my($self) = @_;
	eval {
		fetcher::async::init($self);
		bless $self,'fetcher::async';
	} or eval {
		fetcher::sync::init($self);
		bless $self,'fetcher::sync';
	};
}

sub fetch {
	my($self,$url,$filter) = @_;
	return ($self->fetchall($filter,$url))[0];
}

sub progress {
	my($self,$mess) = @_;
	++$self->{count};
	if (-t STDERR) {
		if (defined $mess) {
			printf STDERR "\r%s\n",$mess;
		} else {
			printf STDERR "\r%d",$self->{count};
		}
		STDERR->flush;
	}
}

sub done {
	my($self) = @_;
	if ($self->{count}) {
		if (-t STDERR) {
			print STDERR "\n";
			STDERR->flush;
		}
	}
	$self->{count} = 0;
}

sub DESTROY {
	my($self) = @_;
	$self->done;
}

package fetcher::async;
use base 'fetcher';

sub init {
	my($self) = @_;

	require URI;
	require HTTP::Async;
	require HTTP::Request;

	my($uri,$slots);
	if (exists $ENV{'http_proxy'}) {
		$uri = new URI($ENV{'http_proxy'});
	}
	$slots = 8;
	$self->{ua} = new HTTP::Async(
		proxy_host => $uri ? $uri->host : undef,
		proxy_port => $uri ? $uri->port : undef,
		timeout => 10,
		slots => $slots
	);
}

sub fetchall {
	my($self) = shift;
	my($filter) = shift;
	my(%c,$res,$req,$win,$todo,%ids,$id,$num);

	$win = 0;
	$todo = scalar(@_);
	$num = 0;

	foreach (@_) {
		if ($win < $self->{ua}->slots) {
			$req = new HTTP::Request(GET => $_);
			$id = $self->{ua}->add($req);
			$ids{$id} = ++$num;
			--$todo;
			++$win;
		}

		next if $todo > 0 && $win < $self->{ua}->slots;

		($res,$id) = $self->{ua}->wait_for_next_response;
		next unless $res;
		do {
			--$win;
			if ($res->is_success) {
				if ($filter) {
					$c{$id} = $filter->(
						$res->request->uri,
						$res->content);
				} else {
					$c{$id} = $res->content;
				}
				$self->progress;
			} else {
				$c{$id} = "";
				$self->progress($res->request->uri."\n".$res->status_line);
			}

			($res,$id) = $todo > 0
				? $self->{ua}->next_response
				: $self->{ua}->wait_for_next_response;
		} while ($res);
	}

	return
		map { $c{$_} }
		sort { $ids{$a} <=> $ids{$b} }
		keys %c;
}

package fetcher::sync;
use base 'fetcher';

sub init {
	my($self) = @_;

	require LWP;

	$self->{ua} = new LWP::UserAgent(
		env_proxy => 1,
		keep_alive => 20,
		timeout => 3
	);
}

sub fetchall {
	my($self) = shift;
	my($filter) = shift;
	my(@list,$res);
	foreach (@_) {
		my $res = $self->{ua}->get($_);
		if ($res->is_success) {
			if ($filter) {
				push @list,$filter->(
					$res->request->uri,
					$res->content);
			} else {
				push @list,$res->content;
			}
			$self->progress;
		} else {
			push @list,"";
			$self->progress($res->request->uri."\n".$res->status_line);
		}
	}
	return @list;
}

####################################################################

package main;

sub findlinks($$$) {
	my($fa,$url,$n) = @_;
	my($html,$prev,@links);

	@links = ();

	$html = $fa->fetch("${url}index.html");
	push @links,$html =~ m#href=".*?/(\d+/\d+/\d+/msg\d+\.html)"#g;

	while (@links < $n) {
		($prev) = $html =~ m#href=".*?/(\d+/\d+/date\d+\.html)".*Prev#mi;
		last unless defined $prev;
		$html = $fa->fetch("$url$prev");
		push @links,$html =~ m#href=".*?/(\d+/\d+/\d+/msg\d+\.html)"#g;
	}

	@links = @links[0..$n-1] if @links > $n;

	return @links;
}

sub relurl($$) {
	my($u1,$u2) = @_;
	my($u);
	$u1 = new URI($u1);
	$u2 = new URI($u2);
	$u = new_abs URI($u2,$u1)->as_string;
	return $u;
}

sub mailindex($$$$$) {
	my($url,$num,$author,$pmatch,$f) = @_;
	my($fa,@links,@html);
	my($i);

	$fa = new fetcher;
	@links = map { relurl($url,$_) } reverse findlinks($fa, $url, $num);
	@html = $fa->fetchall(sub {
		my $out = $f->(@_,$author,$pmatch);
		return '' unless defined $out;
		return "$_[0]\n$out";
	}, @links);
	$fa->done;

	die unless @links == @html;

	for ($i=0; $i<@html; ++$i) {
		print $html[$i] if defined $html[$i];
		STDOUT->flush;
	}
}

####################################################################

use HTML::Entities;
use URI;

sub filter($$$$$) {
	my($h,$s,$author,$pmatch,$root) = @_;
	my($a,$d,$m);
	my($add,$mod,$del,$dif);
	my($imp,$ven,$rel,$log,%tag);
#	my(@dif);
	my($out);
	
	$s =~ s#.*?<pre>##s;
	$s =~ s#</pre>.*##s;
	$s =~ s#\r\n#\n#sg;
	$s = HTML::Entities::decode($s);

	($a) = $s =~ /^Committed By:\s*(.*?)\s*$/m;
	($d) = $s =~ /^Date:\s*(.*?)\s*$/m;
	($m) = $s =~ /\nLog Message:[^\n]*\n[\n\s]*(.*)[\n\s]+\nTo generate a diff/s
	or ($m) = $s =~ /\nLog Message:[^\n]*\n[\n\s]*(.*)[\n\s]+\nStatus:/s;

	return unless defined $d && defined $a;
	$m = "" unless defined $m;

	return unless $a =~ /^$author$/oi;

	$out .= "$h\n";

	$m =~ s/\n[\n\s]*?\n/\n/sg;

	($ven) = $s =~ /^Vendor Tag:\s*(.*?)\s*$/m;
	($rel) = $s =~ /^Release Tags:\s*(.*?)\s*$/m;
	$ven = "" unless defined $ven;
	$rel = "" unless defined $rel;

	($imp) = $s =~ /^Update of\s+\/cvsroot\/(.*?)\s*$/m;
	$imp = defined $imp ? "Importing: $imp ($ven,$rel)\n" : "";

	($log) = $s =~ /\nStatus:.*?\n\s*\n\s*.*?\n\s*\n\s*(.*?\n)\n/s;
	if (defined $log) {
		%tag = ( 'U'=>0, 'M'=>0, 'C'=>0, 'D'=>0 );
		$tag{$1}++ while $log =~ /^(\S)/mg;
		$log = sprintf("   %d new, %d updated, %d conflicting, %d deleted\n",
			$tag{'U'}, $tag{'M'} + $tag{'C'}, $tag{'C'}, $tag{'D'});
	}
	$log = "" unless defined $log;

	($add) = $s =~ /\nAdded Files:.*?\n(.*?\n)[\S\n]/s;
	($mod) = $s =~ /\nModified Files:.*?\n(.*?\n)[\S\n]/s;
	($del) = $s =~ /\nRemoved Files:.*?\n(.*?\n)[\S\n]/s;
	($dif) = $s =~ /\nTo generate a diff.*?\n(.*\n)[\S\n]/s;

	$add =~ s/^\s*/   + /mg if defined $add;
	$mod =~ s/^\s*/   | /mg if defined $mod;
	$del =~ s/^\s*/   - /mg if defined $del;

	$add = "" unless defined $add;
	$mod = "" unless defined $mod;
	$del = "" unless defined $del;
	$dif = "" unless defined $dif;

	return unless "$add$mod$del$log$m" =~ /$pmatch/oi;

	$m =~ s/^/ /mg;

	$out = "$d $a\n$imp$add$mod$del$log$m\n";

#	$dif =~ s/\\\n//sg;
#
#	my @dif =
#		map { s/^cvs\s+rdiff\s+-u\s+//; $_; }
#		grep { /^cvs\s+rdiff\s+-u\s+/ }
#		split /\n/,$dif;
#
#	foreach (@dif) {
#		$out .= qx{cvs 2>/dev/null -d $root rdiff -u $_};
#	}

	$out .= "=========================================\n\n\n";

	return $out;
}

my($url, $cvsroot);

if ($0 =~ /pkgsrc/) {
	$url = 'http://mail-index.netbsd.org/pkgsrc-changes/';
} else {
	$url = 'http://mail-index.netbsd.org/source-changes/';
}

$cvsroot = 'anoncvs@anoncvs.netbsd.org:/cvsroot';

mailindex(
	$url,
	$ARGV[0] || 10,   # num
	$ARGV[1] || '.*', # author
	$ARGV[2] || '.',  # match
	sub { filter($_[0],$_[1],$_[2],$_[3],$cvsroot) }
);
