#!/usr/bin/perl

=head1 NAME

Umegaya - Umegaya is a MEtadata GAtherer using YAml

=head1 SYNOPSIS

curl http://localhost/umegaya/B<package>/B<key>

/usr/lib/cgi-bin/umegaya package=B<package> key=B<key>

curl http://localhost/umegaya/table/B<key>

/usr/lib/cgi-bin/umegaya table=1 key=B<key>

curl http://localhost/umegaya/yaml/B<key>

/usr/lib/cgi-bin/umegaya yaml=1 key=B<key>

=head1 DESCRIPTION

B<Umegaya> is a gatherer of meta information about the software packaged in
Debian. This metadata is accumlated by the package maintainer in the version
control system containing the Debian souce package, in a file called
C<debian/upstream>, and it is collected by the Umegaya web service each time it
is accessed.  The Umegaya web service then aggregates information about all the
packages it knows, and outputs as tables or YAML records that can be loaded in
central information hubs like the Ultimate Debian Database.

Optionally, Umegaya will keep a local copy if the files it retrieves from the
VCS, if a path is indicated in the C<store_files> configuration variable.

=head1 SPECIAL KEYS

YAML-ALL, YAML-REFRESH-DATE, YAML-URL

=head1 FILES

/etc/umegaya/umegaya.conf

=head1 SEE ALSO

http://wiki.debian.org/UpstreamMetadata

umegaya-adm(1)

=head1 SOURCE

http://umegaya.branchable.com/

=head1 LIMITATIONS

Only source packages stored in Subversion and Git are supported for the moment.

The CGI interface of Umegaya is currently read-only.  But PUT and DELETE
operations are considered.

=cut

use strict;
use warnings;

use 5.10.0;

use BerkeleyDB;
use CGI;
use LWP::Simple qw(get);
use YAML::XS qw(Load Dump DumpFile);
use YAML::AppConfig;

my $conf = YAML::AppConfig->new(file => "/etc/umegaya/umegaya.conf");
my $lib         = $conf->get("db_dir");
my $local_comment    = $conf->get("local_comment");
my $berkeleydb  = $lib . $conf->get("db_name");
my $delay       = $conf->get("delay");
my $debug       = $conf->get("debug");
my $store_files = $conf->get("store_files");
my $debian_control   = $conf->get("debian_control")  ;
my $debian_copyright = $conf->get("debian_copyright");

my %stored;
umask 002;
tie (%stored, 'BerkeleyDB::Hash', -Filename =>  $berkeleydb, -Flags => DB_CREATE)
    or die "Cannot open file $berkeleydb: $! $BerkeleyDB::Error\n" ;

my $cgi = CGI->new;


my $package = $cgi->param("package") // 0 ;
my $help    = $cgi->param("help")    // 0 ;
my $table   = $cgi->param("table")   // 0 ;
my $yaml    = $cgi->param("yaml")    // 0 ;
my $key     = $cgi->param("key")     // 0 ;
   $debug   = $cgi->param("debug")   // $debug ;

say <<"__DEBUG__" if $debug;
Package: $package
Table: $table
YAML: $yaml
Key: $key
__DEBUG__

# Default homepage.
help() if ($help);
help() unless ($table or $yaml or $package);

sub help {
    my $help_text = qx(perldoc -o html umegaya);
    $help_text =~ s/<!-- end doc -->/<!-- end doc -->\n$local_comment/ if $local_comment;
    print $cgi->header('text/html');
    print $help_text;
    exit;
}

print $cgi->header(-type=>'text/plain', -charset=>'utf-8');

say "Debug mode on; somebody is probably working on this site…" if $debug;

# Output for http://localhost/umegaya/table/key URLs.
if ($table | $yaml) {
    foreach (keys (%stored)) {
        next unless /:\Q$key\E$/i;
        my $package = (split (/:/))[0];
        say join ("\t", $package, $key, $stored{$_}) if $table;
        say Dump ([$package, $key, $stored{$_}]) if $yaml;
    }
    exit;
}

# The URL of the 'upstream' file is never refreshed.
unless (defined ($stored{"$package:YAML-URL"})) {
    $stored{"$package:YAML-URL"} = qx(debcheckout -d $package | grep url | cut -f2) || "NA";
    chomp $stored{"$package:YAML-URL"};
}

# Refrain to refresh if the last update is still fresh, to avoid loading Alioth.
if (defined ($stored{"$package:YAML-REFRESH-DATE"})) {
    say "Not updated since:", time - $stored{"$package:YAML-REFRESH-DATE"} if $debug;
    $stored{"$package:YAML-REFRESH-DATE"} = refresh_from_url($stored{"$package:YAML-URL"}) if time - $stored{"$package:YAML-REFRESH-DATE"} > $delay;
} else {
    say "$package is not yet in the database, trying to pull its metadata…" if $debug;
    $stored{"$package:YAML-REFRESH-DATE"} = refresh_from_url($stored{"$package:YAML-URL"})
}

# Output for http://localhost/umegaya/package/key URL.
say "Looking for key $key of package ${package}." if $debug;
foreach (keys (%stored)) {
    next unless /\Q${package}:$key\E$/i;
    say "$_ is the key for $key and $package." if $debug;
    say $stored{"$_"};
    exit;
}
say "No $key key for package ${package}." if $debug;

# Reconstructs an URL to the a target file in the Debian directory.
# For Git repositories on Alioth, it relies on the Gitweb interface.
sub guess_file_url {
    my $file_url = shift;
    my $file_to_download = shift;
    if ($file_url =~ /^svn/) {
        $file_url .= '/' unless $file_url =~ m(/$);
        $file_url .= "debian/$file_to_download";
    }
    $file_url =~ s|^git://git.debian.org/g?i?t?/?(.*)|http://git.debian.org/?p=$1;a=blob_plain;f=debian/$file_to_download;hb=HEAD| if $file_url =~ /^git/;
    say "Will retreive $file_url for debian/$file_to_download." if $debug;
    return $file_url;
}

# Refresh and return a time stamp; store files if asked.
sub refresh_from_url {
    my $package_url = shift;
    say "Refreshing data from $package_url" if $debug;
    my $debian_upstream_url = guess_file_url($package_url, "upstream");
    my $package_metadata_yaml;
    $package_metadata_yaml = svn_export($debian_upstream_url) if $debian_upstream_url =~ /^svn/;
    $package_metadata_yaml = get($debian_upstream_url ) if $debian_upstream_url =~ /^http/;
    if ( $store_files ) {
	$package =~ /^(.)/;
	my $pool = $1;
        open(my $debian_upstream, ">", "$store_files/$pool/${package}.upstream")
            or die "Can not write $store_files/$pool/${package}.upstream: $!";
        print $debian_upstream $package_metadata_yaml;
        close($debian_upstream);
        if ( $debian_control ) {
            my $debian_control_url = guess_file_url($package_url, "control");
            system(qq(svn cat "$debian_control_url" > $store_files/$pool/${package}.control)) if $debian_control_url =~ /^svn/;
            system(qq(GET "$debian_control_url" > $store_files/$pool/${package}.control)) if $debian_control_url =~ /^http/;
        }
        if ( $debian_copyright ) {
            my $debian_copyright_url = guess_file_url($package_url, "copyright");
            system(qq(svn cat "$debian_copyright_url" > $store_files/$pool/${package}.copyright)) if $debian_copyright_url =~ /^svn/;
            system(qq(GET "$debian_copyright_url" > $store_files/$pool/${package}.copyright)) if $debian_copyright_url =~ /^http/;
        }
    }
    my $package_metadata;
    eval { $package_metadata = Load ($package_metadata_yaml)};
        say "Could not parse the YAML file for $package\n" and die if $@;
    # Drops all previous keys.
    foreach (keys (%stored)) {
        next unless /^$package:/;
        next if /:YAML-URL$/;
        say "Deleting $_" if $debug;
        undef $stored{$_};
    }
    # Loads the new keys.
    foreach (keys (%$package_metadata)) {
        my $key = $package_metadata->{$_};
        my $keytype = ref($key);
        say "Key type is $keytype." if $debug;
        # Compound keywords are equivalent to nested mappings:
        #
        # Foo-Bar: baz
        #
        # is the same as:
        #
        # Foo:
        #  Bar: baz
        #
        if ( $keytype eq "HASH" ) {
            my $left = $_;
            foreach ( keys (%$key)) {
                say "Setting ${package}:${left}-$_ to $key->{$_}" if $debug;
                $stored{"${package}:${left}-$_"} = $key->{$_};
            }
        } elsif ( $keytype eq "ARRAY" ) {
            say "Setting ${package}:$_ to $key" if $debug;
            $stored{"${package}:$_"} = $key;
        } else {
            say "Setting ${package}:$_ to $key" if $debug;
            $stored{"${package}:$_"} = $key;
        }
    }
    # Keeps a backup of the 'upstream' file.
    $stored{"${package}:YAML-ALL"} = $package_metadata_yaml;
    return time;
}

# For the moment only SVN is supported.
sub svn_export {
    my $url = shift;
    return qx(svn cat $url);
}
