#!/usr/bin/env perl
use v5.26;
use experimental qw(signatures);

use strict;
use warnings;

use Carp qw(croak);
use CPAN::DistnameInfo;
use File::Basename qw(basename);
use File::Spec::Functions qw(catfile);
use HTTP::Tiny;
use JSON ();
use PerlIO::gzip;
use YAML::Tiny;
use File::Temp;

use subs qw(config message);

=head1 NAME

util/generate - create the data for lib/CPAN/Audit/DB.pm

=head1 SYNOPSIS

	# usual operation, outputs to lib/CPAN/Audit/DB.pm
	# gets data from cpan-security-advisory/cpansa/*.yml
	% perl util/generate

	# usual operation, outputs to lib/CPAN/Audit/DB.pm
	# gets data from other_source/*.yml
	% perl util/generate other_source/*.yml

	# suppress progress messages
	% perl util/generate -q
	% perl util/generate --quiet

	# output somewhere else
	% perl util/generate -o some_other_file
	% perl util/generate --output-file some_other_file

	# output to stdout (- is a special file name)
	% perl util/generate -o -

	# output JSON instead of a Perl module (probably want to specify output)
	% perl util/generate --json -o -

=head1 DESCRIPTION

This program chews through the CPAN security advisory reports and
makes the L<CPAN::Audit::DB> module.

=head1 AUTHOR

Original author: Viacheslav Tykhanovskyi (C<vti@cpan.org>)

Maintained by: brian d foy (C<bdfoy@cpan.org>)

=head1 LICENSE

L<CPAN::Audit> is dual-licensed under the GPL or the Artistic License.
See the included F<LICENSE> file for details.

=cut

run(@ARGV) unless caller;

sub _message ( $fh, $message ) {
	$message =~ s/\v+\z//;
	say {$fh} $message
}

sub info ( $message ) {
	_message *STDOUT, $message;
}

sub debug ( $message ) {
	return unless config()->{debug};
	_message *STDERR, $message;
}

sub dumper {
	state $rc = require Data::Dumper;
	Data::Dumper->new([@_])->Indent(1)->Sortkeys(1)->Terse(1)->Useqq(1)->Dump
}

sub process_options ( @args ) {
	state $rc = require Getopt::Long;

	my %results = (
		debug       => 0,
		gpg_key     => $ENV{CPAN_AUDIT_GPG_KEY_FINGERPRINT},
		output_file => default_file(),
		);
	*config = sub () { \%results };

	my %opts = (
		'debug|d'          => \ $results{debug},
		'gpg-key|g=s'      => \ $results{gpg_key},
		'json|j'           => \ $results{json},
		'module-version'   => \ $results{version},
		'output-file|o=s'  => \ $results{output_file},
		'perl-module|pm|p' => \ $results{perl_module},
		'quiet|q'          => \ $results{quiet},
	);

	my $p = Getopt::Long::Parser->new();
	$p->configure( qw(no_ignore_case) );
	$p->getoptionsfromarray( \@args, %opts );


	debug dumper( \%results );

	$results{version} //= default_version();

	\@args;
}

sub default_file () {
	state $file = catfile(qw(lib CPAN Audit DB.pm));
	$file;
}

sub default_version () {
	my $opts = config();
	my( $year, $month, $day ) = (localtime)[5,4,3];

	my $date = sprintf '%4d%02d%02d', $year + 1900, $month + 1, $day;
	my( $previous_date, $previous_serial ) = get_previous_date_serial( $opts->{output_file} );
	debug "PREVIOUS DATE: $previous_date PREVIOUS SERIAL: $previous_serial";

	my $serial = sprintf '%03d', $previous_date == $date ? $previous_serial + 1 : 1;
	debug "NEW SERIAL: $serial";
	debug "NEW DATE: $date";

	my $version = join '.', $date, $serial;
	debug "NEW VERSION: $version";
	return $version;
}

sub get_previous_date_serial ( $file ) {
	open my $fh, '<:encoding(UTF-8)', $file or croak( "Could not read <$file>: $!" );
	while( <$fh> ) {
		next unless /VERSION\s*=\s*'(\d{8})\.(\d{3})'/;
		return ( $1, $2 );
	}
	return;
}

sub run ( @args ) {
	my( $leftover_args ) = process_options( @args );
	my $opts = config();

	*message = $opts->{quiet} ? sub ($m) {} : sub ($m) { _message *STDOUT, $m };

	my $files = get_file_list( $leftover_args );
	die "Usage: <files>\n" unless @$files;

	my $out_fh = do {
		message "Output file will be <$opts->{output_file}>";
		if( $opts->{output_file} eq '-' ) { \*STDOUT }
		elsif( $opts->{output_file} ) {
			open my $fh, '>:encoding(UTF-8)', $opts->{output_file}
				or die "Could not open <$opts->{output_file}>: $!\n";
			$fh;
		}
		else { \*STDOUT }
	};

	my $db = process_files( $files );

	my $string = do {
		if( $opts->{perl_module} ) { stringify_data($db) }
		elsif( $opts->{json} )	   { JSON::encode_json($db) }
		else					   { stringify_data($db) }
	};

	my $target = defined $opts->{output_file} && $opts->{output_file} ne '-' ? $opts->{output_file} : 'STDOUT';
    message "writing to $target";
    print { $out_fh } $string;

	output_gpg_signature( $string );
}

sub get_file_list ( $args ) {
	message "Updating submodules";
	system 'git', 'submodule', 'update', '--remote';
	unless( @$args and -e 'cpan-security-advisory/cpansa' ) {
		debug 'No arguments given fo: looking in cpan-security-advisory/cpansa';
		@$args = glob( 'cpan-security-advisory/cpansa/*.yml' );
	}

	my @files = ($^O eq 'MSWin32') ? map { glob } @$args : @$args;

	\@files;
}

sub output_gpg_signature ( $string ) {
	my $opts = config();

	return if $opts->{output_file} eq '-';
	return unless defined $opts->{gpg_key};

	my $gpg_file = $opts->{output_file} . '.gpg';

	state $rc = require Encode;
	my $octets = Encode::encode("UTF-8", $string);

	my @command = ( 'gpg', '--yes', '-o', $gpg_file, '-sb', '--armor', '-u', $opts->{gpg_key} );
	debug "COMMAND is @command";

	open my $gpg_fh, '|-', @command;

	print { $gpg_fh } $octets;

	close $gpg_fh or croak "Problem making GPG signature: $!";

	return 1;
}

sub process_files ( $files ) {
	my %db;
	foreach my $file ( $files->@* ) {
		message "Reading $file";
		my $yaml = YAML::Tiny->read($file)->[0];

		my %dists_in_file;
		my $n = 0;

		foreach my $record ( $yaml->@* ) {
			$n++;
			my $id = $record->{id};
			warn "Missing distribution key in record $n ($id)\n" unless exists $record->{distribution};
			warn "Undefined distribution key in record $n ($id)\n" unless defined $record->{distribution};
			warn "Empty distribution key in record $n ($id)\n" unless length $record->{distribution};

			$dists_in_file{  $record->{distribution} }++;
			}

		if( keys %dists_in_file == 0 ) {
			warn "There were no distributions declared in the records in <$file>\n";
			next FILE;
		}
		elsif( keys %dists_in_file > 1 ) {
			my @dists = map { length ? $_ : '<not defined or empty>' } keys %dists_in_file;
			warn "There were more than one distribution declared in the records in <$file>: @dists\n";
		}

		my( $dist ) = keys %dists_in_file;
		$db{dists}{$dist}{advisories} = $yaml;
	}

	provides( \%db );

	foreach my $dist ( sort keys $db{dists}->%* ) {
		my @releases = all_releases($dist);
		if (!@releases) {
			warn "no releases found on CPAN for '$dist'\n";
			next;
		}
		message "Processing $dist";

		# try to fetch the latest release, according to MetaCPAN.
		# if there is no 'latest' tag, grab the last item (because
		# the list is ordered by date).
		my ($main_module, @versions);
		foreach my $release (@releases) {
			push @versions, { date => $release->{date}, version => $release->{version} };
			if ($release->{status} eq 'latest') {
				$main_module = $release->{main_module};
			}
		}
		$main_module = $releases[-1]->{main_module} unless $main_module;

		$db{dists}{$dist}{versions} = \@versions;
		$db{dists}{$dist}{main_module} = $main_module;
	}

	# XXX: need to investigate why this shows up as utf8
	$db{dists}{'perl'}{main_module} = 'perl';

	\%db;
}

sub stringify_data ( $db ) {
	my $opts = config();

	state $rc = require Data::Dumper;

	no warnings 'once';
	local $Data::Dumper::Sortkeys = 1;
	my $dump = Data::Dumper::Dumper( $db );
	$dump =~ s{^\$VAR1\s*=\s*}{};
	$dump =~ s{}{};

	my $submodule_dir = 'cpan-security-advisory';

	my( $commit ) = split /\s+/, join '',
		grep { /\Q$submodule_dir/ }
		`git submodule status` =~ s/\A\s+//r;

	my $string = <<~"EOF";
		# created by $0 at @{[ scalar localtime]}
		# $submodule_dir $commit
		#
		package CPAN::Audit::DB;

		use strict;
		use warnings;

		our \$VERSION = '$opts->{version}';

		sub db {
			$dump
		}

		1;
		EOF
}

sub provides {
	my ($db) = @_;

	my $ua = HTTP::Tiny->new;

	my $tmpdir = File::Temp::tempdir();
	my $details_file = catfile($tmpdir, '02packages.details.txt.gz');
	message "Downloading 02packages.details.txt.gz (this may take awhile)";
	$ua->mirror( 'http://www.cpan.org/modules/02packages.details.txt.gz',
		$details_file );
	message "Downloaded 02packages.details.txt.gz";
	message "Digesting 02packages.details.txt.gz (this may take awhile)";

	open my $fh, '<:gzip', $details_file
	  or die "Can't open '$details_file': $!";

	while ( defined( my $line = <$fh> ) ) {
		chomp $line;

		last if $line eq '';
	}

	while ( defined( my $line = <$fh> ) ) {
		my ( $module, $version, $pathname ) = split /\s+/, $line;
		next unless $module && $pathname;

		my $dist_info = CPAN::DistnameInfo->new($pathname);
		next unless $dist_info;

		my $author = $dist_info->cpanid;
		my $dist   = $dist_info->dist;
		my $name   = $dist_info->distvname;

		next unless $dist;

		next unless $db->{dists}->{$dist};

		$db->{module2dist}->{$module} = $dist;
	}

	close $fh;
}

sub all_releases {
	my ($distribution) = @_;

	my $response = HTTP::Tiny->new->post(
		'http://fastapi.metacpan.org/v1/release/_search',
		{
			headers => { 'Content-Type' => 'application/json' },
			content => JSON::encode_json(
				{
					size   => 5000,
					fields => [ qw(date version status main_module) ],
					filter => {
						term => { distribution => $distribution }
					},
					sort => ['date'],
				}
			)
		}
	);

	my $content_json = JSON::decode_json( $response->{content} );

	my @results = map  { $_->{fields} } @{ $content_json->{hits}->{hits} };
	return unless @results;

	return @results;
}
