#!/usr/bin/perl

use strict;
use Data::Dumper;
use Log::Log4perl qw(:easy);
use Sys::Hostname;
Log::Log4perl->easy_init($DEBUG);

my $logger = Log::Log4perl->get_logger();
my $host = hostname;
my $grid_type = "sge";

$logger->info("Running on $host");
$logger->debug("Grid type: $grid_type");
my $MW_PARAM_DELIMITER = ":";

# Get the task ID that we are. There are going to be several instances of this
# script running. We need to know which one we are. This is actually different
# for every execution environment (grid), so we have a subroutine to hide that.
my $task_id = get_task_id();

check_arguments();
# the first argument is the executable to invoke
my $executable = shift(@ARGV); 
$logger->info("The executable is $executable.");
# The second argument is the blocksize, for the window of tasks to do.
my $block_size = shift(@ARGV);
$logger->info("The block size is $executable.");
# The remaining arguments are the MW parameters
my @mw_params = @ARGV;
$logger->debug("Number of MW parameters: " . scalar(@mw_params));

# Now that we have the MW parameters, let's just check that they
# make sense.
validate_mw_params(\@mw_params);
# If we are here, then the MW parameters look fine.
$logger->debug("MW parameters validated correctly.");

if (! defined $task_id || $task_id <= 0) {
    $logger->logdie("Unable to determine this worker's ID.");
}
$logger->info("Task id for this worker: $task_id");

# an array of arrays for all the parameters to invoke
my @arg_groups = ();
foreach my $param (@mw_params) {
    $logger->debug(qq|Processing parameter "$param"|);
    my $arg_ref = get_arguments_for_param($task_id, $block_size, $param);
    # Continue building our array of arrays.
    push (@arg_groups, $arg_ref);
}
execute($executable, \@arg_groups);

#############################################################################

sub execute {
    $logger->debug("In execute.");
    my ($executable, $arg_group_ref) = @_;
    my ($success, $failed) = (0,0);
    my $count = 0;

    my $group_size = scalar @$arg_group_ref;
    $logger->debug("The number of arguments each invocation of $executable will have: $group_size");

    if ( $group_size > 0 ) {

        # Length of the various argument arrays should all be the same, so we'll just use
        # the length of the first one.
        my $arg_length = scalar(@{ $arg_group_ref->[0] });
        
        # This loop is to iterate across the argument arrays
        for (my $arg_index = 0; $arg_index < $arg_length; $arg_index++) {
            $count++;

            my @exec = ($executable);

            for (my $group_index = 0; $group_index < $group_size; $group_index++) {
                my $arg = $arg_group_ref->[$group_index]->[$arg_index];
                push(@exec, $arg);
            }

                $logger->info("Invoking: ", sub { join(" ", @exec) } );

                if ($logger->is_debug()) {
                    $logger->debug("Arg list for invokation of $executable:\n", sub{ Dumper(\@exec) } );
                    $logger->debug(sub { sprintf("Invoking %s. This is time #%d", $executable, $count) } );
                    $logger->debug("==============================================================");
                }

                system(@exec);
                my $exit_value = $? >> 8;

                if ($logger->is_debug()) {
                    $logger->debug("==============================================================");
                    $logger->info(sub { sprintf("Completed run of %s. Exit value: %d", $executable, $exit_value) });
                }

                if ($exit_value == 0) {
                    $success++;
                } else {
                    $failed++;
                }
        }
    } else {
        $logger->warn("No arguments! Just invoking the configured executable with no args.");
    }

    $logger->info(sub { sprintf("Successful: %d. Failed: %d. Total: %d", $success, $failed, $count) });
    my $exit = ($failed == 0) ? 0 : 1;
    $logger->info("$0 has completed execution. Exiting with exit value: $exit");
    exit $exit;
}

sub check_arguments {
    if (scalar @ARGV < 3) {
        print_usage();
        $logger->logdie("An invalid number of parameters was specified.");
    }
    # Check for a valid block size
    my $block_size = $ARGV[1];
    if (int($block_size) != $block_size || $block_size <= 0) {
        $logger->logdie("A non-integer or non-positive block size was specified: $block_size.");
    }
}

sub get_task_id {
    my $task_id;
    $grid_type = lc($grid_type);

    # Delete the environment variables when done with them in case the
    # executable somehow wants to submit jobs and pass the environment.
    if ($grid_type eq "sge") {
        $task_id = $ENV{SGE_TASK_ID};
        delete $ENV{SGE_TASK_ID};
    } elsif ($grid_type eq "condor") {
        # Condor uses 0 based task IDs.
        $task_id = $ENV{CONDOR_TASK_ID} + 1;
        delete $ENV{CONDOR_TASK_ID};
    } elsif ($grid_type eq "torque") {
        # TODO: Determine how to get the Task ID in Torque
        $task_id = "";
    } else {
        $logger->logdie("Unsupported grid type: $grid_type");
    }
    return $task_id
}

sub print_usage {
    print "Usage:\n";
    print "$0 <remote_executable> <block_size> <MW_param_1> <MW_param_2> ... <MW_param_N>\n\n\n";
}

sub validate_mw_params {
    my $param_ref = shift;
    $logger->debug("Received " . scalar(@$param_ref) . " parameters to examine.");

    foreach my $param (@$param_ref) {
        my @components = split(/:/, $param);
        if (scalar(@components) == 3) {
            my ($type, $value, $key) = split(/$MW_PARAM_DELIMITER/, $param);
            $type = uc($type);
            if ($type eq "DIR") {
                # If it's not a directory, throw an exception
                if (! -d $value) {
                    $logger->logdie("MW parameter $param specified an invalid directory: \"$value\".");
                }
            } elsif ($type eq "FILE" || $type eq "FASTA") {
                # If it's not a file or it's not readable, throw an exception...
                if (! -f $value || ! -r $value) {
                    $logger->logdie("MW parameter $param specified a file that doesn't exist or isn't readable.");
                }
            } else {
                # Throw an exception (Unrecognized 
                $logger->logdie("Unrecognized parameter type of \"$type\".");
            }
        } else {
            $logger->logdie("Invalid MW parameter encountered: \"$param\"");
        }
    }
}

sub get_arguments_for_param {
    my ($task_id, $blocksize, $param) = @_;
    my $arg_ref;
    my ($type, $value, $key) = split(/$MW_PARAM_DELIMITER/, $param);
    $type = uc($type);
    
    # This should be dynamic. Dynamically load a module that computes the array
    # at runtime.
    if ($type eq "PARAM") {
        my @args;
        # TODO: There is some clever way to do this (x $blocksize), it just escapes
        # me right now
        for (my $arg_index = 0; $arg_index < $blocksize; $arg_index++) {
            push (@args, $key);
        }
        $arg_ref = \@args;
    } elsif ($type eq "DIR") {
        $arg_ref = get_dir_args($value, $key, $task_id, $blocksize);
    } elsif ($type eq "FILE") {
        $arg_ref = get_file_args($value, $key, $task_id, $blocksize);
    } else {
        # TODO: Throw an exception
        # Possibly, try a dynamic load of a module
        $logger->logdie("Unrecognized parameter type of \"$type\".");
    }

    return $arg_ref;
}

sub get_dir_args {
    my ($dir, $key, $task_id, $blocksize) = @_;

    my @args = ();

    # Read all the files in the directory into an array. Then take
    # the appropriate slice of the array according to our block size
    # and our task id. Then cycle through those filenames and replace
    # any tokens in the $key, then push that resultant argument to
    # the @args array.

    opendir(DIR, $dir) || $logger->logdie("Cannot open directory $dir: $!");
    my @files = grep { /^[^\.]/ && -f "$dir/$_" } readdir(DIR);
    closedir DIR;
    $logger->debug("Finished scanning directory $dir");

    @files = sort(@files);
    $logger->debug("Files scanned: ", sub { Dumper(\@files) } );

    my $offset = $block_size * ($task_id - 1);
    if ($offset + $blocksize > scalar(@files) - 1) {
        $logger->info("This worker appears to be the last since it doesn't have a full compliment of tasks.");
    }

    if ($offset > scalar(@files) - 1) {
        $logger->logdie("Insufficent number of files scanned for this worker.");
    } else {
        my @files_to_use = splice(@files, $offset, $block_size);
        $logger->debug("Files to use for this worker: ", sub { Dumper(\@files_to_use) } );
        foreach my $file (@files_to_use) {
            $logger->debug(qq|Using file: "$file"|);
            my $final_arg = $key;
            $final_arg =~ s/\$\(Name\)/$file/g;
            push (@args, $final_arg);
        }
    }
    $logger->info("Total number of files to use: " . scalar(@args));

    return \@args;
}

sub get_file_args {
    my ($file, $key, $task_id, $blocksize) = @_;

    my @args = ();

    # Open the file specified by the param, drop down to the first line
    # that should be included according to the blocksize and task id.
    # Then read $blocksize lines into an array. cycle through these
    # lines that were pulled and replace any tokens in the $key and
    # push that resultant argument to the @args array.
    $logger->info("About to open $file");
    my $lines_to_skip = $blocksize * ($task_id - 1);
    $logger->debug("Number of lines to skip down: $lines_to_skip.");

    open (FILE, "<", $file) or die "Unable to open $file for reading.";

    # Skip down to where we should start reading (if we need to skip anything).
    if ($lines_to_skip > 0) {
        my $skipped = 0;
        while ($skipped < $lines_to_skip) {
            <FILE>;
            $skipped++;
        }
    }

    # Read the lines that this worker should be processing.
    for (my $valid_count = 0; $valid_count < $blocksize; $valid_count++) {
        last if eof FILE;
        my $valid_line = <FILE>;
        chomp($valid_line);
        $logger->debug(qq|Using line: "$valid_line"|);
        my $final_arg = $key;
        $final_arg =~ s/\$\(Name\)/$valid_line/g;
        push (@args, $final_arg);
    } 
    close FILE or die "Unable to close $file.";
    return \@args;
}
