#!/usr/bin/perl

use strict;
use Data::Dumper;
use Grid::Request::HTC;
use Log::Log4perl qw(:easy);
use Config::IniFiles;
use Sys::Hostname;
Log::Log4perl->easy_init($DEBUG);

my $logger = Log::Log4perl->get_logger();
# Deterine what host we are running on.
my $host = hostname;
$logger->info("Running on $host");

# Load which grid we are using from the configuration file.
my $config_file = Grid::Request::HTC->config();
# The [section] heading in the configuration file.
my $section = $Grid::Request::HTC::config_section;
my $config = Config::IniFiles->new(-file => $config_file);
my $grid_type = $config->val($section, "drm");

$logger->debug("Grid type: $grid_type");
my $MW_PARAM_DELIMITER = ":";

# Get the task ID that we are. There are going to be several instances of this
# script running. We need to know which one we are. This is actually different
# for every execution environment (grid), so we have a subroutine to hide that.
my $task_id = get_task_id();

check_arguments();
# the first argument is the executable to invoke
my $executable = shift(@ARGV); 
$logger->info("The executable is $executable.");
# The second argument is the blocksize, for the window of tasks to do.
my $block_size = shift(@ARGV);
$logger->info("The block size is $executable.");
# The remaining arguments are the MW parameters
my @mw_params = @ARGV;
$logger->debug("Number of MW parameters: " . scalar(@mw_params));

# Now that we have the MW parameters, let's just check that they
# make sense.
validate_mw_params(\@mw_params);
# If we are here, then the MW parameters look fine.
$logger->debug("MW parameters validated correctly.");

if (! defined $task_id || $task_id <= 0) {
    $logger->logdie("Unable to determine this worker's ID.");
}
$logger->info("Task id for this worker: $task_id");

# an array of arrays for all the parameters to invoke
my @arg_groups = ();
foreach my $param (@mw_params) {
    $logger->debug(qq|Processing parameter "$param"|);
    my $arg_ref = get_arguments_for_param($task_id, $block_size, $param);
    # Continue building our array of arrays.
    push (@arg_groups, $arg_ref);
}
# Chop all the arg_groups according to the group with the smallest size
@arg_groups = limit_arguments(\@arg_groups);
execute($executable, \@arg_groups);

#############################################################################

sub limit_arguments {
    my $arg_group_ref = shift;
    my @arg_groups = @$arg_group_ref;
    # Chop all the arg_groups according to the group with the smallest size
    my $min;
    foreach my $group (@arg_groups) {
        # $group is an array ref here
        my $current_size = scalar(@$group);
        if (defined $min) {
            if ($current_size < $min) {
                $min = $current_size;
            }
        } else {
            $min = $current_size;
        }
    }
    $logger->debug("Determined that the number of times we can iterate with these arguments is $min.");

    # Now that we know the minimum size, we can limit each group
    my @limited_groups;
    my $group_number = 0;
    foreach my $group (@arg_groups) {
        $group_number++;
        my $size = scalar(@$group);
        if ($size > $min) {
            # Okay, this group is too big. Cut it down to size
            $logger->debug("Cutting argument group number $group_number down to $min from $size.");
            # Change teh array size by assigning to $#ARRAY
            my @lesser = @$group;
            $#lesser = $min - 1;
            $group = \@lesser;
        }
        push (@limited_groups, $group);
    }
    return wantarray ? @limited_groups : \@limited_groups;
}

sub execute {
    $logger->debug("In execute.");
    my ($executable, $arg_group_ref) = @_;
    my ($success, $failed) = (0,0);
    my $count = 0;

    my $group_size = scalar @$arg_group_ref;
    $logger->debug("The number of arguments each invocation of $executable will have: $group_size");

    if ( $group_size > 0 ) {

        # Length of the various argument arrays should all be the same, so we'll just use
        # the length of the first one.
        my $arg_length = scalar(@{ $arg_group_ref->[0] });
        
        # This loop is to iterate across the argument arrays
        for (my $arg_index = 0; $arg_index < $arg_length; $arg_index++) {
            $count++;

            my @exec = ($executable);

            for (my $group_index = 0; $group_index < $group_size; $group_index++) {
                my $arg = $arg_group_ref->[$group_index]->[$arg_index];

                # Replace $(Index) with the task id.
                $arg =~ s/\$\(Index\)/$task_id/g;
                
                push(@exec, $arg);
            }

            $logger->info("Invoking: ", sub { join(" ", @exec) } );

            if ($logger->is_debug()) {
                $logger->debug("Arg list for invokation of $executable:\n", sub{ Dumper(\@exec) } );
                $logger->debug(sub { sprintf("Invoking %s. This is time #%d", $executable, $count) } );
                $logger->debug("==============================================================");
            }

            system(@exec);

            my $exit_value = $? >> 8;

            if ($logger->is_debug()) {
                $logger->debug("==============================================================");
                $logger->info(sub { sprintf("Completed run of %s. Exit value: %d", $executable, $exit_value) });
            }

            if ($exit_value == 0) {
                $success++;
            } else {
                $failed++;
            }
        }
    } else {
        $logger->warn("No arguments! Just invoking the configured executable with no args.");
    }

    $logger->info(sub { sprintf("Successful: %d. Failed: %d. Total: %d", $success, $failed, $count) });
    my $exit = ($failed == 0) ? 0 : 1;
    $logger->info("$0 has completed execution. Exiting with exit value: $exit");
    exit $exit;
}

sub check_arguments {
    if (scalar @ARGV < 3) {
        $logger->fatal("An invalid number of parameters was specified.");
        print_usage();
    }
    # Check for a valid block size
    my $block_size = $ARGV[1];
    if (int($block_size) != $block_size || $block_size <= 0) {
        $logger->logdie("A non-integer or non-positive block size was specified: $block_size.");
    }
}

sub get_task_id {
    my $task_id;
    $grid_type = lc($grid_type);

    # Delete the environment variables when done with them in case the
    # executable somehow wants to submit jobs and pass the environment.
    if ($grid_type eq "sge") {
        $task_id = $ENV{SGE_TASK_ID};
        delete $ENV{SGE_TASK_ID};
    } elsif ($grid_type eq "condor") {
        # Condor uses 0 based task IDs.
        $task_id = $ENV{CONDOR_TASK_ID} + 1;
        delete $ENV{CONDOR_TASK_ID};
    } elsif ($grid_type eq "torque" || $grid_type eq "pbs") {
        # TODO: Determine how to get the Task ID in PBS/Torque
        $task_id = "";
    } else {
        $logger->logdie("Unsupported grid type: $grid_type");
    }
    return $task_id
}

sub print_usage {
    print "Usage:\n\n";
    print "$0 <remote_executable> <block_size> <MW_param_1> <MW_param_2> ... <MW_param_N>\n\n\n";
    print "Example:\n";
    print "$0 <remote_executable> 100 param:--parameter 'file:/path/to/file:\$(Name)'\n\n\n";
    exit 1;
}

sub validate_mw_params {
    my $param_ref = shift;
    $logger->debug("Received " . scalar(@$param_ref) . " parameters to examine.");

    foreach my $param (@$param_ref) {
        my @components = split(/$MW_PARAM_DELIMITER/, $param);
        if (scalar(@components) == 2) {
            my ($type, $value) = @components;
            $type = uc($type);
            if ($type ne "PARAM") {
                $logger->logdie("Only 2 fields for MW parameter found: \"$param\"");
            } 
        } elsif (scalar(@components) == 3) {
            my ($type, $value, $key) = @components;
            $type = uc($type);
            if ($type eq "DIR") {
                # If it's not a directory, throw an exception
                if (! -d $value) {
                    $logger->logdie("MW parameter $param specified an invalid directory: \"$value\".");
                }
            } elsif ($type eq "FILE") {
                # If it's not a file or it's not readable, throw an exception...
                if (! -f $value || ! -r $value) {
                    $logger->logdie("MW parameter $param specified a file that doesn't exist or isn't readable.");
                }
            } elsif ($type eq "PARAM") {
                $logger->logdie("Only 2 fields are allowed for a type of PARAM.");
            } else {
                # Throw an exception
                $logger->logdie("Unrecognized parameter type of \"$type\".");
            }
        } else {
            $logger->logdie("Invalid MW parameter encountered: \"$param\"");
        }
    }
}

sub get_arguments_for_param {
    my ($task_id, $blocksize, $param) = @_;
    my $arg_ref;
    my ($type, $value, $key) = split(/$MW_PARAM_DELIMITER/, $param);
    $type = uc($type);
    
    # This should be dynamic. Dynamically load a module that computes the array
    # at runtime.
    if ($type eq "PARAM") {
        # So for PARAM type arguments, there will only be 2 fields, like so:
        # param:string because there is nothing to iterate over. Therefore, the
        # $key should be undef and is ignored.
        my @args;
        # TODO: There is some clever way to do this (x $blocksize), it just escapes
        # me right now
        for (my $arg_index = 0; $arg_index < $blocksize; $arg_index++) {
            push (@args, $value);
        }
        $arg_ref = \@args;
    } elsif ($type eq "DIR") {
        $arg_ref = get_dir_args($value, $key, $task_id, $blocksize);
    } elsif ($type eq "FILE") {
        $arg_ref = get_file_args($value, $key, $task_id, $blocksize);
    } else {
        # TODO: Throw an exception
        # Possibly, try a dynamic load of a module
        $logger->logdie("Unrecognized parameter type of \"$type\".");
    }

    return $arg_ref;
}

sub get_dir_args {
    my ($dir, $key, $task_id, $blocksize) = @_;

    my @args = ();

    # Read all the files in the directory into an array. Then take
    # the appropriate slice of the array according to our block size
    # and our task id. Then cycle through those filenames and replace
    # any tokens in the $key, then push that resultant argument to
    # the @args array.

    opendir(DIR, $dir) || $logger->logdie("Cannot open directory $dir: $!");
    my @files = grep { /^[^\.]/ && -f "$dir/$_" } readdir(DIR);
    closedir DIR;
    $logger->debug("Finished scanning directory $dir");

    @files = sort(@files);
    $logger->debug("Files scanned: ", sub { Dumper(\@files) } );

    my $offset = $block_size * ($task_id - 1);
    if ($offset + $blocksize > scalar(@files) - 1) {
        $logger->info("This worker appears to be the last since it doesn't have a full compliment of tasks.");
    }

    if ($offset > scalar(@files) - 1) {
        $logger->logdie("Insufficent number of files scanned for this worker.");
    } else {
        my @files_to_use = splice(@files, $offset, $block_size);
        $logger->debug("Files to use for this worker: ", sub { Dumper(\@files_to_use) } );
        foreach my $file (@files_to_use) {
            $logger->debug(qq|Using file: "$file"|);
            my $final_arg = $key;
            $final_arg =~ s/\$\(Name\)/$file/g;
            push (@args, $final_arg);
        }
    }
    $logger->info("Total number of files to use: " . scalar(@args));

    return \@args;
}

sub get_file_args {
    my ($file, $key, $task_id, $blocksize) = @_;

    my @args = ();

    # Open the file specified by the param, drop down to the first line
    # that should be included according to the blocksize and task id.
    # Then read $blocksize lines into an array. cycle through these
    # lines that were pulled and replace any tokens in the $key and
    # push that resultant argument to the @args array.
    $logger->info("About to open $file");
    my $lines_to_skip = $blocksize * ($task_id - 1);
    $logger->debug("Number of lines to skip down: $lines_to_skip.");

    open (FILE, "<", $file) or die "Unable to open $file for reading.";

    # Skip down to where we should start reading (if we need to skip anything).
    if ($lines_to_skip > 0) {
        my $skipped = 0;
        while ($skipped < $lines_to_skip) {
            <FILE>;
            $skipped++;
        }
    }

    # Read the lines that this worker should be processing.
    for (my $valid_count = 0; $valid_count < $blocksize; $valid_count++) {
        last if eof FILE;
        my $valid_line = <FILE>;
        chomp($valid_line);
        $logger->debug(qq|Using line: "$valid_line"|);
        my $final_arg = $key;
        $final_arg =~ s/\$\(Name\)/$valid_line/g;
        push (@args, $final_arg);
    } 
    close FILE or die "Unable to close $file.";
    return \@args;
}
