#!perl

=head1 NAME

ipgrep - grep files by CIDR

=head1 SYNOPSIS

    ipgrep [-1cfHhLlnVv] [long options...] [patterns] [files]

=head1 DESCRIPTION

This program will print lines from a file that matches IP addresses in the given netblocks.

Many of the options mirror similar options from L<grep>.

=head1 SEE ALSO

L<grep|https://www.gnu.org/software/grep/>

L<grepcidr|https://www.pc-tools.net/unix/grepcidr/>

=head1 SOURCE

The development version is on github at L<https://github.com/robrwo/perl-ipgrep> and may be cloned from
L<git://github.com:robrwo/perl-ipgrep.git>.

=head1 BUGS

Please report any bugs or feature requests on the bugtracker website
L<https://github.com/robrwo/perl-ipgrep/issues>.

When submitting a bug or request, please include a test-file or a
patch to an existing test-file that illustrates the bug or desired
feature.

=head1 AUTHOR

Robert Rothenberg <rrwo@cpan.org>

=head1 COPYRIGHT AND LICENCE

This software is copyright (c) 2023 by Robert Rothenberg.

This is free software, licensed under The Artistic License 2.0 (GPL Compatible).

=cut

use v5.18;
use warnings;

use Getopt::Long::Descriptive;
use IO::File;
use IO::Uncompress::Gunzip;
use List::Util qw( any );
use Net::CIDR qw( cidrvalidate range2cidr );
use NetAddr::IP;
use Sub::Quote qw( qsub );

our $VERSION = 'v0.6.0';

my ( $opt, $usage ) = describe_options(
    '%c %o [patterns] [files]',
    [],
    [ 'count|c'                  => 'print a count of matching lines for each input file' ],
    [ 'files-without-match|L'    => 'print the name of each input file from which no output would normally have been printed' ],
    [ 'files-with-match|l'       => 'print the name of each input file from which output would normally have been printed' ],
    [ 'invert-match|v'           => 'invert the sense of matching, to select non-matching lines' ],
    [ 'line-number|n'            => 'prefix each line of output with the line number within its input file' ],
    [ 'match-first-character|1!' => 'match IP address from the first character of the line only' ],
    [
        'filename' => 'hidden' => {
            one_of => [
                [ 'with-filename|H' => 'print the file name for each match' ],
                [ 'no-filename|h'   => 'suppress the prefixing of file names on output' ],
            ]
        }
    ],
    [ 'label=s'  => 'display input from stdin as coming from label', { default => '(standard input)' } ],
    [ 'file|f=s' => 'obtain CIDRs from a file, one per line' ],
    [ 'only-matching|o' => 'show only nonempty parts of lines that match' ],
    [ 'quiet|silent|q' => 'do not output anything, exit immediately with zero status if any match is found' ],
    [ 'exclude-final-dot!' => 'exclude matches with a final dot' ],
    [ 'ignore-bad-addresses!' => 'ignore bad addresses' ],
    [],
    [ 'version|V' => 'print the version and exit' ],
    [ 'help' => 'print usage and exit' ],

);

if ($opt->version) {
    say "iprep ${VERSION}";
    exit 0;
}

die $usage if $opt->help;

my @patterns;

if ( my $file = $opt->file ) {

    my $fh = IO::File->new($file);
    while ( my $line = $fh->getline ) {
        chomp($line);
        push @patterns, validate_cidr($line);
    }
}
else {
    my $pattern = shift @ARGV                       or die $usage;
    push @patterns, validate_cidr($pattern);
}

my $re = make_regexp($opt);

my @files = @ARGV;
push @files, "-" unless @files;

unless ( $opt->filename ) {
    $opt->{filename}         = @files > 1 ? "with_filename" : "no_filename";
    $opt->{$_}               = 0 for qw( with_filename no_filename );
    $opt->{ $opt->filename } = 1;
}

my $callback;

unless ( $opt->count ) {
    my $cb_pattern;
    if ( $opt->only_matching ) {
        my $prefix = '""';
        $prefix = '$_[0]->input_line_number . ":"' if $opt->line_number;
        $prefix = '$_[1] . ":" . ' . $prefix if $opt->with_filename;
        $cb_pattern = 'defined($_[3][0]) ? join("\n", map { ' . $prefix . ' . $_ } @{$_[3]}) . "\n" : ""';
    }
    else {
        $cb_pattern = '$_[2]';
        $cb_pattern = '$_[0]->input_line_number . ":" . ' . $cb_pattern if $opt->line_number;
        $cb_pattern = '$_[1] . ":" . ' . $cb_pattern if $opt->with_filename;
    }
    $callback = qsub 'print ' . $cb_pattern;
}

my $post;
if ( $opt->files_with_match || $opt->files_without_match ) {
    my $pattern = '$_[0]';
    $post = qsub 'say ' . $pattern;
}
elsif ( $opt->count ) {
    my $pattern = '$_[1]';
    $pattern = '$_[0] . ":" . ' . $pattern if $opt->with_filename;
    $post = qsub 'say ' . $pattern;
}

if ( $opt->quiet ) {
    $callback = $post = sub { };
}

my $matched = 0;

my $match_fn = $opt->only_matching ? sub { [ grep defined, @_ ] } : sub { any { defined($_) } @_ };

for my $file (@files) {

    my $class = $file =~ /\.gz\z/ ? "IO::Uncompress::Gunzip" : "IO::File";
    my $fh    = $class->new($file) or die "$!: $file\n";
    my $label = $file eq "-" ? $opt->label : $file;
    my $count = match_file( $opt, $re, $fh, $label, $callback );
    $matched ||= !!$count;
    exit(0) if $opt->quiet && $matched;
    next if ($opt->files_with_match && !$count) || ($opt->files_without_match && $count);
    $post->( $file, $count ) if $post;

}

exit( $matched ? 0 : 1 );

sub match_file {
    my ( $opt, $re, $fh, $file, $callback ) = @_;
    my $count = 0;
    while ( my $line = <$fh> ) {
        my $matched = $match_fn->( $line =~ m/${re}/g );
        next unless !!$matched ne !!$opt->invert_match;
        $count++;
        last if $opt->files_with_match || $opt->files_without_match;
        $callback->($fh, $file, $line, $matched) if $callback;
    }
    return $count;
}

sub make_regexp {
    my ($opt) = @_;
    my $ips   = join("|", map { NetAddr::IP->new($_)->re } @patterns );
    my $start = $opt->match_first_character ? '^' : '\b';
    $ips .= '(?!\.)' if $opt->exclude_final_dot;
    my $re    = $start . '(' . $ips . ')\b';
    return qr/${re}/;
}

sub validate_cidr {
    my ($arg) = @_;

    if ( my $cidr = cidrvalidate($arg) ) {
        return $cidr;
    }
    elsif ( my @cidrs = eval { range2cidr($arg) } ) {
        return @cidrs;
    }
    elsif ( $opt->ignore_bad_addresses ) {
        return;
    }
    else {
        die "'${arg}' does not look like an IP address, IP address range or network block\n";
    }
}
