#!perl

our $DATE = '2018-11-10'; # DATE
our $VERSION = '0.008'; # VERSION

use 5.010001;
use strict;
use warnings;

use Getopt::Long qw(:config no_ignore_case bundling no_ignore_case gnu_compat no_getopt_compat);
use IPC::System::Options qw(system);
use Term::ANSIColor;
use Time::HiRes 'sleep';

my $PROG = 'block-web-flooders';

my $dbspec = {
    latest_v => 1,
    install => [
        'CREATE TABLE blocked (ip TEXT NOT NULL PRIMARY KEY, ctime INT NOT NULL)',
    ],
};

my %Opts = (
    limit => undef,
    has => [],
    lacks => [],
    has_pattern => [],
    lacks_pattern => [],
    period => 300,
    block_period => 86400,
    whitelist_ip => [],
    # ports => [80, 443],
    spanel_site => undef,
);

sub read_config {
    require Config::IOD::Reader;

    my $iod = Config::IOD::Reader->new();
    for my $dir ("$ENV{HOME}/.config", $ENV{HOME}, "/etc") {
        my $path = "$dir/$PROG.conf";
        next unless -f $path;
        my $hoh = $iod->read_file($path);
        for my $sect (keys %$hoh) {
            next unless $sect eq 'GLOBAL';
            my $h = $hoh->{$sect};
            for my $k (keys %$h) {
                my $v = $h->{$k};
                $Opts{$k} = $v;
            }
        }
    }
    $Opts{whitelist_ip} = [$Opts{whitelist_ip}]
        if defined $Opts{whitelist_ip} && ref $Opts{whitelist_ip} ne 'ARRAY';
}

sub parse_options {
    Getopt::Long::GetOptions(
        'help|h|?' => sub {
            print "Usage: $PROG [options]\n";
            print <<EOT;
Usage: $PROG [options]
Options:
  --has=S       Only consider lines which have string 'S'. Can be specified
                multiple times. If specified multiple times, it means lines
                must match *all* strings to be considered.
  --lacks=S     The opposite of --has. Only consider lines which do not have
                string 'S'. Can be specified multiple times. If specified
                multiple times, it means lines must lack *all* strings to be
                considered.
  --has-pattern=REGEX
                Like --has, but you can specify regex pattern.
  --lacks-pattern=REGEX
                Like --lacks, but you can specify regex pattern.
  --limit=N     Start blocking IP which has requested more than N times
                during the period.
  --period=N    Period to count speed limit, in seconds (default: 300,
                which means 5 minutes).
  --block-period=N
                Period of blocking an IP, in seconds (default: 86400, which
                means 24 hours a.k.a. 1 day).
  --dry-run     Do not actually block with iptables, simulation mode.
  --spanel-site=NAME
                Instead of piping output of tail -f manually, you can use this
                on an Spanel server to automatically locate the HTTP & HTTPS
                log files and switch to the newest files.
For more details, see the documentation (man $PROG).
EOT
            exit 0;
        },
        'version|v' => sub {
            no warnings 'once';
            print "$PROG version ", ($main::VERSION // "dev"), "\n";
            exit 0;
        },
        'whitelist-ip=s' => $Opts{whitelist_ip},
        'has=s' => $Opts{has},
        'has-pattern=s' => $Opts{has_pattern},
        'lacks=s' => $Opts{lacks},
        'lacks-pattern=s' => $Opts{lacks_pattern},
        'limit=i' => \$Opts{limit},
        'period=i' => \$Opts{period},
        'block-period=i' => \$Opts{block_period},
        'dry-run' => \$Opts{dry_run},
        'spanel-site=s' => \$Opts{spanel_site},
    );

    warn "$PROG: No whitelist_ip specified\n" unless @{ $Opts{whitelist_ip} };
    die "$PROG: Please specify --limit\n" unless defined($Opts{limit}) && $Opts{limit} > 0;
    die "$PROG: Please specify positive --period\n" unless $Opts{period} > 0;
    if (defined $Opts{spanel_site}) {
        $Opts{spanel_site} =~ /\A[\w-]+(\.[\w-]+)*\z/ or die "$PROG: Invalid site `$Opts{spanel_site}`\n";
        (-d "/s/$Opts{spanel_site}/syslog")
            or die "$PROG: Site not found `$Opts{spanel_site}`: no /s/$Opts{spanel_site}/syslog\n";
    }
    $Opts{has_pattern}   = [map { qr/$_/ } @{ $Opts{has_pattern} }  ];
    $Opts{lacks_pattern} = [map { qr/$_/ } @{ $Opts{lacks_pattern} }];
}

sub connectdb {
    require DBI;
    require SQL::Schema::Versioned;

    my $dbh = DBI->connect("dbi:SQLite:dbname=/var/run/block-web-flooders.db");

    my $res = SQL::Schema::Versioned::create_or_update_db_schema(
        dbh => $dbh, spec => $dbspec);
    die "Cannot initialize DB: $res->[0] - $res->[1]" unless $res->[0] == 200;

    $dbh;
}

sub run {
    #require Term::Size;
    require Tie::Array::Expire;
    require Time::Duration;

    my $dbh = connectdb();

    #my ($columns, $rows) = Term::Size::chars *STDOUT{IO};

    my $last_check_spanel_log_time;
    my ($spanel_http_log_name, $spanel_https_log_name);

    my $last_update_output_time;
    my $last_unblock_time;
    my %ips; # key = ip address, value = expiring array
    my %whitelisted = map { $_=>1 } @{ $Opts{whitelist_ip} }; # key = ip address
    my %blocked; # key = ip address, value = unix time (time blocked)
    my $num_lines = 0;
    tie my @messages, "Tie::Array::Expire", 15;

    # load blocked ips from db
    my $sth = $dbh->prepare("SELECT * FROM blocked");
    $sth->execute;
    while (my $row = $sth->fetchrow_hashref) { $blocked{ $row->{ip} } = $row->{ctime} }

    my $code_block_or_unblock = sub {
        my ($which, $ip) = @_;
        if ($which eq 'block') {
            return if $blocked{$ip};
        } else {
            return unless $blocked{$ip};
        }
        system({die => 1, dry_run => $Opts{dry_run}},
               "iptables", ($which eq 'block' ? "-A" : "-D"), "INPUT", "-s", $ip,
               "-p", "tcp", "-m", "multiport", "--dports", "80,443",
               "-j", "DROP",
           );
        my $now = time();
        if ($which eq 'block') {
            unshift @messages, "$ip BLOCKED".($Opts{dry_run} ? " (dry-run)" : "");
            $dbh->do("INSERT OR IGNORE INTO blocked (ip,ctime) VALUES (?,?)", {}, $ip, $now);
            $blocked{$ip} = time();
        } else {
            unshift @messages, "$ip unblocked".($Opts{dry_run} ? " (dry-run)" : "");
            $dbh->do("DELETE FROM blocked WHERE ip=?", {}, $ip);
            delete $blocked{$ip};
        }
    };
    my $code_block   = sub { $code_block_or_unblock->('block'  , @_) };
    my $code_unblock = sub { $code_block_or_unblock->('unblock', @_) };

    local *INPUT;
    if (defined $Opts{spanel_site}) {
        require Tie::Handle::TailSwitch;
        my $dir = "/s/$Opts{spanel_site}/syslog";
        tie *INPUT, 'Tie::Handle::TailSwitch', (
            globs => ["$dir/https_access.*.log", "$dir/http_access.*.log"],
        );
    } else {
        *INPUT = \*STDIN;
    }

  LINE:
    while (1) {
        my $line = <INPUT>;
        if (!defined($line) || !length($line)) {
            sleep 0.5;
            next;
        }

        my $now = time();
        $num_lines++;
        chomp $line;
        $line =~ /\A(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s/ or do {
            warn "$PROG: Line '$line': Can't parse IP address, skipped\n";
            next;
        };
        my $ip = $1;
        next if $blocked{$ip};

      OUTPUT:
        {
            last unless !$last_update_output_time ||
                $last_update_output_time <= $now-2;
            print "\e[2J\e[;H"; # clear screen + put cursor at top (0,0)
            printf "Blocked IPs: %s%4d%s | Log lines: %s%6d%s | Running for: %s%s%s\n",
                color('bold'), (scalar keys %blocked), color('reset'),
                color('bold'), $num_lines, color('reset'),
                color('bold'), Time::Duration::concise(Time::Duration::duration($now-$^T, 2)), color('reset');
            $last_update_output_time = $now;
            printf "Top IPs:\n";
            my $i = 0;
            for my $ip (sort { scalar(@{ $ips{$b} }) <=> scalar(@{ $ips{$a} }) } keys %ips) {
                last if $i++ >= 10;
                printf "  %15s (%4d)\n", $ip, scalar(@{ $ips{$ip} });
            }
            printf "Last messages:\n";
            $i = 0;
            for my $msg (@messages) {
                last if $i++ >= 5;
                print "  $msg\n";
            }
        } # OUTPUT

      UNBLOCK:
        {
            last unless !$last_unblock_time ||
                $last_unblock_time <= $now-60;
            for (keys %blocked) {
                next unless $blocked{$_} < $now - $Opts{block_period};
                $code_unblock->($_);
            }
            $last_unblock_time = $now;
        } # UNBLOCK

        for my $has (@{ $Opts{has} }) {
            next LINE unless index($line, $has) >= 0;
        }
        for my $lacks (@{ $Opts{lacks} }) {
            next LINE if index($line, $lacks) >= 0;
        }
        for my $pat (@{ $Opts{has_pattern} }) {
            next LINE unless $line =~ $pat;
        }
        for my $pat (@{ $Opts{lacks_pattern} }) {
            next LINE if $line =~ $pat;
        }

        $ips{$ip} //= do {
            tie my @ary, "Tie::Array::Expire", $Opts{period};
            \@ary;
        };
        push @{ $ips{$ip} }, 1;
        if (@{ $ips{$ip} } > $Opts{limit} && !$whitelisted{$ip}) {
            $code_block->($ip);
            delete $ips{$ip};
        }
    } # loop
}

# MAIN

die "$PROG: Please run me as root\n" if $>;
read_config();
parse_options();
run();

# PODNAME: block-web-flooders
# ABSTRACT: Block IP addresses of web flooders using iptables

__END__

=pod

=encoding UTF-8

=head1 NAME

block-web-flooders - Block IP addresses of web flooders using iptables

=head1 VERSION

This document describes version 0.008 of block-web-flooders (from Perl distribution App-BlockWebFlooders), released on 2018-11-10.

=head1 SYNOPSIS

This script should be run as root/sudo root, because it needs to call the
L<iptables> command to add block rules to the firewall.

First of all, create F</etc/block-web-flooders.conf> that contains something
like this:

 whitelist_ip = 1.2.3.4
 whitelist_ip = ...

Where C<1.2.3.4> is the IP address(es) that you are connecting from (you can see
this from output of L<w> command), to make sure you yourself don't get blocked.
Add more lines/IP address as necessary.

When a flood is happening, try to tail your web access log file:

 # tail -f /s/example.com/syslog/https_access.2017-06-07.log

and see the patterns that you can use to discriminate the requests coming from
the flooder. Since the IP address is usually random/many, you can see from other
patterns e.g. requested URI, user agent. For example, if the suspicious log
lines are something like this:

 93.186.253.79 - - [07/Jun/2017:00:54:23 +0000] "GET /heavy1.php HTTP/1.0" 200 20633 "-" "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.2.0 Version/10.00"
 51.15.41.74 - - [07/Jun/2017:00:54:25 +0000] "POST /heavy2.php HTTP/1.1" 302 - "-" "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.2.0 Version/10.00"
 89.38.149.5 - - [07/Jun/2017:00:54:24 +0000] "GET /heavy1.php HTTP/1.0" 200 20633 "-" "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.2.0 Version/10.00"
 93.186.253.79 - - [07/Jun/2017:00:54:24 +0000] "GET /heavy3.php HTTP/1.0" 200 20524 "-" "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.2.0 Version/10.00"
 51.15.41.74 - - [07/Jun/2017:00:54:25 +0000] "GET /heavy1.php HTTP/1.0" 200 20633 "-" "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.2.0 Version/10.00"
 89.38.149.5 - - [07/Jun/2017:00:54:25 +0000] "GET /heavy3.php HTTP/1.0" 200 20524 "-" "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.2.0 Version/10.00"
 89.38.149.5 - - [07/Jun/2017:00:54:25 +0000] "GET /heavy3.php HTTP/1.0" 200 20524 "-" "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.2.0 Version/10.00"
 93.186.253.79 - - [07/Jun/2017:00:54:26 +0000] "POST /heavy2.php HTTP/1.1" 302 - "-" "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.2.0 Version/10.00"
 51.15.41.74 - - [07/Jun/2017:00:54:25 +0000] "GET /heavy1.php HTTP/1.0" 200 20633 "-" "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.2.0 Version/10.00"
 89.36.213.37 - - [07/Jun/2017:00:54:26 +0000] "GET /heavy3.php HTTP/1.0" 200 20524 "-" "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.2.0 Version/10.00"
 89.36.213.37 - - [07/Jun/2017:00:54:27 +0000] "POST /heavy2.php HTTP/1.1" 302 - "-" "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.2.0 Version/10.00"
 89.38.149.5 - - [07/Jun/2017:00:54:26 +0000] "GET /heavy1.php HTTP/1.0" 200 20633 "-" "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.2.0 Version/10.00"
 89.36.213.37 - - [07/Jun/2017:00:54:26 +0000] "GET /heavy1.php HTTP/1.0" 200 20633 "-" "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.2.0 Version/10.00"

you can add C<--has Presto/2.2.0> and C<--has /heavy> since these quite
accurately selects the flood requests. If you can add strings which pretty
accurately single out the flood requests, you can use a lower threshold speed,
e.g. C<--limit 5> to block IPs which has requested 5 or more in the last 5
minutes. Otherwise, if you do not have any specific C<--has> to single out the
flood, you might need to set a higher limit, e.g. C<--has html --limit 30
--period 60> to block IPs which have requested 30 or more requests in the last
minute, or C<--limit 200 --period 120> to block IPs which have requested 200 or
more requests in the last 2 minutes.

Feed the output of the C<tail> command to this script:

 # tail -f /s/example.com/syslog/https_access.2017-06-07.log | block-web-flooders \
   --has Presto/2.2.0 --has-pattern '/heavy|/baz' --limit 5

or perhaps:

 # tail -f /s/example.com/syslog/https_access.2017-06-07.log | block-web-flooders \
   --limit 200 --period 120

The script will display the top IP addresses and whether an IP is being blocked,
along with some statistics:

 Blocked IPs this session:  12 | Log lines:  198 | Running for: 2m13s
 Top IPs:
   89.36.213.37    (  4)
   89.38.149.5     (  2)
   93.186.253.79   (  2)
   ...
 Last messages:
   51.15.41.74 BLOCKED

While this script is running, you might also want to open something like this in
another terminal (monitor incoming web requests):

 # tail -f /s/example.com/syslog/https_access.2017-06-07.log | grep /heavy

and somethins like this in yet another terminal (monitor system load and number
of web server processes, this depends on the web server you use):

 # watch 'w | head -n1; echo -n "Blocked IPs total: "; iptables -nL INPUT | wc -l; echo -n "Apache processes: "; ps ax | grep apache | wc -l'

If your webserver is still maxed out by requests, you might want to tweak
C<--limit> and C<--period> options and restart the web server.

To see the blocked IP addresses:

 # iptables -nL INPUT

As long as the script runs, IP addresses are blocked by default temporarily for
86400 seconds (or, according to the --block-period command-line option or
block_period configuration). After that block period is exceeded, the IP is
unblocked.

To immediately clear/unblock all the IPs:

 # iptables -F INPUT

=head1 DESCRIPTION

This script helps a sysadmin when there is a flood from multiple IP addresses to
your website. The script works by reading web access log file, considering lines
which match the specified pattern(s), then block the IP address of the requester
if the speed of request from that IP exceeds a limit. The blocking is done using
firewall (L<iptables>), by default:

 # iptables -A INPUT -s <ip-address> -p tcp -m multiport --dports 80,443 -j DROP

To use this script, see the general guide in the Synopsis.

=head1 OPTIONS

=over

=item * --has=S

=item * --has-pattern=REGEX

=item * --lacks=S

=item * --lacks-pattern=REGEX

=item * --limit=N

=item * --period=N

=item * --whitelist-ip=IP

=item * --spanel-site=NAME

=item * --dry-run

=back

=head1 TODO

Option to customize ports.

Parse timestamps from web access logs so it can also parse past lines.

IPv6 support.

Some interactivity, e.g.: reset counters, unblock some IPs, increase/decrease
limit.

=head1 HOMEPAGE

Please visit the project's homepage at L<https://metacpan.org/release/App-BlockWebFlooders>.

=head1 SOURCE

Source repository is at L<https://github.com/perlancar/perl-App-BlockWebFlooders>.

=head1 BUGS

Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-BlockWebFlooders>

When submitting a bug or request, please include a test-file or a
patch to an existing test-file that illustrates the bug or desired
feature.

=head1 AUTHOR

perlancar <perlancar@cpan.org>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2018, 2017 by perlancar@cpan.org.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut
