#!/usr/bin/perl

=begin metadata

Name: cmp
Description: compare two files
Author: D Roland Walker, walker@pobox.com
License: perl

=end metadata

=cut


# cmp - compare two files
# VERSION 1.0
#
# Notes:
#
# It's nice to see that, speedwise, this beats the pants off
# GNU and IRIX system cmp in many tasks.  If compiled, this
# would probably be faster in every case.  Go Perl !
#
#
# Compat:
#
# GNU, Solaris, and IRIX cmp seem to give the EOF warning
# whenever -l is given and file sizes are not equal.  That
# doesn't match the BSD docs.  We follow the docs.
#
#
# Todo:
#
# support grouped single-letter opts
# revise how zero-length files are handled ?
#

use strict;

use File::Basename qw(basename);

use constant EX_SUCCESS   => 0;
use constant EX_FAILURE   => 1;
use constant EX_DIFFERENT => 1;
use constant EX_USAGE     => 2;

use constant ST_INO  => 1;
use constant ST_SIZE => 7;

my $Program = basename($0);

my $chunk_size = 10_000;     # how many bytes in a gulp

my $volume=1;                # controlled by -s and -l

my $file1;
my $file2;
my $skip1;
my $skip2;
my $buffer1;
my $buffer2;
my $read_in1;
my $read_in2;

my $bytes_read = 0;
my $lines_read = 0;
my $saw_difference;

# does not yet support grouped opts
while (@ARGV) {
    my $item= shift;

    $item =~ /^--$/     and  $file1  = shift  , last;
    $item =~ /^-\?$/    and  manual();        # exits
    $item =~ /^-l$/     and  $volume = 2      , next;
    $item =~ /^-s$/     and  $volume = 0      , next;
    $item =~ /^-./      and  usage();         # exits

    $file1 = $item;
    last;
}

usage() unless @ARGV >= 1 and @ARGV <= 3;  # exits;

$file2 = shift;

my @stat1 = stat $file1;
unless (@stat1) {
    warn "$Program: '$file1': $!\n";
    exit EX_FAILURE;
}
my @stat2 = stat $file2;
unless (@stat2) {
    warn "$Program: '$file2': $!\n";
    exit EX_FAILURE;
}

if (defined($stat1[ST_INO]) && $stat1[ST_INO] == $stat2[ST_INO]) {
    exit EX_SUCCESS;                        # hopefully, on platforms where
}                                           # inode is meaningless, stat
                                            # returns undef.

if ($stat1[ST_SIZE] == 0 || $stat2[ST_SIZE] == 0) {
    # special handling for zero-length files
    if ($stat1[ST_SIZE] == 0 && $stat2[ST_SIZE] == 0) {
        exit EX_SUCCESS;
    } else {                            # Can't we say 'differ at byte zero'
                                        # and so on here?  That might make
                                        # more sense than this behavior.
                                        # Also, this should be made consistent
                                        # with the behavior when skip >=
                                        # filesize.
        if ($volume) {
            warn "$Program: EOF on $file1\n" unless $stat1[ST_SIZE];
            warn "$Program: EOF on $file2\n" unless $stat2[ST_SIZE];
        }
        exit 1;
    }
}

if (@ARGV) {
    $skip1 = shift;
    usage() unless $skip1 =~ /^(0x[A-F\d]+|\d+)$/;   # exits;
}

if (@ARGV) {
    $skip2 = shift;
    usage() unless $skip2 =~ /^(0x[A-F\d]+|\d+)$/;   # exits;
}

$skip1 = eval "$skip1" if defined $skip1;
$skip2 = eval "$skip2" if defined $skip2;

if( -d $file1 ) {
	warn( "$Program: $file1 is a directory\n" );
	exit EX_USAGE;
}
if( -d $file2 ) {
	warn( "$Program: $file2 is a directory\n" );
	exit EX_USAGE;
}

unless(  open FILE1, '<', $file1 ) {
	warn "$Program: cannot open '$file1'\n";
	exit EX_FAILURE;
	}
unless(  open FILE2, '<', $file2 ) {
	warn "$Program: cannot open '$file2'\n";
	exit EX_FAILURE;
	}

sysseek FILE1, $skip1, 0 if $skip1;
sysseek FILE2, $skip2, 0 if $skip2;

READ: while (defined ($read_in1 = sysread FILE1, $buffer1, $chunk_size)) {
                      $read_in2 = sysread FILE2, $buffer2, $chunk_size;

    my $checklength = $chunk_size;
    if ($read_in1 < $chunk_size or $read_in2 < $chunk_size) {
        $checklength = ( $read_in1 < $read_in2 ?
                                     $read_in1 :
                                     $read_in2 ) - 1;
    }

    if ($buffer1 ne $buffer2) {
        for (0..$checklength) {
            next unless (substr $buffer1,$_,1) ne (substr $buffer2,$_,1);
            my $report_lines = $lines_read + 1 +  (substr $buffer1,0,$_) =~ tr[\n][\n];
            my $report_bytes = $bytes_read + 1 + $_;
            if ($volume == 1 ) {
                print "$file1 $file2 differ: char $report_bytes, line $report_lines\n";
                exit 1;
            } elsif ($volume eq 0) {
                exit 1;
	    } else {
                $saw_difference ||= 1;
                printf "%6d %3o %3o\n", $report_bytes, ord(substr $buffer1,$_,1),
                                                       ord(substr $buffer2,$_,1);
                next READ if $_ == $checklength;
                next;
	    }
        }
    }

    $lines_read += $buffer1 =~ tr[\n][\n];
    $bytes_read += $checklength;

    if ($read_in1 < $read_in2) {
        warn "$Program: EOF on $file1\n" unless $saw_difference or !$volume;
        exit EX_DIFFERENT;
    } elsif ($read_in1 > $read_in2) {
        warn "$Program: EOF on $file2\n" unless $saw_difference or !$volume;
        exit EX_DIFFERENT;
    } elsif ($read_in1 == 0) {
        exit EX_DIFFERENT;
    }
}

close FILE1;
close FILE2;

exit $saw_difference;

sub usage {
    warn "usage: $Program [-l] [-s] file1 file2 [skip1 [skip2]]\n";
    exit EX_USAGE;
}

sub manual {
	require Pod::Usage;

	Pod::Usage::pod2usage({
		-exitval => EX_SUCCESS,
		-verbose => 2,
	});
}

__END__

=head1 NAME

cmp - compare two files

=head1 SYNOPSIS

cmp B<[-l]> B<[-s]> I<file1> I<file2> I<[skip1 [skip2]]>

=head1 DESCRIPTION

I<cmp> compares two files, byte-by-byte.  The result of the comparison
is always given by the exit status, and may be summarized on the
standard output according to the options given on the command line.

If the two compared files are identical, I<cmp> will exit with
a zero exit status.  If the compared files are not identical, I<cmp>
will exit with a status of 1.

If no options are given, I<cmp> will return (on the standard output),
the byte number and line number where the first difference is encountered.

If one file is an initial subsequence of the other, a message will
also be returned on the standard error indicating that EOF was reached
in the shorter of the two file.

I<skip1> and I<skip2> are optional byte offsets into I<file1> and
I<file2>, respectively, that determine where the file comparison
will begin.  Offsets may be given in decimal, octal, or hexadecimal
form.  Indicate octal notation with a leading '0', and hexadecimal
notation with a leading '0x'.

=head2 OPTIONS

=over

=item -s

silent execution; indicate results only by exit status, suppressing
all output and warnings.

=item -l

list differences; return the byte number and the differing byte values
for each difference between the two files.  The byte number is given
in decimal, and the byte values are given in octal.

=item -?

show the documentation

=item -.

show a short usage summary

=back

=head1 ENVIRONMENT

No environment variables affect the execution of I<cmp>.

=head1 BUGS

No known bugs.

=head1 AUTHOR

D Roland Walker I<E<lt>walker@pobox.comE<gt>>.

=head1 COPYRIGHT and LICENSE

This program is copyright (c) D Roland Walker 1999.

This program is free and open software. You may use, modify, distribute,
and sell this program (and any modified variants) in any way you wish,
provided you do not restrict others from doing the same.

