package Algorithm::Diff::Cheap;
# Skip to first "=head" line for documentation.
use strict;
use warnings;
use integer;
use vars qw( $VERSION @EXPORT_OK );
$VERSION = v0.1;

require Exporter;
*import    = \&Exporter::import;
@EXPORT_OK = qw(
    new testrun
);

use constant DBG => $ENV{'DIFFTRACE'};

sub new
{
    my( $us, $seq1, $seq2, %obj) = @_;
    $obj{seq1} = $seq1;
    $obj{seq2} = $seq2;
    $obj{q1} = []; # non-matching elements
    $obj{q2} = [];
    $obj{q} = []; # queued from last match
    $obj{rq1} = []; # saved elements after the last match
    $obj{rq2} = [];
    $obj{Limit} = 0 unless defined $obj{Limit};
    $obj{Blocksize} = 0 unless defined $obj{Blocksize};
	$obj{got_it} = 1;
    my $me = bless \%obj, ref $us || $us;
    return $me;
}

sub Limit
{
    my( $me, $limit )= @_;
    my $old = $me->{Limit};
    $me->{Limit} = $limit if defined $limit;
    $old;
}

sub Blocksize
{
    my( $me, $size )= @_;
    my $old = $me->{Blocksize};
    $me->{Blocksize} = $size if defined $size;
    $old;
}

sub _nr1 { # next real element
    my( $me )= @_;
    if(@{$me->{rq1}}) {
    	my $r = shift @{$me->{rq1}};
	print "# G_t1 $r\n" if DBG;
    	return $r;
    }
    my $proc = $me->{seq1};
    return undef unless defined $proc;
    my $res = $proc->();
    unless(defined $res) {
    	$me->{seq1} = undef;
	print "# Get1_END\n" if DBG;
    } else {
	print "# Get1 $res\n" if DBG;
    }
    $res;
}
sub _nr2 { # next real element
    my( $me )= @_;
    if(@{$me->{rq2}}) {
    	my $r = shift @{$me->{rq2}};
	print "# G_t2 $r\n" if DBG;
    	return $r;
    }
    my $proc = $me->{seq2};
    return undef unless defined $proc;
    my $res = $proc->();
    unless(defined $res) {
    	$me->{seq2} = undef;
	print "# Get2_END\n" if DBG;
    } else {
	print "# Get2 $res\n" if DBG;
    }
    $res;
}

sub _inqueue1 { # Add to the input queue
    my( $me,@q )= @_;
    print "# INQ1 @q\n" if DBG;
    unshift(@{$me->{rq1}},@q);
}
sub _inqueue2 { # Add to the input queue
    my( $me,@q )= @_;
    print "# INQ2 @q\n" if DBG;
    unshift(@{$me->{rq2}},@q);
}

sub _outqueue1 { # Add to the output queue
    my( $me,@q )= @_;
    print "# OUTQ1 @q\n" if DBG;
    unshift(@{$me->{q1}},@q);
}
sub _outqueue2 {
    my( $me,@q )= @_;
    print "# OUTQ2 @q\n" if DBG;
    unshift(@{$me->{q2}},@q);
}
sub _outqueue {
    my( $me,@q )= @_;
    print "# OUTQ @q\n" if DBG;
    unshift(@{$me->{q}},@q);
}

sub _nq1 { # next queued element
    my( $me )= @_;
    return shift @{$me->{q1}} if @{$me->{q1}};
    return undef;
}
sub _nq2 {
    my( $me )= @_;
    return shift @{$me->{q2}} if @{$me->{q2}};
    return undef;
}
sub _nq {
    my( $me )= @_;
    return shift @{$me->{q}} if @{$me->{q}};
    return undef;
}

sub qtails {
    my( $me )= @_;
    my $q1 = 0+@{$me->{q1}};
    my $q2 = 0+@{$me->{q2}};

    while($q1 and $q2) {
    	$q1--; $q2--;
    	if($me->{q1}[$q1] eq $me->{q2}[$q2]) {
	    	unshift(@{$me->{q}}, pop(@{$me->{q1}}));
	    	my $s = pop(@{$me->{q2}});
    		print "# RE_Q $s\n" if DBG;
		} else {
	    	return;
		}
    }
}

sub is_tail($$$$$) {
    my( $me, $q1,$q2,$p1,$p2 )= @_;

    my $tlen = $me->{Blocksize};
	return 0 if $tlen > $p1 or $tlen > $p2;
	while(--$tlen) {
		return 0 if $q1->[$p1-$tlen] ne $q2->[$p2-$tlen];
	}
	return 1;
}

sub Next {
	my( $me )= @_;
	my( $r1,$r2 );
	# First get anything queued for output
	$r1 = $me->_nq1();
	return ("-",$r1) if defined $r1;
	$r2 = $me->_nq2();
	return ("+",$r2) if defined $r2;
	$r1 = $me->_nq();
	return (" ",$r1) if defined $r1;
	
	# now get the next values to compare
	$r1 = $me->_nr1();
	$r2 = $me->_nr2();
	return () if not defined $r1 and not defined $r2;

	# here is the difficult part ;-)
	my(@q,@q1,@q2);
	while(1) {
		# If one list is exhausted (or both), remember what we have so far 
		if(not defined $r1 or not defined $r2) {
			push(@q1,$r1) if defined $r1;
			push(@q2,$r2) if defined $r2;
			if(not defined $r1 and not defined $r2) {
				$me->_outqueue1(@q1); $me->_outqueue2(@q2); $me->_outqueue(@q);
				$me->qtails();
				goto &Next;
			}
			next;
		}
		# Check if the current item is in the list somewhere
		# we find the first match that's possible
		my $i = 0;
		while($i < @q1) {
			if($r1 eq $q2[$i] and $me->is_tail(\@q1,\@q2,0+@q1,$i)) {
				$me->_outqueue1(@q1); $me->_inqueue1(@q,$r1);
				$me->_outqueue2(splice(@q2,0,$i)); $me->_inqueue2(@q2,@q,$r2);
				$me->qtails();
				$me->{got_it} = 1;
				goto &Next;
			}
			if($r2 eq $q1[$i] and $me->is_tail(\@q1,\@q2,$i,0+@q2)) {
				$me->_outqueue2(@q2); $me->_inqueue2(@q,$r2);
				$me->_outqueue1(splice(@q1,0,$i)); $me->_inqueue1(@q1,@q,$r1);
				$me->qtails();
				$me->{got_it} = 1;
				goto &Next;
			}
			$i++;
		}
		if($r1 eq $r2 and ($me->{got_it} or $me->is_tail(\@q1,\@q2,0+@q1,0+@q2))) {
			push(@q,$r1);
			$me->_outqueue1(@q1); $me->_outqueue2(@q2); $me->_outqueue(@q);
			$me->{got_it} = 1;
			$me->qtails();
			goto &Next;
		} else {
			$me->{got_it} = 0;
			push(@q1,@q,$r1);
			push(@q2,@q,$r2);
			@q = ();
		}
	} continue {
		$r1 = $me->_nr1();
		$r2 = $me->_nr2();
	}
}

1;
__END__

=head1 NAME

Algorithm::Diff::Cheap - Compute `stupid' differences between two streams

=head1 SYNOPSIS

    require Algorithm::Diff::Cheap;

    my $diff = Algorithm::Diff::Cheap->new( \&seq1, \&seq2 );

    $diff->Blocksize( 2 );  # Require two contiguous matches
    $diff->Limit( 1000 );  # When to give up if no matches are found

    while( my($key,$line) = $diff->Next()  ) {
        if($key eq " ") {
	    # same
    	}
        if($key eq "-") {
	    # old
    	}
        if($key eq " ") {
	    # new
    	}
    }

=head1 INTRODUCTION

This implementation of "diff" is not very intelligent. It is designed
for collecting isolated differences in a large stream of data that
may not even fit in main memory.

Algorithm::Diff::Cheap makes no assumptions about the contents of the
data in question. It does not attempt to use comparisons other than
equality. It simply searches for the first match it finds and emits the
intervening data.

Algorithm::Diff::Cheap makes no attempt to find longest common subsequences
(the "LCS" problem"). It simply assumes that, as the data is assumed to be
sorted, any match it does find is the longest one by definition.

If your data looks like this:

    x a y x b y x c y x d y

    x a y x e y x f y x b y x c y x d y

then Algorithm::Diff::Cheap will be B<really> unhelpful unless you set the
block size to at least three.

Algorithm::Diff::Cheap has quadratic performance in the number of
differing lines.

=head2 C<new>

    $diff = Algorithm::Diffs::Cheap->new( \&seq1, \&seq2 );

C<new> accepts two procedures which, when called without arguments,
return the next item. Typically they will read their data from a disk
file.

=head2 C<Next>

Calling C<Next> without arguments will return a list: one character which
signals the kind of difference found, and the data which C<&seq1> or
C<&seq2> returned.

The identifying character can be

=over 4

* Space

Match: both C<&seq1> and C<&seq2> returned this item.

* C<->

Only C<&seq1> returned this item.

=back

Returning the empty list signals that both C<&seq1> and C<&seq2> have
returned C<undef>.

=item C<Blocksize>

This method sets the minimum number of lines which must match before
a common subsequence is detected. If your data contains logical entries
spanning multiple items, set this to at least one more than the number
of constant data in your entry. For instance, if your output is XML
that looks like this:

	<foo>
		<bar>KeyName</bar>
		<baz>KeyContent</baz>
	</foo>

the block size should be four if there are multiple keys with the same
name, and three otherwise.

The default is one.

=item C<Limit>

This method sets the maximum lines

The default is to look for a match indefinitely.

=head1 AUTHOR

This code was written by Matthias Urlichs <smurf@noris.net>.

=head1 LICENSE

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl.

=cut
