#!/usr/sww/bin/perl5 -w
$infilename = undef;
$progfilename = undef;
@program = ();
while($#ARGV >= 0) 
{
    $_ = shift @ARGV;
    if (/^-d(ata)?$/o) {
	$infilename = shift @ARGV;
    } elsif (/^-p(rog)?$/o) {
	$progfilename = shift @ARGV;
    } elsif (/^-h$/o) {
	die("Usage: $0 [-d(ata) datafilename] [-p(rog) progfilename] [command ...]\n");
    } else {
	push(@program,$_);
    }
}
$infilename = "-" unless defined $infilename;
open(RESULTS,$infilename) || die("Can't open $infilename for read??\n");
$count = -1;
$expectfields = undef;
$sepregexp = '\s+';
$dieonwrongfields = 0;
$ndata = 0;
$maxfields = 0;
while(<RESULTS>) {
#    print "Hi:$_\n";
    if (/^#/o) {
	if (/^#SEPREGEXP=(.+)$/o) {
	    $sepregexp = $1;
	}
	s/\s*$//o;
	last if /^#END_OF_DATA$/o;
	if (/^#EXPECTFIELDS=(\d+)$/o) {
	    $expectfields = $1;
	}
	if (/^#DIEONWRONGFIELDS$/o) {
	    $dieonwrongfields = 1;
	}
	next;
    }
    next if /^#/o;
    next if /^\s*$/o;
    chop;
    @fields = split(/$sepregexp/);
    $fields = $#fields+1;
    if (defined $expectfields && $fields != $expectfields) {
	$msg = "Expected $expectfields, found $fields in:\n$_\n";
	die($msg) if $dieonwrongfields;
	warn($msg);
	next;
    }
    ++$count;
    $maxfields = $fields if $fields > $maxfields;
    for($i=$maxfields;$i<$fields;++$i) {
	$tmpdata[$i] = [];
    }
    for($i=0;$i<$fields;++$i) {
	$tmpdata[$i]->[$count] = $fields[$i];
#	$data{"datavar_$i"} = [] unless defined $data{"datavar_$i"};
#	$data{"datavar_$i"}[$count] = $fields[$i-1];
    }
    ++$ndata;
}

#print "Found maxfields = $maxfields\n";
for($i=1;$i<=$maxfields;++$i) {
    $data{"datavar_$i"} = $tmpdata[$i-1];
    &goodeval("\$USERDATA::datavar_$i = \$tmpdata[\$i-1];");
}

#print "Found $ndata;$count items\n";
if (defined $progfilename) {
    open(FILE,$progfilename) || die("can't open $progfilename: $!\n");
    while(<FILE>) {
#	print "Hi:$_\n";
	&runprog($_);
    }
    close(FILE);
} else {
    foreach $progline (@program) {
	&runprog($progline);
    }
}

sub runprog {
    return if ($_[0] =~ /^\#/o) || ($_[0] =~ /^\s*$/o);
    $_[0] =~ s/^\s+//o;
    $_[0] =~ s/\s+$//o;
    if ($_[0] =~ /^xgobi\s+(\S.*)$/o) {
	&xgobicmd($1);
    } elsif ($_[0] =~ /^hist\s+(\S(.*\S)?)$/o) {
	local(@parts) = split(/\s+/o,$1);
	local($stacks,$outdev,$outfile) = (20,undef,undef);
	$_ = shift @parts;
	while (defined ($_)) {
	    if (/^-n$/o) {
		$stacks = shift @parts;
	    } elsif (/^-d$/o) {
		$outdev = shift @parts;
	    } elsif (/^-f$/o) {
		$outfile = shift @parts;
	    } elsif (/^\w+$/o) { 
		last;
	    } else {
		$_ = undef;
		last;
	    }
	    $_ = shift @parts;
	}
	if (defined $_ && $#parts < 0)
	{
	    &histogramcmd($_,$stacks,$outdev,$outfile);
	} else {
	    die("Usage: hist [-n <num-stacks>] [-d <device>] [-f <out-file>] var\n");
	}
    } elsif ($_[0] =~ /^exit$/o) {
	exit(0);
    } elsif ($_[0] =~ /^cdf\s+(\S(.*\S)?)$/o) {
	local(@parts) = split(/\s+/o,$1);
	&cdfcmd(@parts);
    } elsif ($_[0] =~ /^show\s+(\S.*)$/o) {
	&showcmd($1);
    } elsif ($_[0] =~ /^skip\s+(\S.*)$/o) {
	&skipcmd($1);
    } elsif ($_[0] =~ /^var\s+(\w+)\s+(\S.*)$/o) {
	&varcmd($1,$2);
    } elsif ($_[0] =~ /^(\w+)\s*:=\s*(\S.*)$/o) {
	&varcmd($1,$2);
    } elsif ($_[0] =~ /^mean\s+(\w+)$/o) {
	$var = $1;
	($mean,$count) = &meancmd($var);
	print "mean of $var = $mean over $count elements\n";
    } elsif ($_[0] =~ /^median\s+(\w+)$/o) {
	$var = $1;
	($median,$count) = &quantilecmd($var,0.5);
	print "median of $var = $median over $count elements\n";
    } elsif ($_[0] =~ /^quantile\s+(\w+)\s+(.+)$/o) {
	$var = $1;
	$quantiles = $2;
	@quantiles = split(/\s+/o,$quantiles);
	@ret = &quantilecmd($var,@quantiles);
	$count = pop(@ret);
	print "quantiles [", join(', ',@quantiles) ,"] of $var = [",
	      join(', ',@ret) , "] over $count elements\n";
    } elsif ($_[0] =~ /^siqr\s+(\w+)$/o) {
	$var = $1;
	($siqr,$quantile25,$median,$quantile75,$count) = &siqrcmd($var);
	print "SIQR of $var = $siqr from $median ; quantiles [0.25,0.75] = [$quantile25,$quantile75] over $count elements\n";
    } elsif ($_[0] =~ /^variance\s+(\w+)$/o) {
	$var = $1;
	($variance,$mean,$count) = &variancecmd($var);
	print "variance of $var = $variance from $mean over $count elements\n";
    } elsif ($_[0] =~ /^stddev\s+(\w+)$/o) {
	$var = $1;
	($stddev,$mean,$count) = &stddevcmd($var);
	print "stddev of $var = $stddev from $mean over $count elements\n";
    } elsif ($_[0] =~ /^corr\s+(\w+)\s+(\w+)$/o) {
	$var1 = $1;
	$var2 = $2;
	($corr,$cov,$std1,$std2,$count) = &corrcmd($var1,$var2);
	print "Correlation Coefficient of $var1,$var2 = $corr\n";
    } elsif ($_[0] =~ /^randcorr\s+([\d.]+)\s+(\w+)\s+(\w+)$/o) {
	&randcorr($1,$2,$3);
    } elsif ($_[0] =~ /^corrpairs\s+(\S.*)$/o) {
	&corrpairscmd($1);
    } elsif ($_[0] =~ /^echo\s+(\S.+)$/o) {
	print $1 , "\n";
    } elsif ($_[0] =~ /^echo\s*$/o) {
	print "\n";
    } else {
	die("Unknown command '$_[0]'\n");
    }
}

sub meancmd {
    local($var) = @_;
    local($i,$sum,$count,$mean);

    $mean = $sum = $count = 0;
    my($varref) = $data{$var};
    for($i=0;$i<$ndata;++$i) {
	next if &skip($i);
	die("$var #$i not defined\n") unless defined $varref->[$i];
	++$count;
	$sum += $varref->[$i];
    }
    $mean = $sum / $count if $count > 0;
    return ($mean,$count);
}

sub quantilecmd {
    local($var,@quantiles) = @_;
    local(@qvals,$quantile,$count,@arr);

    die("$var not defined\n") unless defined $data{$var}[0];
    my($dataref) = $data{$var};
    for($i=0;$i<$ndata;++$i) {
	next if &skip($i);
	++$count;
	push(@arr,$dataref->[$i]);
    }
    @arr = sort {$a <=> $b} @arr;
    foreach $quantile (@quantiles) {
	push(@qvals,$arr[int($quantile*($count-1)+0.4999999)]); 
# 0.499999 approximates rounding to nearest int with choosing the lower of two if exactly in between.
    }
    return (@qvals,$count);
}
    
sub siqrcmd {
    local($var) = @_;

    local($quantile25,$quantile75,$count,$median);

    ($quantile25,$median,$quantile75,$count) = &quantilecmd($var,0.25,0.5,0.75);
    return (($quantile75-$quantile25)/2,$quantile25,$median,$quantile75,$count);
}

sub variance_meancmd {
    local($var,$mean,$count) = @_;
    local($i,$sum,$variance);

    $sum = $variance = 0;
    my($vref) = $data{$var};
    for($i=0;$i<$ndata;++$i) {
	next if &skip($i);
	$sum += ($vref->[$i] - $mean)**2;
    }
    $variance = $sum / ($count-1) if $count > 1;
    return ($variance,$mean,$count);
}
    
sub variancecmd {
    local($var) = @_;
    local($i,$sum,$mean,$count,$variance);

    ($mean,$count) = &meancmd($var);
    return &variance_meancmd($var,$mean,$count);
}

sub stddevcmd {
    local($var) = @_;

    local($variance,$mean,$count) = &variancecmd($var);
    return (sqrt($variance),$mean,$count);
}

sub covcmd {
    local($var1,$var2) = @_;
    local($i,$sum,$mean1,$mean2,$count,$cov);

    $sum = 0;
    ($mean1,$count) = &meancmd($var1);
    ($mean2,$count) = &meancmd($var2);
    my($ref1,$ref2);
    $ref1 = $data{$var1};
    $ref2 = $data{$var2};
    for($i=0;$i<$ndata;++$i) {
	next if &skip($i);
	$sum += ($ref1->[$i] - $mean1)*($ref2->[$i] - $mean2);
    }
    $cov = $sum / ($count-1) if $count > 1;
    return ($cov,$mean1,$mean2,$count);
}

sub corrcmd {
    local($var1,$var2) = @_;
    local($cov,$std1,$std2,$mean1,$mean2,$count,@rest);

    ($cov,$mean1,$mean2,$count) = &covcmd($var1,$var2);
#    print "($cov,$std1,$std2)\n";
    ($std1,@rest) = &variance_meancmd($var1,$mean1,$count);
    ($std2,@rest) = &variance_meancmd($var2,$mean2,$count);
    $std1 = sqrt($std1);
    $std2 = sqrt($std2);
    return ($cov/($std1*$std2),$cov,$std1,$std2,$count);
}

sub corrpairscmd {
    local($vars) = @_;

    local(@varlist) = split(/\s+/o,$vars);
    local($var1,$var2);
    local($corr,@foo);
    print "Correlation Pairs:\n";
    my(@shortvars);
    my(%corrs);
    foreach $var (@varlist) {
	$corrs{$var} = {};
	$corrs{$var}{$var} = 1;
	push(@shortvars,substr($var,0,7));
    }
    print join("\t",'',@shortvars) , "\n";
    foreach $var1 (@varlist) {
	print substr($var1,0,7), "\t";
	foreach $var2 (@varlist) {
	    if (defined $corrs{$var1}{$var2}) {
		$corr = $corrs{$var1}{$var2};
	    } else {
		($corr,@foo) = &corrcmd($var1,$var2);
	    }
	    $corrs{$var1}{$var2} = $corrs{$var2}{$var1} = $corr;
	    printf("%.4f\t",$corr);
	}
	print "\n";
    }
}

sub varcmd {
    my($newvar,$cmd) = @_;
    my($var,$i,$res);

    $cmd = &cvttoperl($cmd);
    
#    print "Yo:$main::ndata\n";
    my($subr) = '
package USERDATA;
return sub {
    my($i,$newref);
    $newref = [];
    my($cmdsub) = sub { ' . $cmd . ' };
    for($i=0;$i<$main::ndata;++$i) {
	next if &main::skip($i);
	$newref->[$i] = &$cmdsub;
    }
    $' . $newvar . ' = $newref;
    return $newref;
}';
    my($subrref) = &goodeval($subr);
    $data{$newvar} = &$subrref;
}

sub skipcmd {
    my($cmd) = @_;
    my($subrref,$subr,$left);

    $cmd = &cvttoperl($cmd);
    
    undef @PRIVDATA::skip;
    my($subr) = '
package USERDATA;
return sub {
    my($i,$newref,$left);
    my($cmdsub) = sub { ' . $cmd . ' };
    $left = 0;
    for($i=0;$i<$main::ndata;++$i) {
	$PRIVDATA::skip[$i] = &$cmdsub;
	++$left unless &main::skip($i);
    }
    return $left;
}';
    my($subrref) = &goodeval($subr);
    $left = &$subrref;
    print "After skipping, $left elements remain from $ndata\n";
}

sub cvttoperl {
    local($cmd) = @_;
    local(@foo);
    @foo = split(/\s+/o,$cmd);
    foreach $var (keys %data) {
	foreach $elem (@foo) {
	    $elem = '$USERDATA::' . $var . '->[$i]' if $elem eq $var;
	}
    }
    $cmd = join(' ',@foo);
    return $cmd;
}

sub xgobicmd {
    my(@vars) = split(/\s+/o,$_[0]);
    my($var,$i);
    foreach $var (@vars) {
	die("$var not defined??\n") unless defined $data{$var}[0];
    }

    open(XGOBI,"| xgobi");
    for($i=0;$i<$ndata;++$i) {
	undef @out;
	next if &skip($i);
	foreach $var (@vars) {
	    push(@out,$data{$var}[$i]);
	}
	print XGOBI join(" ",@out) , "\n";
    }
    close(XGOBI);
}

sub showcmd {
    my(@vars) = split(/\s+/o,$_[0]);
    my($var,$i);
    foreach $var (@vars) {
	die("$var not defined??\n") unless defined $data{$var}[0];
    }

    print join("\t",@vars) , "\n";
    for($i=0;$i<$ndata;++$i) {
	undef @out;
	next if &skip($i);
	foreach $var (@vars) {
	    push(@out,$data{$var}[$i]);
	}
	print join("\t",@out) , "\n";
    }
    close(XGOBI);
}

sub skip {
    return $PRIVDATA::skip[$_[0]];
}

sub histogramcmd {
    my($var,$nbuckets,$outdev,$outfile) = @_;

    my($min,$max,$i);
    
    die("$var not defined??\n") unless defined $data{$var}[0];

    my($dataref) = $data{$var};
    for($i=0;$i<$ndata;++$i) {
	next if &skip($i);
	$max = $min = $dataref->[$i] unless defined $min;
	$max = $dataref->[$i] if $dataref->[$i] > $max;
	$min = $dataref->[$i] if $dataref->[$i] < $min;
    }

    $max += 0.000001 * $max;
    my(@buckets,$val,$range);

    $range = $max - $min;
#    $nbuckets = 20;
    for($i=0;$i<$nbuckets;++$i) {
	$buckets[$i] = 0;
    }
    for($i=0;$i<$ndata;++$i) {
	next if &skip($i);
	$val = $dataref->[$i];
	$val = ($val - $min)/$range;
	$buckets[int($nbuckets * $val)] += 1;
    }
    open(TMP,">gnuplot.$$") || die("Can't open gnuplot.$$ for write:$! \n");
    my($bucketsize) = $range / $nbuckets;
    my($gnumin) = $min + $bucketsize / 2;
    my($ymax) = 0;
    for ($i=0;$i<$nbuckets;++$i) {
	$ymax = $buckets[$i] if $buckets[$i] > $ymax;
	print TMP $gnumin + $i * $bucketsize, " $buckets[$i] 0 0 $bucketsize\n";
    }
    close(TMP);
    $outfile = "$var\.$outdev" if defined $outdev && ! defined $outfile;
    my($cmd) = "| gnuplot";
    $cmd .= " > $outfile" if defined $outfile;
    open(GNUPLOT,$cmd) || die("no '$cmd':$!\n");
    my($oldfh) = select(GNUPLOT);$|=1;select($oldfh);
    print GNUPLOT "set term $outdev\n" if defined $outdev;
#    print GNUPLOT qq!set title "$var"\n!;
    my($xmin) = $gnumin - $bucketsize/2;
    my($xmax) = $gnumin + $nbuckets * $bucketsize;
    print GNUPLOT qq!plot [$xmin:$xmax] [0:$ymax] "gnuplot.$$" title "$var" with boxes\n!;
    if (defined $outdev) {
	print "Wrote histogram of $var to $outfile; bucketsize = $bucketsize\n";
    } else {
	print "Print histogram of $var; bucketsize = $bucketsize.\n";
	$| = 1;
	print "Hit Return to continue:";
	$_ = <STDIN>;
    }
    close(GNUPLOT);
    unlink("gnuplot.$$");
}

sub cdfcmd {
    my(@parts) = @_;

    local($_,$device,$outfile);

    $_ = shift @parts;
    while(defined ($_)) {
	if (/^-d$/o) {
	    $device = shift @parts;
	} elsif (/^-f$/o) {
	    $outfile = shift @parts;
	} elsif (/^\w+$/o) {
	    last;
	} else {
	    $_ = undef;
	    last;
	}
	$_ = shift @parts;
    }
    unless (defined $_ && $#parts < 0)
    {
	die("Usage: cdf [-d <device>] [-f <out-file>] var\n");
    }
    my($i,$val,$var,@vals);

    $var = $_;
    for($i=0;$i<$ndata;++$i) {
	next if &skip($i);
	die("$var not defined for entry #$i???\n")
	    unless defined $data{$var}[$i];
	push(@vals,$data{$var}[$i]);
    }
    @vals = sort {$a <=> $b} @vals;
    open(TMP,">gnuplot.$$") || die("Can't open gnuplot.$$ for write:$!\n");
    $i = 0;
    foreach $val (@vals) {
	++$i;
	print TMP "$val " , 100*$i/$ndata , "\n";
    }
    close(TMP);
    $outfile = "$var\.$device" 
	if defined $device && ! defined $outfile;
    my($cmd) = "| gnuplot";
    $cmd .= " > $outfile" if defined $outfile;
    open(GNUPLOT,$cmd) || die("no '$cmd':$!\n");
    my($oldfh) = select(GNUPLOT);$|=1;select($oldfh);
    print GNUPLOT "set term $device\n" if defined $device;
    print GNUPLOT "set ytics 0,10,100\n";
    print GNUPLOT qq!plot [:] [0:100] "gnuplot.$$" title "$var" with lines\n!;
    if (defined $device) {
	print "Wrote cdf of $var to $outfile.\n";
    } else {
	print "CDF of $var.\n";
	$|=1;
	print "Hit Return to continue:";
	$_ = <STDIN>;
    }
    close(GNUPLOT);
    unlink("gnuplot.$$");
}

sub goodeval {
    print "About to eval: '$_[0]'\n" if defined $_[1];
    my($ret) = eval($_[0]);
    die("eval of '$_[0]' failed: $@\n") unless $@ eq '';
    return $ret;
}

