#!/usr/bin/perl -w
# combine single locus sumfiles into a combined sumfile
# 
# Syntax: combine-sumfiles
# Options: none
#
# Use this perl script to join the sumfiles of independent loci once
# you have done all of the runs.  Then run the program using the new
# genealogy summaries as input (I have defaulted this new filename to
# "sumfile.combined").
#
# Running on multiple machines has cut my (Steven Irvin) run time 
# from 45 days on a single machine to 5 to 6 days on 36 machines 
# (12 to 24 hours per  microsatellite locus and 4 to 5 days to combine 
# estimates of  parameters across al loci).  
# See the note "HOW-TO PARALLEL" in migrate directory  about how to do this.
#
#
# tested using Perl 5.005-003 on LINUX (PPC, Intel), 
#                                SUN Solaris, True64 UNIX, 
# does not work yet on
# Macintosh (MacPerl), Windows (?????)
#
# created by Steven Irvin October 2000, Cornell
#            email: sdi1@cornell.edu
# modified by Peter Beerli October 2000, Seattle 
#             email: beerli@genetics.washington.edu
#---------------------------------------------------------
# $Id: combine-sumfiles,v 1.2 2000/11/03 01:50:35 beerli Exp $
$combined = "sumfile.combined";

#print "This combine-sumfile program needs sumfiles\n";
#print "from migrate-n version 0.9.7\n";

@dirlist     = &directories();
@sumfilelist = &sumfiles();

$has_dirs = scalar(@dirlist);
$has_sumfiles = scalar(@sumfilelist);
if($has_dirs>1 && $has_sumfiles>1)
{
    print "You have sumfiles in the working directory\n";
    print "and in its subdirectories\n";
    print "Which sumfile sets do you want to combine?\n";
    print "0 = sumfiles, 1=subdirectories\n";
    $answer = <STDIN>;
#-------this needs revision, I don't have a perl book here
    if($answer == 0)
    {
	$has_dirs=0;
    }
    else
    {
	if($answer != 1) 
	{
	    die "Please start again and give either  0 (zero) or 1 (one)\n"; 
	}
	$has_sumfiles=0;
    }
#--------end problematic code
} 
if($has_dirs) 
{    
    &combine(@dirlist);
}
else
{
    &combine(@sumfilelist);
}
print "done\n";
exit(0);


#======================================================
# subroutines
#
sub directories
{
    opendir(THEDIR, '.');       # open current directory
    local(@dirlist) = grep {!/^\.\.?$/ && -d "./$_"}  readdir(THEDIR);
    local($z)=0;
    local(@list)=();
    foreach $the_dir (@dirlist) 
    {
	    # open each subdirectory (from the listing) directory
	    @temp = <$the_dir/sumfile*>;
	    @temp = grep !/~/, @temp;
	    $list[$z] = join ',', @temp; 
	    $z++;
    }
    @list = split ',',(join ',', @list) ;
    closedir(THEDIR);
    return (@list);
}

sub sumfiles
{
    opendir(THEDIR, '.');       # open current directory
    local(@sumfilelist) = grep {/^sumfile/ && !/sumfile\.combined/ && -f "./$_"}  readdir(THEDIR);
    @sumfilelist = grep !/~/, @sumfilelist;
    return (@sumfilelist);
}

sub combine
{
    my @list =  @_;
    open SUMFILE, ">".$combined || 
	die "unable to write to sumfile.combined: $!\n"; 
    open INFILE, "<".$list[0];
    local(@lines) = <INFILE>;
    close INFILE;
    ($numpop, $replicates, $replicatenum, 
     $header, $tail) = &analyze(@lines);
    $numpop2 = $numpop * $numpop;
    printf SUMFILE "$header";
    $loci = scalar(@list);
    print "@list\n";
    print "loci=$loci\n";
    $locus = 0;
    printf SUMFILE "$loci $numpop $numpop2 $replicates $replicatenum\n";
    foreach $i (@list)
    {
	open INFILE, "<".$i;
	@lines = <INFILE>;
	close INFILE;
	shift @lines;
	shift @lines;
	shift @lines; # remove the first 3 lines
	pop @lines;   # remove last line
	$rep = 0;
	foreach $line (@lines)
	{
	    if(index( $line,"\#\#\#") >= 0)
	    {		
		if($rep==$replicatenum)
		{
		    $rep=0;
		    $locus++;
		}
		printf SUMFILE  "$locus $rep \#\#\#\#\#\# locus $locus, "; 
		printf SUMFILE "replicate $rep \#\#\#\#\#\#\#\#\n";
		$rep++;
	    }
	    else
	    {
		printf SUMFILE "$line";
	    }
	}
	$locus++;
    }
    printf SUMFILE "$tail\n";
    close(SUMFILE);
}

sub analyze
{
    my @lines = @_;
    local($header) = $lines[0].$lines[1];
    (local($locus), local($numpop), local($numpop2), local($replicates), local($replicatenum)) =
	split ' ', $lines[2]; 
    local($tail) = $lines[scalar(@lines)-1];
    return ($numpop, $replicates, $replicatenum, 
	    $header, $tail);
}



