/*
  mig-histogram code
 
  Peter Beerli
  beerli@csit.fsu.edu
 
    Copyright 2002 Peter Beerli
 
  This software is distributed free of charge for non-commercial use
 and is copyrighted. Of course, we do not guarantee that the software
 works and are not responsible for any damage you may cause or have.
 
  $Id: mig-histogram.c,v 1.6 2003/11/04 16:12:09 beerli Exp $
 
 */

#include "mig-histogram.h"
void setup_mighist (world_fmt * world, option_fmt * options);
void print_mighist (world_fmt * world);
void
setup_plotfield (plotfield_fmt * plotfield, char thisplotype,
                 long xsize, long ysize, char xlabel[], char ylabel[],
                 char yflabel[], char title[], boolean print);

long calc_migtable (double **migtable, histogram_fmt * histogram,
                    mighistloci_fmt * aa, world_fmt * world, long loci);
double average (double *vec, long size, long *weight, double *se, long *n);
double quantile (double *vec, long size, long *weight, double quantile);

#define NBINS 30
void
print_histogram_ascii (FILE * out, histogram_fmt ** histogram,
                       plotfield_fmt ** plotfield, long loci, long nmigs,
                       long bins, long *sum, double ***migtable);

void
prepare_hist (histogram_fmt * hist, double *time, long count, long *weight);

void minmax (histogram_fmt * hist, double *tempmin, double *tempmax);


void
print_mighist_file (FILE * mighist, world_fmt * world)
{
    mighistloci_fmt *aa;
    //long          copies;
    long loc, j, i;
    for (loc = 0; loc < world->loci; loc++)
    {
        aa = &world->mighistloci[loc];
        for (j = 0; j < aa->mighistnum; j++)
        {
            //copies = aa->mighist[j].copies;
            for (i = 0; i < aa->mighist[j].migeventsize; i++)
            {
                FPRINTF (mighist, "%f %f %f\n",
                         aa->mighist[j].migevents[i][0],
                         aa->mighist[j].migevents[i][1],
                         aa->mighist[j].migevents[i][2]);
            }
        }
    }
}

void
calc_mighistvalues (world_fmt * world, double ***migtable,
                    histogram_fmt ** histogram, long *total)
{
    mighistloci_fmt *aa;
    long loc, p1;
    long loci1 = world->loci == 1 ? 1 : world->loci + 1;
    for (loc = 0; loc < loci1; loc++)
    {
        for (p1 = 0; p1 < world->numpop2; p1++)
            migtable[loc][p1][2] = NOAVERAGE;
        aa = &world->mighistloci[loc];
        if (loc == world->loci)
        {
            if (world->loci != 1)
                calc_migtable (migtable[loc], histogram[loc], world->mighistloci,
                               world, world->loci);
            else
                break;
        }
        else
            total[loc] =
                calc_migtable (migtable[loc], histogram[loc], aa, world, 1);
    }
}

void
print_mighist_output (FILE * out, world_fmt * world, double ***migtable,
                      long *total)
{
    long loc, p1;
    long loci1 = world->loci == 1 ? 1 : world->loci + 1;
    FPRINTF (out, "\n\nSummary of Migration Events\n");
    FPRINTF (out, "===============================\n\n");
    for (loc = 0; loc < world->loci; loc++)
        total[world->loci] += total[loc];
    for (loc = 0; loc < loci1; loc++)
    {    /* Each locus + Summary */
        if (loc != world->loci)
            FPRINTF (out, "Locus %li\n", loc + 1);
        else
            FPRINTF (out, "Over all loci\n");
        FPRINTF (out,
                 "---------------------------------------------------------\n");
        FPRINTF (out,
                 "Population   Time                             Frequency\n");
        FPRINTF (out, "             -----------------------------\n");
        FPRINTF (out, "From    To   Average    Median     SE\n");
        FPRINTF (out,
                 "---------------------------------------------------------\n");
        for (p1 = 0; p1 < world->numpop2; p1++)
        {
            if (migtable[loc][p1][0] != migtable[loc][p1][1])
            {
                if (migtable[loc][p1][2] == NOAVERAGE)
                    FPRINTF (out, "%4li %4li    No migration event encountered\n",
                             (long) migtable[loc][p1][0] + 1,
                             (long) migtable[loc][p1][1] + 1);
                else
                {
                    FPRINTF (out,
                             "%4li %4li    %3.5f    %3.5f    %3.5f    %3.5f\n",
                             (long) migtable[loc][p1][0] + 1,
                             (long) migtable[loc][p1][1] + 1,
                             migtable[loc][p1][2], migtable[loc][p1][3],
                             migtable[loc][p1][4],
                             migtable[loc][p1][5] / total[loc]);
                }
            }
        }
        FPRINTF (out,
                 "---------------------------------------------------------\n");
        FPRINTF (out, "\n");
    }
}

/*
 * print_mighist() prints a table with the frequency of migrations events
 * from and to per timeinterval that is 1/100 of the full time  that goes
 * from zero to the Maximum time in the record.
 *
 * PopFrom PopTo  Average-Time Median-Time SE "Probability"
 *
 */
void
print_mighist (world_fmt * world)
{
    long loc, i, z;
    char plotype;
    char xlabel[255];
    char ylabel[255];
    char yflabel[255];
    char title[255];
    long xsize;
    long ysize;
    long to;
    long from;
    FILE *out = world->outfile;
    long loci1 = world->loci == 1 ? 1 : world->loci + 1;
    long *total;
    double ***migtable;
    //loci x numpop2 x {mean, median, se}

    plotfield_fmt **plotfield;
    histogram_fmt **histogram;
    //only for overall loci
    if (world->options->mighist)
    {
        total = (long *) calloc (loci1 + 1, sizeof (long));
        plotfield = (plotfield_fmt **) calloc (loci1, sizeof (plotfield_fmt *));
        migtable = (double ***) calloc (loci1, sizeof (double **));
        histogram = (histogram_fmt **) calloc (loci1, sizeof (histogram_fmt *));
        for (loc = 0; loc < loci1; ++loc)
        {
            plotfield[loc] =
                (plotfield_fmt *) calloc (world->numpop2, sizeof (plotfield_fmt));
            migtable[loc] =
                (double **) calloc (world->numpop2, sizeof (double *));
            histogram[loc] =
                (histogram_fmt *) calloc (world->numpop2, sizeof (histogram_fmt));
            for (i = 0; i < world->numpop2; ++i)
            {
                histogram[loc][i].time = NULL;
                histogram[loc][i].weight = NULL;
            }
            for (i = 0; i < world->numpop2; ++i)
            {
                migtable[loc][i] = (double *) calloc (6, sizeof (double));
            }
        }

        //setup histogram
        plotype = 'a';
        xsize = NBINS;
        ysize = MIGHIST_YSIZE;
        strcpy (xlabel, "Time");
        strcpy (yflabel, "Frequency");
        strcpy (ylabel, "Count");
        for (loc = 0; loc < loci1; loc++)
        {
            z = 0;
            for (to = 0; to < world->numpop; ++to)
            {
                for (from = 0; from < world->numpop; ++from)
                {
                    sprintf (title, "Migrations from population %li to %li",
                             from + 1, to + 1);
                    setup_plotfield (&plotfield[loc][z], plotype, xsize, ysize,
                                     xlabel, ylabel, yflabel, title, from != to);
                    z++;
                }
            }
        }
        print_mighist_file (world->mighistfile, world);
        calc_mighistvalues (world, migtable, histogram, total);
        print_mighist_output (world->outfile, world, migtable, total);

        print_histogram_ascii (out, histogram, plotfield, loci1, world->numpop2,
                               NBINS, total, migtable);
        fflush (out);
        free (total);
        for (loc = 0; loc < loci1; loc++)
        {
            for (i = 0; i < world->numpop2; ++i)
            {
                free (migtable[loc][i]);
                free (plotfield[loc][i].data[0]);
                free (plotfield[loc][i].data);
                free (plotfield[loc][i].y);
                free (plotfield[loc][i].yfreq);
            }
            free (migtable[loc]);
            free (plotfield[loc]);
            free (histogram[loc]);
        }
        free (migtable);
        free (plotfield);
        free (histogram);
    }
}

void
setup_plotfield (plotfield_fmt * plotfield, char thisplotype,
                 long xsize, long ysize, char xlabel[], char ylabel[],
                 char yflabel[], char title[], boolean print)
{
    long i;
    plotfield->print = print;
    plotfield->type = thisplotype;
    plotfield->xsize = xsize;
    plotfield->ysize = ysize;
    strncpy (plotfield->xaxis, xlabel, 254);
    strncpy (plotfield->yaxis, ylabel, 254);
    strncpy (plotfield->yfaxis, yflabel, 254);
    strncpy (plotfield->title, title, 254);
    plotfield->yfreq = (double *) calloc (ysize, sizeof (double));
    plotfield->y = (long *) calloc (ysize, sizeof (long));
    plotfield->data = (char **) malloc (sizeof (char *) * xsize);
    plotfield->data[0] = (char *) calloc (xsize * (ysize + 1), sizeof (char));
    for (i = 1; i < xsize; i++)
    {
        plotfield->data[i] = plotfield->data[0] + i * (ysize + 1);
    }
}



long
calc_migtable (double **migtable, histogram_fmt * histogram,
               mighistloci_fmt * aa, world_fmt * world, long loci)
{
    long p1, p2, pa, pb, i, j;
    long maxloci, locus;
    long copies;
    long n, total = 0, maxsize;
    double se;
    double ***migtime;
    long ***gencount;
    long **migcount;
    long **size;
    migtime = (double ***) calloc (world->numpop, sizeof (double **));
    gencount = (long ***) calloc (world->numpop, sizeof (long **));
    migcount = (long **) calloc (world->numpop, sizeof (long *));
    size = (long **) calloc (world->numpop, sizeof (long *));
    maxsize = 1;
    if (loci == 1)
        maxloci = 1;
    else
        maxloci = world->loci;
    for (locus = 0; locus < maxloci; locus++)
    {
        for (j = 0; j < aa[locus].mighistnum; j++)
        {
            if (maxsize < aa[locus].mighist[j].migeventsize)
                maxsize = aa[locus].mighist[j].migeventsize;
        }
    }
    for (p1 = 0; p1 < world->numpop; ++p1)
    {
        migtime[p1] = (double **) calloc (world->numpop, sizeof (double *));
        gencount[p1] = (long **) calloc (world->numpop, sizeof (long *));
        migcount[p1] = (long *) calloc (world->numpop, sizeof (long));
        size[p1] = (long *) calloc (world->numpop, sizeof (long));
        for (p2 = 0; p2 < world->numpop; ++p2)
        {
            migtime[p1][p2] = (double *) calloc (maxsize, sizeof (double));
            gencount[p1][p2] = (long *) calloc (maxsize, sizeof (long));
            size[p1][p2] = maxsize;
        }
    }
    for (locus = 0; locus < maxloci; locus++)
    {
        for (j = 0; j < aa[locus].mighistnum; j++)
        {
            copies = aa[locus].mighist[j].copies;
            for (i = 0; i < aa[locus].mighist[j].migeventsize; i++)
            {
                p1 = aa[locus].mighist[j].migevents[i][1];
                p2 = aa[locus].mighist[j].migevents[i][2];
                if (migcount[p1][p2] >= size[p1][p2])
                {
                    size[p1][p2] += 10;
                    gencount[p1][p2] =
                        (long *) realloc (gencount[p1][p2],
                                          sizeof (long) * size[p1][p2]);
                    memset (gencount[p1][p2] + migcount[p1][p2], 0,
                            sizeof (long) * 10);
                    migtime[p1][p2] =
                        (double *) realloc (migtime[p1][p2],
                                            sizeof (double) * size[p1][p2]);
                    memset (migtime[p1][p2] + migcount[p1][p2], 0,
                            sizeof (double) * 10);
                }
                gencount[p1][p2][migcount[p1][p2]] += copies;
                migtime[p1][p2][migcount[p1][p2]] +=
                    aa[locus].mighist[j].migevents[i][0];
                migcount[p1][p2] += 1;
            }
        }
    }
    for (p1 = 0; p1 < world->numpop; p1++)
    {
        for (p2 = 0; p2 < p1; p2++)
        {
            pa = p2 * world->numpop + p1;
            pb = p1 * world->numpop + p2;
            migtable[pa][0] = p1;
            migtable[pb][0] = p2;
            migtable[pa][1] = p2;
            migtable[pb][1] = p1;
            migtable[pa][2] = average (migtime[p1][p2],
                                       migcount[p1][p2],
                                       gencount[p1][p2], &se, &n);
            migtable[pa][4] = se;
            migtable[pa][5] = n;
            migtable[pb][2] = average (migtime[p2][p1],
                                       migcount[p2][p1],
                                       gencount[p2][p1], &se, &n);
            migtable[pb][4] = se;
            migtable[pb][5] = n;
            migtable[pa][3] = quantile (migtime[p1][p2],
                                        migcount[p1][p2],
                                        gencount[p1][p2], 0.5);
            migtable[pb][3] = quantile (migtime[p2][p1],
                                        migcount[p2][p1],
                                        gencount[p2][p1], 0.5);
            prepare_hist (&histogram[pa], migtime[p1][p2], migcount[p1][p2],
                          gencount[p1][p2]);
            prepare_hist (&histogram[pb], migtime[p2][p1], migcount[p2][p1],
                          gencount[p2][p1]);

        }
    }
    for (p1 = 0; p1 < world->numpop2; p1++)
    {
        total += migtable[p1][5];
    }
    for (p1 = 0; p1 < world->numpop; ++p1)
    {
        free (migcount[p1]);
        free (size[p1]);
        for (p2 = 0; p2 < world->numpop; ++p2)
        {
            free (migtime[p1][p2]);
            free (gencount[p1][p2]);
        }
        free (migtime[p1]);
        free (gencount[p1]);
    }
    free (migtime);
    free (gencount);
    return total;
}

void
prepare_hist (histogram_fmt * hist, double *time, long count, long *weight)
{
    hist->count = count;
    if (hist->time == NULL)
        hist->time = (double *) calloc (count + 1, sizeof (double));
    else
        hist->time = realloc (hist->time, sizeof (double) * (count + 1));
    if (hist->weight == NULL)
        hist->weight = (long *) calloc (count + 1, sizeof (long));
    else
        hist->weight = realloc (hist->weight, sizeof (long) * (count + 1));
    memcpy (hist->time, time, sizeof (double) * count);
    memcpy (hist->weight, weight, sizeof (long) * count);
}

double
average (double *vec, long size, long *weight, double *se, long *n)
{
    long i;
    double mean, sum = 0., sum2 = 0.;
    long sumweight = 0;
    for (i = 0; i < size; ++i)
        sumweight += weight[i];
    for (i = 0; i < size; ++i)
    {
        sum += vec[i] * weight[i];
        sum2 += (vec[i] * weight[i]) * (vec[i] * weight[i]);
    }
    if (sumweight != 0)
    {
        mean = sum / sumweight;
        if (sumweight > 1)
            *se = sqrt (fabs (sum - sum2)) / (sumweight - 1.);
        else
            *se = DBL_MAX;
        *n = sumweight;
        return mean;
    }
    else
    {
        *n = 0;
        *se = DBL_MAX;
    }
    return NOAVERAGE;
}

double
quantile (double *vec, long size, long *weight, double quantile)
{
    long i, j, z = 0;
    double *tmp1;
    double val;
    long sumweight = 0;
    for (i = 0; i < size; ++i)
        sumweight += weight[i];

    tmp1 = (double *) calloc (sumweight + 1, sizeof (double));

    for (i = 0; i < size; ++i)
    {
        for (j = 0; j < weight[i]; ++j)
            tmp1[z++] = vec[i];
    }
    qsort ((void *) tmp1, sumweight, sizeof (double), numcmp);
    val = tmp1[(long) (sumweight * quantile)];
    free (tmp1);
    return val;
}

void
increase_mighist (mighistloci_fmt * mighistlocus)
{
    long i;
    if (mighistlocus->allocsize <= mighistlocus->mighistnum + 1)
    {
        mighistlocus->allocsize += DEFAULTALLOCSIZE;
        mighistlocus->mighist = (mighist_fmt *)
                                realloc (mighistlocus->mighist,
                                         sizeof (mighist_fmt) * mighistlocus->allocsize);
        for (i = mighistlocus->allocsize - DEFAULTALLOCSIZE;
                i < mighistlocus->allocsize; i++)
        {
            mighistlocus->mighist[i].migeventsize = 0;
            mighistlocus->mighist[i].allocsize = DEFAULTALLOCSIZE;
            mighistlocus->mighist[i].migevents =
                (migevent_fmt *) calloc (DEFAULTALLOCSIZE, sizeof (migevent_fmt));
        }
    }
}

void
setup_mighist (world_fmt * world, option_fmt * options)
{
    long locus, i;
    long allocsize = DEFAULTALLOCSIZE;
    if (world->options->mighist)
    {
        world->mighistloci = (mighistloci_fmt *) calloc (world->loci, sizeof (mighistloci_fmt));
        world->mighistlocinum = 0;
        for (locus = 0; locus < world->loci; locus++)
        {
            world->mighistloci[locus].allocsize = allocsize;
            world->mighistloci[locus].mighistnum = 0;
            world->mighistloci[locus].mighist =
                (mighist_fmt *) calloc (world->mighistloci[locus].allocsize,
                                        sizeof (mighist_fmt));
            for (i = 0; i < allocsize; i++)
            {
                world->mighistloci[locus].mighist[i].migeventsize = 0;
                world->mighistloci[locus].mighist[i].allocsize = allocsize;
                world->mighistloci[locus].mighist[i].migevents =
                    (migevent_fmt *) calloc (allocsize, sizeof (migevent_fmt));
            }
        }
    }
}

void
minmax (histogram_fmt * hist, double *tempmin, double *tempmax)
{
    long i;
    double tmp1, tmp2;
    double tmpmin = DBL_MAX;
    double tmpmax = -DBL_MAX;

    for (i = 0; i < hist->count; i++)
    {
        if ((tmp1 = hist->time[i]) < tmpmin)
            tmpmin = tmp1;
        if ((tmp2 = hist->time[i]) > tmpmax)
            tmpmax = tmp2;
    }
    *tempmax = tmpmax;
    *tempmin = tmpmin;
}


void
print_histogram_ascii (FILE * out, histogram_fmt ** histogram,
                       plotfield_fmt ** plotfield, long loci, long nmigs,
                       long bins, long *sum, double ***migtable)
{
    long loc, i, j, z, zz;
    double biggest = 0.;
    double *binning;
    double *binvec;
    double tempmin = DBL_MAX;
    double tempmax = -DBL_MAX;
    double begin = DBL_MAX;
    double end = -DBL_MAX;
    double delta;
    //double        sum = 0;
    double time;
    long weight;
    binning = (double *) calloc (bins+1, sizeof (double));
    binvec = (double *) calloc (bins+1, sizeof (double));

    for (loc = 0; loc < loci; loc++)
    {
        for (i = 0; i < nmigs; i++)
        {
            if (migtable[loc][i][2] == NOAVERAGE)
            {
                plotfield[loc][i].print = FALSE;
                continue;
                //no event for this migration from i to j
            }
            minmax (&histogram[loc][i], &tempmin, &tempmax);
            if (tempmin < begin)
                begin = tempmin;
            if (tempmax > end)
                end = tempmax;
        }
    }
    delta = (end - begin) / bins;
    binning[0] = begin + 0.5 * delta;
    for (i = 1; i < bins; i++)
        binning[i] = delta + binning[i - 1];
    for (loc = 0; loc < loci; loc++)
    {
        for (i = 0; i < nmigs; i++)
        {
            if (migtable[loc][i][2] == NOAVERAGE)
                continue;
            //no event for this migration i->j
            memset (binvec, 0, sizeof (double) * (bins+1));
            for (j = 0; j < histogram[loc][i].count; j++)
            {
                time = histogram[loc][i].time[j];
                weight = histogram[loc][i].weight[j];
                z = 0;
                while (time > binning[z] && z < bins)
                    z++;
                binvec[z] += weight;
            }
            biggest = 0.;
            for (j = 0; j < bins; j++)
            {
                plotfield[loc][i].y[j] = (long) binvec[j];
                plotfield[loc][i].yfreq[j] = binvec[j] = binvec[j] / sum[loc];
                if (biggest < binvec[j])
                    biggest = binvec[j];
            }
            for (j = 0; j < bins; j++)
            {
                for (zz = 0;
                        zz <
                        (long) (binvec[j] * plotfield[loc][i].ysize / biggest);
                        zz++)
                    plotfield[loc][i].data[j][zz] = '+';
                plotfield[loc][i].data[j][zz] = '\0';
            }
        }
    }
    for (loc = 0; loc < loci; loc++)
    {
        if (loc == (loci - 1))
        {
            if (loci > 1)
                FPRINTF (out,
                         "\nOver all loci\n------------------------------------------------------------------\n");
            else
                FPRINTF (out,
                         "\nLocus %li\n------------------------------------------------------------------\n",
                         loc + 1);
        }
        else
            FPRINTF (out,
                     "\nLocus %li\n------------------------------------------------------------------\n",
                     loc + 1);

        for (i = 0; i < nmigs; i++)
        {
            if (plotfield[loc][i].print)
            {
                FPRINTF (out, "%s\n\n%10.10s %10.10s %10.10s\n",
                         plotfield[loc][i].title, plotfield[loc][i].xaxis,
                         plotfield[loc][i].yaxis, plotfield[loc][i].yfaxis);
                for (j = 0; j < bins; j++)
                {
                    FPRINTF (stdout,
                             "loc=%li i=%li j=%li %10.6f %10li %10.6f %s\n",
                             loc, i, j, binning[j], plotfield[loc][i].y[j],
                             plotfield[loc][i].yfreq[j],
                             plotfield[loc][i].data[j]);

                    FPRINTF (out,
                             "%10.6f %10li %10.6f %s\n", binning[j],
                             plotfield[loc][i].y[j], plotfield[loc][i].yfreq[j],
                             plotfield[loc][i].data[j]);
                }
                FPRINTF (out, " \n");
            }
        }
    }
}
