/* histogrammer for bayes histogram data
   takes the file bayesallfile and reads it into the histogram structure
   to use the calculate_hpd etc and also call the pretty printer functions
  
   (c) Peter Beerli 2007-2012

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies
or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

*/
#include "definitions.h"
#include "migration.h"
#include "bayes.h"
#include "tools.h"
#include "sighandler.h"
#include "reporter.h"
#include "correlation.h"
#include "pretty.h"
#include <errno.h>
extern int myID;
extern int numcpu;
#ifdef ZNZ
void read_bayes_fromfile(znzFile fmdimfile, world_fmt *world,option_fmt *options, char **files, long fnum);
#else
void read_bayes_fromfile(FILE *fmdimfile, world_fmt *world,option_fmt *options, char **files, long fnum);
#endif

#ifdef ZNZ
void read_from_bayesmdim_minimal_info(znzFile mdimfile, world_fmt *world,option_fmt *options, data_fmt *data)
#else
void read_from_bayesmdim_minimal_info(FILE *mdimfile, world_fmt *world,option_fmt *options, data_fmt *data)
#endif
{
  char *input;
  //long nrep;
  long pop;
  long tmp;
  boolean done=FALSE;
  boolean recordedusem=TRUE;

  input = (char *) mycalloc(LINESIZE , sizeof(char));
#ifdef ZNZ
  while(done==FALSE && ZNZFGETS(input,LINESIZE,mdimfile) != EOF)
#else
  while(done==FALSE && FGETS(input,LINESIZE,mdimfile) != EOF)
#endif
    {
      if(input[0] == '#' && strstr(input,"begin"))
	{
#ifdef ZNZ
	  ZNZFGETS(input,LINESIZE,mdimfile);
#else
	  FGETS(input,LINESIZE,mdimfile);
#endif
	  printf("%i>>>>>>> read from bayesallfile <<<<<<<<<<<<<<<<<<<<<<\n",myID);
	  options->custm = (char *) myrealloc(options->custm, sizeof(char) * (strlen(input)+1));
	  strcpy(options->custm,input+9);
	  printf("%i> custom       = %s\n",myID, options->custm);
#ifdef ZNZ
	  ZNZFGETS(input,LINESIZE,mdimfile);
#else
	  FGETS(input,LINESIZE,mdimfile);
#endif	  
	  options->custm2 = (char *) myrealloc(options->custm2, sizeof(char) * (strlen(input)+1));
	  strcpy(options->custm2,input+9);
	  printf("%i> custom2      = %s\n",myID, options->custm2);
#ifdef ZNZ
	  ZNZFGETS(input,LINESIZE,mdimfile);
#else
	  FGETS(input,LINESIZE,mdimfile);
#endif
	  sscanf (input+3, "%li %li %li %li %li %i", &world->loci, &world->numpop,
		  &world->numpop2, &tmp, &options->replicatenum,&recordedusem);
	  printf("%i> loci         = %li\n",myID, world->loci);
	  printf("%i> numpop       = %li\n",myID, world->numpop);
	  printf("%i> numpop^2     = %li\n",myID, world->numpop2);
	  printf("%i> replicate    = %li\n",myID, tmp);
	  printf("%i> replicatenum = %li\n",myID, options->replicatenum);
	  printf("%i> use_M        = %li\n",myID, (long) recordedusem);
	  // fill some more...
	  data->numpop = world->numpop;
	  options->newpops_numpop = world->numpop;
	//xcode   nrep = options->replicatenum;
	//xcode   if (nrep == 0)
	//xcode     nrep = 1;
	  options->replicate = (boolean) tmp;
	  if(options->usem != recordedusem)
	    options->recordedusem = recordedusem;
        data->popnames = (char **) mymalloc (sizeof (char *) * world->numpop);
        for (pop = 0; pop < world->numpop; pop++)
	  {
            data->popnames[pop] = (char *) mycalloc (1, sizeof (char) * LINESIZE);
#ifdef ZNZ
	    ZNZFGETS(input,LINESIZE,mdimfile);
#else
	    FGETS(input,LINESIZE,mdimfile);
#endif
	    sscanf (input+3, "%s", data->popnames[pop]);
	    printf("%i> population = %s\n",myID, data->popnames[pop]);
	  }
	done=TRUE;
	}
    }
  options->muloci = data->loci = world->loci;
  data->skiploci =
    (boolean *) myrealloc (data->skiploci,
			   sizeof (boolean) * (data->loci + 1));
  memset (data->skiploci, 0, sizeof (boolean) * (data->loci + 1));
  data->numpop = world->numpop;
  printf("%i>>>>>>> end read from bayesallfile <<<<<<<<<<<<<<<<<<\n",myID);
  myfree(input);
}
			    

long get_fullbinsum(MYREAL *lowerbound, MYREAL *upperbound, world_fmt *world, option_fmt *options, long locus)
{
  long temp=0;
  long i;
  for(i = 0; i < world->numpop; i++)
    {
      temp += options->bayespriortheta->bins;
      lowerbound[i] = options->bayespriortheta->min;
      upperbound[i] = options->bayespriortheta->max;
    }
  for(i = world->numpop; i < world->numpop2; i++)
    {
      temp += options->bayespriorm->bins;
      lowerbound[i] = options->bayespriorm->min;
      upperbound[i] = options->bayespriorm->max;
    }
  for(i = world->numpop2; i < world->numpop2+world->bayes->mu * world->loci; i++)
    {
      temp += options->bayespriorrate->bins;
      lowerbound[i] = options->bayespriorrate->min;
      upperbound[i] = options->bayespriorrate->max;
    }
  return temp;
}


///
/// read bayesallfile from disk and creates all the needed parts to 
/// recreate the output and pdf output
#ifdef ZNZ
void read_bayes_fromfile(znzFile fmdimfile, world_fmt *world,option_fmt *options, char **files, long fnum)
#else
  void read_bayes_fromfile(FILE *fmdimfile, world_fmt *world,option_fmt *options, char **files, long fnum)
#endif
{
  const long nn = world->numpop2 + world->bayes->mu * world->loci + 1;// One is for Log(Prob(Data|Model)
  long *n = NULL;
  long j0, j, z0, z;
  long step;
  long locus;
  long frompop;
  long topop;
  long nnn=0;
  long t;
  long bin;
  const long numpop = world->numpop;
  const long numpop2 = world->numpop2;
  const long np = world->numpop2 + world->bayes->mu;
  const long hc = world->options->heated_chains; 
  long numbins = 0;
  long numbinsall = 0;
  char *input;
  char *inptr;
  bayes_fmt * bayes = world->bayes;
  bayeshistogram_fmt *hist;
  MYREAL post;
  MYREAL like;
  MYREAL *params;
  MYREAL *delta = bayes->deltahist;
  MYREAL *lowerbound;
  MYREAL *upperbound;
  MYREAL *autocorrelation;
  MYREAL *ess;
  boolean *done;
  boolean recordedusem = options->usem;
  MYREAL *oldmeans;
#ifndef ZNZ
  FILE *mdimfile = fmdimfile;
#else
  znzFile mdimfile = fmdimfile;
#endif
  long f;
  done = (boolean *) mycalloc(world->loci, sizeof(boolean));
  params = (MYREAL *) mycalloc(2+ numpop2 + world->bayes->mu, sizeof(MYREAL));
  oldmeans = (MYREAL *) mycalloc(2+ numpop2 + world->bayes->mu, sizeof(MYREAL));
  autocorrelation = (MYREAL *) mycalloc(2 * world->loci * nn, sizeof(MYREAL));
  ess = autocorrelation + world->loci * nn;
  lowerbound = (MYREAL *) mycalloc(nn, sizeof(MYREAL));
  upperbound = (MYREAL *) mycalloc(nn, sizeof(MYREAL));
  n = (long *) mycalloc(numpop2 + world->bayes->mu * world->loci, sizeof(long));
  input = (char *) mycalloc(SUPERLINESIZE, sizeof(char));
  printf("Begin reading the bayesallfile back into the system\n");
  // files has always at least one entry
  if (files!=NULL)
    {
      mdimfile = NULL;
    }
  else
    {
      error("Filelist should be filled\n");
    }
#ifdef ZNZ 
  unsigned long bytes = SUPERLINESIZE > ONEMEGABYTE ? SUPERLINESIZE : ONEMEGABYTE;
#endif
  for(f=0;f<fnum;f++)
    {
      input[0]='\0';
#ifdef ZNZ
	  mdimfile = znzopen(files[f], "r", options->use_compressed);      
#else
	  mdimfile = fopen(files[f],"r");      
#endif
	  printf("%s opened\n",files[f]);
	  if (mdimfile == NULL)
	    {
	      printf("errno = %s (%d).\n", strerror(errno), errno);
	      exit(1);
	    }
#ifdef ZNZ
      znzbuffer(mdimfile,bytes);
      while(ZNZFGETS(input,SUPERLINESIZE,mdimfile) != EOF)
#else
      while(FGETS(input,SUPERLINESIZE,mdimfile) != EOF)
#endif
	  {
	    //printf("%s> ",files[f]);//,input);
	  // grab the commentlines
	    if (input[0] == '#' || input[0]=='S' || input[0]=='\0')
	      continue;
	    //	    while(input[0]=='#' || input[0]=='S')
	    //{
	    //#ifdef ZNZ
	      // ZNZFGETS(input,SUPERLINESIZE,mdimfile);
	    //#else
	      //FGETS(input,SUPERLINESIZE,mdimfile);
	      //printf("#### %s\n",input);
	    //#endif
	    //}
	  // read the bayesallfile
	  //	  printf("%s\n",input);
	  //if(input[0] == '\0')
	  //  continue;
	    //	  if(input !=NULL && input[0] != '\0')
	    //{
	      inptr = input;
	      
	      step       = atol(strsep(&inptr,"\t"));
	      //printf("%li\n",step);
	      locus      = atol(strsep(&inptr,"\t"))-1;
	      if(locus == -1)
		error("help");
	      //  replicate  = atol(strsep(&inptr,"\t"))-1;
	      (void) strsep(&inptr,"\t");
	      post       = atof(strsep(&inptr,"\t"));
	      like       = atof(strsep(&inptr,"\t"));
	      //  probg      = atof( strsep(&inptr,"\t"));
	      (void) strsep(&inptr,"\t");
	      //  prior      = atof( strsep(&inptr,"\t"));
	      (void) strsep(&inptr,"\t");
	      //  T = atol(strsep(&inptr,"\t"))+1;
	      (void) strsep(&inptr,"\t");
	      //  treelength = atof(strsep(&inptr,"\t"));
	      (void) strsep(&inptr,"\t");
	      params[0] = post;
	      params[1] = like;
	      
	      if(!done[locus])
		{
		  
		  done[locus] = TRUE;
		  // allocate the number of bins for the histogram
		  bayes->histogram[locus].binsum = get_fullbinsum(lowerbound, upperbound, world, options, locus);
		  bayes->histogram[locus].results = (MYREAL *) mycalloc(bayes->histogram[locus].binsum + 1, sizeof(MYREAL));
		  bayes->histogram[locus].set95 = (char *) mycalloc(bayes->histogram[locus].binsum* 2 + 2, sizeof(char));
		  bayes->histogram[locus].set50 = world->bayes->histogram[locus].set95 + bayes->histogram[locus].binsum + 1;
		  memset(world->bayes->histogram[locus].results, 0 , sizeof(MYREAL) * (world->bayes->histogram[locus].binsum)); 
		  if(bayes->histogram[locus].covariance==NULL)
		    doublevec2d(&bayes->histogram[locus].covariance,world->numpop2, world->numpop2);
		}
	      hist = &bayes->histogram[locus];
	      numbinsall = 0;
	      for(j0=0;j0 < numpop2; j0++)
		{
		  if(shortcut(j0,bayes,&j))
		    {
		      continue;
		    }
		  else
		    {
		      params[j+2] =  atof(strsep(&inptr,"\t"));
		      if(j>=numpop && options->usem!=recordedusem)
			{
			  m2mm(j, numpop,&frompop,&topop);
			  if(recordedusem)// recorded bayesallfile was with M, new request is for 4Nm 
			    {
			      params[j+2] *= params[topop+2];
			    }
			  else //recorded bayesallfile was with xNm, new request is for M
			    {
			      params[j+2] /= params[topop+2];
			    }
			}
		      n[j] += 1;
		      oldmeans[j] = hist->means[j];
		      hist->means[j] += (params[j+2] - hist->means[j]) / n[j];
		    }
		  numbinsall += hist->bins[j];
		  numbins = numbinsall - hist->bins[j];
		  bin = (long) ((params[j+2]-lowerbound[j]) / delta[j]);
		  hist->minima[j0] = lowerbound[j];
		  hist->maxima[j0] = upperbound[j];
		  hist->results[numbins + bin] += 1.;
		  bayes->histtotal[locus * np + j] += 1;
		}
	      if(bayes->mu)
		{
		  numbins = numbinsall;
		  params[j0+2] = atof(strsep(&inptr,"\t"));
		  n[j0+locus] += 1;
		  hist->means[j0] += (params[j0+2] - hist->means[j0]) / n[j0+locus];
		  bin = (long) ((params[j0+2]-lowerbound[j0]) / delta[j0]); 
		  hist->minima[j0] = lowerbound[j0];
		  hist->maxima[j0] = upperbound[j0];
		  hist->results[numbins + bin] += 1.;
		  bayes->histtotal[locus * np + j0] += 1;
		}
	      // calculate covariance_bayes2(world,locus,params) here but this
	      // needs to be done for every line from the mdimfile, is not divided by (n-1)!!!
	      // see where the matrix is printed for that
	      hist->n = n[0]; //assumes that all are the same (should be!)
	      for(j0=0;j0 < numpop2; j0++)
		{
		  if(shortcut(j0,bayes,&j))
		    continue;
		  else
		    {
		      for(z0=0;z0 < numpop2; z0++)
			{
			  if(shortcut(z0,bayes,&z))
			    continue;
			  else
			    {
			      hist->covariance[j][z] += (params[z+2] - hist->means[z]) * (params[j+2]-oldmeans[j]); 
			    }
			}
		    }
		}
	      if(world->options->datatype == 'g')
		{
		  for(t=0;t<hc;t++)
		    {
		      world->bf[locus * hc + t] = atof(strsep(&inptr,"\t"));
		    }
		  // dummy read of thermo sum up to this point
		  //lsum = atof(strsep(&inptr,"\t"));
		  (void ) strsep(&inptr,"\t");
		  // harmonic mean: scaler contains the log value, hm contains 1.
		  world->hmscale[locus] = atof(strsep(&inptr,"\t"));
		  world->hm[locus] = 1.; 
		  //	      calculate_ess_frombayes (world, step, params, locus, &autocorrelation[locus*nn], &ess[locus*nn]);
		  calculate_ess_frombayes (world, step, params, locus, autocorrelation, ess);
		  covariance_bayes(world,locus);
		}
	      else
		{
		  //		  fprintf(stdout,"%s\n",inptr);
		  (void) strsep(&inptr,"\n");
		}
	  }
      //}
#ifdef ZNZ
      znzclose(mdimfile);
#else
      fclose(mdimfile);
#endif
      if(options->mdimdelete)
	{
	  remove(options->bayesmdimfilename);
	}
      fprintf(stdout,"%s closed\n",files[f]);
      mdimfile=NULL;
    }
  printf("End reading the bayesallfile back into the system\n");
  if(world->options->datatype == 'g')
    {
      // reset the archiving machinery
      memset(world->auto_archive,0, sizeof(MYREAL) * 2 * world->numpop2+options->bayesmurates * world->loci + 1);
      nnn = 1;
      for(j=0;j<world->loci;j++)
	{
	  
	  for(t=0;t<nn; t++)
	    {
	      // onepass mean of autocorrelation
	      world->auto_archive[t] += (autocorrelation[j*nn + t] - world->auto_archive[t])/nnn;
	      // summing ess values
	      world->ess_archive[t] += ess[j*nn + t];
	      //printf("j=%li t=%li %f\n", j, t, world->ess_archive[t]);
	    }
	  nnn++;
	}
    }

  myfree(params);
  myfree(oldmeans);
  myfree(autocorrelation);
  myfree(lowerbound);
  myfree(upperbound);
  myfree(n);
  myfree(done);
  myfree(input);
}
