/* MPI parts for migrate 
   started November 2000, Seattle
   Peter Beerli beerli@genetics.washington.edu

   
Copyright 2001 Peter Beerli and Joseph Felsenstein

$Id: migrate_mpi.c,v 1.9 2001/06/04 00:08:54 beerli Exp $
*/
#ifdef MPI
#include "migrate_mpi.h"
#include "broyden.h"
#include "combroyden.h"
#include "gammalike.h"
#include "options.h"

#define END 0
#define LIKE 1
#define GRADIENT 2
#define RESULT 3

extern void run_locus (world_fmt ** universe, int usize, 
		       option_fmt * options, data_fmt * data, 
		       tpool_t * heating_pool, long maxreplicate, 
		       long locus, long *treefilepos, long *Gmax);

long pack_databuffer(char **buffer, data_fmt *data, option_fmt *options);
void mpi_gradient_master(nr_fmt *nr, world_fmt *world, int *who);

void mpi_results_worker(long bufsize, timearchive_fmt **atl, long maxrep,  
			long numpop, int *who);
void assignloci_worker(world_fmt *world);
void swap_atl(long from, long to, world_fmt *world);

void mpi_runloci_master(long loci, int *who)
{
  long locus;
  int sender;
  long locusdone;
  MPI_Status status;
  long numsent=0;
  for (locus = 0; locus < MIN(loci, numcpu-1); locus++)
    {
      MPI_Send(&locus, 1, MPI_LONG, locus+1, locus+1, MPI_COMM_WORLD);
      numsent++;
    }
  for (locus = 0; locus < loci; locus++)
    {
      MPI_Recv(&locusdone, 1, MPI_LONG, MPI_ANY_SOURCE, MPI_ANY_TAG,
	       MPI_COMM_WORLD, &status);
      sender = status.MPI_SOURCE;
      who[locusdone] = sender;
      if(numsent<loci)
	{
	  MPI_Send(&numsent, 1, MPI_LONG, sender, numsent+1, 
		   MPI_COMM_WORLD);
	  numsent++;
	}
      else
	{
	  MPI_Send(&loci, 1, MPI_LONG, sender, 0,
		   MPI_COMM_WORLD); //end of loci
	}
    }
}


void mpi_runloci_worker(world_fmt ** universe, int usize, 
		       option_fmt * options, data_fmt * data, 
		       tpool_t * heating_pool, long maxreplicate, 
		       long *treefilepos, long *Gmax)
{
  boolean done=FALSE;
  long locus;
  MPI_Status status;
  while(!done)
    {
      MPI_Recv(&locus, 1, MPI_LONG, MASTER, MPI_ANY_TAG, 
	       MPI_COMM_WORLD, &status);
      if(status.MPI_TAG != 0) //stop condition
	{
	  run_locus (universe, usize, options, data,
		     heating_pool, maxreplicate, locus, treefilepos, Gmax);
	  MPI_Send(&locus, 1, MPI_LONG, MASTER, locus+1,
		   MPI_COMM_WORLD);
	  /* we want to know what locus we worked for
	     - to control the work sent by master
	     - to use in setup_parameter0() [combroyden2.c]*/
	  universe[0]->who[locidone++] = locus; 
	}
      else
	{
	  done = TRUE;
	}
    }
}

double mpi_likelihood_master(double *param, double *lparam,
			     world_fmt *world, nr_fmt *nr, 
			     helper_fmt *helper,
			     int *who)
{
  long locus, worker;
  long sender;
  MPI_Status status;
  double logres=0.0;
  double *temp;
  double *tmp;
  int tag;

  long numelem = world->numpop2 + (world->options->gamma ? 2 : 1);
  long numelem2 = numelem *2 ;
  tmp = (double *) calloc(world->loci, sizeof(double));
  temp = (double *) calloc(numelem2, sizeof(double));
  temp[0] = LIKE;
  memcpy(temp+1,param, (numelem-1) * sizeof(double));
  memcpy(temp+numelem,lparam, (numelem-1) * sizeof(double));
  memset(nr->locilikes,0,sizeof(double)*world->loci);
  for (worker = 1; worker < numcpu; worker++)
    {
      MPI_Send(temp, (int) numelem2, MPI_DOUBLE, worker, 
	       worker, MPI_COMM_WORLD);
    }
  for (worker = 1; worker < numcpu; worker++)
    {
      //      printf("%i> wait for like calc\n",myID); fflush(stdout);
      MPI_Recv(tmp, (int) world->loci, MPI_DOUBLE, MPI_ANY_SOURCE, 
	       MPI_ANY_TAG,
	       MPI_COMM_WORLD, &status);
      sender = status.MPI_SOURCE;
      tag = status.MPI_TAG;
      //      printf("%i> like calc got %i from %i\n",myID, sender, tag);
      // the worker send a vector of values 
      // e.g. (0,0,0,-12,0,-34,0,0,0), these are all loci
      // of which most of them were not evaluated
      // the loop updates the master copy of locilikes
      for(locus=0; locus<world->loci;locus++)
	{
	  nr->locilikes[locus] += tmp[locus];
	}
    }
  for(locus=0; locus<world->loci;locus++)
    {
      logres += nr->locilikes[locus];
    }
  nr->llike = logres;
  //  printf("%i> Log(L) = %f -------------------------\n",myID, logres);
  free(temp);
  return logres;
}


void mpi_likelihood_worker(world_fmt *world, helper_fmt *helper,
			  long rep)
{
  long locus, ww;
  nr_fmt *nr = helper->nr;
  memset(nr->locilikes,0,sizeof(double)*world->loci);
  for(ww=0;ww<locidone;ww++)
    {
      locus = nr->world->who[ww];
      if (!world->options->gamma)
	{
	  //	  printf("%i> in mpi_likelihood_worker\n",myID);
	  nr->locilikes[locus] = (*calc_like) (nr, nr->param, nr->lparam, locus);
	  //	  printf("%i> locilikes[locus=%li]=%f\n", myID, locus, nr->locilikes[locus]);
	}
      else
	{
	  helper->locus = locus;
	  helper->nr->locilikes[locus] =  gamma_locus_like(nr,
							   helper->expxv, 
							   helper->xv,
							   locus);
	}
    }
}


void mpi_gmax_master(world_fmt *world, long *Gmax)
{
  long sender;
  MPI_Status status;
  long tmp;
  int tag;
  int numreceived=0;
  //  printf("%i> waiting for gmax\n",myID); 
  *Gmax = 0.;
  MPI_Bcast(Gmax, 1, MPI_LONG, MASTER, MPI_COMM_WORLD);
  while(numreceived<numcpu-1)
    {
      //      printf("%i> waiting for gmax in loop %i\n",myID, numreceived); 
      MPI_Recv(&tmp, 1, MPI_LONG, MPI_ANY_SOURCE, 
	       MPI_ANY_TAG,
	       MPI_COMM_WORLD, &status);
      sender = status.MPI_SOURCE;
      tag = status.MPI_TAG;
      //      printf("%i> received gmax=%li from %li with tag %i\n",myID, 
      //	     tmp, sender,tag); 
      fflush(stdout);
      if(*Gmax < tmp)
	*Gmax = tmp;
      numreceived++;
    }
//  printf("%i> broadcast gmax=%li\n",myID,*Gmax); 
  MPI_Bcast(Gmax, 1, MPI_LONG, MASTER, MPI_COMM_WORLD);
}

void mpi_gmax_worker(world_fmt *world)
{
  long ww;
  long repstart;
  long repstop;
  long r;
  long locus;
  long Gmax = 1000000;
//  printf("%i> before bcast gmax in worker\n",myID);fflush(stdout); 
  MPI_Bcast(&Gmax, 1, MPI_LONG, MASTER, MPI_COMM_WORLD);
  set_replicates (world, world->repkind, world->options->replicatenum,
		  &repstart, &repstop);

  for(ww=0;ww<locidone;ww++)
    {
      locus = world->who[ww];
      for (r = repstart; r < repstop; r++)
	{
	  if (Gmax < world->atl[r][locus].T)
	    Gmax = world->atl[r][locus].T;
	}
    }
//  printf("%i> will send gmax=%li for locus %li\n",myID,Gmax,locus+1); 
//  fflush(stdout);
  MPI_Send(&Gmax, 1, MPI_LONG, MASTER, myID,
	   MPI_COMM_WORLD);	  
}


void mpi_send_stop(world_fmt *world)
{
  long worker;
  double *temp;
  long numelem = world->numpop2 + (world->options->gamma ? 2 : 1);
  long numelem2 = 2 * numelem;
  //  printf("%i> will send stop signal\n",myID);fflush(stdout);
  temp = (double *) calloc(numelem2, sizeof(double));
  temp[0] = END;
  for(worker=1;worker<numcpu;worker++)
    {
      //      printf("%i> stop signal: numcpu=%i, loci=%li, numpop2=%li\n",myID, 
      //     numcpu, world->loci,numelem2);fflush(stdout);
      MPI_Send(temp, (int) numelem2, MPI_DOUBLE, worker, 0,
	       MPI_COMM_WORLD); //end of loci
      //printf("%i> sent stop signal %f to worker %li\n",myID,
      //temp[0], worker);
    }
  free(temp);
}

void mpi_results_stop(void)
{
  long worker;
  long dummy=0;
  for(worker=1;worker<numcpu;worker++)
    {
      //printf("%i> will send stop\n", myID);
      MPI_Send(&dummy, 1, MPI_LONG, worker, 0, MPI_COMM_WORLD);
      //printf("%i> have sent stop to worker %i\n", myID, worker);
    }
}

void mpi_gradient_master(nr_fmt *nr, world_fmt *world, int *who)
{
  long locus;
  long sender;
  MPI_Status status;
  int tag;

  double *temp;
  long *tempindex;
  long numelem = world->numpop2 + (world->options->gamma ? 2 : 1);
  long numelem2 = 2 * numelem;
  temp = (double *) calloc(numelem2, sizeof(double));
  tempindex = (long *) calloc(numelem, sizeof(long));
  temp[0] = GRADIENT;
  //  printf("%i> indeks = %li %li \n", myID, nr->indeks[0], nr->indeks[1]);
  memcpy(temp+1,nr->param, (numelem-1) * sizeof(double));
  memcpy(tempindex,nr->indeks, (numelem-1) * sizeof(long));
  memcpy(temp+numelem, nr->lparam, (numelem-1) * sizeof(double));
  temp[numelem2-1] = nr->profilenum;
  for (locus = 1; locus < numcpu; locus++)
    {
      MPI_Send(temp, (int) numelem2, MPI_DOUBLE, locus, 
	       locus, MPI_COMM_WORLD);
      MPI_Send(tempindex, (int) numelem, MPI_LONG, locus, 
             locus, MPI_COMM_WORLD);
    }
   memset(nr->d,0,sizeof(double)*(numelem-1));
  for (locus = 1; locus < numcpu; locus++)
    {
      copy_and_clear_d (nr);
      MPI_Recv(nr->d, (int)(numelem-1), MPI_DOUBLE, MPI_ANY_SOURCE, 
	       MPI_ANY_TAG,
	       MPI_COMM_WORLD, &status);
      add_back_d (nr);
      sender = status.MPI_SOURCE;
      tag = status.MPI_TAG;
    }
  free(temp);
}

void mpi_gradient_worker(helper_fmt *helper, nr_fmt * nr, 
			 timearchive_fmt ** tyme)
{
  long ww, locus;
  memset(nr->d,0,sizeof(double)*nr->partsize);
  for(ww=0;ww<locidone;ww++)
    {
      locus = nr->world->who[ww];
      copy_and_clear_d (nr);
      //      printf("called derivatives for locus %li\n",locus);
      simple_loci_derivatives (nr->d, nr, tyme, locus);
      //      printf("%i-%li> %f %f %f %f\n", myID, locus, nr->d[0], nr->d[1], nr->d[2], nr->d[3]);
      add_back_d (nr);
    }
}

void mpi_maximize_worker(world_fmt *world, long rep)
{
  boolean done = FALSE;
  long locus;
  MPI_Status status;
  nr_fmt *nr;
  helper_fmt helper;
  long repstart, repstop, Gmax;
  long numelem = world->numpop2 + (world->options->gamma ? 2 : 1);
  long numelem2 = numelem * 2;
  double *temp;
  temp = (double *) calloc(numelem2, sizeof(double));
  nr = (nr_fmt *) calloc(1, sizeof(nr_fmt));
  set_replicates (world, world->repkind, rep, &repstart, &repstop);

  which_calc_like (world->repkind);

  MPI_Bcast(&Gmax, 1, MPI_LONG, MASTER, MPI_COMM_WORLD);
  //  printf("%i> received broadcasted gmax=%li\n",myID,Gmax);fflush(stdout); 
  create_nr (nr, world, Gmax, 0, world->loci, world->repkind, 
	     repstart);
  setup_parameter0 (world, nr, world->repkind, 
		    repstart, repstop, world->loci, MULTILOCUS, TRUE);

  while(!done)
    {
      //      printf("%i> wait for work\n",myID);
      MPI_Recv(temp, (int) numelem2, MPI_DOUBLE, MASTER, MPI_ANY_TAG, 
	       MPI_COMM_WORLD, &status);
      //     printf("%i> got signal: %li\n",myID, (long) temp[0]);
      locus = world->locus = status.MPI_TAG - 1;
      switch((long) temp[0])
	{
	case LIKE:
	  //	  printf("%i> in LIKE\n",myID);
	  memset(nr->locilikes,0,sizeof(double)*world->loci);
	  memcpy(nr->param,temp+1,sizeof(double)*(numelem-1));
	  memcpy(nr->lparam,temp+numelem,sizeof(double)*(numelem-1));
	  fill_helper (&helper, nr->param, nr->lparam, world, nr);
	  mpi_likelihood_worker(world, &helper, rep);
	  MPI_Send(nr->locilikes, (int) world->loci, MPI_DOUBLE, MASTER, locus+1,
		   MPI_COMM_WORLD);
	  break;
	case GRADIENT:
	  //	  printf("%i> in gradient\n",myID);
	  memcpy(nr->param,temp+1,sizeof(double)*(numelem-1));
	  memcpy(nr->lparam,temp+numelem,sizeof(double)*(numelem-1));
	  fill_helper (&helper, nr->param, nr->lparam, world, nr);
	  nr->profilenum = temp[numelem2-1];
	  MPI_Recv(nr->indeks, (int) numelem, MPI_LONG, MASTER, MPI_ANY_TAG, 
		   MPI_COMM_WORLD, &status);
	  //	  printf("%i> indeks = %li %li \n", myID, nr->indeks[0], nr->indeks[1]);
	  mpi_gradient_worker(&helper, nr, world->atl);
	  MPI_Send(nr->d, (int) nr->partsize, MPI_DOUBLE, MASTER, locus+1,
		   MPI_COMM_WORLD);
	  //	  printf("%i> sent derivatives\n",myID); fflush(stdout);
	  break;
	case RESULT:
	  mpi_results_worker(temp[1], world->atl, repstop, world->numpop, world->who);
	  break;
	case END:
	  //	  printf("%i> got stop signal\n",myID);
	  done = TRUE;
	  break;
	default:
	  fprintf(stdout, "%i> does not understand task\n", myID);
	  exit(0);
	}
    }
  destroy_nr(nr, world);
  printf("%i> I am dead now\n",myID);fflush(stdout);
}

void broadcast_options_master(option_fmt *options)
{
  int bufsize=MAXBUFSIZE;
  char *buffer;
  buffer = (char *) calloc(MAXBUFSIZE, sizeof(char));
  bufsize = save_options_buffer(buffer,options);
  MPI_Bcast(buffer, bufsize, MPI_CHAR, MASTER, MPI_COMM_WORLD);
  free(buffer);
} 


void broadcast_options_worker(option_fmt *options)
{
  int bufsize=MAXBUFSIZE;
  char *buffer;
  char *sbuffer;
  sbuffer = (char *) calloc(MAXBUFSIZE, sizeof(char));
  buffer=sbuffer;
  MPI_Bcast(sbuffer, bufsize, MPI_CHAR, MASTER, MPI_COMM_WORLD);
  read_options_worker(&buffer, options);
  free(sbuffer);
}


void broadcast_data_master(data_fmt *data, option_fmt *options)
{
  int bufsize=MAXBUFSIZE;
  char *buffer;
  buffer = (char *) calloc(MAXBUFSIZE, sizeof(char));
  bufsize = pack_databuffer(&buffer,data, options);
  MPI_Bcast(buffer, bufsize, MPI_CHAR, MASTER, MPI_COMM_WORLD);
  free(buffer);
} 

void broadcast_data_worker(data_fmt *data, option_fmt *options)
{
  int bufsize=MAXBUFSIZE;
  char *buffer;
  buffer = (char *) calloc(MAXBUFSIZE, sizeof(char));
  MPI_Bcast(buffer, bufsize, MPI_CHAR, MASTER, MPI_COMM_WORLD);
  free(buffer);
} 

long pack_databuffer(char **buffer, data_fmt *data, option_fmt *options)
{
  long locus;
  long position = 0;
  sprintf(*buffer,"%c%li%li%c%s\n", options->datatype,data->numpop,
	  data->loci, data->dlm, options->title);
  position = strlen(*buffer);
  for(locus=0;locus<data->loci;locus++)
    {
      sprintf((*buffer)+position,"%li ",data->seq->sites[locus]);
      position = strlen(*buffer); 
    }
  sprintf((*buffer)+position,"\n%li %f",data->seq->addon,
	  data->seq->fracchange);
  position = strlen(*buffer); 
  return position;
}

void
unpack_result_buffer(char *buffer, timearchive_fmt **atl, 
		     long maxrep, long locus, long numpop)
{
  long rep, pop;
  char input[LINESIZE];
  char *buf = buffer;
  //  long bufsize = strlen(*buffer);
  
  for(rep=0;rep<maxrep;rep++)
    {
      sgets(input,LINESIZE,&buf);
      atl[rep][locus].param_like = atof(input);
      for(pop=0;pop<numpop*numpop; pop++)
	{
	  sgets(input,LINESIZE,&buf);
	  atl[rep][locus].param[pop] = atof(input);
	}
    }
}

void
pack_result_buffer(char *buffer, timearchive_fmt **atl, 
		   long maxrep, long locus, long numpop)
{
  long rep, pop;
  char input[LINESIZE];
  //  long bufsize = strlen(buffer);  
  for(rep=0;rep<maxrep;rep++)
    {
      sprintf(input,"%f\n", atl[rep][locus].param_like);
      strcat(buffer,input);
      for(pop=0;pop<numpop*numpop; pop++)
	{
	  sprintf(input,"%f\n", atl[rep][locus].param[pop] );
	  strcat(buffer,input);
	}
    }
}

void 
mpi_results_master(world_fmt *world, timearchive_fmt **atl, long maxrep, 
		   long numpop, int *who)
{
  int maxbufsize= (int)(numpop * numpop * maxrep * sizeof(char) * MAXPRINTVALLENGTH);
  //  long bufsize;
  boolean done = FALSE;
  char *buffer, *sbuffer;
  double *temp;
  int locus;
  long z, tag, sender;
  MPI_Status status;
  long numelem = world->numpop2 + (world->options->gamma ? 2 : 1);
  long numelem2 = 2 * numelem;
  temp = (double *) calloc(numelem2, sizeof(double));

  buffer = (char *) calloc(maxbufsize, sizeof(char));
  sbuffer = buffer;
  temp[0] = RESULT;
  temp[1] = maxbufsize;
  for(locus=1; locus < numcpu; locus++)
    {
      //printf("%i> will send bufsize: %li\n",myID,bufsize);
      MPI_Send(temp, numelem2, MPI_DOUBLE, 
	       locus, locus, MPI_COMM_WORLD);
      //      printf("%i> have sent bufsize %i to worker %i\n",
      //myID,maxbufsize, locus);
    }
  z = 0;
  while(!done)
    {
      if(z++ >= world->loci)
	{
	  //	  printf("%i> all loci received\n", myID);
	  break;
	}
      memset(sbuffer,0,sizeof(char)*maxbufsize);
      //      printf("%i> ----------- wait for buffer with  bufsize: %i\n",myID,maxbufsize); fflush(stdout);
      MPI_Recv(sbuffer, maxbufsize, MPI_CHAR, 
	       MPI_ANY_SOURCE, MPI_ANY_TAG,
	       MPI_COMM_WORLD, &status);
      sender = status.MPI_SOURCE;
      tag = status.MPI_TAG;
      //      printf("%i> have received buflen=%i from %li for locus %li and this is z=%li\n",
      //	     myID,strlen(buffer),sender,tag,z);
  //      printf("%i> have received buffer:====\n %s\n======\n",myID,buffer);fflush(stdout);
      unpack_result_buffer(buffer, atl,maxrep,tag-1, numpop);
    }
  free(sbuffer);
  free(temp);
}

void 
mpi_results_worker(long bufsize, timearchive_fmt **atl, long maxrep,  
		   long numpop, int *who)
{
  int ww, locus;
  char *buffer;
  buffer = (char *) calloc(1, sizeof(char));
//  printf("%i> received bufsize=%li\n", myID, bufsize);
  buffer = (char *) realloc(buffer, bufsize * sizeof(char));
  
  for(ww=0; ww < locidone ; ww++)
    {
      locus = who[ww];
      memset(buffer,0,sizeof(char)*bufsize);
      pack_result_buffer(buffer, atl, maxrep, locus, numpop);
//      printf("%i> will send locus=%i-buffer\nsssss\n %s\nssssss\n",myID,locus,buffer);fflush(stdout);
      MPI_Send(buffer, bufsize, MPI_CHAR, MASTER, locus+1, 
	       MPI_COMM_WORLD);
//      printf("%i> have sent buffer\n",myID);fflush(stdout);
    }
  free(buffer);
}
/*
// send the data over all loci/replicates to all nodes
// including the master node, so that all nodes can then 
// start calculating profiles [see calc_profiles()]
//
void distribute_locidata(world_fmt *world)
{
  char *buffer;
  pack_loci_data(world, &buffer);
  MPI_allgather(buffer);
  unpack_loci_data(buffer, world);
  free(buffer);
}

void pack_loci_data(world_fmt *world, char **buffer)
{
  long replicates = world->options->repl
  *buffer = realloc(*buffer,LINESIZE);
  hits = sscanf (input, "%li %li %li %li %li", &world->loci, &world->numpop, &world->numpop2, &tmp, &replicates);  
}
*/
// necessary for analyzing old sumfiles using MPI
//
// master is reusing  mpi_runloci_master()
void assignloci_worker(world_fmt *world)
{
  boolean done=FALSE;
  long locus;
  MPI_Status status;
  while(!done)
    {
      MPI_Recv(&locus, 1, MPI_LONG, MASTER, MPI_ANY_TAG, 
	       MPI_COMM_WORLD, &status);
      if(status.MPI_TAG != 0) //stop condition
	{
	  swap_atl(locus, locidone, world);
	  MPI_Send(&locus, 1, MPI_LONG, MASTER, locus+1,
		   MPI_COMM_WORLD);
	  /* we want to know what locus we worked for
	     - to control the work sent by master
	     - to use in setup_parameter0() [combroyden2.c]*/
	  world->who[locidone++] = locus; 
	}
      else
	{
	  done = TRUE;
	}
    }
}

void swap_atl(long from, long to, world_fmt *world)
{
  long r;
  timearchive_fmt *tmp;
  for(r=0;r<world->options->replicatenum;r++)
    {
      tmp = &world->atl[r][to];
      world->atl[r][to] = world->atl[r][from];
      world->atl[r][from] = *tmp;
    }
}
#endif








