/* MPI parts for migrate 
   started November 2000, Seattle
   Peter Beerli beerli@genetics.washington.edu

   
Copyright 2001 Peter Beerli and Joseph Felsenstein

$Id: migrate_mpi.c,v 1.10 2001/07/25 19:27:24 beerli Exp $
*/
#ifdef MPI
#include "migrate_mpi.h"
#include "broyden.h"
#include "combroyden.h"
#include "gammalike.h"
#include "options.h"
#include "world.h"

extern void run_locus (world_fmt ** universe, int usize,
		       option_fmt * options, data_fmt * data,
		       tpool_t * heating_pool, long maxreplicate,
		       long locus, long *treefilepos, long *Gmax);

long pack_databuffer (char **buffer, data_fmt * data, option_fmt * options);
void mpi_gradient_master (nr_fmt * nr, world_fmt * world, int *who);
void mpi_results_master (double sendtype, world_fmt * world,
			 long maxrep,
			 void (*unpack) (char *buffer, world_fmt * world,
					 long locus, long maxrep,
					 long numpop));

void mpi_results_worker (long bufsize, world_fmt * world,
			 long maxrep,
			 long (*pack) (char **buffer, world_fmt * world,
				       long locus, long maxrep, long numpop));
void assignloci_worker (world_fmt * world);
void swap_atl (long from, long to, world_fmt * world);

/* global variable pack points to function pack_stuff() */
//static void (*pack) (node *, world_fmt *, long);
/* global variable unpack points to function unpack_stuff() */
//static void (*unpack) (node *, world_fmt *, long);

void
mpi_runloci_master (long loci, int *who)
{
  long locus;
  int sender;
  long locusdone;
  MPI_Status status;
  long numsent = 0;
  for (locus = 0; locus < MIN (loci, numcpu - 1); locus++)
    {
      MPI_Send (&locus, 1, MPI_LONG, locus + 1, locus + 1, MPI_COMM_WORLD);
      numsent++;
    }
  for (locus = 0; locus < loci; locus++)
    {
      MPI_Recv (&locusdone, 1, MPI_LONG, MPI_ANY_SOURCE, MPI_ANY_TAG,
		MPI_COMM_WORLD, &status);
      sender = status.MPI_SOURCE;
      who[locusdone] = sender;
      if (numsent < loci)
	{
	  MPI_Send (&numsent, 1, MPI_LONG, sender, numsent + 1,
		    MPI_COMM_WORLD);
	  numsent++;
	}
      else
	{
	  MPI_Send (&loci, 1, MPI_LONG, sender, 0, MPI_COMM_WORLD);	//end of loci
	}
    }
}


void
mpi_runloci_worker (world_fmt ** universe, int usize,
		    option_fmt * options, data_fmt * data,
		    tpool_t * heating_pool, long maxreplicate,
		    long *treefilepos, long *Gmax)
{
  boolean done = FALSE;
  long locus;
  MPI_Status status;
  while (!done)
    {
      MPI_Recv (&locus, 1, MPI_LONG, MASTER, MPI_ANY_TAG,
		MPI_COMM_WORLD, &status);
      if (status.MPI_TAG != 0)	//stop condition
	{
	  run_locus (universe, usize, options, data,
		     heating_pool, maxreplicate, locus, treefilepos, Gmax);
	  MPI_Send (&locus, 1, MPI_LONG, MASTER, locus + 1, MPI_COMM_WORLD);
	  /* we want to know what locus we worked for
	     - to control the work sent by master
	     - to use in setup_parameter0() [combroyden2.c] */
	  universe[0]->who[locidone++] = locus;
	}
      else
	{
	  done = TRUE;
	}
    }
}

double
mpi_likelihood_master (double *param, double *lparam,
		       world_fmt * world, nr_fmt * nr,
		       helper_fmt * helper, int *who)
{
  long locus, worker;
  long sender;
  MPI_Status status;
  double logres = 0.0;
  double *temp;
  double *tmp;
  int tag;

  long numelem = world->numpop2 + (world->options->gamma ? 2 : 1);
  long numelem2 = numelem * 2;
  tmp = (double *) calloc (world->loci, sizeof (double));
  temp = (double *) calloc (numelem2, sizeof (double));
  temp[0] = MIGMPI_LIKE;
  memcpy (temp + 1, param, (numelem - 1) * sizeof (double));
  memcpy (temp + numelem, lparam, (numelem - 1) * sizeof (double));
  memset (nr->locilikes, 0, sizeof (double) * world->loci);
  for (worker = 1; worker < numcpu; worker++)
    {
      MPI_Send (temp, (int) numelem2, MPI_DOUBLE, worker,
		worker, MPI_COMM_WORLD);
    }
  for (worker = 1; worker < numcpu; worker++)
    {
      //      printf("%i> wait for like calc\n",myID); fflush(stdout);
      MPI_Recv (tmp, (int) world->loci, MPI_DOUBLE, MPI_ANY_SOURCE,
		MPI_ANY_TAG, MPI_COMM_WORLD, &status);
      sender = status.MPI_SOURCE;
      tag = status.MPI_TAG;
      //      printf("%i> like calc got %i from %i\n",myID, sender, tag);
      // the worker send a vector of values 
      // e.g. (0,0,0,-12,0,-34,0,0,0), these are all loci
      // of which most of them were not evaluated
      // the loop updates the master copy of locilikes
      for (locus = 0; locus < world->loci; locus++)
	{
	  nr->locilikes[locus] += tmp[locus];
	}
    }
  for (locus = 0; locus < world->loci; locus++)
    {
      logres += nr->locilikes[locus];
    }
  nr->llike = logres;
  //  printf("%i> Log(L) = %f -------------------------\n",myID, logres);
  free (temp);
  return logres;
}


void
mpi_likelihood_worker (world_fmt * world, helper_fmt * helper, long rep)
{
  long locus, ww;
  nr_fmt *nr = helper->nr;
  memset (nr->locilikes, 0, sizeof (double) * world->loci);
  for (ww = 0; ww < locidone; ww++)
    {
      locus = nr->world->who[ww];
      if (!world->options->gamma)
	{
	  //      printf("%i> in mpi_likelihood_worker\n",myID);
	  nr->locilikes[locus] =
	    (*calc_like) (nr, nr->param, nr->lparam, locus);
	  //      printf("%i> locilikes[locus=%li]=%f\n", myID, locus, nr->locilikes[locus]);
	}
      else
	{
	  helper->locus = locus;
	  helper->nr->locilikes[locus] = gamma_locus_like (nr,
							   helper->expxv,
							   helper->xv, locus);
	}
    }
}


void
mpi_gmax_master (world_fmt * world, long *Gmax)
{
  long sender;
  MPI_Status status;
  long tmp;
  int tag;
  int numreceived = 0;
  //  printf("%i> waiting for gmax\n",myID); 
  *Gmax = 0.;
  MPI_Bcast (Gmax, 1, MPI_LONG, MASTER, MPI_COMM_WORLD);
  while (numreceived < numcpu - 1)
    {
      //      printf("%i> waiting for gmax in loop %i\n",myID, numreceived); 
      MPI_Recv (&tmp, 1, MPI_LONG, MPI_ANY_SOURCE,
		MPI_ANY_TAG, MPI_COMM_WORLD, &status);
      sender = status.MPI_SOURCE;
      tag = status.MPI_TAG;
      //      printf("%i> received gmax=%li from %li with tag %i\n",myID, 
      //             tmp, sender,tag); 
      fflush (stdout);
      if (*Gmax < tmp)
	*Gmax = tmp;
      numreceived++;
    }
//  printf("%i> broadcast gmax=%li\n",myID,*Gmax); 
  MPI_Bcast (Gmax, 1, MPI_LONG, MASTER, MPI_COMM_WORLD);
}

void
mpi_gmax_worker (world_fmt * world)
{
  long ww;
  long repstart;
  long repstop;
  long r;
  long locus;
  long Gmax = 1000000;
//  printf("%i> before bcast gmax in worker\n",myID);fflush(stdout); 
  MPI_Bcast (&Gmax, 1, MPI_LONG, MASTER, MPI_COMM_WORLD);
  set_replicates (world, world->repkind, world->options->replicatenum,
		  &repstart, &repstop);

  for (ww = 0; ww < locidone; ww++)
    {
      locus = world->who[ww];
      for (r = repstart; r < repstop; r++)
	{
	  if (Gmax < world->atl[r][locus].T)
	    Gmax = world->atl[r][locus].T;
	}
    }
//  printf("%i> will send gmax=%li for locus %li\n",myID,Gmax,locus+1); 
//  fflush(stdout);
  MPI_Send (&Gmax, 1, MPI_LONG, MASTER, myID, MPI_COMM_WORLD);
}


void
mpi_send_stop (world_fmt * world)
{
  long worker;
  double *temp;
  long numelem = world->numpop2 + (world->options->gamma ? 2 : 1);
  long numelem2 = 2 * numelem;
  //  printf("%i> will send stop signal\n",myID);fflush(stdout);
  temp = (double *) calloc (numelem2, sizeof (double));
  temp[0] = MIGMPI_END;
  for (worker = 1; worker < numcpu; worker++)
    {
      //      printf("%i> stop signal: numcpu=%i, loci=%li, numpop2=%li\n",myID, 
      //     numcpu, world->loci,numelem2);fflush(stdout);
      MPI_Send (temp, (int) numelem2, MPI_DOUBLE, worker, 0, MPI_COMM_WORLD);	//end of loci
      //printf("%i> sent stop signal %f to worker %li\n",myID,
      //temp[0], worker);
    }
  free (temp);
}

void
mpi_results_stop (void)
{
  long worker;
  long dummy = 0;
  for (worker = 1; worker < numcpu; worker++)
    {
      //printf("%i> will send stop\n", myID);
      MPI_Send (&dummy, 1, MPI_LONG, worker, 0, MPI_COMM_WORLD);
      //printf("%i> have sent stop to worker %i\n", myID, worker);
    }
}

void
mpi_gradient_master (nr_fmt * nr, world_fmt * world, int *who)
{
  long locus;
  long sender;
  MPI_Status status;
  int tag;

  double *temp;
  long *tempindex;
  long numelem = world->numpop2 + (world->options->gamma ? 2 : 1);
  long numelem2 = 2 * numelem;
  temp = (double *) calloc (numelem2, sizeof (double));
  tempindex = (long *) calloc (numelem, sizeof (long));
  temp[0] = MIGMPI_GRADIENT;
  //  printf("%i> indeks = %li %li \n", myID, nr->indeks[0], nr->indeks[1]);
  memcpy (temp + 1, nr->param, (numelem - 1) * sizeof (double));
  memcpy (tempindex, nr->indeks, (numelem - 1) * sizeof (long));
  memcpy (temp + numelem, nr->lparam, (numelem - 1) * sizeof (double));
  temp[numelem2 - 1] = nr->profilenum;
  for (locus = 1; locus < numcpu; locus++)
    {
      MPI_Send (temp, (int) numelem2, MPI_DOUBLE, locus,
		locus, MPI_COMM_WORLD);
      MPI_Send (tempindex, (int) numelem, MPI_LONG, locus,
		locus, MPI_COMM_WORLD);
    }
  memset (nr->d, 0, sizeof (double) * (numelem - 1));
  for (locus = 1; locus < numcpu; locus++)
    {
      copy_and_clear_d (nr);
      MPI_Recv (nr->d, (int) (numelem - 1), MPI_DOUBLE, MPI_ANY_SOURCE,
		MPI_ANY_TAG, MPI_COMM_WORLD, &status);
      add_back_d (nr);
      sender = status.MPI_SOURCE;
      tag = status.MPI_TAG;
    }
  free (temp);
}

void
mpi_gradient_worker (helper_fmt * helper, nr_fmt * nr,
		     timearchive_fmt ** tyme)
{
  long ww, locus;
  memset (nr->d, 0, sizeof (double) * nr->partsize);
  for (ww = 0; ww < locidone; ww++)
    {
      locus = nr->world->who[ww];
      copy_and_clear_d (nr);
      //      printf("called derivatives for locus %li\n",locus);
      simple_loci_derivatives (nr->d, nr, tyme, locus);
      //      printf("%i-%li> %f %f %f %f\n", myID, locus, nr->d[0], nr->d[1], nr->d[2], nr->d[3]);
      add_back_d (nr);
    }
}

void
mpi_maximize_worker (world_fmt * world, long rep)
{
  boolean done = FALSE;
  long locus;
  MPI_Status status;
  nr_fmt *nr;
  helper_fmt helper;
  long repstart, repstop, Gmax;
  long numelem = world->numpop2 + (world->options->gamma ? 2 : 1);
  long numelem2 = numelem * 2;
  double *temp;
  temp = (double *) calloc (numelem2, sizeof (double));
  nr = (nr_fmt *) calloc (1, sizeof (nr_fmt));
  set_replicates (world, world->repkind, rep, &repstart, &repstop);

  which_calc_like (world->repkind);

  MPI_Bcast (&Gmax, 1, MPI_LONG, MASTER, MPI_COMM_WORLD);
  //  printf("%i> received broadcasted gmax=%li\n",myID,Gmax);fflush(stdout); 
  create_nr (nr, world, Gmax, 0, world->loci, world->repkind, repstart);
  setup_parameter0 (world, nr, world->repkind,
		    repstart, repstop, world->loci, MULTILOCUS, TRUE);

  while (!done)
    {
      //      printf("%i> wait for work\n",myID);
      MPI_Recv (temp, (int) numelem2, MPI_DOUBLE, MASTER, MPI_ANY_TAG,
		MPI_COMM_WORLD, &status);
      //     printf("%i> got signal: %li\n",myID, (long) temp[0]);
      locus = world->locus = status.MPI_TAG - 1;
      switch ((long) temp[0])
	{
	case MIGMPI_LIKE:
	  //      printf("%i> in LIKE\n",myID);
	  memset (nr->locilikes, 0, sizeof (double) * world->loci);
	  memcpy (nr->param, temp + 1, sizeof (double) * (numelem - 1));
	  memcpy (nr->lparam, temp + numelem,
		  sizeof (double) * (numelem - 1));
	  fill_helper (&helper, nr->param, nr->lparam, world, nr);
	  mpi_likelihood_worker (world, &helper, rep);
	  MPI_Send (nr->locilikes, (int) world->loci, MPI_DOUBLE, MASTER,
		    locus + 1, MPI_COMM_WORLD);
	  break;
	case MIGMPI_GRADIENT:
	  //      printf("%i> in gradient\n",myID);
	  memcpy (nr->param, temp + 1, sizeof (double) * (numelem - 1));
	  memcpy (nr->lparam, temp + numelem,
		  sizeof (double) * (numelem - 1));
	  fill_helper (&helper, nr->param, nr->lparam, world, nr);
	  nr->profilenum = temp[numelem2 - 1];
	  MPI_Recv (nr->indeks, (int) numelem, MPI_LONG, MASTER, MPI_ANY_TAG,
		    MPI_COMM_WORLD, &status);
	  //      printf("%i> indeks = %li %li \n", myID, nr->indeks[0], nr->indeks[1]);
	  mpi_gradient_worker (&helper, nr, world->atl);
	  MPI_Send (nr->d, (int) nr->partsize, MPI_DOUBLE, MASTER, locus + 1,
		    MPI_COMM_WORLD);
	  //      printf("%i> sent derivatives\n",myID); fflush(stdout);
	  break;
	case MIGMPI_RESULT:
	  mpi_results_worker (temp[1], world, repstop, pack_result_buffer);
	  break;
	case MIGMPI_SUMFILE:
	  mpi_results_worker (temp[1], world, repstop, pack_sumfile_buffer);
	  break;
	case MIGMPI_MIGHIST:
	  mpi_results_worker (temp[1], world, repstop, pack_mighist_buffer);
	  break;
	case MIGMPI_END:
	  done = TRUE;
	  break;
	default:
	  fprintf (stdout, "%i> does not understand task\n", myID);
	  exit (0);
	}
    }
  destroy_nr (nr, world);
  //  printf ("%i> I am dead now\n", myID);
  //fflush (stdout);
}


void
broadcast_options_master (option_fmt * options)
{
  int bufsize = MAXBUFSIZE;
  char *buffer;
  buffer = (char *) calloc (MAXBUFSIZE, sizeof (char));
  bufsize = save_options_buffer (buffer, options);
  MPI_Bcast (buffer, bufsize, MPI_CHAR, MASTER, MPI_COMM_WORLD);
  free (buffer);
}


void
broadcast_options_worker (option_fmt * options)
{
  int bufsize = MAXBUFSIZE;
  char *buffer;
  char *sbuffer;
  sbuffer = (char *) calloc (MAXBUFSIZE, sizeof (char));
  buffer = sbuffer;
  MPI_Bcast (sbuffer, bufsize, MPI_CHAR, MASTER, MPI_COMM_WORLD);
  read_options_worker (&buffer, options);
  free (sbuffer);
}


void
broadcast_data_master (data_fmt * data, option_fmt * options)
{
  int bufsize = MAXBUFSIZE;
  char *buffer;
  buffer = (char *) calloc (MAXBUFSIZE, sizeof (char));
  bufsize = pack_databuffer (&buffer, data, options);
  MPI_Bcast (buffer, bufsize, MPI_CHAR, MASTER, MPI_COMM_WORLD);
  free (buffer);
}

void
broadcast_data_worker (data_fmt * data, option_fmt * options)
{
  int bufsize = MAXBUFSIZE;
  char *buffer;
  buffer = (char *) calloc (MAXBUFSIZE, sizeof (char));
  MPI_Bcast (buffer, bufsize, MPI_CHAR, MASTER, MPI_COMM_WORLD);
  free (buffer);
}

long
pack_databuffer (char **buffer, data_fmt * data, option_fmt * options)
{
  long locus;
  long position = 0;
  sprintf (*buffer, "%c%li%li%c%s\n", options->datatype, data->numpop,
	   data->loci, data->dlm, options->title);
  position = strlen (*buffer);
  for (locus = 0; locus < data->loci; locus++)
    {
      sprintf ((*buffer) + position, "%li ", data->seq->sites[locus]);
      position = strlen (*buffer);
    }
  sprintf ((*buffer) + position, "\n%li %f", data->seq->addon,
	   data->seq->fracchange);
  position = strlen (*buffer);
  return position;
}

void
unpack_result_buffer (char *buffer, world_fmt * world,
		      long locus, long maxrep, long numpop)
{
  long rep, pop;
  timearchive_fmt **atl = world->atl;
  char input[LINESIZE];
  char *buf = buffer;
  strcpy (input, "");
  for (rep = 0; rep < maxrep; rep++)
    {
      sgets (input, LINESIZE, &buf);
      atl[rep][locus].param_like = atof (input);
      for (pop = 0; pop < numpop * numpop; pop++)
	{
	  sgets (input, LINESIZE, &buf);
	  atl[rep][locus].param[pop] = atof (input);
	}
    }
}

long
pack_result_buffer (char **buffer, world_fmt * world,
		    long locus, long maxrep, long numpop)
{
  long rep, pop;
  char input[LINESIZE];
  timearchive_fmt **atl = world->atl;
  long temp;
  long bufsize = strlen (*buffer);
  strcpy (input, "");
  if (bufsize < (temp = (maxrep * (20 + (20 * numpop * numpop)))))
    {
      (*buffer) = (char *) realloc (*buffer, sizeof (char) * temp);
      bufsize = temp;
    }
  memset (*buffer, 0, sizeof (char) * bufsize);
  for (rep = 0; rep < maxrep; rep++)
    {
      sprintf (input, "%f\n", atl[rep][locus].param_like);
      strcat (*buffer, input);
      for (pop = 0; pop < numpop * numpop; pop++)
	{
	  sprintf (input, "%f\n", atl[rep][locus].param[pop]);
	  strcat (*buffer, input);
	}
    }
  return strlen (*buffer) + 1;
}

void
unpack_mighist_buffer (char *buffer, world_fmt * world,
		       long locus, long maxrep, long numpop)
{
  long i, j;
  mighistloci_fmt *aa;
  char input[LINESIZE];
  char *buf = buffer;
  aa = &world->mighistloci[locus];
  sgets (input, LINESIZE, &buf);
  sscanf (input, "%li\n", &aa->mighistnum);
  aa->mighist = (mighist_fmt *) realloc (aa->mighist,
					 sizeof (mighist_fmt) *
					 aa->mighistnum);
  for (j = 0; j < aa->mighistnum; j++)
    {
      sgets (input, LINESIZE, &buf);
      sscanf (input, "%li %li %li\n", &aa->mighist[j].copies,
	      &aa->mighist[j].weight, &aa->mighist[j].migeventsize);
      aa->mighist[j].migevents =
	(migevent_fmt *) realloc (aa->mighist[j].migevents,
				  sizeof (migevent_fmt) *
				  aa->mighist[j].migeventsize + 1);
      for (i = 0; i < aa->mighist[j].migeventsize; i++)
	{
	  sgets (input, LINESIZE, &buf);
	  sscanf (input, "%lf %lf %lf\n",
		  &aa->mighist[j].migevents[i][0],
		  &aa->mighist[j].migevents[i][1],
		  &aa->mighist[j].migevents[i][2]);
	}
    }
}


long
pack_mighist_buffer (char **buffer, world_fmt * world,
		     long locus, long maxrep, long numpop)
{
  long j, i;
  mighistloci_fmt *aa;
  char input[LINESIZE];
  long bufsize = strlen ((*buffer));
  aa = &world->mighistloci[locus];
  strcpy (input, "");
  for (j = 0; j < aa->mighistnum; j++)
    bufsize += 100 + aa->mighist[j].migeventsize * 100;
  (*buffer) = (char *) realloc ((*buffer), sizeof (char) * bufsize);
  memset (*buffer, 0, sizeof (char) * bufsize);
  sprintf (input, "%li\n", aa->mighistnum);
  strcat ((*buffer), input);

  for (j = 0; j < aa->mighistnum; j++)
    {
      sprintf (input, "%li %li %li\n", aa->mighist[j].copies,
	       aa->mighist[j].weight, aa->mighist[j].migeventsize);
      strcat ((*buffer), input);
      for (i = 0; i < aa->mighist[j].migeventsize; i++)
	{
	  sprintf (input, "%f %f %f\n",
		   aa->mighist[j].migevents[i][0],
		   aa->mighist[j].migevents[i][1],
		   aa->mighist[j].migevents[i][2]);
	  strcat ((*buffer), input);
	}
    }
  return strlen (*buffer) + 1;
}

void
unpack_sumfile_buffer (char *buffer, world_fmt * world,
		       long locus, long maxrep, long numpop)
{
  long r, i, j;
  long l = locus;
  char input[LINESIZE * world->numpop2];
  timearchive_fmt **ta = world->atl;
  char *buf = buffer;
  strcpy (input, "");
  for (r = 0; r < maxrep; r++)
    {
      sgets (input, LINESIZE, &buf);
      sscanf (input, "%li %li %li %lf\n", &ta[r][l].T, &ta[r][l].numpop,
	      &ta[r][l].sumtips, &ta[r][locus].param_like);
      ta[r][l].allocT = 0;
      increase_timearchive (world, l, ta[r][l].T, world->numpop, r);
      for (i = 0; i < ta[r][l].T; i++)
	{
	  sgets (input, LINESIZE, &buf);
	  sscanf (input, "%li ", &ta[r][l].tl[i].copies);
	  for (j = 0; j < world->numpop; j++)
	    {
	      sgets (input, LINESIZE, &buf);
	      sscanf (input, "%lf %lf %lf\n", &ta[r][l].tl[i].km[j],
		      &ta[r][l].tl[i].kt[j], &ta[r][l].tl[i].p[j]);
	    }
	  sgets (input, world->numpop2 * LINESIZE, &buf);
	  for (j = 0; j < world->numpop2; j++)
	    {
	      sscanf (input, "%lf ", &ta[r][l].tl[i].mindex[j]);
	    }
	}
      for (i = 0; i < world->numpop2; i++)
	{
	  sgets (input, LINESIZE, &buf);
	  sscanf (input, "%lg %lg\n", &ta[r][l].param[i],
		  &ta[r][l].param0[i]);
	}
      log_param0 (ta[r][l].param0, ta[r][l].lparam0, world->numpop2);
      sgets (input, LINESIZE, &buf);
      sscanf (input, "%li %lg\n", &ta[r][l].trials, &ta[r][l].normd);
    }
}


long
pack_sumfile_buffer (char **buffer, world_fmt * world,
		     long locus, long maxrep, long numpop)
{
  long r, i, j;
  long numpop2 = numpop * numpop;
  long l = locus;
  char input[LINESIZE];
  timearchive_fmt **ta = world->atl;
  // calc bufsize needed
  long bufsize = strlen ((*buffer));
  for (r = 0; r < maxrep; r++)
    {
      bufsize += ta[r][l].T * (numpop * 100 + numpop2 * 20);
      bufsize += numpop2 * 50;
    }
  (*buffer) = (char *) realloc ((*buffer), sizeof (char) * bufsize);
  memset (*buffer, 0, sizeof (char) * bufsize);
  for (r = 0; r < maxrep; r++)
    {
      sprintf (input, "%li %li %li %f\n", ta[r][l].T, ta[r][l].numpop,
	       ta[r][l].sumtips, ta[r][l].param_like);
      strcat ((*buffer), input);
      for (i = 0; i < ta[r][l].T; i++)
	{
	  sprintf (input, "%li\n", ta[r][l].tl[i].copies);
	  strcat ((*buffer), input);
	  for (j = 0; j < numpop; j++)
	    {
	      sprintf (input, "%20.20f %20.20f %f\n",
		       ta[r][l].tl[i].km[j], ta[r][l].tl[i].kt[j],
		       ta[r][l].tl[i].p[j]);
	      strcat ((*buffer), input);
	    }
	  for (j = 0; j < numpop2; j++)
	    {
	      sprintf (input, "%f ", ta[r][l].tl[i].mindex[j]);
	      strcat ((*buffer), input);
	    }
	  sprintf (input, "\n");
	  strcat ((*buffer), input);
	}
      for (i = 0; i < numpop2; i++)
	{
	  sprintf (input, "%20.20e %20.20e\n", ta[r][l].param[i],
		   ta[r][l].param0[i]);
	  strcat ((*buffer), input);
	}
      sprintf (input, "%li %20.20e\n", ta[r][l].trials, ta[r][l].normd);
      strcat ((*buffer), input);
    }
  return strlen (*buffer) + 1;
}

void
mpi_results_master (double sendtype, world_fmt * world, long maxreplicate,
		    void (*unpack) (char *buffer, world_fmt * world,
				    long locus, long maxrep, long numpop))
{
  long numpop = world->numpop;
  int maxbufsize =
    (int) (numpop * numpop * maxreplicate * sizeof (char) *
	   MAXPRINTVALLENGTH);
  boolean done = FALSE;
  char *buffer, *sbuffer;
  double *temp;
  int locus;
  long z, tag, sender;
  MPI_Status status;
  long numelem = world->numpop2 + (world->options->gamma ? 2 : 1);
  long numelem2 = 2 * numelem;
  temp = (double *) calloc (numelem2, sizeof (double));

  buffer = (char *) calloc (maxbufsize, sizeof (char));
  temp[0] = sendtype;
  temp[1] = maxbufsize;
  for (locus = 1; locus < numcpu; locus++)
    {
      MPI_Send (temp, numelem2, MPI_DOUBLE, locus, locus, MPI_COMM_WORLD);
    }
  z = 0;
  while (!done)
    {
      if (z++ >= world->loci)
	{
	  break;
	}
      MPI_Recv (&maxbufsize, 1, MPI_LONG, MPI_ANY_SOURCE, MPI_ANY_TAG,
		MPI_COMM_WORLD, &status);
      printf ("%i> ----------- wait for buffer with  bufsize: %i\n", myID,
	      maxbufsize);
      fflush (stdout);

      buffer = (char *) realloc (buffer, sizeof (char) * (maxbufsize + 1));
      memset (buffer, 0, sizeof (char) * (maxbufsize + 1));
      sender = status.MPI_SOURCE;
      tag = status.MPI_TAG;
      MPI_Recv (buffer, maxbufsize - 1, MPI_CHAR,
		sender, tag, MPI_COMM_WORLD, &status);
      //printf
      //        ("%i> have received buflen=%i from %li for locus %li and this is z=%li\n",
      // myID, strlen (buffer), sender, tag, z);
      //      printf ("%i> have received buffer:====\n %s\n======\n", myID, buffer);
      //fflush (stdout);
      sbuffer = buffer;
      (*unpack) (buffer, world, tag - 1, maxreplicate, numpop);
    }
  free (sbuffer);
  free (temp);
}

void
mpi_results_worker (long bufsize, world_fmt * world, long maxrep,
		    long (*pack) (char **buffer, world_fmt * world,
				  long locus, long maxrep, long numpop))
{
  long numpop = world->numpop;
  int ww, locus;
  char *buffer;
  char **buf;
  buffer = (char *) calloc (1, sizeof (char));	//some small value
  for (ww = 0; ww < locidone; ww++)
    {
      buffer = (char *) realloc (buffer, sizeof (char));	//some small value
      buf = &buffer;
      locus = world->who[ww];
      bufsize = (*pack) (&buffer, world, locus, maxrep, numpop);
      printf ("%i> will send locus=%i-buffer\nsssss\n %s\nssssss\n", myID,
	      locus, buffer);
      fflush (stdout);
      MPI_Send (&bufsize, 1, MPI_LONG, MASTER, locus + 1, MPI_COMM_WORLD);
      MPI_Send (buffer, bufsize - 1, MPI_CHAR, MASTER, locus + 1,
		MPI_COMM_WORLD);
      printf ("%i> have sent buffer with size %li\n", myID, bufsize);
      fflush (stdout);
    }
  free (*buf);
}

/*
// send the data over all loci/replicates to all nodes
// including the master node, so that all nodes can then 
// start calculating profiles [see calc_profiles()]
//
void distribute_locidata(world_fmt *world)
{
  char *buffer;
  pack_loci_data(world, &buffer);
  MPI_allgather(buffer);
  unpack_loci_data(buffer, world);
  free(buffer);
}

void pack_loci_data(world_fmt *world, char **buffer)
{
  long replicates = world->options->repl
  *buffer = realloc(*buffer,LINESIZE);
  hits = sscanf (input, "%li %li %li %li %li", &world->loci, &world->numpop, &world->numpop2, &tmp, &replicates);  
}
*/
// necessary for analyzing old sumfiles using MPI
//
// master is reusing  mpi_runloci_master()
void
assignloci_worker (world_fmt * world)
{
  boolean done = FALSE;
  long locus;
  MPI_Status status;
  while (!done)
    {
      MPI_Recv (&locus, 1, MPI_LONG, MASTER, MPI_ANY_TAG,
		MPI_COMM_WORLD, &status);
      if (status.MPI_TAG != 0)	//stop condition
	{
	  swap_atl (locus, locidone, world);
	  MPI_Send (&locus, 1, MPI_LONG, MASTER, locus + 1, MPI_COMM_WORLD);
	  /* we want to know what locus we worked for
	     - to control the work sent by master
	     - to use in setup_parameter0() [combroyden2.c] */
	  world->who[locidone++] = locus;
	}
      else
	{
	  done = TRUE;
	}
    }
}

void
swap_atl (long from, long to, world_fmt * world)
{
  long r;
  timearchive_fmt *tmp;
  for (r = 0; r < world->options->replicatenum; r++)
    {
      tmp = &world->atl[r][to];
      world->atl[r][to] = world->atl[r][from];
      world->atl[r][from] = *tmp;
    }
}

#endif
