/*------------------------------------------------------
 Maximum likelihood estimation 
 of migration rate  and effectice population size
 using a Metropolis-Hastings Monte Carlo algorithm                            
 -------------------------------------------------------                        
 D A T A   R O U T I N E S 

 creates data structures,
 read data (Electrophoretic loci, sequences, microsats),
 feeds data into tree (?),
 prints data,
 destroys data.
 
 Peter Beerli 1996, Seattle
 beerli@genetics.washington.edu
 
Copyright 2001 Peter Beerli and Joseph Felsenstein

$Id: data.c,v 1.37 2001/09/07 23:56:13 beerli Exp $

-------------------------------------------------------*/
#include <string.h>

#include "migration.h"

extern long number_genomes (char type);

#ifdef DMALLOC_FUNC_CHECK
#include <dmalloc.h>
#endif
/* prototypes ----------------------------------------- */
void create_data (data_fmt ** data);
void get_data (FILE * infile, data_fmt * data, option_fmt * options);
void print_data (world_fmt * world, option_fmt * options, data_fmt * data);
void print_data_summary (FILE * file, world_fmt * world, option_fmt * options,
			 data_fmt * data);
void free_datapart (data_fmt * data, option_fmt * options, long locus);
/*private functions */
void init_data_structure1 (data_fmt ** data, option_fmt * options);
void read_header (FILE * infile, data_fmt * data, option_fmt * options);
void read_sites (data_fmt * data);
void init_data_structure2 (data_fmt ** data, option_fmt * options, long pop);
void init_data_structure3 (data_fmt * data);
void read_popheader (FILE * infile, data_fmt * data, long pop);
void read_indname (FILE * file, data_fmt * data, long pop, long ind,
		   long nmlength);
void read_popdata (FILE * file, data_fmt * data, long pop,
		   option_fmt * options);
void read_microalleles (FILE * infile, data_fmt * data, long pop, long ind);
void read_alleles (FILE * infile, data_fmt * data, long pop, long ind);
long read_ind_seq (FILE * infile, data_fmt * data, option_fmt * options,
		   long locus, long pop, long ind, long baseread);
void read_distance_fromfile (FILE * dfile, long tips, long nmlength,
			     double **m);
void finish_read_seq (FILE * infile, data_fmt * data, option_fmt * options,
		      long pop, long baseread);
void print_alleledata (world_fmt * world, data_fmt * data,
		       option_fmt * options);
void print_seqdata (world_fmt * world, option_fmt * options, data_fmt * data);

void print_header (FILE * outfile, long pop, world_fmt * world,
		   option_fmt * options, data_fmt * data);
void create_alleles (data_fmt * data);
void addAllele (data_fmt * data, char s[], long locus, long *z);
void set_numind (data_fmt * data);
void print_seq_pop (long locus, long pop, world_fmt * world,
		    option_fmt * options, data_fmt * data);
void print_seq_ind (long locus, long pop, long ind, world_fmt * world,
		    option_fmt * options, data_fmt * data);
void print_locus_head (long locus, world_fmt * world, option_fmt * options,
		       data_fmt * data);
void find_delimiter (char *title, char *dlm);
void read_geofile (data_fmt * data, option_fmt * options, long numpop);
void read_uep_fromfile (FILE * uepfile, long tips, long nmlength, long **uep,
			long *uepsites, long datatype);
void read_uepfile (data_fmt * data, option_fmt * options, long numpop);


/*=====================================================*/
void
create_data (data_fmt ** data)
{
  (*data) = (data_fmt *) calloc (1, sizeof (data_fmt));
}

/*
void
init_data (data_fmt * data)
{

}

void
destroy_data (data_fmt * data)
{
  free (data);
}

*/

void
get_data (FILE * infile, data_fmt * data, option_fmt * options)
{
  long pop;
  data->hasghost = FALSE;
  read_header (infile, data, options);
  init_data_structure1 (&data, options);
  switch (options->datatype)
    {
    case 's':
    case 'f':
      read_sites (data);
      break;
    case 'n':
      read_sites (data);
      data->seq->addon = 4;
      break;
    case 'u':
      read_sites (data);
      data->seq->addon = 4;
      break;
    default:
      data->seq->fracchange = 1.0;
      break;
    }
  if (options->progress)
    fprintf (stdout, "\n\n");
  if (options->writelog)
    fprintf (options->logfile, "\n\n");
  for (pop = 0; pop < data->numpop; pop++)
    {
      read_popheader (data->infile, data, pop);
      if (options->progress)
	fprintf (stdout, "Reading %s ...\n", data->popnames[pop]);
      if (options->writelog)
	fprintf (options->logfile, "Reading %s ...\n", data->popnames[pop]);
      init_data_structure2 (&data, options, pop);
      read_popdata (data->infile, data, pop, options);
    }
  read_geofile (data, options, data->numpop);
#ifdef UEP
  read_uepfile (data, options, data->numpop);
#endif
  if (options->progress)
    fprintf (stdout, "\n\n");
  init_data_structure3 (data);
  /* replace this, if loci can have different number of ind */
  set_numind (data);
  switch (options->datatype)
    {
    case 'a':
      create_alleles (data);
      break;
    case 'b':
    case 'm':
      create_alleles (data);
      for (pop = 0; pop < data->loci; pop++)
	data->maxalleles[pop] = options->micro_stepnum;
      break;
    }
}

/* private functions ========================================== */

void
init_data_structure1 (data_fmt ** data, option_fmt * options)
{
  long pop;
  if ((*data)->yy == NULL)
    {
      (*data)->yy =
	(char *****) malloc (sizeof (char ****) * (*data)->numpop);
      (*data)->seq = (seqmodel_fmt *) calloc (1, sizeof (seqmodel_fmt));
      (*data)->popnames =
	(char **) malloc (sizeof (char *) * (*data)->numpop);
      (*data)->indnames =
	(char ***) malloc (sizeof (char **) * (*data)->numpop);
      (*data)->numind = (long **) malloc (sizeof (long *) * (*data)->numpop);
      for (pop = 0; pop < (*data)->numpop; pop++)
	{
	  (*data)->popnames[pop] =
	    (char *) calloc (1, sizeof (char) * LINESIZE);
	  (*data)->numind[pop] =
	    (long *) malloc (sizeof (long) * (*data)->loci);
	}
      (*data)->seq->sites =
	(long *) calloc (1, sizeof (long) * (*data)->loci);
    }
  else
    {
      error ("Problem with initialization of data matrix yy\n");
    }
}


void
init_data_structure2 (data_fmt ** data, option_fmt * options, long pop)
{
  long ind, locus;
  long indalloc = (*data)->numind[pop][FLOC];
  if (indalloc == 0)
    indalloc = 2;
  (*data)->yy[pop] = (char ****) malloc (sizeof (char ***) * indalloc);
  (*data)->indnames[pop] = (char **) calloc (1, sizeof (char *) * indalloc);
  for (ind = 0; ind < indalloc; ind++)
    {
      (*data)->indnames[pop][ind] =
	(char *) calloc (1, sizeof (char) * (1 + options->nmlength));
      (*data)->yy[pop][ind] =
	(char ***) malloc (sizeof (char **) * (*data)->loci);
      for (locus = 0; locus < (*data)->loci; locus++)
	{
	  if (!strchr (SEQUENCETYPES, options->datatype))
	    {
	      (*data)->yy[pop][ind][locus] =
		(char **) calloc (1, sizeof (char *) * 2);
	      (*data)->yy[pop][ind][locus][0] =
		(char *) calloc (1, sizeof (char) * options->allelenmlength);
	      (*data)->yy[pop][ind][locus][1] =
		(char *) calloc (1, sizeof (char) * options->allelenmlength);
	    }
	  else
	    {
	      (*data)->yy[pop][ind][locus] =
		(char **) calloc (1, sizeof (char *));
	      (*data)->yy[pop][ind][locus][0] =
		(char *) calloc (1,
				 sizeof (char) * (*data)->seq->sites[locus]);
	    }
	}
    }
}


void
free_datapart (data_fmt * data, option_fmt * options, long locus)
{
  long ind, pop;
  long genomes = number_genomes (options->datatype);
  for (pop = 0; pop < data->numpop; pop++)
    {
      for (ind = 0; ind < data->numind[pop][locus] / genomes; ind++)
	{
	  if (strchr (SEQUENCETYPES, options->datatype))
	    {
	      free (data->yy[pop][ind][locus][0]);
	      free (data->yy[pop][ind][locus][1]);
	      free (data->yy[pop][ind][locus]);
	    }
	  else
	    {
	      free (data->yy[pop][ind][locus][0]);
	      free (data->yy[pop][ind][locus]);
	    }
	}
    }
}


void
init_data_structure3 (data_fmt * data)
{
  long locus, pop, maxi;
  data->allele =
    (allele_fmt **) calloc (1, sizeof (allele_fmt *) * data->loci);
  for (locus = 0; locus < data->loci; locus++)
    {
      maxi = 0;
      for (pop = 0; pop < data->numpop; pop++)
	maxi += data->numind[pop][FLOC] * 2;
      data->allele[locus] =
	(allele_fmt *) calloc (1, sizeof (allele_fmt) * maxi);
    }
  data->maxalleles = (long *) calloc (1, sizeof (long) * data->loci);
  data->skiploci =
    (boolean *) calloc (1, sizeof (boolean) * (data->loci + 1));
}


void
read_header (FILE * infile, data_fmt * data, option_fmt * options)
{
  char input[LINESIZE], *p;
  FGETS (input, sizeof (input), infile);
  if ((p = (char *) strpbrk (input, CRLF)) != NULL)
    *p = '\0';
  switch (lowercase (input[0]))
    {
    case 'a':
      sscanf (input, "%1s%ld%ld%[^\n]", &options->datatype, &(data->numpop),
	      &(data->loci), options->title);
      find_delimiter (options->title, &data->dlm);
      break;
    case 'b':
    case 'm':
      sscanf (input, "%1s%ld%ld%1s%[^\n]", &options->datatype,
	      &(data->numpop), &(data->loci), &data->dlm, options->title);
      break;
    case 's':
    case 'n':
    case 'u':
    case 'f':
      sscanf (input, "%1s%ld%ld%[^\n]", &options->datatype, &(data->numpop),
	      &(data->loci), options->title);
      break;
    case 'g':			/* fall through if a menu change forces to analyze data
				   instead of using the already sampled genealogies */
      if (options->datatype == 'g')
	break;
      else
	memmove (input, input + 1, (strlen (input) - 1) * sizeof (char));
    default:
      switch (options->datatype)
	{
	case 'a':
	  sscanf (input, "%ld%ld%[^\n]", &(data->numpop), &(data->loci),
		  options->title);
	  find_delimiter (options->title, &data->dlm);
	  break;
	case 'b':
	case 'm':
	  sscanf (input, "%ld%ld%1s%[^\n]", &(data->numpop), &(data->loci),
		  &(data->dlm), options->title);
	  break;
	case 's':
	case 'n':
	case 'u':
	case 'f':
	  sscanf (input, "%ld%ld%[^\n]", &(data->numpop), &(data->loci),
		  options->title);
	  break;
	default:
	  usererror ("Datatype is wrong, please use a valid data type!");
	}
    }
  options->datatype = lowercase (options->datatype);
}

void
find_delimiter (char *title, char *dlm)
{
  char *p = title;
  long z = 0;
  while (*p == ' ')
    {
      p++;
      z++;
    }
  if (isalnum (*p))
    memmove (title, p, sizeof (char) * (strlen (title) - z));
  else
    {
      *dlm = *p;
      p++;
      while (*p == ' ')
	{
	  p++;
	  z++;
	}
      memmove (title, p, sizeof (char) * (strlen (title) - z));
    }
}


void
read_sites (data_fmt * data)
{
  long locus;
  char *input, *p, *a;
  input = (char *) calloc (LINESIZE, sizeof (char));
  FGETS (input, LINESIZE, data->infile);
  if ((p = (char *) strpbrk (input, CRLF)) != NULL)
    *p = '\0';
  p = input;
  for (locus = 0; locus < data->loci; locus++)
    {
      while (isspace ((int) *p))
	p++;
      if (locus == 0)
	a = strtok (p, " ");
      else
	a = strtok (NULL, " ");
      data->seq->sites[locus] = atoi (a);
      if (data->seq->sites[locus] == 0)
	{
	  warning ("This does look like sequence data\n");
	  warning ("I just read a number of sites=0\n");
	  warning ("If you use the wrong data type, the program\n");
	  usererror ("will crash anyway, so I stop now\n");
	}
    }
  free (input);

}

void
read_popheader (FILE * infile, data_fmt * data, long pop)
{
  char input[LINESIZE], *p;
  FGETS (input, sizeof (input), infile);
  if ((p = (char *) strpbrk (input, CRLF)) != NULL)
    *p = '\0';
  sscanf (input, "%ld%[^\n]", &(data->numind[pop][FLOC]),
	  data->popnames[pop]);
  translate (data->popnames[pop], ' ', '_');
  translate (data->popnames[pop], '\t', '_');
  if (data->numind[pop][FLOC] == 0)
    data->hasghost = TRUE;
}

void
read_indname (FILE * file, data_fmt * data, long pop, long ind, long nmlength)
{
  long i = 0;
  while (i < nmlength)
    data->indnames[pop][ind][i++] = getc (file);
  data->indnames[pop][ind][nmlength] = '\0';
}

void
read_popdata (FILE * infile, data_fmt * data, long pop, option_fmt * options)
{
  long ind, baseread = 0;
  long locus = 0;
  for (ind = 0; ind < data->numind[pop][FLOC]; ind++)
    {
      read_indname (infile, data, pop, ind, options->nmlength);
      switch (options->datatype)
	{
	case 'a':
	case 'b':
	case 'm':
	  if (data->dlm == '\0')
	    read_alleles (infile, data, pop, ind);
	  else
	    read_microalleles (infile, data, pop, ind);
	  break;
	case 's':
	case 'n':
	case 'u':
	case 'f':
	  baseread = read_ind_seq (infile, data, options, locus, pop, ind, 0);
	  break;
	default:
	  usererror
	    ("Wrong datatype, only the types a, m, s, n\n       (electrophoretic alleles, \n       microsatellite data,\n       sequence data,\n       SNP polymorphism)\n        are allowed.\n");
	  break;
	}
    }
  if (!strchr (SEQUENCETYPES, options->datatype))
    return;
  else
    {
      finish_read_seq (infile, data, options, pop, baseread);
    }
}

void
read_microalleles (FILE * infile, data_fmt * data, long pop, long ind)
{
  char *input, *isave, dlm[2], ddlm[2], *p, *a, *a1, *a2;
  long locus, i;
  input = (char *) calloc (1, sizeof (char) * (LONGLINESIZE + 1));
  isave = input;
  a = (char *) calloc (1, sizeof (char) * LINESIZE);
  a1 = (char *) calloc (1, sizeof (char) * LINESIZE);
  a2 = (char *) calloc (1, sizeof (char) * LINESIZE);
  dlm[0] = data->dlm, dlm[1] = '\0';
  ddlm[0] = ' ', ddlm[1] = '\0';
  FGETS (input, LONGLINESIZE, infile);
  if ((p = (char *) strpbrk (input, CRLF)) != NULL)
    *p = '\0';
  for (locus = 0; locus < data->loci; locus++)
    {
      while (isspace ((int) *input))
	input++;
      if (input[0] == '\0')
	FGETS (input, LONGLINESIZE, infile);
      i = 0;
      while (input[i] != ' ' && input[i] != dlm[0])
	{
	  a1[i] = input[i];
	  i++;
	}
      a1[i] = '\0';
      input += i;
      i = 0;
      if (input[i] == dlm[0])
	{
	  input++;
	  while (input[i] != ' ' && input[i] != '\0')
	    {
	      a2[i] = input[i];
	      i++;
	    }
	  a2[i] = '\0';
	  if (a2[0] == '\0')
	    {
	      strcpy (a2, a1);
	    }
	  input += i;
	}
      else
	{
	  strcpy (a2, a1);
	}
      strcpy (data->yy[pop][ind][locus][0], a1);
      strcpy (data->yy[pop][ind][locus][1], a2);
    }
  free (a);
  free (a1);
  free (a2);
  free (isave);
}

void
read_alleles (FILE * infile, data_fmt * data, long pop, long ind)
{
  char *input, *isave, *p, *a;
  long locus;
  a = (char *) calloc (1, sizeof (char) * LINESIZE);

  input = (char *) calloc (1, sizeof (char) * LONGLINESIZE);
  isave = input;
  FGETS (input, LONGLINESIZE, infile);
  if ((p = (char *) strpbrk (input, CRLF)) != NULL)
    *p = '\0';
  for (locus = 0; locus < data->loci; locus++)
    {
      while (isspace ((int) *input))
	{
	  input++;
	}
      if (sscanf (input, "%s", a) == 1)
	{
	  input += strlen (a);
	}

      data->yy[pop][ind][locus][0][0] = a[0];
      data->yy[pop][ind][locus][0][1] = '\0';
      if (a[1] == '\0')
	{
	  data->yy[pop][ind][locus][1][0] = a[0];
	  data->yy[pop][ind][locus][1][1] = '\0';
	}
      else
	{
	  data->yy[pop][ind][locus][1][0] = a[1];
	  data->yy[pop][ind][locus][1][1] = '\0';
	}
    }
  free (a);
  free (isave);
}

long
read_ind_seq (FILE * infile, data_fmt * data, option_fmt * options,
	      long locus, long pop, long ind, long baseread)
{
  long j;
  char charstate;
  j = (options->interleaved) ? baseread : 0;
  charstate = getc (infile);
  ungetc ((int) charstate, infile);
  while (j < data->seq->sites[locus]
	 && !(options->interleaved && charstate == '\n'))
    {
      charstate = getc (infile);
      if (charstate == '\n')
	{
	  if (options->interleaved)
	    return j;
	  else
	    charstate = ' ';
	}
      if (charstate == '\r' || charstate == ' '
	  || (charstate >= '0' && charstate <= '9') || charstate == '\\')
	continue;
      charstate = uppercase (charstate);
      //      printf("%c",charstate);
      if ((strchr ("ABCDGHKMNRSTUVWXY?O-", (int) charstate)) == NULL)
	{
	  printf
	    ("ERROR: BAD BASE: %c AT POSITION %5ld OF INDIVIDUUM %3li in POPULATION %ld\n",
	     charstate, j, ind, pop);
	  printf
	    ("Last complete sequences was in population %li, individual %li and locus %li:\n%s",
	     pop + 1, ind - 1, locus, data->indnames[pop][ind - 1]);
	  for (j = 0; j < data->seq->sites[locus]; j++)
	    printf ("%c", data->yy[pop][ind - 1][locus][0][j]);
	  exit (EXIT_FAILURE);
	}
      data->yy[pop][ind][locus][0][j++] = charstate;
    }
  charstate = getc (infile);	/* swallow the \n or \r */
#ifndef MAC
  if (charstate == '\r')
    charstate = getc (infile);	/* swallow the \n */
#endif
  return j;
}

void
read_distance_fromfile (FILE * dfile, long tips, long nmlength, double **m)
{
  char input[LINESIZE];
  long i, j;

  if (dfile != NULL)
    {
      // assumes that the dfile is in PHYLIP format
      // and that all n x n cells are filled.
      FGETS (input, LINESIZE, dfile);	//reads header line with
      for (i = 0; i < tips; i++)	// of individuals
	{
	  //reads header line with
	  FGETS (input, nmlength + 1, dfile);
	  for (j = 0; j < tips; j++)
	    {
	      fscanf (dfile, "%lf", &m[i][j]);
	    }
	  // reads the last \n from the
	  // data matrix
	  FGETS (input, LINESIZE, dfile);
	}
    }
}

#ifdef UEP
// uep function 

void
read_uep_fromfile (FILE * uepfile, long tips, long nmlength, long **uep,
		   long *uepsites, long datatype)
{
  char input[LINESIZE];
  long i, j;
  long thistips;
  if (uepfile != NULL)
    {
      // assumes that the uepfile is in PHYLIP format
      // and that all n cells are filled.
      FGETS (input, LINESIZE, uepfile);	//reads header line with
      // of individuals and uep sites
      sscanf (input, "%li%li", &thistips, uepsites);
      if (thistips != tips)
	error ("UEP datafile and infile are inconsistent!");
      if (strchr (SEQUENCETYPES, datatype))
	{
	  for (i = 0; i < tips; i++)
	    {
	      uep[i] = (long *) calloc (*uepsites, sizeof (long));
	      FGETS (input, nmlength + 1, uepfile);	//reads each line
	      for (j = 0; j < *uepsites; ++j)
		fscanf (uepfile, "%li", &uep[i][j]);
	      // reads the last \n from the data matrix
	      FGETS (input, LINESIZE, uepfile);
	    }
	}
      else
	{
	  for (i = 0; i < tips; i++)
	    {
	      uep[i] = (long *) calloc (*uepsites, sizeof (long));
	      uep[i + tips] = (long *) calloc (*uepsites, sizeof (long));
	      FGETS (input, nmlength + 1, uepfile);	//reads each line
	      for (j = 0; j < *uepsites; ++j)
		fscanf (uepfile, "%li", &uep[i][j]);
	      // finished reading first allele, no onto the second
	      for (j = 0; j < *uepsites; ++j)
		fscanf (uepfile, "%li", &uep[i + tips][j]);
	      // reads the last \n from the data matrix
	      FGETS (input, LINESIZE, uepfile);
	    }
	}
    }
}
#endif

void
finish_read_seq (FILE * infile, data_fmt * data, option_fmt * options,
		 long pop, long baseread)
{

  long ind, baseread2 = 0, locus = 0;
  if (options->interleaved)
    {
      while (baseread < data->seq->sites[0])
	{
	  for (ind = 0; ind < data->numind[pop][FLOC]; ind++)
	    {
	      baseread2 =
		read_ind_seq (infile, data, options, locus, pop, ind,
			      baseread);
	    }
	  baseread = baseread2;
	}
    }
  for (locus = 1; locus < data->loci; locus++)
    {
      baseread = 0;
      for (ind = 0; ind < data->numind[pop][FLOC]; ind++)
	{
	  read_indname (infile, data, pop, ind, options->nmlength);
	  baseread = read_ind_seq (infile, data, options, locus, pop, ind, 0);
	}
      if (options->interleaved)
	{
	  while (baseread < data->seq->sites[locus])
	    {
	      for (ind = 0; ind < data->numind[pop][FLOC]; ind++)
		{
		  baseread2 =
		    read_ind_seq (infile, data, options, locus, pop, ind,
				  baseread);
		}
	      baseread = baseread2;
	    }
	}
    }
}

void
print_data_summary (FILE * file, world_fmt * world, option_fmt * options,
		    data_fmt * data)
{
  long total = 0;
  long pop;
  char dstring[LINESIZE];

  fprintf (file, "Summary of data:\n");
  fprintf (file, "---------------\n");
  switch (options->datatype)
    {
    case 'a':
      strcpy (dstring, "Allelic data");
      break;
    case 'f':
    case 's':
      strcpy (dstring, "Sequence data");
      break;
    case 'b':
    case 'm':
      strcpy (dstring, "Microsatellite data");
      break;
    case 'n':
    case 'u':
      strcpy (dstring, "Allelic data");
      break;
    default:
      strcpy (dstring, "Unknown data [ERROR]");
      break;
    }
  fprintf (file, "Datatype:                                %20s\n", dstring);
  fprintf (file, "Number of loci:                          %20li\n\n",
	   data->loci);

  fprintf (file,
	   "Population                                        Individuals\n");
  fprintf (file,
	   "-------------------------------------------------------------\n");
  for (pop = 0; pop < world->numpop; pop++)
    {
      fprintf (file, "%3li %-50.50s %6li\n", pop + 1, data->popnames[pop],
	       data->numind[pop][FLOC]);
      total += data->numind[pop][FLOC];
    }
  fprintf (file,
	   "Total of all populations                               %6li\n\n",
	   total);
  fflush (file);
}

void
print_data (world_fmt * world, option_fmt * options, data_fmt * data)
{
  if (options->printdata)
    {
      switch (options->datatype)
	{
	case 'a':
	case 'b':
	case 'm':
	  print_alleledata (world, data, options);
	  break;
	case 's':
	case 'n':
	case 'u':
	case 'f':
	  print_seqdata (world, options, data);
	  break;
	}
    }
}

void
print_alleledata (world_fmt * world, data_fmt * data, option_fmt * options)
{
  long i, pop, ind, locus, mult80;
  for (pop = 0; pop < data->numpop; pop++)
    {
      print_header (world->outfile, pop, world, options, data);
      for (ind = 0; ind < data->numind[pop][FLOC]; ind++)
	{
	  fprintf (world->outfile, "%-*.*s ", (int) options->nmlength,
		   (int) options->nmlength, data->indnames[pop][ind]);
	  mult80 = options->nmlength;
	  for (locus = 0; locus < data->loci; locus++)
	    {
	      mult80 +=
		1 + strlen (data->yy[pop][ind][locus][0]) +
		strlen (data->yy[pop][ind][locus][1]);
	      if (mult80 >= 80)
		{
		  mult80 = 0;
		  fprintf (world->outfile, "\n");
		  for (i = 0; i < options->nmlength; i++)
		    fputc (' ', world->outfile);
		}
	      fprintf (world->outfile, " %s.%-s",
		       data->yy[pop][ind][locus][0],
		       data->yy[pop][ind][locus][1]);
	    }
	  fprintf (world->outfile, "\n");
	}
      fprintf (world->outfile, "\n");
    }
  fprintf (world->outfile, "\n\n");
  fflush (world->outfile);
}

void
print_seqdata (world_fmt * world, option_fmt * options, data_fmt * data)
{
  long pop, locus;
  for (pop = 0; pop < data->numpop; pop++)
    {
      print_header (world->outfile, pop, world, options, data);
      for (locus = 0; locus < data->loci; locus++)
	{
	  print_locus_head (locus, world, options, data);
	  print_seq_pop (locus, pop, world, options, data);
	}
    }
  fflush (world->outfile);
}

void
print_header (FILE * outfile, long pop, world_fmt * world,
	      option_fmt * options, data_fmt * data)
{
  long i;
  long locus, mult80 = 80;
  char input[LINESIZE];
  fprintf (outfile, "\n%-s", data->popnames[pop]);
  for (i = 0; i < (long) (80 - strlen (data->popnames[pop])); i++)
    fputc ('-', outfile);
  fprintf (outfile, "\n\n");
  if (!strchr (SEQUENCETYPES, options->datatype))
    {
      fprintf (outfile, "%-s  ", (data->loci == 1 ? "locus" : "loci "));
      for (i = 0; i < (options->nmlength - 6); i++)
	fputc (' ', outfile);
      for (locus = 0; locus < data->loci; locus++)
	{
	  if (locus * 4 + options->nmlength > mult80)
	    {
	      mult80 += 80;
	      fprintf (outfile, "\n");
	      for (i = 0; i < options->nmlength; i++)
		fputc (' ', outfile);
	    }
	  fprintf (outfile, "  %2li", locus + 1);
	}
      fprintf (outfile, "\n%-s\n",
	       strncpy (input, "indiv.", options->nmlength));
    }
}



void
create_alleles (data_fmt * data)
{
  long locus, pop, ind;
  long z;
  char a1[DEFAULT_ALLELENMLENGTH];
  char a2[DEFAULT_ALLELENMLENGTH];
  for (locus = 0; locus < data->loci; locus++)
    {
      z = 0;
      for (pop = 0; pop < data->numpop; pop++)
	{
	  for (ind = 0; ind < data->numind[pop][locus]; ind++)
	    {
	      strcpy (a1, data->yy[pop][ind][locus][0]);
	      strcpy (a2, data->yy[pop][ind][locus][1]);
	      if (!strcmp (a1, a2))
		{
		  addAllele (data, a1, locus, &z);
		}
	      else
		{
		  addAllele (data, a1, locus, &z);
		  addAllele (data, a2, locus, &z);
		}
	    }
	}
      data->maxalleles[locus] = z + 1;
      /* + 1: for all the unencountered alleles */
    }
}

void
addAllele (data_fmt * data, char s[], long locus, long *z)
{
  long found = 0;
  while ((data->allele[locus][found++][0] != '\0')
	 && (strcmp (s, data->allele[locus][found - 1])))
    ;
  if (found > (*z))
    {
      strcpy (data->allele[locus][*z], s);
      (*z)++;
    }
}

void
set_numind (data_fmt * data)
{
  long locus, pop;
  for (locus = 1; locus < data->loci; locus++)
    {
      for (pop = 0; pop < data->numpop; pop++)
	{
	  data->numind[pop][locus] = data->numind[pop][FLOC];
	}
    }
}


void
print_seq_pop (long locus, long pop, world_fmt * world, option_fmt * options,
	       data_fmt * data)
{
  long ind;
  for (ind = 0; ind < data->numind[pop][locus]; ind++)
    {
      print_seq_ind (locus, pop, ind, world, options, data);
    }
}

void
print_seq_ind (long locus, long pop, long ind, world_fmt * world,
	       option_fmt * options, data_fmt * data)
{
  long site;
  char blank[2] = " ";
  fprintf (world->outfile, "%-*.*s", (int) options->nmlength,
	   (int) options->nmlength, data->indnames[pop][ind]);
  fprintf (world->outfile, " %c", data->yy[pop][ind][locus][0][0]);
  for (site = 1; site < data->seq->sites[locus]; site++)
    {
      if ((site) % 60 == 0)
	{
	  fprintf (world->outfile, "\n%-*.*s %c", (int) options->nmlength,
		   (int) options->nmlength, blank,
		   data->yy[pop][ind][locus][0][site]);
	}
      else
	{
	  if ((site) % 10 == 0)
	    {
	      fprintf (world->outfile, " ");
	    }
	  fprintf (world->outfile, "%c", data->yy[pop][ind][locus][0][site]);
	}
    }
  fprintf (world->outfile, "\n");
}


void
print_locus_head (long locus, world_fmt * world, option_fmt * options,
		  data_fmt * data)
{
  char *head;
  head = (char *) calloc (1, sizeof (char) * MAX (10, options->nmlength));
  sprintf (head, "Locus %li", locus);
  fprintf (world->outfile, "%-*.*s --------10 --------20 --------30",
	   (int) options->nmlength, (int) options->nmlength, head);
  fprintf (world->outfile, " --------40 --------50 --------60\n");

  free (head);
}

void
read_geofile (data_fmt * data, option_fmt * options, long numpop)
{
  long i, j, pop;
  long numpop2 = numpop * numpop;
  data->geo = (double *) calloc (1, sizeof (double) * numpop2);
  data->lgeo = (double *) calloc (1, sizeof (double) * numpop2);
  if (!options->geo)
    {
      for (i = 0; i < numpop2; i++)
	data->geo[i] = 1.0;
    }
  else
    {
      data->ogeo = (double **) calloc (1, sizeof (double *) * numpop);
      data->ogeo[0] = (double *) calloc (1, sizeof (double) * numpop2);
      for (pop = 1; pop < numpop; pop++)
	data->ogeo[pop] = data->ogeo[0] + numpop * pop;
      read_distance_fromfile (data->geofile, numpop, options->nmlength,
			      data->ogeo);
      for (i = 0; i < numpop; i++)
	for (j = i + 1; j < numpop; j++)
	  {

	    data->geo[mm2m (i, j, numpop)] = data->geo[mm2m (j, i, numpop)] =
	      1./data->ogeo[i][j];
	    data->lgeo[mm2m (i, j, numpop)] =
	      data->lgeo[mm2m (j, i, numpop)] =
	      data->ogeo[i][j] > 0.0 ? log (1./data->ogeo[i][j]) : -DBL_MAX;;
	  }
    }
}

#ifdef UEP
void
read_uepfile (data_fmt * data, option_fmt * options, long numpop)
{
  long i;
  long sumtips = 0;

  if (!options->uep)
    return;

  for (i = 0; i < numpop; ++i)
    sumtips += data->numind[i][FLOC];
  data->uep = (long **) calloc (number_genomes (options->datatype) * sumtips,
				sizeof (long *));
  read_uep_fromfile (data->uepfile, sumtips, options->nmlength, data->uep,
		     &data->uepsites, options->datatype);
}

#endif
