/*------------------------------------------------------
 Maximum likelihood estimation 
 of migration rate  and effectice population size
 using a Metropolis-Hastings Monte Carlo algorithm                            
 -------------------------------------------------------                        
 S E Q U E N C E S   R O U T I N E S 

 used in menu.c

 Peter Beerli 1996, Seattle
 beerli@genetics.washington.edu
 $Id: sequence.c,v 1.1.1.1 1998/06/06 06:09:51 beerli Exp $
-------------------------------------------------------*/

#include "migration.h"

#ifdef DMALLOC_FUNC_CHECK
#include "dmalloc.h"
#endif
/* prototypes ------------------------------------------- */
void make_sequences (world_fmt * world, long locus);
void init_sequences (world_fmt * world, long locus);
void init_sequences2 (seqmodel_fmt * seq, option_fmt * options, long locus);
void initratio (option_fmt * options);
void initfreqs (double *freqa, double *freqc, double *freqg, double *freqt);
void initcatn (long *categs);
void initcategs (long categs, double *rate);
void initprobcat (long categs, double *probsum, double *probcat);
void init_tbl (world_fmt * world, long locus);
void print_weights (world_fmt * world, long locus);
void print_tbl (world_fmt * world, long locus);
double treelike_seq (world_fmt * world, long locus);
/*private functions */
void getbasefreqs (option_fmt * options, seqmodel_fmt * seq, long locus);
void empiricalfreqs (world_fmt * world, seqmodel_fmt * seq, long locus);
void makeweights (world_fmt * world, long locus);
void makevalues_seq (world_fmt * world, long locus);
void sitecombine2 (world_fmt * world, long sites, long locus);
void sitesort2 (world_fmt * world, long sites, long locus);
void sitescrunch2 (world_fmt * world, long sites,
		   long i, long j, long locus);
void inputoptions (world_fmt * world, long locus);
void inputweights (world_fmt * world, long chars);
void inputcategs (long a, long b, world_fmt * world);
void initlambda (option_fmt * options);
void printweights (world_fmt * world, short inc, long chars, short *weight, char *letters);
void print_seqfreqs (world_fmt * world);

const short C = 1;
const short G = 2;
const short T = 3;


/* allocation things */
void
init_sequences (world_fmt * world, long locus)
{
  long sites = world->data->seq->sites[locus];
  if (locus == 0)
    {
      world->data->seq->alias = (long *) calloc (1, sizeof (long) * sites);
      world->data->seq->ally = (long *) calloc (1, sizeof (long) * sites);
      world->data->seq->aliasweight = (long *) calloc (1, sizeof (long) * sites);
      world->data->seq->location = (long *) calloc (1, sizeof (long) * sites);
      world->data->seq->category = (long *) calloc (1, sizeof (long) * sites);
      world->data->seq->weight = (short *) calloc (1, sizeof (short) * sites);
    }
  else
    {
      world->data->seq->alias = (long *) realloc (world->data->seq->alias, sizeof (long) * sites);
      world->data->seq->ally = (long *) realloc (world->data->seq->ally, sizeof (long) * sites);
      world->data->seq->aliasweight = (long *) realloc (world->data->seq->aliasweight, sizeof (long) * sites);
      world->data->seq->location = (long *) realloc (world->data->seq->location, sizeof (long) * sites);
      world->data->seq->category = (long *) realloc (world->data->seq->category, sizeof (long) * sites);
      world->data->seq->weight = (short *) realloc (world->data->seq->weight, sizeof (short) * sites);
    }
  inputoptions (world, locus);
  if (!world->options->freqsfrom)
    getbasefreqs (world->options, world->data->seq, locus);
  makeweights (world, locus);

}



/* menu material ----------------------------------------- */
void
initratio (option_fmt * options)
{
  long z = 0;
  char *tmp;
  char input[LINESIZE];
  printf ("Transition/transversion ratio?\nEnter a value for each locus, spaced by blanks or commas\n");
  fgets (input, LINESIZE, stdin);
  tmp = strtok (input, " ,\n");
  while (tmp != NULL)
    {
      options->ttratio[z++] = atof (tmp);
      tmp = strtok (NULL, " ,;\n");
      options->ttratio = (double *) realloc (options->ttratio, sizeof (double) * (z + 1));
      options->ttratio[z] = 0.0;
    }
}

void
initfreqs (double *freqa, double *freqc, double *freqg, double *freqt)
{
  char input[LINESIZE];
  int scanned;
  printf ("Base frequencies for A, C, G, T/U (use blanks to separate)?\n");
  for (;;)
    {
      fgets (input, LINESIZE, stdin);
      scanned = sscanf (input, "%lf%lf%lf%lf%*[^\n]", freqa, freqc, freqg, freqt);
      if (scanned == 4)
	break;
      else
	printf ("Please enter exactly 4 values.\n");
    };
}


void
initcatn (long *categs)
{				/* initialize category number */

  do
    {
      printf ("Number of categories (1-%d)?\n", MAXCATEGS);
      scanf ("%ld%*[^\n]", categs);
      getchar ();
    }
  while (*categs > MAXCATEGS || *categs < 1);
}


void
initcategs (long categs, double *rate)
{				/* initialize category rates */
  long i;
  char input[LINESIZE];
  char rest[LINESIZE];
  int scanned;
  boolean done;

  for (;;)
    {
      printf ("Rate for each category? (use a space to separate)\n");
      fgets (input, LINESIZE, stdin);
      done = TRUE;
      for (i = 0; i < categs; i++)
	{
	  scanned = sscanf (input, "%lf %[^\n]", &rate[i], rest);
	  if ((scanned < 2 && i < (categs - 1)) ||
	      (scanned < 1 && i == (categs - 1)))
	    {
	      printf ("Please enter exactly %ld values.\n", categs);
	      done = FALSE;
	      break;
	    }
	  strcpy (input, rest);
	}
      if (done)
	break;
    }
}


void
initprobcat (long categs, double *probsum, double *probcat)
{
  long i;
  boolean done;
  char input[LINESIZE];
  char rest[LINESIZE];
  int scanned;

  do
    {
      printf ("Probability for each category?");
      printf (" (use a space to separate)\n");
      fgets (input, LINESIZE, stdin);
      done = TRUE;
      for (i = 0; i < categs; i++)
	{
	  scanned = sscanf (input, "%lf %[^\n]", &probcat[i], rest);
	  if ((scanned < 2 && i < (categs - 1)) ||
	      (scanned < 1 && i == (categs - 1)))
	    {
	      done = FALSE;
	      printf ("Please enter exactly %ld values.\n", categs);
	      break;
	    }
	  strcpy (input, rest);
	}
      if (!done)
	continue;
      *probsum = 0.0;
      for (i = 0; i < categs; i++)
	*probsum += probcat[i];
      if (fabs (1.0 - (*probsum)) > 0.001)
	{
	  done = FALSE;
	  printf ("Probabilities must add up to");
	  printf (" 1.0, plus or minus 0.001.\n");
	}
    }
  while (!done);
}


/*data read material ===================================== */

void
make_sequences (world_fmt * world, long locus)
{
  makevalues_seq (world, locus);
  if (world->options->freqsfrom)
    {
      empiricalfreqs (world, world->data->seq, locus);
      getbasefreqs (world->options, world->data->seq, locus);
    }
}

/* private functions================================== */
void
getbasefreqs (option_fmt * options, seqmodel_fmt * seq, long locus)
{
  long l;
  double aa, bb;
  if(locus==0)
    seq->ttratio = options->ttratio[0];
  else
    {
      for (l=1;l<=locus;l++)
	{
	  if(options->ttratio[l]==0.0)
	    seq->ttratio = options->ttratio[l-1];
	    break;
	}
      if(l>locus)
	seq->ttratio = options->ttratio[locus];
    }
  seq->freqa = options->freqa;
  seq->freqc = options->freqc;
  seq->freqg = options->freqg;
  seq->freqt = options->freqt;
  seq->freqr = seq->freqa + seq->freqg;
  seq->freqy = seq->freqc + seq->freqt;
  seq->freqar = seq->freqa / seq->freqr;
  seq->freqcy = seq->freqc / seq->freqy;
  seq->freqgr = seq->freqg / seq->freqr;
  seq->freqty = seq->freqt / seq->freqy;
  aa = seq->ttratio * (seq->freqr) * (seq->freqy)
    - seq->freqa * seq->freqg - seq->freqc * seq->freqt;
  bb = seq->freqa * (seq->freqgr) + seq->freqc * (seq->freqty);
  seq->xi = aa / (aa + bb);
  seq->xv = 1.0 - seq->xi;
  if (seq->xi <= 0.0)
    {
      printf ("\n WARNING: This transition/transversion ratio\n");
      printf (" is impossible with these base frequencies!\n");
      seq->xi = 0.0;
      seq->xv = 1.0;
      seq->ttratio = (seq->freqa * seq->freqg
		      + seq->freqc * seq->freqt)
	/ ((seq->freqr) * (seq->freqy));
      printf (" Transition/transversion parameter reset\n");
      printf ("  so transition/transversion ratio is %10.6f\n\n", (seq->ttratio));
    }
  seq->fracchange = (seq->xi) * (2 * seq->freqa * (seq->freqgr)
				 + 2 * seq->freqc * (seq->freqty)) +
    (seq->xv) * (1.0 - seq->freqa * seq->freqa
		 - seq->freqc * seq->freqc - seq->freqg
		 * seq->freqg
		 - seq->freqt * seq->freqt);
}

/*===================================================*/

void
makeweights (world_fmt * world, long locus)
{
  /* make up weights vector to avoid duplicate computations */
  long i;

  for (i = 1; i <= world->data->seq->sites[locus]; i++)
    {
      world->data->seq->alias[i - 1] = i;
      world->data->seq->ally[i - 1] = 0;
      world->data->seq->aliasweight[i - 1] = world->data->seq->weight[i - 1];
      world->data->seq->location[i - 1] = 0;
    }
  sitesort2 (world, world->data->seq->sites[locus], locus);
  sitecombine2 (world, world->data->seq->sites[locus], locus);
  sitescrunch2 (world, world->data->seq->sites[locus], 1, 2, locus);
  for (i = 1; i <= world->data->seq->sites[locus]; i++)
    {
      if (world->data->seq->aliasweight[i - 1] > 0)
	world->data->seq->endsite = i;
    }
  for (i = 1; i <= world->data->seq->endsite; i++)
    {
      world->data->seq->location[world->data->seq->alias[i - 1] - 1] = i;
      world->data->seq->ally[world->data->seq->alias[i - 1] - 1] = world->data->seq->alias[i - 1];
    }
  init_sequences2 (world->data->seq, world->options, locus);
}				/* makeweights */

void
init_sequences2 (seqmodel_fmt * seq, option_fmt * options, long locus)
{
  /*long i;
     seq->term = (double **) malloc(seq->endsite * sizeof(double *));
     for (i = 0; i <seq->endsite; i++)
     seq->term[i] = (double *) malloc(options->rcategs * sizeof(double));
     seq->slopeterm = (double **) malloc(seq->endsite * sizeof(double *));
     for (i = 0; i <seq->endsite; i++)
     seq->slopeterm[i] = (double *) malloc(options->rcategs * sizeof(double));
     seq->curveterm = (double **) malloc(seq->endsite * sizeof(double *));
     for (i = 0; i <seq->endsite; i++)
     seq->curveterm[i] = (double *) malloc(options->rcategs * sizeof(double));
     seq->mp = (val *) malloc(seq->sites[locus] * sizeof(val)); */
  if (locus == 0)
    seq->contribution = (contribarr *) malloc (seq->endsite * sizeof (contribarr));
  else
    seq->contribution = (contribarr *) realloc (seq->contribution, seq->endsite * sizeof (contribarr));

}


void
makevalues_seq (world_fmt * world, long locus)
{
  long i, ii, j, k, l, pop;
  long b;
  node **treenode = world->nodep;
  for (k = 0; k < world->data->seq->endsite; k++)
    {
      j = world->data->seq->alias[k] - 1;
      i = -1;
      for (pop = 0; pop < world->numpop; pop++)
	{
	  for (ii = 0; ii < world->data->numind[pop][locus]; ii++)
	    {
	      i++;
	      if (!world->options->usertree)
		strcpy (treenode[i]->nayme, world->data->indnames[pop][ii]);
	      for (l = 0; l < world->options->rcategs; l++)
		{		/*?????????????????was categs need to ask joe?????????????? */
		  for (b = 0; b < 4; b++)
		    treenode[i]->x.s[k][l][b] = 0.0;
		  switch (world->data->yy[pop][ii][locus][0][j])
		    {

		    case 'A':
		      treenode[i]->x.s[k][l][0] = 1.0;
		      break;

		    case 'C':
		      treenode[i]->x.s[k][l][C] = 1.0;
		      break;

		    case 'G':
		      treenode[i]->x.s[k][l][G] = 1.0;
		      break;

		    case 'T':
		      treenode[i]->x.s[k][l][T] = 1.0;
		      break;

		    case 'U':
		      treenode[i]->x.s[k][l][T] = 1.0;
		      break;

		    case 'M':
		      treenode[i]->x.s[k][l][0] = 1.0;
		      treenode[i]->x.s[k][l][C] = 1.0;
		      break;

		    case 'R':
		      treenode[i]->x.s[k][l][0] = 1.0;
		      treenode[i]->x.s[k][l][G] = 1.0;
		      break;

		    case 'W':
		      treenode[i]->x.s[k][l][0] = 1.0;
		      treenode[i]->x.s[k][l][T] = 1.0;
		      break;

		    case 'S':
		      treenode[i]->x.s[k][l][C] = 1.0;
		      treenode[i]->x.s[k][l][G] = 1.0;
		      break;

		    case 'Y':
		      treenode[i]->x.s[k][l][C] = 1.0;
		      treenode[i]->x.s[k][l][T] = 1.0;
		      break;

		    case 'K':
		      treenode[i]->x.s[k][l][G] = 1.0;
		      treenode[i]->x.s[k][l][T] = 1.0;
		      break;

		    case 'B':
		      treenode[i]->x.s[k][l][C] = 1.0;
		      treenode[i]->x.s[k][l][G] = 1.0;
		      treenode[i]->x.s[k][l][T] = 1.0;
		      break;

		    case 'D':
		      treenode[i]->x.s[k][l][0] = 1.0;
		      treenode[i]->x.s[k][l][G] = 1.0;
		      treenode[i]->x.s[k][l][T] = 1.0;
		      break;

		    case 'H':
		      treenode[i]->x.s[k][l][0] = 1.0;
		      treenode[i]->x.s[k][l][C] = 1.0;
		      treenode[i]->x.s[k][l][T] = 1.0;
		      break;

		    case 'V':
		      treenode[i]->x.s[k][l][0] = 1.0;
		      treenode[i]->x.s[k][l][C] = 1.0;
		      treenode[i]->x.s[k][l][G] = 1.0;
		      break;

		    case 'N':
		      for (b = 0; b < 4; b++)
			treenode[i]->x.s[k][l][b] = 1.0;
		      break;

		    case 'X':
		      for (b = 0; b < 4; b++)
			treenode[i]->x.s[k][l][b] = 1.0;
		      break;

		    case '?':
		      for (b = 0; b < 4; b++)
			treenode[i]->x.s[k][l][b] = 1.0;
		      break;

		    case 'O':
		      for (b = 0; b < 4; b++)
			treenode[i]->x.s[k][l][b] = 1.0;
		      break;

		    case '-':
		      for (b = 0; b < 4; b++)
			treenode[i]->x.s[k][l][b] = 1.0;
		      break;
		    }
		}
	    }
	}
    }
}

void
sitesort2 (world_fmt * world, long sites, long locus)
{
  long gap, i, j, jj, jg, k, kk, itemp, pop;
  boolean flip, tied, samewt;
  seqmodel_fmt *seq;
  long *tempsum;
  tempsum = (long *) calloc (1, sizeof (long) * world->numpop);
  tempsum[0] = world->data->numind[0][FLOC];
  for (i = 1; i < world->numpop; i++)
    {
      tempsum[i] = tempsum[i - 1] + world->data->numind[i][FLOC];
    }
  seq = world->data->seq;
  gap = sites / 2;
  while (gap > 0)
    {
      for (i = gap + 1; i <= sites; i++)
	{
	  j = i - gap;
	  flip = TRUE;
	  while (j > 0 && flip)
	    {
	      jj = seq->alias[j - 1];
	      jg = seq->alias[j + gap - 1];
	      samewt = ((seq->weight[jj - 1] != 0) && (seq->weight[jg - 1] != 0))
		|| ((seq->weight[jj - 1] == 0) && (seq->weight[jg - 1] == 0));
	      tied = samewt && (seq->category[jj - 1] == seq->category[jg - 1]);
	      flip = ((!samewt) && (seq->weight[jj - 1] == 0))
		|| (samewt && (seq->category[jj - 1] > seq->category[jg - 1]));
	      k = 0;
	      pop = 0;
	      kk = -1;
	      while (k < world->sumtips && tied)
		{
		  if (k == tempsum[pop])
		    {
		      kk = 0;
		      pop++;
		    }
		  else
		    {
		      kk++;
		    }
		  flip = (world->data->yy[pop][kk][locus][0][jj - 1] >
			  world->data->yy[pop][kk][locus][0][jg - 1]);
		  tied = (tied && world->data->yy[pop][kk][locus][0][jj - 1]
			  == world->data->yy[pop][kk][locus][0][jg - 1]);
		  k++;
		}
	      if (!flip)
		break;
	      itemp = seq->alias[j - 1];
	      seq->alias[j - 1] = seq->alias[j + gap - 1];
	      seq->alias[j + gap - 1] = itemp;
	      itemp = seq->aliasweight[j - 1];
	      seq->aliasweight[j - 1] = seq->aliasweight[j + gap - 1];
	      seq->aliasweight[j + gap - 1] = itemp;
	      j -= gap;
	    }
	}
      gap /= 2;
    }
  free (tempsum);
}				/* sitesort2 */


void
sitecombine2 (world_fmt * world, long sites, long locus)
{
  long i, j, k, kk, pop;
  boolean tied, samewt;
  seqmodel_fmt *seq;
  long *tempsum;
  tempsum = (long *) calloc (1, sizeof (long) * world->numpop);
  tempsum[0] = world->data->numind[0][FLOC];
  for (i = 1; i < world->numpop; i++)
    {
      tempsum[i] = tempsum[i - 1] + world->data->numind[i][FLOC];
    }

  seq = world->data->seq;
  i = 1;
  while (i < sites)
    {
      j = i + 1;
      tied = TRUE;
      while (j <= sites && tied)
	{
	  samewt = ((seq->aliasweight[i - 1] != 0) && (seq->aliasweight[j - 1] != 0))
	    || ((seq->aliasweight[i - 1] == 0) && (seq->aliasweight[j - 1] == 0));
	  tied = samewt
	    && (seq->category[seq->alias[i - 1] - 1] == seq->category[seq->alias[j - 1] - 1]);
	  k = 0;
	  pop = 0;
	  kk = -1;
	  while (k < world->sumtips && tied)
	    {
	      if (k == tempsum[pop])
		{
		  kk = 0;
		  pop++;
		}
	      else
		{
		  kk++;
		}
	      tied = (tied &&
		   world->data->yy[pop][kk][locus][0][seq->alias[i - 1] - 1]
	      == world->data->yy[pop][kk][locus][0][seq->alias[j - 1] - 1]);
	      k++;
	    }
	  if (!tied)
	    break;
	  seq->aliasweight[i - 1] += seq->aliasweight[j - 1];
	  seq->aliasweight[j - 1] = 0;
	  seq->ally[seq->alias[j - 1] - 1] = seq->alias[i - 1];
	  j++;
	}
      i = j;
    }
}				/* sitecombine2 */


void
sitescrunch2 (world_fmt * world, long sites,
	      long i, long j, long locus)
{
  /* move so positively weighted sites come first */
  /* used by dnainvar, dnaml, dnamlk, & restml */
  long itemp;
  boolean done, found;
  seqmodel_fmt *seq;
  seq = world->data->seq;
  done = FALSE;
  while (!done)
    {
      found = FALSE;
      if (seq->aliasweight[i - 1] > 0)
	i++;
      else
	{
	  if (j <= i)
	    j = i + 1;
	  if (j <= sites)
	    {
	      found = FALSE;
	      do
		{
		  found = (seq->aliasweight[j - 1] > 0);
		  j++;
		}
	      while (!(found || j > sites));
	      if (found)
		{
		  j--;
		  itemp = seq->alias[i - 1];
		  seq->alias[i - 1] = seq->alias[j - 1];
		  seq->alias[j - 1] = itemp;
		  itemp = seq->aliasweight[i - 1];
		  seq->aliasweight[i - 1] = seq->aliasweight[j - 1];
		  seq->aliasweight[j - 1] = itemp;
		}
	      else
		done = TRUE;
	    }
	  else
	    done = TRUE;
	}
      done = (done || i >= sites);
    }
}				/* sitescrunch2 */

void
inputoptions (world_fmt * world, long locus)
{
  long i;
  long sites = world->data->seq->sites[locus];
  for (i = 0; i < sites; i++)
    world->data->seq->category[i] = 1;
  for (i = 0; i < sites; i++)
    world->data->seq->weight[i] = 1;
  if (world->options->weights)
    inputweights (world, sites);
  world->data->seq->weightsum = 0;
  for (i = 0; i < sites; i++)
    world->data->seq->weightsum += world->data->seq->weight[i];
  if (world->options->categs > 1)
    {
      inputcategs (0, sites, world);
    }
}				/* inputoptions */

void
inputweights (world_fmt * world, long chars)
{
  /* input the character weights, 0-9 and A-Z for weights 0 - 35 */
  char ch;
  long i;

  for (i = 0; i < chars; i++)
    {
      do
	{
	  ch = getc (world->data->weightfile);
	  if (ch == '\n')
	    ch = ' ';
	}
      while (ch == ' ');
      world->data->seq->weight[i] = 1;
      if (isdigit ((int) ch))
	world->data->seq->weight[i] = ch - '0';
      else if (isalpha ((int) ch))
	{
	  ch = uppercase (ch);
	  world->data->seq->weight[i] = ch - 'A' + 10;
	}
      else
	{
	  printf ("BAD WEIGHT CHARACTER: %c\n", ch);
	  exit (EXIT_FAILURE);
	}
    }
}				/* inputweights */

void
inputcategs (long a, long b, world_fmt * world)
{
  /* input the categories, 1-9 */
  char ch;
  long i;
  option_fmt *options = world->options;

  ch = getc (world->data->catfile);
  while (ch == '#')
    {
      while (ch != '\n')
	ch = getc (world->data->catfile);
      ch = getc (world->data->catfile);
    }
  ungetc (ch, world->data->catfile);
  fscanf (world->data->catfile, "%ld", &options->categs);
  options->rate = (double *) realloc (options->rate, sizeof (double)
				      * options->categs);
  for (i = 0; i < options->categs; i++)
    {
      fscanf (world->data->catfile, "%lf", &options->rate[i]);
    }

  for (i = a; i < b; i++)
    {
      do
	{
	  ch = getc (world->data->catfile);
	  if (ch == '\n')
	    ch = ' ';
	}
      while (ch == ' ');
      if ((ch >= '1') && (ch <= ('0' + world->options->categs)))
	world->data->seq->category[i] = ch - '0';
      else
	{
	  printf ("BAD CATEGORY CHARACTER: %c -- CATEGORIES ARE CURRENTLY 1-%ld\n",
		  ch, world->options->categs);
	  exit (EXIT_FAILURE);
	}
    }
}				/* inputcategs */




void
empiricalfreqs (world_fmt * world, seqmodel_fmt * seq, long locus)
{
  /* Get empirical base frequencies from the data */
  /* used in dnaml & dnamlk */
  long i, j, k;
  double sum, suma, sumc, sumg, sumt, w;

  world->options->freqa = 0.25;
  world->options->freqc = 0.25;
  world->options->freqg = 0.25;
  world->options->freqt = 0.25;
  for (k = 1; k <= 8; k++)
    {
      suma = 0.0;
      sumc = 0.0;
      sumg = 0.0;
      sumt = 0.0;
      for (i = 0; i < world->sumtips; i++)
	{
	  for (j = 0; j < seq->endsite; j++)
	    {
	      w = seq->aliasweight[j];
	      sum = (world->options->freqa) * world->nodep[i]->x.s[j][0][0];
	      sum += (world->options->freqc) * world->nodep[i]->x.s[j][0][C];
	      sum += (world->options->freqg) * world->nodep[i]->x.s[j][0][G];
	      sum += (world->options->freqt) * world->nodep[i]->x.s[j][0][T];
	      suma += w * (world->options->freqa) * world->nodep[i]->x.s[j][0][0] / sum;
	      sumc += w * (world->options->freqc) * world->nodep[i]->x.s[j][0][C] / sum;
	      sumg += w * (world->options->freqg) * world->nodep[i]->x.s[j][0][G] / sum;
	      sumt += w * (world->options->freqt) * world->nodep[i]->x.s[j][0][T] / sum;
	    }
	}
      sum = suma + sumc + sumg + sumt;
      world->options->freqa = suma / sum;
      world->options->freqc = sumc / sum;
      world->options->freqg = sumg / sum;
      world->options->freqt = sumt / sum;
    }
}				/* empiricalfreqs */


void
initlambda (option_fmt * options)
{
  do
    {
      printf ("Mean block length of sites having the same rate (greater than 1)?\n");
      scanf ("%lf%*[^\n]", &options->lambda);
      getchar ();
    }
  while (options->lambda <= 1.0);
  options->lambda = 1.0 / options->lambda;
}



void
init_tbl (world_fmt * world, long locus)
{
  /* Define a lookup table. Precompute values and print them out in tables */
  long i, j;
  double sumrates;
  long categs = world->options->categs;
  long rcategs = world->options->rcategs;
  world->data->seq->tbl = (valrec ***) malloc (rcategs * sizeof (valrec **));
  for (i = 0; i < rcategs; i++)
    {
      world->data->seq->tbl[i] = (valrec **) malloc (categs * sizeof (valrec *));
      for (j = 0; j < categs; j++)
	world->data->seq->tbl[i][j] = (valrec *) malloc (sizeof (valrec));
    }
  for (i = 0; i < rcategs; i++)
    {
      for (j = 0; j < categs; j++)
	{
	  world->data->seq->tbl[i][j]->rat =
	    world->options->rrate[i] * world->options->rate[j];
	  world->data->seq->tbl[i][j]->ratxi =
	    world->data->seq->tbl[i][j]->rat * world->data->seq->xi;
	  world->data->seq->tbl[i][j]->ratxv =
	    world->data->seq->tbl[i][j]->rat * world->data->seq->xv;
	}
    }
  sumrates = 0.0;
  for (i = 0; i < world->data->seq->endsite; i++)
    {
      for (j = 0; j < rcategs; j++)
	sumrates += world->data->seq->aliasweight[i]
	  * world->options->probcat[j]
	  * world->data->seq->tbl[j][world->data->seq->category[world->data->seq->alias[i] - 1] - 1]->rat;
    }
  sumrates /= (double) world->data->seq->sites[locus];
  for (i = 0; i < rcategs; i++)
    for (j = 0; j < categs; j++)
      {
	world->data->seq->tbl[i][j]->rat /= sumrates;
	world->data->seq->tbl[i][j]->ratxi /= sumrates;
	world->data->seq->tbl[i][j]->ratxv /= sumrates;
      }
}				/* inittable */

void
print_weights (world_fmt * world, long locus)
{
  if (world->options->weights)
    {
      if (world->options->printdata)
	{
	  printweights (world, 0, world->data->seq->sites[locus],
			world->data->seq->weight, "Sites");
	}
    }
}

void
print_tbl (world_fmt * world, long locus)
{
  long i;

  option_fmt *opt;
  FILE *outfile;
  outfile = world->outfile;

  opt = world->options;
  if (opt->rcategs > 1)
    {
      fprintf (outfile, "Region type     Rate of change    Probability\n");
      fprintf (outfile, "---------------------------------------------\n");
      for (i = 0; i < opt->rcategs; i++)
	fprintf (outfile, "%9ld%16.3f%17.3f\n", i + 1, opt->rrate[i], opt->probcat[i]);
      putc ('\n', outfile);
      if (opt->autocorr)
	fprintf (outfile,
	"Expected length of a patch of sites having the same rate = %8.3f\n",
		 1. / opt->lambda);
      putc ('\n', outfile);
    }
  if (opt->categs > 1)
    {
      fprintf (outfile, "Site category   Rate of change\n");
      fprintf (outfile, "------------------------------\n");
      for (i = 0; i < opt->categs; i++)
	fprintf (outfile, "%9ld%16.3f\n", i + 1, opt->rate[i]);
    }
  if ((opt->rcategs > 1) || (opt->categs > 1))
    fprintf (outfile, "\n");
}


void
printweights (world_fmt * world, short inc, long chars, short *weight, char *letters)
{
  /* print out the weights of sites */
  long i, j;
  FILE *filename;
  filename = world->outfile;
  fprintf (filename, "\n    %s are weighted as follows:\n", letters);
  for (i = 0; i < chars; i++)
    {
      if (i % 60 == 0)
	{
	  putc ('\n', filename);
	  for (j = 1; j <= world->options->nmlength + 3; j++)
	    putc (' ', filename);
	}
      fprintf (filename, "%hd", weight[i + inc]);
      if ((i + 1) % 10 == 0 && (i + 1) % 60 != 0)
	putc (' ', filename);
    }
  fprintf (filename, "\n\n");
}				/* printweights */



void
print_seqfreqs (world_fmt * world)
{
  option_fmt *options = world->options;
  if (world->locus == 0)
    {
      if (options->freqsfrom)
	fprintf (world->outfile, "Empirical ");
      fprintf (world->outfile, "Base Frequencies\n");
      fprintf (world->outfile, "------------------------------------------------------------\n");
      fprintf (world->outfile, "Locus     Nucleotide                        Transition/\n");
      fprintf (world->outfile, "          ------------------------------  Transversion ratio\n");
      fprintf (world->outfile, "          A       C       G       T(U)\n");
      fprintf (world->outfile, "------------------------------------------------------------\n");
    }
  /*  fprintf(world->outfile,  "1234      0.245  0.333  0.345  0.234 */
  fprintf (world->outfile, "%4li      %6.4f  %6.4f  %6.4f  %6.4f    %10.5f\n",
	   world->locus + 1, options->freqa, options->freqc,
	   options->freqg, options->freqt, world->data->seq->ttratio);
}


double
treelike_seq (world_fmt * world, long locus)
{
  contribarr tterm;
  double sum, sum2, sumc, sumterm, lterm;
  long i, j, k, lai;
  double termtest;
  node *p;
  sitelike x1;
  option_fmt *opt;
  seqmodel_fmt *seq;
  opt = world->options;
  seq = world->data->seq;
  p = crawlback (world->root->next);
  sum = 0.0;

  /*  y = p->v; */
/*   lz = -y; */
  for (i = 0; i < seq->endsite; i++)
    {
      termtest = 0.0;
      k = seq->category[seq->alias[i] - 1] - 1;
      for (j = 0; j < opt->rcategs; j++)
	{
	  memcpy (x1, p->x.s[i][j], sizeof (sitelike));
	  tterm[j] = seq->freqa * x1[0] + seq->freqc * x1[1] +
	    seq->freqg * x1[2] + seq->freqt * x1[3];
	  termtest += tterm[j];
	}
      if (termtest == 0.0)
	{
	  error ("Encountered tree incompatible with data\n");
	}
      sumterm = 0.0;
      for (j = 0; j < opt->rcategs; j++)
	sumterm += opt->probcat[j] * tterm[j];
      lterm = log (sumterm);
      for (j = 0; j < opt->rcategs; j++)
	seq->clai[j] = tterm[j] / sumterm;
      memcpy (seq->contribution[i], seq->clai, sizeof (contribarr));
      sum += seq->aliasweight[i] * lterm;
    }
  for (j = 0; j < opt->rcategs; j++)
    seq->like[j] = 1.0;
  for (i = 0; i < seq->sites[locus]; i++)
    {
      sumc = 0.0;
      for (k = 0; k < opt->rcategs; k++)
	sumc += opt->probcat[k] * seq->like[k];
      sumc *= opt->lambda;
      if ((seq->ally[i] > 0) && (seq->location[seq->ally[i] - 1] > 0))
	{
	  lai = seq->location[seq->ally[i] - 1];
	  memcpy (seq->clai, seq->contribution[lai - 1], sizeof (contribarr));
	  for (j = 0; j < opt->rcategs; j++)
	    seq->nulike[j] = ((1.0 - opt->lambda) * seq->like[j] + sumc) * seq->clai[j];
	}
      else
	{
	  for (j = 0; j < opt->rcategs; j++)
	    seq->nulike[j] = ((1.0 - opt->lambda) * seq->like[j] + sumc);
	}
      memcpy (seq->like, seq->nulike, sizeof (contribarr));
    }
  sum2 = 0.0;
  for (i = 0; i < opt->rcategs; i++)
    sum2 += opt->probcat[i] * seq->like[i];
  sum += log (sum2);
  return sum;
}				/* evaluate */
