/*------------------------------------------------------
 Maximum likelihood estimation 
 of migration rate  and effectice population size
 using a Metropolis-Hastings Monte Carlo algorithm                            
 -------------------------------------------------------                        
 C O M B I N E L O C I (NEWTON RAPHSON)  R O U T I N E S 

 combines loci by estimating a gamma shape parameter
 

 Peter Beerli 1996, Seattle
 beerli@genetics.washington.edu
 $Id: combine.c,v 1.1.1.1 1998/06/06 06:09:51 beerli Exp $
-------------------------------------------------------*/

#include "migration.h"
#include "parameter.h"
#include "world.h"


#ifdef DMALLOC_FUNC_CHECK
#include "dmalloc.h"
#endif

/* structure for passing derivatives between functions */
typedef struct derive_fmt
  {
    double l0[GAMMA_INTERVALS];
    double l1000[GAMMA_INTERVALS];
    double l0100[GAMMA_INTERVALS];
    double l0010[GAMMA_INTERVALS];
    double l0001[GAMMA_INTERVALS];
    double l2000[GAMMA_INTERVALS];
    double l1100[GAMMA_INTERVALS];
    double l1010[GAMMA_INTERVALS];
    double l1001[GAMMA_INTERVALS];
    double l0200[GAMMA_INTERVALS];
    double l0110[GAMMA_INTERVALS];
    double l0101[GAMMA_INTERVALS];
    double l0020[GAMMA_INTERVALS];
    double l0011[GAMMA_INTERVALS];
    double l0002[GAMMA_INTERVALS];
    double tba[GAMMA_INTERVALS];
  }
derive_fmt;



/* prototypes ------------------------------------------- */
long combine_loci (world_fmt * world);
void create_nr (nr_fmt * nr, long numpop, long G);
void destroy_nr (nr_fmt * nr);
double calc_loci_like (nr_fmt * nr, timearchive_fmt * atl, long loci, boolean gamma);

void calc_gamma (nr_fmt * nr);	/* calculates gamma values a specified probabilities */

/* private functions prototypes-------------------------- */
/*derivative calculations */
long dgamma_nr_driver (timearchive_fmt * tyme, long loci,
		       world_fmt * world, char **plane);
void dgamma_derivatives (nr_fmt * nr,
       timearchive_fmt * tyme, double *param, long loci, boolean boolgamma);
void simple_loci_derivatives (nr_fmt * nr,
			  timearchive_fmt * tyme, double *param, long loci);
void dgamma_parts (derive_fmt * derive,
		   nr_fmt * nr, timearchive_fmt * tyme,
		   double tb1, double tb2, double m1, double m2,
		   double a);
void dgamma_first (double integral, derive_fmt * derive,
		   nr_fmt * nr, timearchive_fmt * tyme,
		   double tb1, double tb2, double r1, double r2,
		   double a);
void dgamma_secon (double integral, derive_fmt * derive,
		   nr_fmt * nr, timearchive_fmt * tyme,
		   double tb1, double tb2, double r1, double r2,
		   double a);
void dgamma_sum (double *intergral, double tba[],
		 double l0[], nr_fmt * nr, timearchive_fmt * atl);
/* likelihood calculations with gamma variation */
double calc_like2 (nr_fmt * nr, double t, double *param0, tarchive_fmt * tyme, long G);

/* Newton-Raphson stuff and other specific helper functions */
void calc_loci_param (nr_fmt * nr, double *param, double lamda);
void create_apg0 (nr_fmt * nr, timearchive_fmt * tyme);
void reset_parts (derive_fmt * derive, nr_fmt * nr);
void init_part (double *x, double value, long n);
void copy_and_clear_ddd (nr_fmt * nr, double *d, double **dd);
void add_back_ddd (nr_fmt * nr, double *d, double **dd);
/* printing sthings */
void print_menu_finalestimate (option_fmt * options, char text[]);

/* public functions---------------------------------------------- */
long
combine_loci (world_fmt * world)
{
  long trials;
  trials = dgamma_nr_driver (world->atl, world->data->loci,
			     world, world->plane[world->loci]);
  if (world->options->plot)
    create_loci_plot (world, world->plane[world->loci], world->atl, world->loci);
  return trials;
}

long
dgamma_nr_driver (timearchive_fmt * tyme, long loci,
		  world_fmt * world, char **plane)
{

  boolean notinverse, tttt, savety_belt = FALSE, do_newton = TRUE;
  char *strllike, kind[20];
  long pop, locus, trials = -1, panic, elem = 5, Gmax = 1;
  double **idd, *nld, lam1 = 0.0, lam2 = 0.0, lamda = 1., nld2;
  double llike = -DBL_MAX, normd = DBL_MAX /*, maxllike */ ;
  nr_fmt *nr;
  double lamdasign = 1;

  print_menu_finalestimate (world->options, "Newton-Raphson method");
  nr = (nr_fmt *) calloc (1, sizeof (nr_fmt) * 1);
  nr->numpop = world->numpop;
  if (world->options->gamma)
    nr->numpop2 = world->numpop * 2 + 1;
  else
    nr->numpop2 = world->numpop * 2;
  for (locus = 1; locus < loci + 1; locus++)
    {
      if (Gmax < tyme[locus].T)
	{
	  Gmax = tyme[locus].T;
	}
    }
  nr->partsize = (nr->numpop2 + nr->numpop2 * nr->numpop2);
  /* initialize local stuff: mallocing */
  strllike = (char *) calloc (1, sizeof (char) * 128);
  nr->gammaI = GAMMA_INTERVALS;
  nr->gamma = (double *) calloc (1, nr->gammaI * sizeof (double));
  nr->parts = (double *) calloc (1, nr->partsize * sizeof (double));
  nr->d = (double *) malloc (nr->numpop2 * sizeof (double));
  nr->param = (double *) malloc ((1 + nr->numpop2) * sizeof (double));
  nr->oparam = (double *) malloc ((1 + nr->numpop2) * sizeof (double));
  nr->datalike = (double *) malloc (Gmax * sizeof (double));
  nr->apg0 = (double *) malloc (Gmax * sizeof (double));
  nr->apg = (double *) malloc (Gmax * sizeof (double));
  nld = (double *) malloc ((1 + NTRIALS) * sizeof (double));
  nr->skiploci = world->data->skiploci;
  nr->dd = world->cov[loci];
  idd = (double **) calloc (1, sizeof (double *) * nr->numpop2);
  idd[0] = (double *) calloc (1, sizeof (double) * nr->numpop2 * nr->numpop2);
  for (pop = 1; pop < nr->numpop2; pop++)
    {
      idd[pop] = idd[0] + pop * nr->numpop2;
    }
  world->param0 = (double *) realloc (world->param0, sizeof (double) * nr->numpop2);
  memset (world->param0, 0, sizeof (double) * nr->numpop2);
  for (locus = 1; locus < loci + 1; locus++)
    {
      for (pop = 0; pop < nr->numpop2; pop++)
	{
	  if (!world->data->skiploci[locus - 1])
	    world->param0[pop] += world->atl[locus].param[pop];
	}
    }
  for (pop = 0; pop < nr->numpop2; pop++)
    {
      world->param0[pop] /= (loci - world->skipped);
    }
  if (!world->options->gamma)
    {
      elem = nr->numpop2;
      nr->gammaI = 1;
      nr->gamma[0] = 1.;
    }
  else
    {
      elem = nr->numpop2;
      world->param0[nr->numpop2 - 1] = 1.0;
    }
  memcpy (nr->param, world->param0, sizeof (double) * nr->numpop2);
  nr->ollike = -DBL_MAX;
  /*  maxllike= -DBL_MAX; */
  while (trials++ < NTRIALS)
    {
#ifdef MAC
      eventloop ();
#endif
      reset_nr (nr);
      if (world->options->gamma)
	calc_gamma (nr);
      calc_loci_like (nr, tyme, loci, world->options->gamma);
      dgamma_derivatives (nr, tyme, world->param0, loci, world->options->gamma);
      normd = norm (nr->d, nr->numpop2);
      if (normd == 0.0)
	break;
      notinverse = is_singular (nr->dd, elem /*number rows in matrix */ );
      if (!notinverse)
	{
	  for (pop = 0; pop < nr->numpop2; pop++)
	    memcpy (idd[pop], nr->dd[pop], sizeof (double) * nr->numpop2);
	  invert_matrix (idd, elem);
	  tttt = nrcheck (nr->dd, idd, nr->d, elem, &lam1, &lam2, do_newton);
	  if (tttt)
	    {
	      strcpy (kind, "NEWTON: ");
	      lamda = 1.;
	      nld[trials] = 1.0;
	    }
	  else
	    notinverse = TRUE;
	}
      if (notinverse)
	{
	  if (lam2 > 0)
	    {
	      lamda = lam1 / lam2;
	      if (lamda >= 1.0)
		nld[trials] = normd;
	      else
		nld[trials] = normd * (lamda);
	    }
	  else
	    {
	      if (trials == 0)
		{
		  lamda = 1.0;
		  nld[0] = normd;
		}
	      else
		{
		  nld2 = normd;
		  if (nld2 == 0.0)
		    {
		      nld[trials] = 0.0;
		      lamda = 0.0;
		      fprintf (stderr, "norm(d) is 0.0 we should stop!");
		    }
		  else
		    {
		      lamda = vector_max (nld, trials) / nld2;
		      if (lamda >= 1.0)
			nld[trials] = normd;
		      else
			nld[trials] = nld2 * (lamda);
		    }
		}
	    }
	  strcpy (kind, "SLOW:   ");
	}
      if (world->options->gamma)
	calc_loci_param (nr, world->param0, lamda);
      else
	calc_param (nr, world->param0, lamda);
      nr->ollike = nr->llike;
      calc_loci_like (nr, tyme, loci, world->options->gamma);
      panic = 0;
      memcpy (nr->oparam, nr->param, nr->numpop2 * sizeof (double));
      if (nr->ollike >= nr->llike)
	{
	  lamda = lamdasign * lamda;
	  while (((nr->llike - nr->ollike) < 0.) && (panic++ < 20))
	    {
	      memcpy (nr->oparam, world->param0, nr->numpop2 * sizeof (double));
	      lamda /= 2.;
	      if (world->options->gamma)
		calc_loci_param (nr, world->param0, lamda);
	      else
		calc_param (nr, world->param0, lamda);
	      calc_loci_like (nr, tyme, loci, world->options->gamma);
	    }
	  if (panic > 20)
	    {
	      memcpy (world->param0, nr->param, nr->numpop2 * sizeof (double));
	      if (!savety_belt)
		{
		  fprintf (stdout, "Halfing limit reached! Disable Newton-steps and use only gradient\n");
		  ;
		  do_newton = FALSE;
		  if (world->options->gamma)
		    calc_loci_param (nr, world->param0, lamda);
		  else
		    calc_param (nr, world->param0, lamda);
		  calc_loci_like (nr, tyme, loci, world->options->gamma);
		  savety_belt = !savety_belt;
		  panic = 0;
		}
	      else
		{
		  fprintf (stdout, "Halfing limit reached! Rescue failed!\n");
		  break;
		}
	    }
	  memcpy (world->param0, nr->param, nr->numpop2 * sizeof (double));
	}
      else
	{
	  savety_belt = FALSE;
	  while (nr->llike - nr->ollike > EPSILON && panic++ < 10)
	    {
	      memcpy (nr->oparam, nr->param, nr->numpop2 * sizeof (double));
	      lamda *= 2.;
	      if (world->options->gamma)
		calc_loci_param (nr, world->param0, lamda);
	      else
		calc_param (nr, world->param0, lamda);
	      llike = nr->ollike;
	      nr->ollike = nr->llike;
	      nr->oPGC = nr->PGC;
	      memcpy (world->param0, nr->param, nr->numpop2 * sizeof (double));
	      calc_loci_like (nr, tyme, loci, world->options->gamma);
	    }
	  if (panic > 0)
	    {
	      memcpy (world->param0, nr->oparam, nr->numpop2 * sizeof (double));
	      memcpy (nr->param, nr->oparam, nr->numpop2 * sizeof (double));
	      nr->llike = nr->ollike;
	      nr->ollike = llike;
	      nr->PGC = nr->oPGC;
	    }
	  else
	    {
	      if (world->options->gamma)
		calc_loci_param (nr, world->param0, lamda);
	      else
		calc_param (nr, world->param0, lamda);
	      memcpy (world->param0, nr->param, nr->numpop2 * sizeof (double));
	    }
	}
      if (!((((normd > LOCI_NORM) && (trials < NTRIALS))) || trials == 0))
	{
	  break;
	}
    }
  world->param_like = nr->llike;
  tyme[loci + 1].param = (double *) realloc (tyme[loci + 1].param,
					     sizeof (double) * elem);
  tyme[loci + 1].param_like = nr->llike;
  tyme[loci + 1].normd = normd;
  tyme[loci + 1].trials = trials;
  memcpy (tyme[loci + 1].param, world->param0, sizeof (double) * elem);
  if (!world->options->simulation)
    calc_cov (nr->dd, nr->d, world->param0, elem);
  free (strllike);
  free_nr (nr);
  free (nld);
  free (idd[0]);
  free (idd);
  if (world->options->progress)
    {
      fprintf (stdout, "           Newton-Raphson cycles needed: %li\n", trials);
      fprintf (stdout, "           (norm(first derivatives: %f)\n", normd);
    }
  return trials;
}

/* calculates the likelihood over all loci for the new parameter set */
double
calc_loci_like (nr_fmt * nr, timearchive_fmt * atl, long loci,
		boolean boolgamma)
{
  static double oldalpha, oldtheta;
  long /*lazy, */ locus, i, maxloc = 0;
  double *temp, *param0;
  double ll, theta, result, logres = 0, tmax;
  temp = (double *) malloc (sizeof (double) * nr->gammaI);
  param0 = (double *) malloc (sizeof (double) * nr->numpop * 2);

  for (locus = 1; locus < loci + 1; locus++)
    {
      if (nr->skiploci[locus - 1])
	continue;
      result = 0.;
      tmax = -DBL_MAX;
      memcpy (param0, atl[locus].param0, sizeof (double) * nr->numpop * 2);
      if (!boolgamma)
	{
	  create_apg0 (nr, &atl[locus]);
	  logres += calc_like (nr, atl[locus].tl, atl[locus].T);
	}
      else
	{
	  if (oldalpha != nr->param[4] || oldtheta != nr->param[0])
	    {
	      calc_gamma (nr);
	      oldalpha = nr->param[4];
	      oldtheta = nr->param[0];
	    }
/*       lazy=0; */
	  for (i = nr->gammaI - 1; i >= 0; i--)
	    {
	      theta = nr->gamma[i];
	      ll = calc_like2 (nr, theta, param0,
			       atl[locus].tl, atl[locus].T);
	      temp[i] = -theta / (nr->param[0] * nr->param[4]) +
		(1. / nr->param[4] - 1.) * log (theta) + ll;

	      if (temp[i] > tmax)
		{
		  maxloc = i;
		  tmax = temp[i];
		}
	      /*      else {
	         lazy++;
	         if ((lazy>10) && (temp[i]< tmax-40.)){
	         lowerbound=i;
	         break;
	         }
	         } */
	    }
	  for (i = maxloc; i < nr->gammaI; i++)
	    {
	      temp[i] -= tmax;
	      if (temp[i] > -40)
		result += exp (temp[i]);
	      else
		{
		  break;
		}
	    }
	  for (i = maxloc - 1; i >= 0; i--)
	    {
	      temp[i] -= tmax;
	      if (temp[i] > -40)
		result += exp (temp[i]);
	      else
		{
		  break;
		}
	    }
	  logres += tmax + log (result);
	  logres = logres - (1. / nr->param[4] * (log (nr->param[0]) +
			  log (nr->param[4])) + lgamma (1. / nr->param[4]));
	}
    }
  free (param0);
  free (temp);
  nr->llike = logres;
  return logres;
}

void
create_nr (nr_fmt * nr, long numpop, long G)
{
  long pop;
  nr->numpop = numpop;
  nr->numpop2 = numpop * 2;
  nr->partsize = (nr->numpop2 + nr->numpop2 * nr->numpop2);
  nr->parts = (double *) calloc (1, nr->partsize * sizeof (double));
  nr->d = (double *) malloc (nr->numpop2 * sizeof (double));
  nr->dd = (double **) calloc (1, sizeof (double *) * nr->numpop2);
  nr->dd[0] = (double *) calloc (1, sizeof (double) * nr->numpop2 * nr->numpop2);
  for (pop = 1; pop < nr->numpop2; pop++)
    {
      nr->dd[pop] = nr->dd[0] + pop * nr->numpop2;
    }
  nr->param = (double *) malloc ((1 + nr->numpop2) * sizeof (double));
  nr->oparam = (double *) malloc ((1 + nr->numpop2) * sizeof (double));
  nr->datalike = (double *) malloc (G * sizeof (double));
  nr->apg0 = (double *) malloc (G * sizeof (double));
  nr->apg = (double *) malloc (G * sizeof (double));
  nr->gammaI = GAMMA_INTERVALS;
  nr->gamma = (double *) malloc (nr->gammaI * sizeof (double));
}

void
destroy_nr (nr_fmt * nr)
{
  free (nr->parts);
  free (nr->d);
  free (nr->param);
  free (nr->oparam);
  free (nr->datalike);
  free (nr->apg0);
  free (nr->apg);
  free (nr->gamma);
}

/* calculation of theta values following a gamma distribution for
   given probability values */
void
calc_gamma (nr_fmt * nr)
{
  long i, panic;
  double low, mid, high, xlow, xhigh, tmp, freq = 0, x = 10, alpha = 1. / nr->param[4],
    elements = (double) nr->gammaI, theta = nr->param[0];
  freq = -(0.5 / elements);	/*so we have midpoints instead of endpoints */
  for (i = 0; i < elements; i++)
    {
      low = 0;
      mid = /*exp(-lgamma(alpha)) */ incompletegamma (10., alpha);
      high = 1.;
      freq += 1. / (elements);
      if (freq < mid)
	{
	  high = mid;
	  xlow = 0;
	  xhigh = 10.;
	  x = 5.;
	}
      else
	{
	  low = mid;
	  xhigh = 1e10;
	  xlow = 10.;
	  x = 1e5;
	}
      panic = 0;
      while (panic++ < 1000 && fabs (low - high) > 0.0001 && x > SMALLEST_THETA)
	{
	  mid = /*exp(-lgamma(alpha)) */ incompletegamma (x, alpha);
	  if (freq < mid)
	    {
	      high = mid;
	      tmp = x;
	      x = (x + xlow) / 2.;
	      xhigh = tmp;
	    }
	  else
	    {
	      low = mid;
	      tmp = x;
	      x = (x + xhigh) / 2.;
	      xlow = tmp;
	    }
	}
      nr->gamma[i] = x * theta / alpha;
      if (x >= 10e10)
	{
	  nr->gammaI = i + 1;
	  return;
	}
    }
}

/* private functions---------------------------------------------- */
/* derivatives */
void
dgamma_derivatives (nr_fmt * nr,
	timearchive_fmt * tyme, double *param, long loci, boolean boolgamma)
{
  long locus;
  derive_fmt derive;
  double integral;
  double tb1 = nr->param[0];
  double tb2 = nr->param[1];
  double r1 = nr->param[2];
  double r2 = nr->param[3];
  double invalpha = 0, alpha = 0;
  if (boolgamma)
    {
      invalpha = nr->param[4];
      alpha = 1. / invalpha;
    }
  for (locus = 1; locus < loci + 1; locus++)
    {
      if (nr->skiploci[locus - 1])
	continue;
      reset_parts (&derive, nr);
      if (!boolgamma)
	{
	  simple_loci_derivatives (nr, tyme, param, locus);
	}
      else
	{
	  dgamma_parts (&derive, nr, &tyme[locus], tb1,
			tb2, r1, r2, alpha);
	  dgamma_sum (&integral, derive.tba, derive.l0, nr, &tyme[locus]);
	  dgamma_first (integral, &derive,
			nr, &tyme[locus], tb1, tb2, r1, r2, invalpha);
	  dgamma_secon (integral, &derive,
			nr, &tyme[locus], tb1, tb2, r1, r2, invalpha);
	}
    }
  derivatives_to_logderivatives (nr);
}

void
simple_loci_derivatives (nr_fmt * nr,
			 timearchive_fmt * tyme, double *param, long locus)
{
  long g;
  static double *d, **dd;
  d = (double *) calloc (1, sizeof (double) * nr->numpop2);
  dd = (double **) calloc (1, sizeof (double *) * nr->numpop2);
  dd[0] = (double *) calloc (1, sizeof (double) *
			     nr->numpop2 * nr->numpop2);
  for (g = 1; g < nr->numpop2; g++)
    {
      dd[g] = dd[0] + g * nr->numpop2;
    }
  copy_and_clear_ddd (nr, d, dd);
  nr->PGC = 0;
  nr->apg_max = -DBL_MAX;
  for (g = 0; g < tyme[locus].T; g++)
    {
      nr->apg0[g] = probG (tyme[locus].param0, &tyme[locus].tl[g], nr->numpop);
    }
  for (g = 0; g < tyme[locus].T; g++)
    {
      nr->apg[g] = probG (nr->param, &tyme[locus].tl[g], nr->numpop) - nr->apg0[g];
      if (nr->apg[g] > nr->apg_max)
	nr->apg_max = nr->apg[g];
    }
  for (g = 0; g < tyme[locus].T; g++)
    {
      nr->apg[g] -= nr->apg_max;
      nr->PGC += tyme[locus].tl[g].copies * exp (nr->apg[g]);
    }
  derivatives (1, nr, tyme[locus].tl, tyme[locus].T,
	       param, 0);
  add_back_ddd (nr, d, dd);
  free (dd[0]);
  free (dd);
  free (d);
}

void
dgamma_parts (derive_fmt * derive,
	      nr_fmt * nr, timearchive_fmt * tyme,
	      double tb1, double tb2, double m1, double m2,
	      double a)
{
  long g, gg, gsum = 0;
  double x1000, x0100, x0010, x0001, x1100, x1010, x1001, x2000, x0200;
  double x0020, x0002, t, /*st, */ k1, k2, kk1, kk2, l1, l2, p1, p2;
  double logt1, logt2, logr1, logr2, el0;
  double l0max = -DBL_MAX, tbamax = -DBL_MAX;
  double stb1 = tb1 * tb1, stb2 = tb2 * tb2;
  double tb10 = tyme->param0[0], tb20 = tyme->param0[1];
  double m10 = tyme->param0[2], m20 = tyme->param0[3];
  double logt, lgsum, logpt1, logpt2, logpr1, logpr2;
  double **l0;
  l0 = (double **) calloc (1, sizeof (double *) * nr->gammaI);
  for (gg = 0; gg < tyme->T; gg++)
    {
      gsum += tyme->tl[gg].copies;
    }
  lgsum = log ((double) gsum);
  logpr1 = log (m1 * tb1 / (m10));
  logpr2 = log (m2 * tb1 / (m20));
  logpt1 = log (tb10);
  logpt2 = log (tb20 * tb1 / (tb2));
  for (g = 0; g < nr->gammaI; g++)
    {
      l0[g] = (double *) calloc (1, sizeof (double) * gsum);
      t = nr->gamma[g];
/*      st = t * t; */
      logt = log (t);
      logr1 = logpr1 - logt;
      logr2 = logpr2 - logt;
      logt1 = logpt1 - logt;
      logt2 = logpt2 - logt;
      for (gg = 0; gg < tyme->T; gg++)
	{
	  kk1 = tyme->tl[gg].kt[0];
	  kk2 = tyme->tl[gg].kt[1];
	  k1 = tyme->tl[gg].km[0];
	  k2 = tyme->tl[gg].km[1];
	  l1 = tyme->tl[gg].l[0];
	  l2 = tyme->tl[gg].l[1];
	  p1 = tyme->tl[gg].p[0];
	  p2 = tyme->tl[gg].p[1];

	  l0[g][gg] = k1 * m10 + k2 * m20 - kk1 / t - (k1 * m1 * tb1) / t
	    - (k2 * m2 * tb1) / t + kk1 / tb10 - (kk2 * tb1) / (t * tb2)
	    + kk2 / tb20 + l1 * logr1 + l2 * logr2
	    + p1 * logt1 + p2 * logt2;
	  if (l0[g][gg] > l0max)
	    l0max = l0[g][gg];
	}
      derive->tba[g] = (a - 1) * logt - (t * a / tb1);
      if (derive->tba[g] > tbamax)
	tbamax = derive->tba[g];
    }
  for (g = 0; g < nr->gammaI; g++)
    {
      for (gg = 0; gg < tyme->T; gg++)
	{
	  l0[g][gg] -= l0max;
	  derive->l0[g] += exp (l0[g][gg]) * tyme->tl[gg].copies;
	}
      if (derive->l0[g] == 0.0)
	derive->l0[g] = -DBL_MAX;
      else
	derive->l0[g] = log (derive->l0[g]) - lgsum;
      derive->tba[g] -= tbamax;
    }

  for (g = 0; g < nr->gammaI; g++)
    {
      t = nr->gamma[g];

      for (gg = 0; gg < tyme->T; gg++)
	{
	  el0 = exp (l0[g][gg] - lgsum + log ((double) tyme->tl[gg].copies));
	  kk1 = tyme->tl[gg].kt[0];
	  kk2 = tyme->tl[gg].kt[1];
	  k1 = tyme->tl[gg].km[0];
	  k2 = tyme->tl[gg].km[1];
	  l1 = tyme->tl[gg].l[0];
	  l2 = tyme->tl[gg].l[1];
	  p2 = tyme->tl[gg].p[1];

	  x1000 = (-kk2 / tb2 - k1 * m1 - k2 * m2) / t + (l1 + l2 + p2) / tb1;
	  x0100 = kk2 * tb1 / (stb2 * t) - p2 / tb2;
	  x0010 = l1 / m1 - k1 * tb1 / t;
	  x0001 = l2 / m2 - k2 * tb1 / t;

	  x2000 = -(l1 + l2 + p2) / stb1;
	  x0200 = p2 / stb2 - 2. * kk2 * tb1 / (t * stb2 * tb2);
	  x0020 = -l1 / (m1 * m1);
	  x0002 = -l2 / (m2 * m2);

	  x1100 = kk2 / (t * stb2);
	  x1010 = -k1 / t;
	  x1001 = -k2 / t;


	  derive->l1000[g] += el0 * x1000;
	  derive->l0100[g] += el0 * x0100;
	  derive->l0010[g] += el0 * x0010;
	  derive->l0001[g] += el0 * x0001;

	  derive->l2000[g] += el0 * (x1000 * x1000 + x2000);
	  derive->l0200[g] += el0 * (x0100 * x0100 + x0200);
	  derive->l0020[g] += el0 * (x0010 * x0010 + x0020);
	  derive->l0002[g] += el0 * (x0001 * x0001 + x0002);

	  derive->l1100[g] += el0 * (x1000 * x0100 + x1100);
	  derive->l1010[g] += el0 * (x1000 * x0010 + x1010);
	  derive->l1001[g] += el0 * (x1000 * x0001 + x1001);
	  derive->l0110[g] += el0 * x0100 * x0010;
	  derive->l0101[g] += el0 * x0100 * x0001;
	  derive->l0011[g] += el0 * x0010 * x0001;
	}
      free (l0[g]);
    }
  free (l0);
}

void
dgamma_sum (double *integral, double tba[],
	    double l0[], nr_fmt * nr, timearchive_fmt * atl)
{
  long g;

  *integral = 0.0;
  for (g = 0; g < nr->gammaI; g++)
    {
      *integral += exp (tba[g] + l0[g]);
    }
  if (*integral == 0.0)
    *integral = DBL_EPSILON;
}

/* first derivatives */
void
dgamma_first (double integral, derive_fmt * derive,
	      nr_fmt * nr, timearchive_fmt * tyme,
	      double tb1, double tb2, double r1, double r2,
	      double ia)
{
  long g;
  double t, logt, pt, el0;

  for (g = 0; g < nr->gammaI; g++)
    {
      el0 = exp (derive->l0[g]);
      pt = exp (derive->tba[g]);
      t = nr->gamma[g];
      logt = log (t);
      /*tb1--------------------------------------------- */
      nr->parts[0] += pt * (t * el0 / (ia * tb1 * tb1) + derive->l1000[g]);
      /*tb2--------------------------------------------- */
      nr->parts[1] += pt * derive->l0100[g];
      /*r1 and r2--------------------------------------------- */
      nr->parts[2] += pt * derive->l0010[g];
      nr->parts[3] += pt * derive->l0001[g];
      /*a-------------------------------------------------- */
      nr->parts[4] += pt * el0 * (t - tb1 * logt) / (ia * ia * tb1);

    }

  for (g = 0; g < 5; g++)
    {
      nr->parts[g] /= integral;
    }
}

void
dgamma_secon (double integral, derive_fmt * derive,
	      nr_fmt * nr, timearchive_fmt * tyme,
	      double tb1, double tb2, double r1, double r2,
	      double ia)
{
  long g;
  double t, st, logt, el0, pt, twoat, stb1 = tb1 * tb1;
  double a = 1. / ia;
  double sa = a * a;
  for (g = 0; g < nr->gammaI; g++)
    {
      el0 = exp (derive->l0[g]);
      pt = exp (derive->tba[g]);
      t = nr->gamma[g];
      st = t * t;
      logt = log (t);
      twoat = 2. * a * t;
      /*tb1,tb1--------------------------------------------- */
      nr->parts[5] += (pt * (sa * st * el0 - twoat * tb1 * el0 + twoat * stb1 * derive->l1000[g] + stb1 * stb1 *
			     derive->l2000[g])) / (stb1 * stb1);
/*    ((el0/stb1*(st*sa - twoat *tb1) + twoat * derive->l1000[g])/stb1 
   + derive->l2000[g]); */
      /*tb2,tb2--------------------------------------------- */
      nr->parts[6] += pt * derive->l0200[g];
      /*r1, r1--------------------------------------------- */
      nr->parts[7] += pt * derive->l0020[g];
      /*r2, r2--------------------------------------------- */
      nr->parts[8] += pt * derive->l0002[g];
      /*a,a--------------------------------------------- */
      nr->parts[9] += pt * el0 * (-t + tb1 * logt) *
	(-t + 2. * ia * tb1 + tb1 * logt) / (ia * ia * ia * ia * stb1);
      /*tb1,tb2--------------------------------------------- */
      nr->parts[10] += pt * (a * t * derive->l0100[g] / stb1 + derive->l1100[g]);
      /*tb1,r1--------------------------------------------- */
      nr->parts[11] += pt * (a * t * derive->l0010[g] / stb1 + derive->l1010[g]);
      /* tb1,r2-------------------------- */
      nr->parts[12] += pt * (a * t * derive->l0001[g] / stb1 + derive->l1001[g]);
      /* tb1,a-------------------------- */
      nr->parts[13] += pt * (st * el0 - ia * t * tb1 * el0 - t * tb1 * el0 * logt +
			     ia * t * stb1 * derive->l1000[g] -
			     ia * stb1 * tb1 * logt * derive->l1000[g]) / (ia * ia * ia * stb1 * tb1);
      /* tb2,r1-------------------------- */
      nr->parts[14] += pt * derive->l0110[g];
      /* tb2,r2-------------------------- */
      nr->parts[15] += pt * derive->l0101[g];
      /* tb2,a-------------------------- */
      nr->parts[16] += pt * (t - tb1 * logt) * derive->l0100[g] / (ia * ia * tb1);
      /* r1,r2-------------------------- */
      nr->parts[17] += pt * derive->l0011[g];
      /* r1,a-------------------------- */
      nr->parts[18] += pt * (t - tb1 * logt) * derive->l0010[g] / (ia * ia * tb1);
      /* r2,a-------------------------- */
      nr->parts[19] += pt * (t - tb1 * logt) * derive->l0001[g] / (ia * ia * tb1);
    }
  nr->dd[0][0] -= 1. / (ia * stb1) - nr->parts[0] * nr->parts[0] + nr->parts[5] / integral;
  nr->dd[1][1] -= -(nr->parts[1] * nr->parts[1]) + nr->parts[6] / integral;

  nr->dd[2][2] -= -(nr->parts[2] * nr->parts[2]) + nr->parts[7] / integral;
  nr->dd[3][3] -= -(nr->parts[3] * nr->parts[3]) + nr->parts[8] / integral;
  nr->dd[4][4] -= (3. - 2. * log (ia * tb1) - 2. * polygamma (0, a) - polygamma (1, a) / ia) / (ia * ia * ia)
    - nr->parts[4] * nr->parts[4] + nr->parts[9] / integral;
  nr->dd[1][0] -= -(nr->parts[0] * nr->parts[1]) + nr->parts[10] / integral;
  nr->dd[2][0] -= -(nr->parts[0] * nr->parts[2]) + nr->parts[11] / integral;
  nr->dd[3][0] -= -(nr->parts[0] * nr->parts[3]) + nr->parts[12] / integral;
  nr->dd[4][0] -= 1. / (ia * ia * tb1) - (nr->parts[0] * nr->parts[4]) + nr->parts[13] / integral;
  nr->dd[2][1] -= -(nr->parts[1] * nr->parts[2]) + nr->parts[14] / integral;
  nr->dd[3][1] -= -(nr->parts[1] * nr->parts[3]) + nr->parts[15] / integral;
  nr->dd[4][1] -= -(nr->parts[1] * nr->parts[4]) + nr->parts[16] / integral;
  nr->dd[3][2] -= -(nr->parts[2] * nr->parts[3]) + nr->parts[17] / integral;
  nr->dd[4][2] -= -(nr->parts[2] * nr->parts[4]) + nr->parts[18] / integral;
  nr->dd[4][3] -= -(nr->parts[3] * nr->parts[4]) + nr->parts[19] / integral;
  nr->d[0] -= (-1. / (ia * tb1) + nr->parts[0]);
  nr->d[1] -= nr->parts[1];
  nr->d[2] -= nr->parts[2];
  nr->d[3] -= nr->parts[3];
  nr->d[4] -= (-1 + log (tb1 * ia) + polygamma (0, a)) / (ia * ia) + nr->parts[4];
}


/* likelihood calculations */
double
calc_like2 (nr_fmt * nr, double t, double *param0, tarchive_fmt * tyme, long G)
{
  long g;
  double k1, k2, kk1, kk2, l1, l2, p1, p2, apg_max = -DBL_MAX, gsum = 0.,
    logt1, logt2, logr1, logr2, m10, m20, /*r1, r2, r10, r20, */ tb1 = nr->param[0],
    tb2 = nr->param[1], m1 = nr->param[2], m2 = nr->param[3], tb10 = param0[0],
    tb20 = param0[1];
  nr->PGC = 0;
/*    r1 = m1 * tb1; */
/*    r2 = m2 * tb2; */
  m10 = param0[2];
  m20 = param0[3];
/*    r10 = m10 * tb10; */
/*    r20 = m20 * tb20; */
  logr1 = log (m1 * tb1 / (m10 * t));
  logr2 = log (m2 * tb1 / (m20 * t));
  logt2 = log (tb20 * tb1 / (tb2 * t));
  logt1 = log (tb10 / t);

  for (g = 0; g < G; g++)
    {
      kk1 = tyme[g].kt[0];
      kk2 = tyme[g].kt[1];
      k1 = tyme[g].km[0];
      k2 = tyme[g].km[1];
      l1 = tyme[g].l[0];
      l2 = tyme[g].l[1];
      p1 = tyme[g].p[0];
      p2 = tyme[g].p[1];
      nr->apg[g] = k1 * m10 + k2 * m20 - kk1 / t - (k1 * m1 * tb1) / t
	- (k2 * m2 * tb1) / t + kk1 / tb10 - (kk2 * tb1) / (t * tb2)
	+ kk2 / tb20 + l1 * logr1 + l2 * logr2 + p1 * logt1 + p2 * logt2;
      if (nr->apg[g] > apg_max)
	apg_max = nr->apg[g];
    }
  for (g = 0; g < G; g++)
    {
      gsum += tyme[g].copies;
      nr->apg[g] -= apg_max;
      nr->PGC += tyme[g].copies * exp (nr->apg[g]);
    }
  return apg_max + log (nr->PGC) - log (gsum);
}


/* Newton-Raphson stuff and other specific helper functions */
void
calc_loci_param (nr_fmt * nr, double *param, double lamda)
{
  long i;
  for (i = 0; i < nr->numpop2; i++)
    nr->param[i] = param[i] * exp ((MAX (-100, MIN (-lamda * nr->d[i], 100))));
  param_all_adjust (nr, param, 1);
}


void
reset_parts (derive_fmt * derive, nr_fmt * nr)
{
  init_part (derive->l0, 0., GAMMA_INTERVALS);
  init_part (derive->l1000, 0., GAMMA_INTERVALS);
  init_part (derive->l0100, 0., GAMMA_INTERVALS);
  init_part (derive->l0010, 0., GAMMA_INTERVALS);
  init_part (derive->l0001, 0., GAMMA_INTERVALS);
  init_part (derive->l2000, 0., GAMMA_INTERVALS);
  init_part (derive->l1100, 0., GAMMA_INTERVALS);
  init_part (derive->l1010, 0., GAMMA_INTERVALS);
  init_part (derive->l1001, 0., GAMMA_INTERVALS);
  init_part (derive->l0200, 0., GAMMA_INTERVALS);
  init_part (derive->l0110, 0., GAMMA_INTERVALS);
  init_part (derive->l0101, 0., GAMMA_INTERVALS);
  init_part (derive->l0020, 0., GAMMA_INTERVALS);
  init_part (derive->l0011, 0., GAMMA_INTERVALS);
  init_part (derive->l0002, 0., GAMMA_INTERVALS);
  init_part (nr->parts, 0., nr->partsize);

}

void
init_part (double *x, double value, long n)
{
  long i;
  if (value == 0)
    {
      memset (x, 0, sizeof (double) * n);
    }
  else
    {
      for (i = 0; i < n; i++)
	{
	  x[i] = value;
	}
    }
}

void
copy_and_clear_ddd (nr_fmt * nr, double *d, double **dd)
{
  long pop;
  memcpy (d, nr->d, sizeof (double) * nr->numpop2);
  memset (nr->d, 0, sizeof (double) * nr->numpop2);
  for (pop = 0; pop < nr->numpop2; pop++)
    {
      memcpy (dd[pop], nr->dd[pop], sizeof (double) * nr->numpop2);
      memset (nr->dd[pop], 0, sizeof (double) * nr->numpop2);
    }
}
void
add_back_ddd (nr_fmt * nr, double *d, double **dd)
{
  long pop, p;
  for (pop = 0; pop < nr->numpop2; pop++)
    {
      nr->d[pop] += d[pop];
      for (p = 0; p < nr->numpop2; p++)
	{
	  nr->dd[pop][p] += dd[pop][p];
	}
    }
}

void
create_apg0 (nr_fmt * nr, timearchive_fmt * tyme)
{
  long g;
  /* Prob(G|Param0) */
  for (g = 0; g < tyme->T; g++)
    {
      nr->apg0[g] = probG (tyme->param0, &tyme->tl[g], nr->numpop);
    }
}

void
print_menu_finalestimate (option_fmt * options, char text[])
{
  char nowstr[LINESIZE];
  if (options->progress)
    {
      get_time (nowstr, "%H:%M:%S");
      fprintf (stdout, "%s   Final parameter estimation over all loci\n           using %s\n", nowstr, text);
    }
}
