/*------------------------------------------------------
 Maximum likelihood estimation 
 of migration rate  and effectice population size
 using a Metropolis-Hastings Monte Carlo algorithm                            
 -------------------------------------------------------                        
 P A R A M E T E R E S T I M A T I O N   R O U T I N E S 

 estimates parameter for each locus
 using a Broyden minimization
 

 Peter Beerli 1997, Seattle
 beerli@genetics.washington.edu
 $Id: broyden.c,v 1.36 2000/07/26 23:03:20 beerli Exp $
-------------------------------------------------------*/
extern double norm (double *d, long size);

#include "migration.h"
#include "world.h"
#include "random.h"
#include "combroyden.h"
#include "joint-chains.h"

#ifdef DMALLOC_FUNC_CHECK
#include <dmalloc.h>
#endif



/* prototypes ----------------------------------------- */
double absmaxvec (double *v, long n);
void create_nr (nr_fmt * nr, world_fmt * world, long G, long profilenum,
		long thislocus, long repkind, long rep);
void reset_hess (double **hess, long n);
void destroy_nr (nr_fmt * nr, world_fmt * world);
double calc_locus_like (nr_fmt * nr, double *param, long locus);
void calc_param (nr_fmt * nr, double *param, double lamda);
void param_all_adjust (nr_fmt * nr, boolean boolgamma);
void gradient (double *d, nr_fmt * nr, long locus);
inline double probG (double *param, double *lparam, tarchive_fmt * tl,
		     nr_fmt * nr);
double logprob_noevent (world_fmt * world, long interval);
double sum_migprob (world_fmt * world, long pop, long interval);
double sum_mig (double *param, long msta, long msto);
void calc_cov (double **dd, double *d, double *param, long n);
boolean is_singular (double **dd, long n);
void print_contribution (nr_fmt * nr, timearchive_fmt **atl, long G);

void calc_dv (double *dv, double **hess, double *gxv, long n);
double calc_line (helper_fmt * helper, double a, double b, double c,
		  double (*psi) (double lamda, helper_fmt * helper));
void calc_hessian (double **hess, long n, double *delta, double *gama);
double psi (double lamda, helper_fmt * helper);
void grad2loggrad (nr_fmt * nr, double *d, double PGC, long nn);
void log_param0 (double *param, double *lparam, long nn);
void create_apg0 (double *apg0, nr_fmt * nr, timearchive_fmt * tyme);
void create_multiapg0 (double *apg0, nr_fmt * nr, long rep, long locus);

void force_sametheta (nr_fmt * nr);
void force_samemigration (nr_fmt * nr);
void calc_symmetric_d (nr_fmt * nr, long nn, long start, long stop);
void force_symmetric_d (long model, nr_fmt * nr, long nn);
void check_symmetric_d (nr_fmt * nr, long nn);
void check_matrix_arbitrary (nr_fmt * nr);
void print_menu_contribution(FILE *file, long contribution[]);

void
calc_dv (double *dv, double **hess, double *gxv, long n)
{
  long i, j;
  memset (dv, 0, sizeof (double) * n);
  for (i = 0; i < n; i++)
    {
      for (j = 0; j < n; j++)
	{
	  dv[i] += hess[i][j] * gxv[j];
	}
    }
}
// line searcher
// finds the maximum in a direction
// this should be replaced with something more efficient.
#define PP 0.61803399
#define QQ 0.38196601
#define MOVE3(a,b,c,d) (a)=(b);(b)=(c);(c)=(d)

double
calc_line (helper_fmt * helper, double a, double b, double c,
	   double (*fpsi) (double lamda, helper_fmt * helper))
{
  /* a < b < c AND psia > psib < psic */
  double d, psib, psic;
  d = c;
  if (fabs (c - b) > fabs (b - a))
    {
      c = b + QQ * (c - b);
    }
  else
    {
      c = b;
      b = b - QQ * (b - a);
    }
  psib = (*fpsi) (b, helper);
  psic = (*fpsi) (c, helper);
  while (fabs (d - a) > EPSILON * (fabs (b) + fabs (c)))
    {
      if (psic < psib)
	{
	  MOVE3 (a, b, c, PP * b + QQ * d);
	  psib = psic;
	  psic = (*fpsi) (c, helper);
	}
      else
	{
	  MOVE3 (d, c, b, PP * c + QQ * a);
	  psic = psib;
	  psib = (*fpsi) (b, helper);
	}
    }
  if (psib < psic)
    {
      return b;
    }
  else
    {
      return c;
    }
}

double
psi (double lamda, helper_fmt * helper)
{
  double like;
  calc_loci_param (helper->nr, helper->nr->oparam, lamda, FALSE);
  like = (*calc_like) (helper->nr, helper->nr->param, helper->nr->world->locus);
  return -like;
}


void
calc_hessian (double **hess, long n, double *delta, double *gama)
{
  double **dd, *temp, t;
  long i, j, k;
  double dtg;
  temp = (double *) calloc (n, sizeof (double));
  dd = (double **) calloc (n, sizeof (double *));
  dd[0] = (double *) calloc (n * n, sizeof (double));
  dtg = delta[0] * gama[0];
  for (i = 1; i < n; i++)
    {
      dd[i] = dd[0] + n * i;
      dtg += delta[i] * gama[i];
    }
  if (dtg != 0.0)
    dtg = 1. / dtg;
  else
    {
      reset_hess (hess, n);
      return;
    }
  for (i = 0; i < n; i++)
    {
      for (j = 0; j < n; j++)
	{
	  temp[i] += gama[j] * hess[j][i];
	}
    }
  t = 0.0;
  for (i = 0; i < n; i++)
    t += temp[i] * gama[i];
  t = (1.0 + t * dtg) * dtg;
  for (i = 0; i < n; i++)
    {
      for (j = 0; j < n; j++)
	{
	  for (k = 0; k < n; k++)
	    {
	      dd[i][j] += delta[i] * gama[k] * hess[k][j];
	    }
	}
    }
  for (i = 0; i < n; i++)
    {
      temp[i] = 0.0;
      for (j = 0; j < n; j++)
	{
	  temp[i] += hess[i][j] * gama[j];
	}
    }
  for (i = 0; i < n; i++)
    {
      for (j = 0; j < n; j++)
	{
	  dd[i][j] += temp[i] * delta[j];
	  dd[i][j] *= dtg;
	  hess[i][j] += delta[i] * delta[j] * t - dd[i][j];
	}
    }
  free (temp);
  free (dd[0]);
  free (dd);
}

double
absmaxvec (double *v, long n)
{
  long i;
  double max = fabs (v[0]);
  for (i = 1; i < n; i++)
    {
      if (max < fabs (v[i]))
	max = fabs (v[i]);
    }
  return max;
}

void
create_nr (nr_fmt * nr, world_fmt * world, long G, long profilenum,
	   long thislocus, long repkind, long rep)
{
  long i, j;
  nr->numpop = world->numpop;
  nr->numpop2 = world->numpop2;
  nr->skiploci = world->data->skiploci;
  nr->world = world;
  nr->mstart = world->mstart;
  nr->mend = world->mend;
  nr->repkind = repkind;
  nr->repstart =(repkind == SINGLECHAIN) ? rep : 0; 
  nr->repstop = (repkind == SINGLECHAIN) ? rep+1 : world->repstop;
  nr->profilenum = profilenum;
  if (world->options->gamma && world->locus >= world->loci)
    {
      nr->gammaI = GAMMA_INTERVALS;
      nr->gamma = (double *) malloc (nr->gammaI * sizeof (double));
      nr->partsize = world->numpop2 + 1;
    }
  else
    {
      if (!world->options->gamma && world->locus >= world->loci)
	{
	  nr->gammaI = 1;
	  nr->gamma = (double *) malloc (nr->gammaI * sizeof (double));
	  nr->partsize = world->numpop2;
	}
      else
	{
	  nr->gammaI = 3;	/* debug: this traps any looping */
	  nr->gamma = NULL;
	  nr->partsize = world->numpop2;
	}
    }
  //  nr->numg = G;
  nr->atl = world->atl;
  nr->parts = (double *) calloc (1, nr->partsize * sizeof (double));
  nr->d = (double *) calloc (1, nr->partsize * sizeof (double));
  nr->od = (double *) calloc (1, nr->partsize * sizeof (double));
  nr->dv = (double *) calloc (1, nr->partsize * sizeof (double));
  nr->delta = (double *) malloc (nr->partsize * sizeof (double));
  nr->gdelta = (double *) calloc (1, nr->partsize * sizeof (double));
  nr->param = (double *) malloc ((nr->partsize+1) * sizeof (double));
  memcpy (nr->param, world->param0, nr->partsize * sizeof (double));
  nr->oparam = (double *) malloc ((nr->partsize+1) * sizeof (double));
  memcpy (nr->oparam, world->param0, nr->partsize * sizeof (double));
  nr->datalike = (double *) malloc (G * sizeof (double));
  nr->locilikes = (double *) malloc ((world->loci + 1) * sizeof (double));
  nr->apg0 = (double ***) calloc (1, (world->repstop) * sizeof (double **));
  for(j=0;j<world->repstop;j++)
    {
      nr->apg0[j] = (double **) calloc (world->loci + 1,  sizeof (double *));
      //      nr->apg0[j][0] = (double *) calloc (1, G * (world->loci + 1) * 
      //				  sizeof (double));
      for (i = 0; i < world->loci + 1; i++)
	{
	  nr->apg0[j][i]=(double *) calloc (G, sizeof (double));
	  //  nr->apg0[j][i] = nr->apg0[j][0] + i * G;
	}
    }
    nr->apg = (double **) calloc (1, (world->repstop+1) * sizeof (double*));
    nr->apg[0] = (double *) calloc (1, G * (world->repstop+1) * sizeof (double));
     for (i = 1; i < world->repstop+1; i++)
  	{
  	  nr->apg[i] = nr->apg[0] + i * G;
  	}
}

void
reset_hess (double **hess, long n)
{
  long pop;
  memset (hess[0], 0, sizeof (double) * n * n);
  for (pop = 1; pop < n; pop++)
    {
      hess[pop] = hess[0] + pop * n;
      hess[pop][pop] = 1.0;
    }
  hess[0][0] = 1.0;
}

void
destroy_nr (nr_fmt * nr, world_fmt * world)
{
  long j,i;
  free (nr->parts);
  free (nr->d);
  free (nr->od);
  free (nr->dv);
  free (nr->delta);
  free (nr->gdelta);
  free (nr->param);
  free (nr->oparam);
  free (nr->locilikes);
  free (nr->datalike);
  for(j=world->repstop-1;j>=0;j--)
    {
      for(i=world->loci;i>=0;i--)
	free (nr->apg0[j][i]);
      free (nr->apg0[j]);
    }
  free(nr->apg0);
  //free (nr->apg[0]);
  free (nr->apg);

  if (nr->gamma != NULL)
    free (nr->gamma);
  free (nr);
}

double
calc_locus_like (nr_fmt * nr, double *param,
		 long locus)
{
  //static 
  double *lparam;

  //static boolean done = FALSE;
  long g, r, copies;
  double gsum = 0;
  double ***apg0;
  //if (!done)
  //  {
      lparam = (double *) calloc (nr->numpop2, sizeof (double));
      //    done = TRUE;
      // }
  for (g = 0; g < nr->numpop2; g++)
    lparam[g] = log (param[g]);
  nr->PGC = 0.0;
  nr->apg_max = -DBL_MAX;
  apg0 = nr->apg0;
  for(r=nr->repstart; r < nr->repstop; r++)
    {
      //      printf("calclike: rep:%li loc:%li %f\n",
      //	     r,locus, 
      //     nr->atl[r][locus].tl[0].km[0]);
      gsum = 0;
      for (g = 0; g < nr->atl[r][locus].T; g++)
	{
	  //gsum += nr->atl[r][locus].tl[g].copies;
	  nr->apg[r][g] = probG (param, lparam, &(nr->atl[r][locus].tl[g]), nr) 
	    - apg0[r][locus][g];
	  if (nr->apg[r][g] > nr->apg_max)
	    nr->apg_max = nr->apg[r][g];
	}
      
      //gsum = log (gsum-1);
      for (g = 0; g < nr->atl[r][locus].T; g++)
	{
	  copies = (g>0) ? nr->atl[r][locus].tl[g].copies : nr->atl[r][locus].tl[g].copies -1;
	  gsum += copies;
	  nr->apg[r][g] -= nr->apg_max;
	  nr->PGC += copies * exp (nr->apg[r][g]);
	}
    }
  nr->llike = nr->apg_max + log (nr->PGC) -  log(gsum) ;
  //   - log(nr->repstop - nr->repstart);
  free (lparam); 
  return nr->llike;
}

void
calc_param (nr_fmt * nr, double *param, double lamda)
{
  long i;
  double elem = nr->partsize;
  for (i = 0; i < elem; i++)
    {
      nr->param[i] =
	param[i] * exp ((MAX (-100, MIN (-lamda * nr->dv[i], 100))));
    }
  //param_all_adjust (nr, FALSE);
}

void
force_sametheta (nr_fmt * nr)
{
  long i;
  double summ = 0;
  for (i = 0; i < nr->numpop; i++)
    {
      summ += nr->param[i];
    }
  summ /= nr->numpop;
  for (i = 0; i < nr->numpop; i++)
    {
      nr->param[i] = summ;
    }
}

void
force_samemigration (nr_fmt * nr)
{
  long i;
  double summ = 0;
  for (i = nr->numpop; i < nr->numpop2; i++)
    {
      summ += nr->param[i];
    }
  summ /= nr->numpop * (nr->numpop - 1);
  for (i = nr->numpop; i < nr->numpop2; i++)
    {
      nr->param[i] = summ;
    }
}

void
param_all_adjust (nr_fmt * nr, boolean boolgamma)
{
/*  const double minima[3] =
    { SMALLEST_THETA, SMALLEST_MIGRATION, 1. / BIGGEST_GAMMA };
  const double maxima[3] =
    { BIGGEST_THETA, BIGGEST_MIGRATION, 1. / SMALLEST_GAMMA };

  double ff, f = 1., denom;
  long i = 0, ii, j, z = 0;
  boolean overboard = FALSE;

  for (ii = 0; ii < nr->numpop2 - nr->profilenum; ii++)
    {
      i = (nr->profilenum > 0) ? nr->indeks[z++] : ii;
      if (i < nr->numpop)
	{
	  if (nr->param[i] < minima[0] || nr->param[i] > maxima[0])
	    {
	      overboard = TRUE;
	      break;
	    }
	}
      else
	{
	  if (nr->param[i] < minima[1] || nr->param[i] > maxima[1])
	    {
	      overboard = TRUE;
	      break;
	    }
	}
    }
  if (boolgamma)
    {
      if (nr->param[i] < minima[2] || nr->param[i] > maxima[2])
	{
	  overboard = TRUE;
	}
    }
  if (overboard)
    {
      for (i = 0; i < nr->partsize; i++)
	{
	  denom = nr->param[i] - nr->oparam[i];
	  if (fabs (denom) > 10e-10)
	    {
	      j = (i < nr->numpop ? 0 : (i < nr->numpop2 ? 1 : 2));
	      ff = MIN (1., fabs ((minima[j] - nr->oparam[i]) / denom));
	      ff = MIN (ff, fabs ((maxima[j] - nr->oparam[i]) / denom));
	    }
	  else
	    ff = 1.;
	  if (ff < f)
	    f = ff;
	}
      if (f < 1.)
	{
	  for (i = 0; i < nr->partsize; i++)
	    {
	      nr->param[i] =
		nr->oparam[i] + f * (nr->param[i] - nr->oparam[i]);
	      if (nr->param[i] <
		  MIN3 (SMALLEST_THETA, SMALLEST_MIGRATION,
			1. / SMALLEST_GAMMA))
		{
		  if (nr->world->options->custm2[i] != '0')
		    nr->param[i] = (i < nr->numpop) ? SMALLEST_THETA :
		      ((i < nr->numpop2) ? SMALLEST_MIGRATION :
		       1. / SMALLEST_GAMMA);
		}
	    }
	}
        }*/
  switch (nr->world->options->migration_model)
    {
    case MATRIX:
      break;
    case MATRIX_ARBITRARY:
      check_matrix_arbitrary (nr);
      break;
    case MATRIX_SAMETHETA:
      force_sametheta (nr);
      break;
    case ISLAND:
      force_sametheta (nr);
      force_samemigration (nr);
      break;
    case ISLAND_VARTHETA:
      force_samemigration (nr);
      break;
    }
}

void
check_matrix_arbitrary (nr_fmt * nr)
{

  /*static*/ char *custm = NULL;
  /*static*/ long zeron = 0;
  /*static*/ long constn = 0;
  /*static*/ long symn = 0;
  /*static*/ long sym2n = 0;
  /*static*/ long numpop = 0;
  /*static*/ boolean done = FALSE;
  /*static*/ twin_fmt *syms = NULL;
  /*static*/ quad_fmt *sym2s = NULL;
  /*static*/ long *zeros = NULL;
  /*static*/ long *consts = NULL;
  /*static*/ long z = 0, i;
  /*static*/ char *p;
  /*static*/ double mm;
  /*static*/ long locus = 0;
  //  long to, from;
  if (locus != nr->world->locus)
    {
      done = FALSE;
      locus = nr->world->locus;
    }
  //if (!done)			/* on system with working static this will execute once
  //			   on others it may fail :-( */
  //{
  //  done = TRUE;
      numpop = nr->numpop;
      custm = nr->world->options->custm2;
      zeron = nr->world->options->zeron;
      constn = nr->world->options->constn;
      symn = nr->world->options->symn;
      sym2n = nr->world->options->sym2n;
      if (symn)
	syms = nr->world->options->symparam;
      if (sym2n)
	sym2s = nr->world->options->sym2param;
      if (zeron)
	zeros = nr->world->options->zeroparam;
      if (constn)
	consts = nr->world->options->constparam;
      p = custm;
      if (*p == 'm')
	{
	  while (*p == 'm')
	    {
	      p++;
	      z++;
	    }
	}
      // }

  if (z >= numpop)
    force_sametheta (nr);

  for (i = 0; i < zeron; i++)
    {
      nr->param[zeros[i]] = 0.;
    }

  // for (i = 0; i < constn; i++)
  // {
  //   if(consts[i]<nr->world->numpop)
  //	nr->param[consts[i]] = nr->world->options->thetag[consts[i]];
  //   if(consts[i]>=nr->world->numpop)
  //	{
  //	  m2mm(consts[i],nr->world->numpop,&from,&to);
  //	  nr->param[consts[i]] = 
  //	  nr->world->options->mg[consts[i]-nr->world->numpop]/
  //	  nr->world->options->thetag[to];
  //	}
  // }

  for (i = 0; i < symn; i++)
    {
      mm = (nr->param[syms[i][0]] + nr->param[syms[i][1]]) / 2.;
      nr->param[syms[i][0]] = nr->param[syms[i][1]] = mm;
    }

  for (i = 0; i < sym2n; i++)
    {
      mm = (nr->param[sym2s[i][0]] * nr->param[sym2s[i][2]] +
	    nr->param[sym2s[i][1]] * nr->param[sym2s[i][3]]) / 2.;
      nr->param[sym2s[i][0]] = mm / nr->param[sym2s[i][2]];
      nr->param[sym2s[i][1]] = mm / nr->param[sym2s[i][3]];
    }
}


void
gradient (double *d, nr_fmt * nr, long locus)
{
  long z, r, copies;
  long g, i, ii, pop, msta, msto;
  long numpop = nr->numpop;
  long nn = nr->partsize - nr->profilenum;
  double expapg, *thetas, *m;
  double *geo = nr->world->data->geo;
  tarchive_fmt *tl;

  //  long model = nr->world->options->migration_model;
  memset (d, 0, sizeof (double) * nr->numpop2);
  thetas = nr->param;
  m = nr->param + numpop;
  for (r = nr->repstart; r < nr->repstop; r++)
    {
      for (g = 0; g < nr->world->atl[r][locus].T; g++)
	{ 
	  tl = nr->world->atl[r][locus].tl;
	  if (nr->apg[r][g] > -100)
	    {
	      for (pop = 0; pop < numpop; pop++)
		{
		  nr->parts[pop] =
		    -tl[g].p[pop] / thetas[pop] +
		    tl[g].kt[pop] / (thetas[pop] * thetas[pop]);
		  
		  msta = nr->mstart[pop];
		  msto = nr->mend[pop];
		  z = 0;
		  for (i = msta; i < msto; i++)
		    {
		      nr->parts[i] = 
			((tl[g].mindex[i]  / nr->param[i]) - geo[i] * tl[g].km[pop]);
		      //debug (tl[g].l[m2mml2(i,pop,numpop)] / nr->param[i]) - tl[g].km[pop];
		    }
		}
	      copies = (g>0) ? tl[g].copies : tl[g].copies -1;
	      expapg = copies * exp (nr->apg[r][g]);
	      z = 0;
	      for (i = 0; i < nn; i++)
		{
		  ii = (nr->profilenum > 0) ? nr->indeks[z++] : i;
		  d[i] += expapg * nr->parts[ii];
		}
	    }
	}
    }
  //  force_symmetric_d (model, nr, nn);
}

/* calculates the derivatives for the different
   migration models
 */
void
force_symmetric_d (long model, nr_fmt * nr, long nn)
{
  switch (model)
    {
    case MATRIX:
      break;
    case MATRIX_ARBITRARY:
      check_symmetric_d (nr, nn);
      break;
    case MATRIX_SAMETHETA:
      calc_symmetric_d (nr, nn, 0, nr->numpop);
      break;
    case ISLAND:
      calc_symmetric_d (nr, nn, 0, nr->numpop);
      calc_symmetric_d (nr, nn, nr->numpop, nr->numpop2);
      break;
    case ISLAND_VARTHETA:
      calc_symmetric_d (nr, nn, nr->numpop, nr->numpop2);
      break;
    }
}

void
calc_symmetric_d (nr_fmt * nr, long nn, long start, long stop)
{
  long i, ii;
  long z = 0;
  double dt = 0;
  for (i = 0; i < nn; i++)
    {
      ii = (nr->profilenum > 0) ? nr->indeks[z++] : i;
      if (ii >= start && ii < stop)
	dt += nr->d[i];
    }
  dt /= (stop - start);
  z = 0;
  for (i = 0; i < nn; i++)
    {
      ii = (nr->profilenum > 0) ? nr->indeks[z++] : i;
      if (ii >= start && ii < stop)
	nr->d[i] = dt;
    }
}

void
check_symmetric_d (nr_fmt * nr, long nn)
{
  /*static*/ char *custm = NULL;
  /*static*/ long zeron = 0;
  /*static*/ long constn = 0;
  /*static*/ long symn = 0;
  /*static*/ long sym2n = 0;
  /*static*/ long mmn = 0;
  /*static*/ long numpop = 0;
  /*static boolean done = FALSE;*/
  /*static*/ twin_fmt *syms = NULL;
  /*static*/ quad_fmt *sym2s = NULL;
  /*static*/ long *zeros = NULL;
  /*static*/ long *consts = NULL;
  /*static*/ long *mms = NULL;
  /*static*/ long z = 0, i;
  /*static*/ char *p;
  /*static*/ double mm;
  double summ;
  double sign;

  //if (!done)
  //  {
  //    done = TRUE;
      custm = nr->world->options->custm2;
      zeron = nr->world->options->zeron;
      constn = nr->world->options->constn;
      symn = nr->world->options->symn;
      sym2n = nr->world->options->sym2n;
      mmn = nr->world->options->mmn;
      if (symn)
	syms = nr->world->options->symparam;
      if (sym2n)
	sym2s = nr->world->options->sym2param;
      if (zeron)
	zeros = nr->world->options->zeroparam;
      if (constn)
	consts = nr->world->options->constparam;
      if (mmn)
	mms = nr->world->options->mmparam;
      p = custm;
      custm = nr->world->options->custm2;
      numpop = nr->numpop;
      p = custm;
      if (*p == 'm')
	{
	  while (*p == 'm')
	    {
	      p++;
	      z++;
	    }
	}
      // }
/* if thetas = m then use only a mean nr->d for them*/
  if (z >= numpop)
    calc_symmetric_d (nr, nn, 0, nr->numpop);
/* do not zero anything, we are only recording the 
   non-zero nr->d  
  for (i = 0; i < zeron; i++)
    {
	nr->d[zeros[i]] = 0.;
    }
*/
/* if there are symmetric migration rates M go here*/
  for (i = 0; i < symn; i++)
    {
      mm = (nr->d[syms[i][0]] + nr->d[syms[i][1]]) / 2.;
      nr->d[syms[i][0]] = nr->d[syms[i][1]] = mm;
    }
/* if there are symmetric migration rates 4Nm do nothing but
   you need to do something when the parameters are changed, TEST*/
  for (i = 0; i < sym2n; i++)
    {
      mm = (sqrt ((nr->d[sym2s[i][0]] * nr->d[sym2s[i][0]]) +
		  (nr->d[sym2s[i][2]] * nr->d[sym2s[i][2]])) +
	    sqrt ((nr->d[sym2s[i][1]] * nr->d[sym2s[i][1]]) +
		  (nr->d[sym2s[i][3]] * nr->d[sym2s[i][3]]))) / 2.;
      mm *= mm;
      sign = nr->d[sym2s[i][0]] * nr->d[sym2s[i][2]] < 0 ? -1 : 1;
      nr->d[sym2s[i][0]] =
	sqrt (mm - (nr->d[sym2s[i][2]] * nr->d[sym2s[i][2]]));
      sign = nr->d[sym2s[i][1]] * nr->d[sym2s[i][3]] < 0 ? -1 : 1;
      nr->d[sym2s[i][1]] =
	sign * sqrt (mm - (nr->d[sym2s[i][3]] * nr->d[sym2s[i][3]]));
    }


/* if there is a mixture of zeros (0) and m than go here,
 e.g. stepping stone */
  if (mmn > 0)
    {
      summ = 0;
      for (i = nr->numpop; i < nr->numpop + mmn; i++)
	{
	  summ += nr->d[i];
	}
      summ /= mmn;
      for (i = nr->numpop; i < mmn + nr->numpop; i++)
	{
	  nr->d[i] = summ;
	}
    }
}


void
grad2loggrad (nr_fmt * nr, double *d, double PGC, long nn)
{
  long i, ii, z = 0;
  for (i = 0; i < nn; i++)
    {
      ii = (nr->profilenum > 0) ? nr->indeks[z++] : i;
      d[i] = -nr->param[ii] * d[i] / PGC;
/* to log derivatives */
/* the division by PGC
   (the underflow protected uncorrected likelihood)
   is made here, because
   in multiple loci where I have to sum over loci
   before "taking logs"
   I use the derivatives of L with PGC=1 instead of Log(L) 
   where I use PGC = nr->PGC, the minus is for minimizing
   the function -L
   instead of maximizing L
*/
    }
}

inline double
probG (double *param, double *lparam,
       tarchive_fmt * tl, nr_fmt * nr)
{
  const long numpop = nr->numpop;
  long i, j, msta, msto;
  double result = 0, sm;
  double *geo = nr->world->data->geo;
  double *lgeo = nr->world->data->lgeo;

  for (i = 0; i < numpop; i++)
    {
      if (lparam[i] <= -DBL_MAX)
	return DBL_MAX;
      result += tl->p[i] * (LOG2 - lparam[i]);
      msta = nr->mstart[i];
      msto = nr->mend[i];
      sm = 0.0;
      for (j = msta; j < msto; j++)
	{
	  if (param[j] > 0.0)
          {
	    result += tl->mindex[j] * (lgeo[j]+ lparam[j]);
	  sm += geo[j] * param[j];
          }
	}
      result += -tl->km[i] * sm - tl->kt[i] / param[i];
//      result += -tl->km[i] * summig[i] - tl->kt[i] / param[i];
    }
  return result;
}


boolean is_singular (double **dd, long n)
{
  long i, j;
  double temp;
  boolean singular = FALSE;
  for (i = 0; i < n; i++)
    {
      temp = 0.0;
      for (j = 0; j < n; j++)
	{
	  temp += dd[i][j];
	}
      if (temp == 0.0)
	{
	  singular = TRUE;
	  break;
	}
    }
  for (i = 0; i < n; i++)
    {
      temp = 0.0;
      for (j = 0; j < n; j++)
	{
	  temp += dd[i][j];
	}
      if (temp == 0.0)
	{
	  singular = TRUE;
	  break;
	}
    }
  return singular;
}


void
calc_cov (double **dd, double *d, double *param, long n)
{
  long i, j;
  if (!is_singular (dd, n))
    invert_matrix (dd, n);
  else
    {
      reset_hess (dd, n);
      return;
    }
  for (i = 0; i < n; i++)
    {
      for (j = 0; j < i; j++)
	{
	  dd[i][j] /= (param[i] * param[j]);
	  dd[j][i] = dd[i][j];
	}
      dd[i][i] = (dd[i][i] - param[i] * d[i]) / (param[i] * param[i]);
    }
  if (!is_singular (dd, n))
    invert_matrix (dd, n);
  else
    reset_hess (dd, n);
}


void
print_contribution (nr_fmt * nr, timearchive_fmt **atl, long G)
{
  long g, i, r;
  double temp = 0, temp2 = 0, maxtemp;
  FILE *mixfile = NULL;
  long events = 0;
  long contribution[11];
  tarchive_fmt *tyme;
  for (g = 0; g < 11; g++)
    contribution[g] = 0;
  if (nr->world->options->mixplot)
    {
      mixfile = fopen ("mixfile", "a");
      fprintf (mixfile, "\n\n");
    }
  maxtemp = -DBL_MAX;
  for (r = nr->repstart; r < nr->repstop; r++)
    {
      for (g = 0; g < G; g++)
	{
	  tyme = atl[r][nr->world->locus].tl;
	  temp = nr->apg[r][g] + nr->apg0[r][nr->world->locus][g];
	  temp2 = temp + nr->world->likelihood[g];
	  if (temp2 > maxtemp)
	    maxtemp = temp2;
	  if (nr->world->options->mixplot)
	    {
	      events = 0;
	      for (i = nr->world->numpop; i < nr->world->numpop2; i++)
		events += tyme[g].mindex[i];
	      for (i = 0; i < tyme[g].copies; i++)
		fprintf (mixfile, "%li %f %f ", events, temp,
			 nr->world->likelihood[g]);
	    }
	  temp2 -= maxtemp;
	  if (temp2 > -20)
	    {
	      contribution[9 - (long) (fabs (temp2) / 2)] += (g>0) ? tyme[g].copies : tyme[g].copies -1;
	    }
	  contribution[10] += (g>0) ? tyme[g].copies : tyme[g].copies -1;
	}
    }
  if(nr->world->options->progress)
    {
      print_menu_contribution(stdout,contribution);
      if(nr->world->options->writelog)
	print_menu_contribution(nr->world->options->logfile,contribution);
    }
  if (nr->world->options->mixplot)
    fclose (mixfile);
}

void print_menu_contribution(FILE *file, long contribution[])
{
  long g;
  fprintf (file, "           log(P(g|Param) * P(data|g)\n");
  fprintf (file, "                            -20 to ");
  for (g = -18; g <= 0; g += 2)
    {
      fprintf (file, "%4li ", g);
    }
  fprintf (file, "  All\n");
  fprintf (file, "           Counts                  ");
  for (g = 0; g < 10; g++)
    {
      fprintf (file, "%4li ", contribution[g]);
    }
  fprintf (file, "%5li\n", contribution[10]);
}

/* calculates log(parameters)*/
void
log_param0 (double *param, double *lparam, long nn)
{
  long i;
  for (i = 0; i < nn; i++)
    {
      if (param[i] > 0)
	lparam[i] = log (param[i]);
      else
	lparam[i] = -DBL_MAX;
    }
}


void
create_apg0 (double *apg0, nr_fmt * nr, timearchive_fmt * tyme)
{
  long g;
  /* Prob(G|Param0) */
  for (g = 0; g < tyme->T; g++)
    {
      apg0[g] = probG (tyme->param0, tyme->lparam0, &tyme->tl[g], nr);
    }
}

void
create_multiapg0 (double *apg0, nr_fmt * nr, long rep, long locus)
{
  long g, r;
  //  double lsteps = log((double) nr->world->options->lsteps) ;
  double *tmp;
  double tmpmax;
  //double gsum=0;
  tmp = (double*) calloc(nr->repstop,sizeof(double));
  for (g = 0; g < nr->world->atl[rep][locus].T; g++)
    {
      //gsum += nr->atl[rep][locus].tl[g].copies;
      tmpmax= -DBL_MAX;
      for(r=nr->repstart; r < nr->repstop; r++)
	{
	  tmp[r] =  //lsteps +
	    probG (nr->world->atl[r][locus].param0, nr->world->atl[r][locus].lparam0, 
		   &nr->world->atl[rep][locus].tl[g], nr) - nr->world->chainlikes[locus][r];
	  if(tmp[r]>tmpmax)
	    tmpmax = tmp[r];
	}
      apg0[g] = 0.0;
      for(r=nr->repstart; r < nr->repstop; r++)
	apg0[g] += exp(tmp[r]-tmpmax);
      apg0[g] = log(apg0[g]) + tmpmax ;//
      //-  log(nr->repstop-nr->repstart);//+ log(nr->repstop-nr->repstart);
    }
  free(tmp);
}

double
logprob_noevent (world_fmt * world, long interval)
{
  long pop, k;
  double result = 0.0;
  for (pop = 0; pop < world->numpop; pop++)
    {
      k = world->treetimes[0].tl[interval].lineages[pop];
      result +=
	(k * (k - 1) / world->param0[pop]) + sum_migprob (world, pop, interval);
    }
  return result;
}

double
sum_migprob (world_fmt * world, long pop, long interval)
{
  long i;
  double result = 0.0;
  long *lineages = world->treetimes[0].tl[interval].lineages;
  long msta = world->mstart[pop];
  long msto = world->mend[pop];
  for (i = msta; i < msto; i++)
    {
      result += world->param0[i];
    }
  return result * lineages[pop];
}

double
sum_mig (double *param, long msta, long msto)
{
  long i;
  double result = 0.0;
  for (i = msta; i < msto; i++)
    {
      result += param[i];
    }
  return result;
}
