/*------------------------------------------------------
 Maximum likelihood estimation 
 of migration rate  and effectice population size
 using a Metropolis-Hastings Monte Carlo algorithm                            
 -------------------------------------------------------                        
 Likelihood ratio test   R O U T I N E S 
 
 moved out from world.c                                                                                                               
 Peter Beerli 2000, Seattle
 beerli@genetics.washington.edu
 
 Copyright 2001 Peter Beerli and Joseph Felsenstein
 
$Id: lrt.c,v 1.13 2001/07/25 19:27:24 beerli Exp $
 
-------------------------------------------------------*/
#define SICK_VALUE    -1
#include "migration.h"
//#include "mcmc.h"

//#include "fst.h"
//#include "random.h"
#include "tools.h"
#include "broyden.h"
#include "combroyden.h"
#include "options.h"
#ifndef LAGUERRE
#include "derivatives2.h"
#endif
#include "sort.h"

//#ifdef UEP
//#include "uep.h"
//#endif
#ifdef DMALLOC_FUNC_CHECK
#include <dmalloc.h>
#endif

void print_lratio_test (world_fmt * world, long *Gmax);
void test_loci_like (nr_fmt * nr, double *param0,
                     double *param1, long df, long zeros,
                     long loci, world_fmt * world,
                     long *maxwhich, long maxnum,
                     boolean withhead, char *this_string1,
                     char *this_string2);
long set_test_param (double *param, char *strp, world_fmt * world,
                     long lrline, long locus, long *maxwhich,
                     long *maxnum, long *zeros);

boolean
mywhitespace (char ch)
{
  if (!(ch == ',' || ch == '\0' || ch == '\n' || ch == ';'))
    {
      return TRUE;
    }
  return FALSE;
}


void
print_lratio_test (world_fmt * world, long *Gmax)
{
  long c;
  long r, locus;
  long df;
  long zeros;
  int header;
  nr_fmt *nr;
  double *param0;
  double *param1;
  long *maxwhich;
  long nparam;
  long maxnum = 0;
  long rep = !world->options->replicate ? 0 :
             (world->options->replicatenum == 0 ?
              world->options->lchains : world->options->replicatenum);
  long repstop = world->repstop;
  param0 = (double *) calloc (1, sizeof (double) * (world->numpop2 + 1));
  param1 = (double *) calloc (1, sizeof (double) * (world->numpop2 + 1));
  maxwhich = (long *) calloc (1, sizeof (long) * (world->numpop2 + 1));


  if (world->options->progress)
    fprintf (stdout, "           Printing likelihood ratio tests\n");

  nr = (nr_fmt *) calloc (1, sizeof (nr_fmt));
  create_nr (nr, world, *Gmax, 0, world->loci, world->repkind, world->rep);
  for (locus = 0; locus < world->loci; locus++)
    {
      if (world->options->replicate)
        for (r = 0; r < repstop; r++)
          create_multiapg0 (nr->apg0[r][locus], nr, r, locus);
      else
        create_apg0 (nr->apg0[0][locus], nr, &world->atl[0][locus], locus);
    }
  if (world->loci > 1)
    {
      locus = world->loci;
      rep = 0;
    }
  else
    {
      locus = 0;
    }
  PAGEFEEDWORLD;
  nparam = world->options->gamma ? world->numpop2 + 1 : world->numpop2;
  for (c = 0; c < world->options->lratio->counter; c++)
    {
      header = (c == 0) ? HEADER : NOHEADER;
      if (world->options->lratio->data[c].type == MLE)
        {
          memcpy (param1, world->atl[rep][locus].param,
                  sizeof (double) * nparam);
          df = set_test_param (param0,
                               world->options->lratio->data[c].value1,
                               world, 0, -1, maxwhich, &maxnum, &zeros);
          test_loci_like (nr, param0, param1,
                          df, zeros, world->loci, world, maxwhich,
                          maxnum, header,
                          world->options->lratio->data[c].value1, NULL);
        }
      else			/*arbitrary */
        {
          df = set_test_param (param0, world->options->lratio->data[c].value1,
                               world, 0, -1, maxwhich, &maxnum, &zeros);
          df = set_test_param (param1, world->options->lratio->data[c].value2,
                               world, 0, -1, maxwhich, &maxnum, &zeros);
          test_loci_like (nr, param0, param1,
                          df, zeros, world->loci, world, maxwhich,
                          maxnum, HEADER,
                          world->options->lratio->data[c].value1,
                          world->options->lratio->data[c].value2);
        }
      //      fprintf (world->outfile, "Remember: Tests based on a single locus may not be valid\n");
    }
  fflush (world->outfile);
  free (param0);
  free (param1);
  free (maxwhich);
  destroy_nr (nr, world);
}

void
tell_chiprob2 (char *message, double zeros, double df, double chi)
{
  if (chi > chiboundary (zeros, df - zeros, 0.001))
    {
      if (chi > chiboundary (zeros, df - zeros, 0.0001))
        strcpy (message, "<0.0001");
      else
        strcpy (message, "<0.001 ");
    }
  else
    {
      if (chi > chiboundary (zeros, df - zeros, 0.01))
        strcpy (message, "<0.01  ");
      else
        {
          if (chi > chiboundary (zeros, df - zeros, 0.05))
            strcpy (message, "<0.05  ");
          else
            strcpy (message, ">0.05  ");
        }
    }
}


void
test_locus_like (nr_fmt * nr, double *param0, double *param1, long df,
                 long zeros, long locus, world_fmt * world, long *maxwhich,
                 long maxnum, boolean withhead, char *this_string)
{
  char *teststat, temp[LRATIO_STRINGS];
  double like1, like0, testval, chiprob, chiprob2;
  int length;
  char tmp[LRATIO_STRINGS];
  long zi, i, part1len = 1, part2len = 1;
  long elem, w = 0, z = 0, pop;
  double normd = 0;
  long *which;
  double /**values,*/ *saveparam0, tparam;
  double *lparam0;
  double *lparam1;
  long spaces = 0;
  char spacer[100];
  char *message;
  helper_fmt helper;
  message = (char *) calloc (20, sizeof (char));
  which = (long *) calloc (1, sizeof (long) * (world->numpop2 + 1));
  lparam0 = (double *) calloc (world->numpop2 + 1, sizeof (double));
  lparam1 = (double *) calloc (world->numpop2 + 1, sizeof (double));
  saveparam0 = (double *) malloc (sizeof (double) * (world->numpop2 + 1));
  memcpy (saveparam0, param0, sizeof (double) * (world->numpop2 + 1));
  nr->skiploci = world->data->skiploci;
  if (world->options->gamma)
    elem = (nr->numpop2 = world->numpop2) + 1;
  else
    elem = nr->numpop2 = world->numpop2;
  nr->numpop = world->numpop;
  if (maxnum > 0)
    {
      for (i = 0; i < elem; i++)
        {
          if (i != maxwhich[z])
            {
              which[w] = i;
              nr->values[w++] = param0[i];
            }
          else
            {
              if (maxnum > z + 1)
                z++;
            }
        }
      nr->profilenum = w;
      maximize (param0, world, nr, PROFILE, world->repkind);
      like0 = nr->llike;
      normd = nr->normd;
      memcpy (param0, world->param0, sizeof (double) * nr->partsize);
    }
  else
    {
      set_logparam (lparam0, param0, elem);
      fill_helper (&helper, param0, lparam0, world, nr);
      like0 = CALCLIKE (&helper, FALSE, FALSE);
    }
  set_logparam (lparam1, param1, elem);
  fill_helper (&helper, param1, lparam1, world, nr);
  like1 = CALCLIKE (&helper, param1, lparam1);
  testval = -2. * (like0 - like1);
  chiprob = probchi (df, testval);
  //tell_chiprob2(message, zeros, df, testval);
  chiprob2 = probchiboundary (testval, zeros, df);
  sprintf (message, " %f ", chiprob2);
  teststat = (char *) calloc (1, sizeof (char) * LRATIO_STRINGS);
  length = MAX (0, 5 - sprintf (tmp, "%i", ((int) param0[0])));
  sprintf (teststat, "Loc%3li: %.*f", locus, length, param0[0]);
  for (i = 1; i < elem; i++)
    {
      zi = mml2m (i, world->numpop);
      if (zi >= world->numpop)
        {
          pop = (zi - world->numpop) / (world->numpop - 1);
          tparam = param0[zi] * saveparam0[pop];
        }
      else
        tparam = param0[zi];
      length = MAX (0, 5 - sprintf (tmp, "%i", ((int) tparam)));
      sprintf (temp, " %.*f", length, tparam);
      strcat (teststat, temp);
      if ((i + 1) % world->numpop == 0)
        strcat (teststat, "\n        ");
    }
  sprintf (temp, "=assumed to be equal to=");
  strcat (teststat, temp);
  part1len = 43;		//strlen (teststat);
  length = MAX (0, 5 - sprintf (tmp, "%i", ((int) param1[0])));
  sprintf (temp, "\n        %.*f", length, param1[0]);
  strcat (teststat, temp);
  for (i = 1; i < elem; i++)
    {
      zi = mml2m (i, world->numpop);
      if (zi >= world->numpop)
        {
          pop = (zi - world->numpop) / (world->numpop - 1);
          tparam = param1[zi] * param1[pop];
        }
      else
        tparam = param1[zi];
      length = MAX (0, 5 - sprintf (tmp, "%i", ((int) tparam)));
      sprintf (temp, " %.*f", length, tparam);
      strcat (teststat, temp);
      if ((i + 1) % world->numpop == 0)
        strcat (teststat, "\n       ");
    }
  //  sprintf (temp, "}");
  //  strcat (teststat, temp);
  part2len = 43;		// strlen (teststat);
  spaces = 43 - 5 * (1 + world->numpop);
  strcpy (spacer, "                          ");

  if (withhead)
    {
      fprintf (world->outfile,
               "==============================================================================\n");
      fprintf (world->outfile, "Likelihood ratio tests\n");
      fprintf (world->outfile,
               "==============================================================================\n");
      fprintf (world->outfile, "Over all loci\n");
      fprintf (world->outfile,
               "[Theta values are on the diagonal of the Migration matrix,\nmigration rates are specified as Theta*M]\n");
      part1len = MAX (part1len, part2len - part1len);
      fprintf (world->outfile,
               "%*.*s           %-10.10s   %-3.3s   %-4.4s %-5.5s\n",
               (int) part1len, (int) part1len, " ", "Test value", "Df",
               "Prob", "Prob*");
      for (i = 0; i < part1len + 35; i++)
        fputc ('-', world->outfile);
      fprintf (world->outfile, "\n");
    }
  if (testval > 10000 && chiprob < EPSILON)
    sprintf (temp, ">10000.00");
  else
    sprintf (temp, "%10.2f", testval);
  fprintf (world->outfile, "You entered\n[%s]\n", this_string);
  fprintf (world->outfile, "%s %s -2(%6.3f-%6.3f)=%11.11s %3li %8.5f %s\n",
           teststat, spacer, like0, like1, temp, df, chiprob, message);
  free (teststat);
  free (lparam0);
  free (lparam1);
  free (saveparam0);
  free (which);
  free (message);
}

//remember: param0 is the parameterset to test and
// NOT the parameterset from migrate.
void
test_loci_like (nr_fmt * nr, double *param0, double *param1, long df,
                long zeros, long loci, world_fmt * world, long *maxwhich,
                long maxnum, boolean withhead, char *this_string1,
                char *this_string2)
{

  char *teststat, temp[LRATIO_STRINGS];
  double like1, like0, testval, chiprob, chiprob2;
  int length;
  char tmp[LRATIO_STRINGS];
  long i, j, g = 0, part1len = 1, part2len = 1;
  long elem, zi, z = 0, w = 0, pop;
  double normd = 0;
  long *which;
  double /**values,*/ *saveparam0, tparam;
  long spaces = 0;
  char spacer[100];
  helper_fmt helper;
  double *lparam0;
  double *lparam1;
  char *message;
  lparam0 = (double *) calloc (world->numpop2 + 1, sizeof (double));
  lparam1 = (double *) calloc (world->numpop2 + 1, sizeof (double));
  which = (long *) calloc (1, sizeof (long) * (world->numpop2 + 1));
  message = (char *) calloc (20, sizeof (char));
  //  values = (double *) calloc (1, sizeof (double) * (world->numpop2 + 1));
  saveparam0 = (double *) malloc (sizeof (double) * (world->numpop2 + 1));
  memcpy (saveparam0, param0, sizeof (double) * (world->numpop2 + 1));
  for (i = 0; i < loci; i++)
    {
      for (j = 0; j < world->repstop; j++)
        {
          if (g < world->atl[j][i].T)
            g = world->atl[j][i].T;
        }
    }
  elem = world->options->gamma ? nr->numpop2 + 1 : nr->numpop2;
  nr->skiploci = world->data->skiploci;
  helper.multilocus = world->loci == 1 ? FALSE : TRUE;
  if (maxnum > 0)
    {
      for (i = 0; i < elem; i++)
        {
          if (i != maxwhich[z])
            {
              which[w] = i;
              nr->values[w++] = param0[i];
            }
          else
            {
              if (maxnum > z + 1)
                z++;
            }
        }
      nr->profilenum = w;
      maximize (param0, world, nr, PROFILE, world->repkind);
      like0 = nr->llike;
      normd = nr->normd;
      memcpy (param0, world->param0, sizeof (double) * nr->partsize);
    }
  else
    {
      set_logparam (lparam0, param0, elem);
      fill_helper (&helper, param0, lparam0, world, nr);
      like0 = CALCLIKE (&helper, param0, lparam0);
    }
  set_logparam (lparam1, param1, elem);
  fill_helper (&helper, param1, lparam1, world, nr);
  like1 = CALCLIKE (&helper, param1, lparam1);
  testval = -2. * (like0 - like1);
  chiprob = probchi (df, testval);
  //tell_chiprob2(message, zeros, df, testval);
  chiprob2 = probchiboundary (testval, zeros, df);
  sprintf (message, " %f ", chiprob2);
  teststat = (char *) calloc (1, sizeof (char) * LRATIO_STRINGS);
  length = MAX (0, 5 - sprintf (tmp, "%i", ((int) param0[0])));
  sprintf (teststat, "H0:%.*f", length, param0[0]);
  for (i = 1; i < elem; i++)
    {
      zi = mml2m (i, world->numpop);
      if (zi >= world->numpop)
        {
          pop = (zi - world->numpop) / (world->numpop - 1);
          tparam = param0[zi] * param0[pop];
        }
      else
        tparam = param0[zi];
      length = MAX (0, 5 - sprintf (tmp, "%i", ((int) tparam)));
      sprintf (temp, " %.*f", length, tparam);
      strcat (teststat, temp);
      if ((i + 1) % world->numpop == 0)
        strcat (teststat, "\n  ");
    }
  sprintf (temp, "=assumed to be equal to=    -2(%6.3f-%6.3f)=", like0,
           like1);
  strcat (teststat, temp);
  if (testval > 10000 && chiprob < EPSILON)
    sprintf (temp, ">10000.00  %3li %8.5f %s\n", df, chiprob, message);
  else
    sprintf (temp, "%-9.2f  %3li %8.5f %s\n", testval, df, chiprob, message);
  strcat (teststat, temp);
  part1len = 43;		//strlen (teststat);
  length = MAX (0, 5 - sprintf (tmp, "%i", ((int) param1[0])));
  sprintf (temp, "   %.*f", length, param1[0]);
  strcat (teststat, temp);
  for (i = 1; i < elem; i++)
    {
      zi = mml2m (i, world->numpop);
      if (zi >= world->numpop)
        {
          pop = (zi - world->numpop) / (world->numpop - 1);
          tparam = param1[zi] * param1[pop];
        }
      else
        tparam = param1[zi];
      length = MAX (0, 5 - sprintf (tmp, "%i", ((int) tparam)));
      sprintf (temp, " %.*f", length, tparam);
      strcat (teststat, temp);
      if ((i + 1) % world->numpop == 0)
        strcat (teststat, "\n  ");
    }
  //  sprintf (temp, "}");
  part2len = 43;		//strlen (teststat);
  spaces = 43 - 5 * (1 + world->numpop);

  strcpy (spacer, "                          ");

  if (withhead)
    {
      fprintf (world->outfile,
               "==============================================================================\n");
      fprintf (world->outfile, "Likelihood ratio tests\n");
      fprintf (world->outfile,
               "==============================================================================\n");
      fprintf (world->outfile, "Over all loci\n");
      fprintf (world->outfile,
               "[Theta values are on the diagonal of the Migration matrix,\nmigration rates are specified as Theta*M]\n");
      part1len = MAX (part1len, part2len - part1len);
      fprintf (world->outfile,
               "%*.*s      %-10.10s   %-3.3s   %-4.4s %-5.5s\n",
               (int) part1len, (int) part1len, " ", "Test value", "Df",
               "Prob", "Prob*");
      for (i = 0; i < part1len + 35; i++)
        fputc ('-', world->outfile);
      fprintf (world->outfile, "\n");
    }

  if (testval > 10000 && chiprob < EPSILON)
    sprintf (temp, ">10000.00");
  else
    sprintf (temp, "%10.2f", testval);
  fprintf (world->outfile, "You entered\n[%s]\n", this_string1);
  //fprintf (world->outfile, "%s %s -2(%6.3f-%6.3f)=%11.11s %3li %8.5f %s\n", teststat, spacer, like0, like1, temp, df, chiprob, message);
  fprintf (world->outfile, "%s\n", teststat);
  free (teststat);
  free (lparam0);
  free (lparam1);
  free (saveparam0);
  free (which);
  free (message);
}


long
set_test_param (double *param, char *strp, world_fmt * world, long lrline,
                long locus, long *maxwhich, long *maxnum, long *zeros)
{
  long i = 0, z = 0, zi = 0, zz = 0, zzz = 0, df = 0;
  long offset = 0, limit = 0, pop, pop1, pop2;
  long numpop = world->numpop;
  char *tmp, *ss, *paramtype;
  double *meanparam, mean;
  long elem = world->options->gamma ? world->numpop2 + 1 : world->numpop2;
  long repstop = !world->options->replicate ? 0 :
                 (world->options->replicatenum == 0 ?
                  world->options->lchains : world->options->replicatenum);
  *zeros = 0;
  ss = (char *) calloc (1, sizeof (char) * LINESIZE);
  tmp = (char *) calloc (1, sizeof (char) * LINESIZE);
  paramtype = (char *) calloc (1, sizeof (char) * elem);

  *maxnum = 0;
  strcpy (ss, strp);

  if (world->loci - world->skipped > 1)
    meanparam = world->atl[0][world->loci].param;
  else
    meanparam = world->atl[repstop][0].param;
  while (ss[zzz] != '\0')
    {
      tmp[i] = ss[zzz++];
      if (mywhitespace (tmp[i]))
        {
          if (tmp[i] != ' ')
            i++;
        }
      else
        {
          tmp[i] = '\0';
          i = 0;
          zi = mml2m (z, numpop);
          m2mm (zi, numpop, &pop1, &pop2);
          switch (tmp[0])
            {
            case 'x':
              paramtype[zi] = '-';
              param[zi] = meanparam[zi];
              maxwhich[(*maxnum)++] = zi;
              z++;
              df++;
              break;
            case '*':
              paramtype[zi] = '-';
              param[zi] = meanparam[zi];
              z++;
              break;
            case 't':
            case 'm':
              paramtype[zi] = '-';
              zz = atol (tmp) - 1;
              df++;
              if (zz < 0)
                {
                  mean = 0.0;
                  offset = (zi >= world->numpop) ? world->numpop : 0;
                  limit =
                    (zi >= world->numpop) ? world->numpop2 : world->numpop;
                  for (zz = offset; zz < limit; zz++)
                    mean += meanparam[zz];
                  mean /= limit - offset;
                  param[zi] = mean;
                }
              else
                {
                  param[zi] = meanparam[zz];
                }
              z++;
              break;
            case 'M':
              zz = atol (tmp) - 1;
              df++;
              if (zz < 0)
                {
                  mean = 0.0;
                  offset = (zi >= world->numpop) ? world->numpop : 0;
                  limit =
                    (zi >= world->numpop) ? world->numpop2 : world->numpop;
                  if (offset < numpop)
                    {
                      paramtype[zi] = '-';
                      for (zz = offset; zz < limit; zz++)
                        mean += meanparam[zz];
                      mean /= limit - offset;
                      param[zi] = mean;
                    }
                  else
                    {
                      paramtype[zi] = '+';
                      for (zz = offset; zz < limit; zz++)
                        {
                          m2mm (zz, numpop, &pop1, &pop2);
                          mean += meanparam[zz] * meanparam[pop2];
                        }
                      mean /= limit - offset;
                      param[zi] = mean;
                    }
                }
              else
                {
                  paramtype[zi] = '-';
                  param[zi] = meanparam[zz];
                }
              z++;
              break;
            case 's':
              df++;
              if (zi < world->numpop)
                {
                  paramtype[zi] = '-';
                  param[zi] = meanparam[zi];
                  z++;
                }
              else
                {
                  paramtype[zi] = '-';
                  param[zi] =
                    (meanparam[zi] +
                     meanparam[mm2m (pop2, pop1, world->numpop)]) / 2.;
                  z++;
                }
              break;
            case 'S':
              df++;
              if (zi < world->numpop)
                {
                  paramtype[zi] = '-';
                  param[zi] = meanparam[zi];
                  z++;
                }
              else
                {
                  paramtype[zi] = '+';
                  param[zi] =
                    (meanparam[zi] * meanparam[pop2] +
                     meanparam[mm2m (pop2, pop1, world->numpop)] *
                     meanparam[pop1]) / 2.;
                  z++;
                }
              break;
            default:
              paramtype[zi] = '+';
              df++;
              if (tmp[0] == '0')
                (*zeros)++;
              param[zi] = MAX (atof (tmp), SMALLEST_THETA);
              z++;
              break;
            }
        }
    }
  for (zi = world->numpop; zi < world->numpop2; zi++)
    {
      if (paramtype[zi] == '+')
        {
          pop = (zi - world->numpop) / (world->numpop - 1);
          param[zi] /= param[pop];
        }
    }
  free (paramtype);
  free (ss);
  free (tmp);
  return df;
}
