/*
   This file is part of Numerix.  Numerix is free software; you can
   redistribute it and/or modify it under the terms of the GNU General
   Public License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
   USA 
*/

/* +------------------------------------------------------------------------+
   |                                                                        |
   |                      Entiers de longueur arbitraire                    |
   |                                                                        |
   |                        FFT modulo m = BASE^n+1                         |
   |                                                                        |
   +------------------------------------------------------------------------+ */


/* M. Quercia, 31/01/2001 */

#include "long_int.h"
#include "long_int-s.h"

                     /* +--------------------+
                        |  c <- a + b mod m  |
                        +--------------------+ */


#ifndef have_sn_sc_add
void xn(sc_add)(naturel a, naturel b, naturel c, longueur n) {
  zdouble ret;
  longueur i;

  /* additionne les chiffres de 0  n */
  for (ret=0, i=0; i<=n; i++) {
    ret += (ndouble)a[i] + (ndouble)b[i];
    c[i] = ret;
    ret >>= HW;
  }

  /* si dbordement, retranche la partie haute - 1 */
  if (ret) {
    ret = -BASE - (ndouble)c[n] + 1;
    c[n] = 1;
    for (i=0; ret; i++) {
      ret += (ndouble)c[i];
      c[i] = ret;
      ret >>= HW;
    }
  }
}
#else
void xn(sc_add)(naturel a, naturel b, naturel c, longueur n);
#endif

                     /* +--------------------+
                        |  c <- a - b mod m  |
                        +--------------------+ */


#ifndef have_sn_sc_sub
void xn(sc_sub)(naturel a, naturel b, naturel c, longueur n) {
  zdouble ret;
  longueur i;

  /* soustrait les chiffres de 0  n */
  for (ret=0, i=0; i<=n; i++) {
    ret += (ndouble)a[i] - (ndouble)b[i];
    c[i] = ret;
    ret >>= HW;
  }

  /* si dbordement, retranche la partie haute */
  if (ret) {
    ret = BASE - (ndouble)c[n];
    c[n] = 0;
    for (i=0; ret; i++) {
      ret += (ndouble)c[i];
      c[i] = ret;
      ret >>= HW;
    }
  }
}
#else
void xn(sc_sub)(naturel a, naturel b, naturel c, longueur n);
#endif


                      /* +------------------+
                         |  c <- a*b mod m  |
                         +------------------+ */

#ifndef have_sn_sc_mul
void xn(sc_mul)(naturel a, naturel b, naturel c, longueur n) {
  chiffre *buff = xn(alloc_tmp)(2*n+2);
  zdouble ret;

  /* produit dans N => c = x + base^n*y + base^(2n)*z = x-y+z mod m */
  xn(mul_k)(a,n+1,b,n+1,buff);
  ret  = xn(dec)(buff,n,buff+n,n);
  ret += xn(inc)(buff,n,buff+2*n,2);

  /* si < 0, ajoute BASE^n+1 */
  if (ret) buff[n] = xn(inc_1)(buff,n,1); else buff[n] = 0;

  xn(cpy)(c,buff,n+1);
  xn(free)(buff);

}
#else
void xn(sc_mul)(naturel a, naturel b, naturel c, longueur n);
#endif

                     /* +---------------------+
                        |  b <- a << k mod m  |
                        +---------------------+ */

/* 0 < k < n*HW */
#ifndef have_sn_sc_shift
void xn(sc_shift)(naturel a, naturel b, longueur k, longueur n) {
  chiffre *buff = xn(alloc_tmp)(2*n+2);
  longueur p = 1+((k+HW-1)/HW);
  zdouble ret;

  /* dcale dans N => b = x + base^n*y = x - y mod m */
  xn(shl)(a,n+1,k,buff);
  if (p <= n) ret = xn(dec)(buff,n,buff+n,p);
  else {
    ret  = xn(dec)(buff,n,buff+n,n);
    ret += xn(inc)(buff,n,buff+2*n,p-n);
  }

  /* si < 0, ajoute BASE^n+1 */
  if (ret) buff[n] = xn(inc_1)(buff,n,1); else buff[n] = 0;

  xn(cpy)(b,buff,n+1);
  xn(free)(buff);
}
#else
void xn(sc_shift)(naturel a, naturel b, longueur k, longueur n);
#endif

                        /* +---------------+
                           |  b <- fft(a)  |
                           +---------------+ */

/* dcoupe a en blocs de k mots et calcule la transforme de Fourier */
/* directe de a sur 2^l points.                                      */
/* Lg(a) <= k*2^l, capacit(b) >= (n+1)*2^l.                         */
#ifndef have_sn_sc_fft
void xn(sc_fft)(naturel a, longueur la, naturel b, longueur k, longueur l, longueur n) {
  longueur n1 = n+1, d,i,p,r,s,t;
  chiffre *x,*y, *buff = xn(alloc_tmp)(n+1);

  /* dcoupe a en blocs de k mots */
  xn(clear)(b,n1<<l);
  for (x=b; la>0; a+=k, la-=k, x+=n1) xn(cpy)(x,a, min(la,k));

  /* fft itrative */
  for (d=1<<(l-1); d; d>>=1) {
    for (x=b, y=x+d*n1, s=0, r=0, i=1<<(l-1); i; ) {
      if (r == 0) xn(cpy)(buff,y,n1); else xn(sc_shift)(y,buff,r,n);
      xn(sc_sub)(x,buff,y,n);
      xn(sc_add)(x,buff,x,n);
      x+=n1; y+=n1; i--;
      if ((i&(d-1)) == 0) {
	x=y; y+=d*n1;
	s++;
	/* inversion binaire sur l-1 bits */
	for (r=0, t=s, p=l-1; p; p--) {r = (r<<1)+(t&1); t >>= 1;}
	r *= ((n*HW) >> (l-1));
      }
    }
  }
  xn(free)(buff);
}
#else
void xn(sc_fft)(naturel a, longueur la, naturel b, longueur k, longueur l, longueur n);
#endif

                        /* +-------------------+
                           |  b <- fft inv(a)  |
                           +-------------------+ */

#ifndef have_sn_sc_fft_inv
void xn(sc_fft_inv)(naturel a, naturel b, longueur lb, longueur k, longueur l, longueur n) {
  longueur n1 = n+1, d,i,p,r,s,t;
  chiffre *x,*y, *buff = xn(alloc_tmp)(n+1);
  zdouble z;

  /* fft inverse itrative */
  for (d=1; d < (1<<l); d <<= 1) {
    for (x=a, y=x+d*n1, s=0, r=0, i=1<<(l-1); i; ) {
      if (r == 0) {
	xn(sc_sub)(x,y,buff,n);
	xn(sc_add)(x,y,x,n);
	xn(cpy)(y,buff,n1);
      }
      else {
	xn(sc_sub)(y,x,buff,n);
	xn(sc_add)(x,y,x,n);
	xn(sc_shift)(buff,y,r,n);
      }
      x+=n1; y+=n1; i--;
      if ((i&(d-1)) == 0) {
	x=y; y+=d*n1;
	s++;
	/* inversion binaire sur l-1 bits */
	for (r=0, t=s, p=l-1; p; p--) {r = (r<<1)+(t&1); t >>= 1;}
	r = ((1<<(l-1))-r) * ((n*HW) >> (l-1));
      }
    }
  }

  /* normalise et divise par 2^l */
  for (x=a, i=1<<l; i; x+=n1, i--) {
    z = xn(dec_1)(x,n,x[n]);
    if (z) z += xn(inc_1)(x,n,1);
    x[n] = z;
    xn(shr)(x,n+1,l,x);
  }

  /* value le polynme en base^k */
  xn(clear)(b,lb);
  for (x=a, i=1<<l; (i>0) && (lb>0); i--, x+=n1, b+=k, lb-=k)
    xn(inc)(b,lb,x,min(lb,n+1));

  xn(free)(buff);
    
}
#else
void xn(sc_fft_inv)(naturel a, naturel b, longueur lb, longueur k, longueur l, longueur n);
#endif

               /* +--------------------------------+
                  |  Cherche les paramtres n,k,l  |
                  +--------------------------------+ */

/*
  choix des paramtres n,k,l :
  - n*HW doit tre un multiple de 2^(l-1)
  - 2k*HW + l <= n*HW
  - k*2^l >= la
*/

#ifndef have_sn_sc_get_nkl
void xn(sc_get_nkl)(longueur la, longueur *n, longueur *k, longueur *l) {
  longueur k1,l1,n1,c1,c,d;

#define lmin 5
#define lmax 15

  /* on fait varier l de lmin  lmax et on dtermine k et n en fonction */
  /* de l. Ensuite, on retient la solution tq n^1.5*2^l soit minimal    */
  
  c = 100000;
  for (l1=lmin; l1<=lmax; l1++) {
    k1 = (la + (1<<l1) - 1) >> l1;
    n1 = 2*k1 + (l1+HW-1)/HW;
    d = (1<<l1)/HW; if (d) n1 = d*((n1+d-1)/d);

    /* calcul approch de c1 = 10*log(n^1.5*2^l) */
    for (c1=0, d=n1; d >= 4; c1 += 10, d = (d+1)/2);
    if (d == 2) c1 += 10; else if (d == 3) c1 += 15;
    c1 = 3*c1/2 + 10*l1;

    if (c > c1) {*l = l1; *k = k1; *n = n1; c = c1;}
  }

}
#else
void xn(sc_get_nkl)(longueur la, longueur *n, longueur *k, longueur *l);
#endif

                         /* +------------+
                            |  b <- a^2  |
                            +------------+ */

#ifndef have_sn_sc_fftsqr
void xn(sc_fftsqr)(naturel a, longueur la, naturel b) {
  longueur k,l,n,i;
  naturel aa;

  xn(sc_get_nkl)(2*la,&n,&k,&l);
  aa = xn(alloc_tmp)(((n+1)<<l));
  xn(sc_fft)(a,la,aa,k,l,n);
  for (i=((n+1)<<l)-n-1; i>=0; i-=n+1) xn(sc_mul)(aa+i,aa+i,aa+i,n);
  xn(sc_fft_inv)(aa,b,2*la,k,l,n);
  xn(free)(aa);

}
#endif

                         /* +------------+
                            |  c <- ab   |
                            +------------+ */

#ifndef have_sn_sc_fftmul
void xn(sc_fftmul)(naturel a, longueur la, naturel b, longueur lb, naturel c) {
  longueur k,l,n,i;
  naturel aa, bb;

  xn(sc_get_nkl)(la+lb,&n,&k,&l);
  aa = xn(alloc_tmp)(((n+1)<<l));
  bb = xn(alloc_tmp)(((n+1)<<l));
  xn(sc_fft)(a,la,aa,k,l,n);
  xn(sc_fft)(b,lb,bb,k,l,n);
  for (i=((n+1)<<l)-n-1; i>=0; i-=n+1) xn(sc_mul)(aa+i,bb+i,aa+i,n);
  xn(sc_fft_inv)(aa,c,la+lb,k,l,n);
  xn(free)(bb); xn(free)(aa);

}
#endif

