/* nsintegrator.cpp: time-integration class for spectral Navier-Stokes DNS
 * Channelflow-0.9
 *
 * Copyright (C) 2001  John F. Gibson  
 *  
 * jgibson@mail.sjcsf.edu  
 * John F. Gibson 
 * St. John's College
 * 1160 Camino de la Cruz Blanca
 * Santa Fe, NM 87501
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, U
 */

#include "nsintegrator.h"
#include "orrsommfunc.h"
//#include <fstream> // tmp debugging need

// 3rd order implicit Runge Kutta constants.
const Real EPSILON = 1e-12;
void assignOrrSommNonlin(FlowField& nonlin, Real t, Complex omega, const BasisFunc& oseig);


TimeStep::TimeStep(Real dt, Real dtmin, Real dtmax, Real dT,
		   Real CFLmin, Real CFLmax) 
  :
  n_(int(dT/dt + EPSILON)),
  nmin_(int(dT/dtmax + EPSILON)),
  nmax_(int(dT/dtmin + EPSILON)),
  dT_(dT),
  CFLmin_(CFLmin),
  CFL_((CFLmax+CFLmin)/2), // will take on meaninful value after first adjust
  CFLmax_(CFLmax)
{
  assert(dt>0 && dt<=dT);
  assert(dt>=dtmin && dt<=dtmax);
  assert(n_>=nmin_ &&  n_<=nmax_);
}

bool TimeStep::adjust(Real CFL) {
  CFL_ = CFL;
  int new_n = n_;
  if (CFL<CFLmin_ || CFL>CFLmax_) {
    Real CFLtarget = 0.5*(CFLmin_ + CFLmax_);
    new_n = int(n_*CFL/CFLtarget);
    if (new_n < nmin_) {
      cerr << "TimeStep::adjust(CFL) : topping out at dt==" << dT_/nmin_<<endl;
      new_n = nmin_;
    }
    if (new_n > nmax_) {
      cerr << "TimeStep::adjust(CFL) : bottoming out at dt=="<<dT_/nmax_<<endl;
      new_n = nmax_;
    }
  }
  bool adjustment = (new_n == n_) ? false : true;
  if (adjustment) {
    CFL_ *= Real(n_)/Real(new_n);  // i.e. *= (new dt)/(old dt)
    n_ = new_n;
  }
  return adjustment;
}
    
Real TimeStep::CFL() const {return CFL_;}
Real TimeStep::dt() const {return dT_/n_;}
Real TimeStep::dT() const {return dT_;}
TimeStep::operator Real() const {return dT_/n_;}
int TimeStep::n() const {return n_;}

//====================================================================
NSIntegrator::~NSIntegrator() {
  for (int substep=0; substep<Nsubsteps_; ++substep) {
    for (int nx=0; nx<Nx_; ++nx) 
      delete[] tausolver_[substep][nx];  // undo new #3
    delete[] tausolver_[substep];        // undo new #2
  } 
  delete[] tausolver_;              // undo new #1
}

NSIntegrator::NSIntegrator() 
  :
  Nx_(0),
  Ny_(0),
  Nyd_(0),
  Nz_(0),
  kxd_max_(0),
  kzd_max_(0),
  Nsubsteps_(0),
  Lx_(0),
  Lz_(0),
  a_(0),
  b_(0),
  nu_(0),
  dt_(0),
  //alpha_(0),
  //beta_(0),
  //gamma_(0),
  //zeta_(0),
  flags_(), 
  alternator_(false),
  dPdxRef_(0),
  dPdxAct_(0),
  UbulkRef_(0), 
  UbulkAct_(0), 
  UbulkBase_(0), 
  t_(0),
  cfl_(0),
  Ubase_(),
  Ubasey_(),
  UbaseyT_(),
  Ubaseyy_(), 
  vortn_(),
  fn_(),
  fn1_(),
  tausolver_(0),
  uk_(),
  vk_(),
  wk_(),
  Rxk_(),
  Ryk_(),
  Rzk_(),
  fxk_(),
  fyk_(),
  fzk_(),
  Pk_(),
  Pyk_()
{}  

NSIntegrator::NSIntegrator(FlowField& u, const ChebyCoeff& Ubase,
			   Real nu, Real dt, const DNSFlags& flags, Real t0)
  :
  Nx_(u.numXmodes()),
  Ny_(u.numYmodes()),
  Nyd_(flags.dealias_y() ? 2*(u.numYmodes()-1)/3 + 1 : u.numYmodes()),
  Nz_(u.numZmodes()),
  kxd_max_(flags.dealias_xz() ? 2*u.kxmax()/3 : u.kxmax()),
  kzd_max_(flags.dealias_xz() ? 2*u.kzmax()/3 : u.kzmax()),
  Nsubsteps_(flags.numSubsteps()),
  Lx_(u.Lx()),
  Lz_(u.Lz()),
  a_(u.a()),
  b_(u.b()),
  nu_(nu),
  dt_(dt),
  //alpha_(Nsubsteps_),
  //beta_(Nsubsteps_),
  //gamma_(Nsubsteps_),
  //zeta_(Nsubsteps_),
  flags_(flags),
  alternator_(false),
  dPdxRef_(0),
  dPdxAct_(0),
  UbulkRef_(0), 
  UbulkAct_(0), 
  UbulkBase_(0), 
  t_(t0),
  cfl_(0),
  Ubase_(Ubase),
  Ubasey_(),
  UbaseyT_(),
  Ubaseyy_(), 
  vortn_(u),
  fn_(u),
  fn1_(u),
  tausolver_(0),
  uk_(Nyd_,a_,b_,Spectral),
  vk_(Nyd_,a_,b_,Spectral),
  wk_(Nyd_,a_,b_,Spectral),
  Rxk_(Nyd_,a_,b_,Spectral),
  Ryk_(Nyd_,a_,b_,Spectral),
  Rzk_(Nyd_,a_,b_,Spectral),
  fxk_(Nyd_,a_,b_,Spectral),
  fyk_(Nyd_,a_,b_,Spectral),
  fzk_(Nyd_,a_,b_,Spectral),
  Pk_(Nyd_,a_,b_,Spectral),
  Pyk_(Nyd_,a_,b_,Spectral)
{
  vortn_.setToZero();
  fn_.setToZero();
  fn1_.setToZero();

  u.assertState(Spectral, Spectral);
  assert(u.vectorDim() == 3);

  //int nlspec_count = int(flags_.rotational)+int(flags.skewsymmetric)+int(flags_.linearized);
  //if (nlspec_count != 1) {
  //cerr << "NSIntegrator ctor() : please choose just one of the\n"
  //"rotational, skewsymmetric, or linearized DNSFlags for claculation of\n"
  //"the nonlinear term\n";
  //exit(1);
  //}

  // Set the timestepping constants
  switch (flags_.timestepping) {
  case CNAB2: 
    alpha_[0] = 0.5;
    beta_[0]  = 0.5;
    gamma_[0] = 1.5;
    zeta_[0]  = -0.5;
    break;
  case RK3:
  default:
    alpha_[0]=29.0/96.0; alpha_[1]=-3.0/40.0; alpha_[2]=1.0/6.0;
    beta_[0] =37.0/160.0; beta_[1]= 5.0/24.0; beta_[2] =1.0/6.0;
    gamma_[0]=8.0/15.0;  gamma_[1]= 5.0/12.0; gamma_[2]=3.0/4.0;
    zeta_[0] =0.0;        zeta_[1]=-17.0/60.0; zeta_[2]=-5.0/12.0;
    break;
  }

  // Set the aliased modes to zero
  Complex zero = 0.0;
  if (flags_.dealias_xz()) {
    for (int i=0; i<3; ++i)
      for (int nx=0; nx<Nx_; ++nx)
	for (int nz=0; nz<Nz_; ++nz) {
	  if (isAliasedMode(u.kx(nx), u.kz(nz)))
	    for (int ny=0; ny<Nyd_; ++ny)
	      u.cmplx(nx,ny,nz,i) = zero;
	  for (int ny=Nyd_; ny<Ny_; ++ny)
	    u.cmplx(nx,ny,nz,i) = zero;
	}
  }
	    
  // Calculate Ubasey_ and related quantities if nonzero. Require that 
  // base flow is quadratic (solves an equilibrium problem).
  ChebyTransform t(Ny_);
  if (Ubase_.length() != 0) {
    t.makeSpectral(Ubase_);
    UbulkBase_ = Ubase_.mean();
    Ubasey_ = diff(Ubase_);
    UbaseyT_ = Ubasey_;
    Ubaseyy_ = diff(Ubasey_);
    t.makePhysical(Ubase_);
    t.makePhysical(Ubasey_);
    // keep Ubaseyy_ as spectral for calc of R in advance() method..
  }
  
  // Calculate nonlin term and vorticity in order to get initial CFL number.
  switch (flags_.nonlinearity) {
  case SkewSymmetric:
    // SkewSymmetric requires a 9d (3x3) tmp flowfield
    vortn_.resize(u.Nx(), u.Ny(), u.Nz(), 9, u.Lx(), u.Lz(), u.a(), u.b());
    u.nonlinearitySkewSymmetric(Ubase_, Ubasey_, UbaseyT_, vortn_, fn_); 
    break;
  case Alternating:
    // SkewSymmetric requires a 9d (3x3) tmp flowfield
    vortn_.resize(u.Nx(), u.Ny(), u.Nz(), 9, u.Lx(), u.Lz(), u.a(), u.b());
    if (alternator_) 
      u.nonlinearityConvection(Ubase_, Ubasey_, UbaseyT_, vortn_, fn_); 
    else 
      u.nonlinearityDivergence(Ubase_, Ubasey_, UbaseyT_, vortn_, fn_); 
    alternator_ = !alternator_;
    break;
  case Linearized:
    u.nonlinearityLinearized(Ubase_, Ubasey_, UbaseyT_, vortn_, fn_); 
    break;
  case Rotational:
  default:
    if (flags_.dealias_y())
      u.nonlinearityDealiased(Ubase_, Ubasey_, UbaseyT_, vortn_, fn_); 
    else
      u.nonlinearityRotational(Ubase_, Ubasey_, UbaseyT_, vortn_, fn_); 
  }
    
  cfl_ = u.CFLfactor();
  cfl_ *= flags_.dealias_xz() ? 2.0*pi/3.0*dt_ : pi*dt_;

  // Determine actual Ubulk and dPdx from initial data Ubase + u. 
  ChebyCoeff u00(Ny_,a_,b_,Spectral);
  for (int ny=0; ny<Ny_; ++ny)
    u00[ny] = Re(u.cmplx(0,ny,0,0));
  ChebyCoeff du00dy = diff(u00);

  UbulkAct_ = UbulkBase_ + u00.mean();
  dPdxAct_  = nu*(du00dy.eval_b() - du00dy.eval_a())/(b_-a_);
  if (Ubase_.length() != 0)
    dPdxAct_  += nu*(Ubasey_.eval_b() - Ubasey_.eval_a())/(b_-a_);
    
  // Set whichever reference value is being held const.
  if (flags_.constraint == BulkVelocity)
    UbulkRef_ = UbulkAct_;
  else
    dPdxRef_ = dPdxAct_;

  // Allocate memory for [Nsubsteps x Nx_ x Nz_] Tausolver array
  tausolver_ = new TauSolver**[Nsubsteps_]; // new #1
  for (int substep=0; substep<Nsubsteps_; ++substep) {
    tausolver_[substep] = new TauSolver*[Nx_];       // new #2
    for (int nx=0; nx<Nx_; ++nx) 
      tausolver_[substep][nx] = new TauSolver[Nz_];  // new #3
  }

  // Initialize the Tausolvers with correct viscosity and dt parameters.
  reset(nu, dt);
  vortn_.setToZero();
  //fn_.setToZero();
  fn1_.setToZero();
}

NSIntegrator& NSIntegrator::operator=(const NSIntegrator& ns) {
  // Remove any axisting tausolver arrays.
  if (tausolver_) {
    for (int substep=0; substep<Nsubsteps_; ++substep) {
      for (int nx=0; nx<Nx_; ++nx) 
	delete[] tausolver_[substep][nx];  // undo new #3
      delete[] tausolver_[substep];        // undo new #2
    } 
    delete[] tausolver_;              // undo new #1
  }
  Nx_ = ns.Nx_;
  Ny_ = ns.Ny_;
  Nyd_ = ns.Nyd_;
  Nz_ = ns.Nz_;
  kxd_max_ = ns.kxd_max_;
  kzd_max_ = ns.kzd_max_;
  Nsubsteps_ = ns.Nsubsteps_;
  Lx_ = ns.Lx_;
  Lz_ = ns.Lz_;
  a_ = ns.a_;
  b_ = ns.b_;
  nu_ = ns.nu_;
  dt_ = ns.dt_;
  //alpha_ = ns.alpha_;
  //beta_ = ns.beta_;
  //gamma_ = ns.gamma_;
  //zeta_ = ns.zeta_;
  flags_ = ns.flags_;
  alternator_ = ns.alternator_;
  dPdxRef_ = ns.dPdxRef_;
  dPdxAct_ = ns.dPdxAct_;
  UbulkRef_ = ns.UbulkRef_;
  UbulkAct_ = ns.UbulkAct_; 
  UbulkBase_ = ns.UbulkBase_;
  t_ = ns.t_;
  cfl_ = ns.cfl_;
  Ubase_ = ns.Ubase_;
  Ubasey_ = ns.Ubasey_;
  UbaseyT_ = ns.UbaseyT_;
  Ubaseyy_ = ns.Ubaseyy_;
  vortn_ = ns.vortn_;
  fn_ = ns.fn1_;
  fn1_ = ns.fn1_;
  uk_ = ns.uk_;
  vk_ = ns.vk_;
  wk_ = ns.wk_;
  Rxk_ = ns.Rxk_;
  Ryk_ = ns.Ryk_;
  Rzk_ = ns.Rzk_;
  fxk_ = ns.fxk_;
  fyk_ = ns.fyk_;
  fzk_ = ns.fzk_;
  Pk_ = ns.Pk_;
  Pyk_ = ns.Pyk_;
  
  // Set the timestepping constants
  for (int i=0; i<flags_.numSubsteps(); ++i) {
    alpha_[i] = ns.alpha_[i];
    beta_[i]  = ns.beta_[i];
    gamma_[i] = ns.gamma_[i];
    zeta_[i]  = ns.zeta_[i];
  }

  // Allocate new null tausolvers in the new array dimensions
  tausolver_ = new TauSolver**[Nsubsteps_]; // new #1
  for (int substep=0; substep<Nsubsteps_; ++substep) {
    tausolver_[substep] = new TauSolver*[Nx_];       // new #2
    for (int nx=0; nx<Nx_; ++nx) 
      tausolver_[substep][nx] = new TauSolver[Nz_];  // new #3
  }
  // Now initialize the tausolvers correctly.
  reset(nu_,dt_);
  return (*this);
}


// This function's named "reset" because it can be called externally to 
// change the integration parameters.  It's also used for initializing
// the tauSolvers within the NSIntegrator constructor.
void NSIntegrator::reset(Real nu, Real dt) { 
  cfl_ *= dt/dt_;
  nu_ = nu;
  dt_ = dt;
  const Real four_pisq_nu = 4.0*square(pi)*nu;
  //bool verify = (flags_.verbosity == VerifyTauSolve || 
  //flags_.verbosity == PrintAll) ? true : false;

  // This loop replaces the TauSolver objects at tausolver_[substep][nx][nz] 
  // with new TauSolver objects, with the given parameters.
  for (int substep=0; substep<Nsubsteps_; ++substep) {
    for (int nx=0; nx<Nx_; ++nx) {
      int kx = vortn_.kx(nx);
      for (int nz=0; nz<Nz_; ++nz) {
	int kz = vortn_.kz(nz);
	Real lambda = (1.0/(beta_[substep]*dt_) 
		       + four_pisq_nu*(square(kx/Lx_) + square(kz/Lz_)));

	// Dealiasing means some modes get set to zero, rather than updated 
	// with momentum eqns. Don't bother to initialize TauSolvers for these.
	if (!flags_.dealias_xz() || !isAliasedMode(kx,kz)) 
	  tausolver_[substep][nx][nz] = TauSolver(kx, kz, Lx_, Lz_, a_, b_, 
						  lambda, nu, Nyd_, true);
      }
    }
  }
}

void NSIntegrator::resettime(Real t) {t_=t;}

void NSIntegrator::resetdPdx(Real dPdx) {
  dPdxRef_ = dPdx;
  UbulkRef_ = 0.0;
  flags_.constraint = PressureGradient;
}
void NSIntegrator::resetUbulk(Real Ubulk) {
  dPdxRef_ = 0.0;
  UbulkRef_ = Ubulk;
  flags_.constraint = BulkVelocity;
}

Real NSIntegrator::dt() const {return dt_;}
Real NSIntegrator::CFL() const {return cfl_;}
Real NSIntegrator::time() const {return t_;}
Real NSIntegrator::dPdx() const {return dPdxAct_;}
Real NSIntegrator::dPdxRef() const {return dPdxRef_;}
Real NSIntegrator::Ubulk() const {return UbulkAct_;}
Real NSIntegrator::UbulkRef() const {return UbulkRef_;}

int NSIntegrator::kxMaxDealiased() const {return kxd_max_;}
int NSIntegrator::kzMaxDealiased() const {return kzd_max_;}

// This function is for debugging a problme in nsintegrator::advance.
// The problem appears when no dealiasing is done (i.e. no high-order 
// modes are set to zero). The solution un is divergence-free immediately
// after solving the time-stepping equations. But, transforming un to 
// Physical and back introduces 1e-8 order changes in the kx,kz == kxmax,0 
// and kx,kz=*,kzmax modes. These 1e-8 canges cause order-1 divergence
// in those modes. The only way I have found to eliminate them is to
// *always dealias* kx,kz == kxmax,* and  kx,kz=*,kzmax. 1/31/04.
// Possibly this is a problem with FFTW or my use of it. 

void NSIntegrator::debugfunc(const FlowField& u) const {

  FlowField v(u);
  v.makePhysical();
  v.makeSpectral();
  FlowField w(u);
  w -= v;

  cout << "L2Norm2(u)   == " << L2Norm2(u) << endl;
  cout << "L2Norm2(v)   == " << L2Norm2(v) << endl;
  cout << "L2Norm2(u-v) == " << L2Norm2(w) << endl;
  cout << "L2Dist2(u,v) == " << L2Dist2(u,v) << endl;
  cout << "    div(u)   == " << u.divergence() << endl;
  cout << "    div(v)   == " << v.divergence() << endl;
  cout << "    div(u-v) == " << w.divergence() << endl;

  u.saveSpectrum("u");
  v.saveSpectrum("v");
  w.saveSpectrum("w");

  u.saveDivSpectrum("ud");
  v.saveDivSpectrum("vd");
  w.saveDivSpectrum("wd");
  cfpause();
}
  

void NSIntegrator::advance(FlowField& un, FlowField& qn, int nSteps) {

  bool verify = (flags_.verbosity == VerifyTauSolve || 
		 flags_.verbosity == PrintAll) ? true : false;

  for (int stepsRemaining=nSteps; stepsRemaining>0; --stepsRemaining) {
    for (int substep=0; substep<Nsubsteps_; ++substep) {

      //cout << "Before integration..." << endl;
      //debugfunc(un);

      // Take a time step. Current time is t=n*dt. Calculate u,q at t=(n+1)*dt
      // and store it in un,qn. I.e. map
      // (u(n), q(n), f(n), f(n-1)) ->  (u(n+1), q(n+1), f(n+1), f(n)). 

      // Update nonlinear terms fn and fn1. Swap() switches pointers to the 
      // data inside the objects, so saving a copy. 
      swap(fn1_,fn_);

      switch (flags_.nonlinearity) {
      case SkewSymmetric:
	un.nonlinearitySkewSymmetric(Ubase_, Ubasey_, UbaseyT_, vortn_, fn_); 
	break;
      case Alternating:
	if (alternator_) 
	  un.nonlinearityConvection(Ubase_, Ubasey_, UbaseyT_, vortn_, fn_); 
	else 
	  un.nonlinearityDivergence(Ubase_, Ubasey_, UbaseyT_, vortn_, fn_); 
	break;
      case Linearized:
	un.nonlinearityLinearized(Ubase_, Ubasey_, UbaseyT_, vortn_, fn_); 
	break;
      case Rotational:
      default:
	if (flags_.dealias_y())
	  un.nonlinearityDealiased(Ubase_, Ubasey_, UbaseyT_, vortn_, fn_); 
	else
	  un.nonlinearityRotational(Ubase_, Ubasey_, UbaseyT_, vortn_, fn_); 
      }
      //cout << "After nonlinearity calc..." << endl;
      //debugfunc(un);

      //cerr << "NSI: SUBSTEP==" << substep << "L2Norm(fn) == " << L2Norm(fn_) << endl;
      Real ab = alpha_[substep]/beta_[substep]; 
      Real ngb = -gamma_[substep]/beta_[substep]; 
      Real nzb = -zeta_[substep]/beta_[substep];  
      Real abnu = ab*nu_;           

      // Update with the time-stepping algorthim
      // Each (kx,kz) pair is an independent calculation, so parallelism
      // can be implemented at this level in the code.
      int kxmax = un.kxmax();
      int kzmax = un.kzmax();
      for (int nx=0; nx<Nx_; ++nx) {
	const int kx = un.kx(nx);
	for (int nz=0; nz<Nz_; ++nz) {
	  const int kz = un.kz(nz);
	  if ((kx==kxmax || kz==kzmax) ||
	      (flags_.dealias_xz() && isAliasedMode(kx,kz))) {
	    for (int ny=0; ny<Nyd_; ++ny) {
	      un.cmplx(nx,ny,nz,0) = 0.0;
	      un.cmplx(nx,ny,nz,1) = 0.0;
	      un.cmplx(nx,ny,nz,2) = 0.0;
	      qn.cmplx(nx,ny,nz,0) = 0.0;
	    }
	    break;
	  }
	  // Put alpha/beta nu u_n into (uk,vk,wk),
	  //  -(gamma/beta fn + zeta/beta fn1) into (fxk,fyk,fzk),
	  // and alpha/beta qn into Pk.
	  int ny; // MSVC++ FOR-SCOPE BUG
	  for (ny=0; ny<Nyd_; ++ny) {
	    uk_.set(ny, abnu*un.cmplx(nx,ny,nz,0));
	    vk_.set(ny, abnu*un.cmplx(nx,ny,nz,1));
	    wk_.set(ny, abnu*un.cmplx(nx,ny,nz,2));
	    Pk_.set(ny, ab*qn.cmplx(nx,ny,nz,0));
	    fxk_.set(ny, ngb*( fn_.cmplx(nx,ny,nz,0)) 
		     +   nzb*(fn1_.cmplx(nx,ny,nz,0)));
	    fyk_.set(ny, ngb*( fn_.cmplx(nx,ny,nz,1)) 
		     +   nzb*(fn1_.cmplx(nx,ny,nz,1)));
	    fzk_.set(ny, ngb*( fn_.cmplx(nx,ny,nz,2)) 
		     +   nzb*(fn1_.cmplx(nx,ny,nz,2)));
	  }
	  
	  // Put abnu un" into in Rx, etc., using Pyk_ as a tmp.
	  diff2(uk_, Rxk_, Pyk_);
	  diff2(vk_, Ryk_, Pyk_);
	  diff2(wk_, Rzk_, Pyk_);

	  // Compute y-comp of pressure gradient
	  diff(Pk_, Pyk_); 
      
	  // Add remainder of RHS terms to R. After this step,
	  // R = a/b nu un" + [1/(b dt)- a/b nu kappa2] un - a/b grad P 
	  //       - g/b fn - z/b fn1

	  // At this point the variable uk_ == nu a/b un. So
	  //    [1/(a nu dt) - kappa2] * uk 
	  // == [1/(a nu dt) - kappa2] * nu a/b un 
	  // == [1/(beta dt) - nu a/b kappa2] un 
	  // which is what 4/09/01 notes call for in next term

	  Real kappa2 = 4*pi*pi*(square(kx/Lx_) + square(kz/Lz_));
	  Real c = 1.0/(alpha_[substep]*nu_*dt_) - kappa2;
	  const Complex pi2IkxLx = I*(2*pi*kx/Lx_);
	  const Complex pi2IkzLz = I*(2*pi*kz/Lz_);
	  for (ny=0; ny<Nyd_; ++ny) {
	    Rxk_.add(ny, c*uk_[ny] + fxk_[ny] - pi2IkxLx*Pk_[ny]);
	    Ryk_.add(ny, c*vk_[ny] + fyk_[ny] - Pyk_[ny]);
	    Rzk_.add(ny, c*wk_[ny] + fzk_[ny] - pi2IkzLz*Pk_[ny]);
	  }

	  // kx == kz == 0 is handled specially, and differently for const
	  // press grad and const ubulk cases.
	  if (kx==0 && kz==0) {

	    // The x-comp for R has an additional term from base flow
	    // curvature, in both cases of const mass flux and const press grad
	    Real c = nu_*(ab+1.0);
	    if (Ubaseyy_.length() > 0)
	      for (int ny=0; ny<Ny_; ++ny)
		Rxk_.re[ny] += c*Ubaseyy_[ny];
	    
	    if (flags_.constraint == PressureGradient) { 
	      // CONST PRESSURE GRADIENT 
	      // The x comp of R has another term for the mean pressure 
	      // gradient. Distinguish between dPdxAct and dPdxRef for the 
	      // rare case that dPdxRef has just been changed externally. 
	      // Thesis notes 4/09/01.
	      Rxk_.re[0] -= ab*dPdxAct_ + dPdxRef_;

	      tausolver_[substep][nx][nz].solve(uk_,vk_,wk_,Pk_, Rxk_,Ryk_,Rzk_);
	      if (verify) 
		tausolver_[substep][nx][nz].verify(uk_,vk_,wk_,Pk_, Rxk_,Ryk_,Rzk_);
	      UbulkAct_ = UbulkBase_ + uk_.re.mean();
	      dPdxAct_ = dPdxRef_;
	    }
	    else { 
	      // CONST MASS FLUX
	      // Add the previous time-step's dPdx to the RHS. The next 
	      // timestep's dPdx term appears on LHS as unknown.
	      Rxk_.re[0] -= ab*dPdxAct_;

	      // Use tausolver with additional variable and constraint:
	      // free variable: dPdxAct at next time-step, 
	      // constraint:    UbulkBase + mean(u) = UbulkRef.
	      tausolver_[substep][nx][nz].solve(uk_, vk_, wk_, Pk_, dPdxAct_, 
						Rxk_, Ryk_, Rzk_, 
						UbulkRef_ - UbulkBase_);
	      if (verify)
		tausolver_[substep][nx][nz].verify(uk_, vk_, wk_, Pk_, 
						   dPdxAct_,  Rxk_, Ryk_, 
						   Rzk_, UbulkRef_-UbulkBase_);
		

	      
	      UbulkAct_ = UbulkBase_ + uk_.re.mean(); // should == UbulkRef_
	      //Real UbulkDiff = UbulkRef_ - UbulkAct_;
	      //cout << "mass flux check : " << endl;
	      //cout << "meanval(uk) == " << uk_.mean() << endl;
	      //cout << "       dPdx == " << dPdx_ << endl;
	      //cout << "UbulkRef_ - UbulkAct_ == " << UbulkDiff << endl;
	    }
	  }
	  // for all cases but kx==kz==0, both const press grad and const flux.
	  else {
	    tausolver_[substep][nx][nz].solve(uk_,vk_,wk_,Pk_, Rxk_,Ryk_,Rzk_);
	    if (verify)
	      tausolver_[substep][nx][nz].verify(uk_,vk_,wk_,Pk_, Rxk_,Ryk_,Rzk_,true);
	  }
	  // Load solutions back into the full 3d data arrays.
	  for (ny=0; ny<Nyd_; ++ny) {
	    un.cmplx(nx,ny,nz,0) = uk_[ny];
	    un.cmplx(nx,ny,nz,1) = vk_[ny];
	    un.cmplx(nx,ny,nz,2) = wk_[ny];
	    qn.cmplx(nx,ny,nz,0) = Pk_[ny];
	  }
	  // Load solutions back into the full 3d data arrays.
	  for (ny=Nyd_; ny<Ny_; ++ny) {
	    un.cmplx(nx,ny,nz,0) = 0.0;
	    un.cmplx(nx,ny,nz,1) = 0.0;
	    un.cmplx(nx,ny,nz,2) = 0.0;
	  }
	}
      }
      //cout << "Check un after calcs" << endl;
      //debugfunc(un);
    }
    t_ += dt_;
    if (flags_.verbosity == PrintTime ||
	flags_.verbosity == PrintAll) {
      cout << t_ << ' ';
      //cout.form("%4.2f ", t_);
      cout.flush();
    }
    alternator_ = !alternator_;  // swap Convection/Divergence if Alternating
  }
  //cout << "After integration..." << endl;
  //debugfunc(un);

  // No need to compute CFL during substeps, since variable timesteps are
  // controlled externally to NSIntegrator (by checking NSIntegrator::CFL()
  // and calling NSIntegrator::reset(dt,nu) if necessary).
  cfl_ = un.CFLfactor();
  cfl_ *= flags_.dealias_xz() ? 2.0*pi/3.0*dt_ : pi*dt_;

  //cout << "After CFL calc..." << endl;
  //debugfunc(un);

  if (flags_.verbosity == PrintTime ||
      flags_.verbosity == PrintAll) 
    cout << endl;
}


void assignOrrSommNonlin(FlowField& nonlin, Real t, Complex omega, const BasisFunc& oseig) { 
  nonlin.setState(Spectral, Physical);
  nonlin.setToZero();
  
  ComplexChebyCoeff ueig = oseig.u();
  ComplexChebyCoeff veig = oseig.v();
  int Ny = ueig.length();
  int Nx = nonlin.numXmodes();
  int Nx1 = Nx-1;
  //int Nx2 = Nx-2;
  ChebyTransform trans(Ny);
  trans.makeSpectral(ueig); 
  ComplexChebyCoeff ueigy = diff(ueig);
  trans.makePhysical(ueig); 
  trans.makePhysical(veig); 
  trans.makePhysical(ueigy); 

  Complex phase1 = exp(-1.0*I*omega*t);
  //Complex phase2 = exp(-2.0*I*omega*t);
  Complex phase1s = exp(1.0*I*omega*t);
  //Complex phase2s = exp(2.0*I*omega*t);

  for (int ny=0; ny<nonlin.numYgridpts(); ++ny) {  
    Complex u   = ueig[ny];
    Complex uy  = ueigy[ny];
    Complex v   = veig[ny];
    Complex us  = conjugate(u);  // "ustar", etc.
    Complex uys = conjugate(uy);
    Complex vs  = conjugate(v);
    Real y = nonlin.y(ny);
    Complex  c0 = v*I - uy; 
    Complex  c1 = (-1.0*vs)*I - uys; 

    //nonlin.cmplx(0,ny,0,0) = -2*(c0*vs - c1*v);
    //nonlin.cmplx(0,ny,0,1) =  2*(c0*us + c1*u  + 2*y*(1-y*y));

    nonlin.cmplx(1,  ny,0,0) = phase1*(-2*y*v);
    nonlin.cmplx(1,  ny,0,1) = phase1*(2*y*u  + c0*(1-y*y));
    nonlin.cmplx(Nx1,ny,0,0) = phase1s*(-2*y*vs);
    nonlin.cmplx(Nx1,ny,0,1) = phase1s*(2*y*us + c1*(1-y*y));

    //nonlin.cmplx(2,  ny,0,0) = phase2*(-c0*v);
    //nonlin.cmplx(2,  ny,0,1) = phase2*(c0*u);
    //nonlin.cmplx(Nx2,ny,0,0) = phase2s*(-c1*vs);
    //nonlin.cmplx(Nx2,ny,0,1) = phase2*(c1*us);
  }
  nonlin.chebyfft_y();
}

void Uuq2p(const ChebyCoeff& Ubase, const FlowField& un, const FlowField& q, 
	   FlowField& p, bool linearized) {
  

  if (linearized) {
    p = q;
    return;
  }
  ChebyCoeff U(Ubase);
  ChebyTransform trans(Ubase.numModes());
  U.makePhysical(trans);
  
  int Nx=un.numXgridpts();
  int Ny=un.numYgridpts();
  int Nz=un.numZgridpts();

  p = q; 
  p.makePhysical();

  FlowField& u = (FlowField&) un;
  fieldstate uxzstate = u.xzstate();
  fieldstate uystate = u.ystate();
  u.makePhysical();

  // At this point 
  // u == utot - U
  // p == q == ptrue + 1/2 (utot dot utot)
  
  // Remove 1/2 utot dot utot from p
  for (int ny=0; ny<Ny; ++ny) {
    //Real Uny = Ubase[ny];
    for (int nx=0; nx<Nx; ++nx)
      for (int nz=0; nz<Nz; ++nz) 
	p(nx,ny,nz,0) -= 0.5*(square(u(nx,ny,nz,0)) + //  + Uny) + 
			      square(u(nx,ny,nz,1)) + 
			      square(u(nx,ny,nz,2))); 
  }
  u.makeState(uxzstate, uystate); 
  p.makeState(q.xzstate(), q.ystate()); 
}

void Uup2q(const ChebyCoeff& Ubase, const FlowField& un, const FlowField& p, 
	   FlowField& q, bool linearized) {

  if (linearized) {
    q=p;
    return;
  }


  ChebyCoeff U(Ubase);
  ChebyTransform trans(Ubase.numModes());
  U.makePhysical(trans);
 
  int Nx=un.numXgridpts();
  int Ny=un.numYgridpts();
  int Nz=un.numZgridpts();
  
  q = p;
  q.makePhysical();

  FlowField& u = (FlowField&) un;
  fieldstate uxzstate = u.xzstate();
  fieldstate uystate = u.ystate();

  u.makePhysical();
  // At this point 
  // u == utot - U
  // q == p
  
  // Add 1/2 utot dot utot from p
  for (int ny=0; ny<Ny; ++ny) {
    //Real Uny = Ubase[ny];
    for (int nx=0; nx<Nx; ++nx)
      for (int nz=0; nz<Nz; ++nz) 
	q(nx,ny,nz,0) += 0.5*(square(u(nx,ny,nz,0)) + //  + Uny) + 
			      square(u(nx,ny,nz,1)) + 
			      square(u(nx,ny,nz,2))); 
  }
  u.makeState(uxzstate, uystate); 
  q.makeState(p.xzstate(), p.ystate()); 
}

void changeBaseFlow(const ChebyCoeff& ubase0, const FlowField& ufluc0, 
		const FlowField& q0arg, 
		const ChebyCoeff& ubase1, FlowField& u1, FlowField& q1){
  ChebyCoeff& U0 = (ChebyCoeff&) ubase0;
  fieldstate U0state = U0.state();

  ChebyCoeff& U1 = (ChebyCoeff&) ubase1;
  fieldstate U1state = U1.state();

  FlowField& u0 = (FlowField&) ufluc0;
  fieldstate u0xzstate = u0.xzstate();
  fieldstate u0ystate = u0.ystate();
  
  FlowField& q0 = (FlowField&) q0arg;
  fieldstate q0xzstate = q0.xzstate();
  fieldstate q0ystate = q0.ystate();
  
  int Nx=u0.numXgridpts();
  int Ny=u0.numYgridpts();
  int Nz=u0.numZgridpts();

  u1 = u0; // want u1 FPF
  u1.makeState(Spectral, Physical);
  u0.makePhysical();
  q0.makePhysical();
  q1 = q0; // want q1 physical

  // At this point 
  // u1 == utot - U0
  // q1 == p + 1/2 u0 dot u0
  
  // Remove 1/2 u0 dot u0 from q1
  for (int ny=0; ny<Ny; ++ny)
    for (int nx=0; nx<Nx; ++nx)
      for (int nz=0; nz<Nz; ++nz) 
	q1(nx,ny,nz,0) -= 0.5*(square(u0(nx,ny,nz,0)) + 
			       square(u0(nx,ny,nz,1)) + 
			       square(u0(nx,ny,nz,2)));
  // At this point 
  // u1 == utot - U0
  // q1 == p 
  
  ChebyTransform t(U0.numModes());
  U0.makePhysical(t);
  U1.makePhysical(t);

  // Add U0-U1 to u1 
  ChebyCoeff delta_U(U0);
  delta_U -= U1;
  u1.addProfile(delta_U);
  u1.makePhysical();

  // At this point 
  // u1 == utot - U1
  // q1 == p 
  
  // Add 1/2 u1 dot u1 to q1
  for (int ny=0; ny<Ny; ++ny)
    for (int nx=0; nx<Nx; ++nx)
      for (int nz=0; nz<Nz; ++nz) 
	q1(nx,ny,nz,0) += 0.5*(square(u1(nx,ny,nz,0)) + 
			      square(u1(nx,ny,nz,1)) + 
			      square(u1(nx,ny,nz,2)));
  // At this point 
  // u1 == utot - U1
  // q1 == p + 1/2 u1 dot u1
  // et, voila

  U0.makeState(U0state,t);
  U1.makeState(U1state,t);
  u0.makeState(u0xzstate, u0ystate); 
  q0.makeState(q0xzstate, q0ystate); 
  u1.makeState(u0xzstate, u0ystate); 
  q1.makeState(q0xzstate, q0ystate); 
}
  
DNSFlags::DNSFlags(MeanConstraint  constraint_,
		     TimeStepMethod  timestepping_,
		     NonlinearMethod nonlinearity_,
		     Dealiasing      dealiasing_,
		     Verbosity       verbosity_)
  :
  constraint(constraint_),
  timestepping(timestepping_),
  nonlinearity(nonlinearity_),
  dealiasing(dealiasing_),
  verbosity(verbosity_)
{
  if (dealias_y() && (nonlinearity != Rotational)) {
    cerr << "DNSFlags::DNSFlags: DealiasY and DealiasXYZ work only with\n";
    cerr << "Rotational nonlinearity in the current version of channelflow.\n";
    cerr << "Setting nonlinearity to Rotational." << endl;
    nonlinearity = Rotational;
  }
}

bool DNSFlags::dealias_xz() const {
  return ((dealiasing == DealiasXZ || dealiasing == DealiasXYZ) ? true:false);
}

bool DNSFlags::dealias_y() const {
  return ((dealiasing == DealiasY || dealiasing == DealiasXYZ) ? true:false);
}

int DNSFlags::numSubsteps() const {
  int rtn;
  switch (timestepping) {
  case CNAB2:
    rtn=1;
    break;
  case RK3:
  default:
    rtn=3;
  }
  return rtn;
}

