/* flowfield.h: Class for N-dim Fourier x Chebyshev x Fourier expansions
 * Channelflow-0.9
 *
 * Copyright (C) 2001-2005  John F. Gibson  
 *  
 * jgibson@mail.sjcsf.edu  
 * John F. Gibson 
 * St. John's College
 * 1160 Camino de la Cruz Blanca
 * Santa Fe, NM 87501
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, U
 */

#include <fstream>
#include <iomanip>

#include "flowfield.h"
#include "chebyshev.h"

const Real EPSILON=1e-4;

FlowField::FlowField()
  :
  Nx_(0),
  Ny_(0),
  Nz_(0),
  Nzpad_(0),
  Nzpad2_(0),
  Nd_(0),
  Lx_(0),
  Lz_(0),
  a_(0),
  b_(0),
  dealiasIO_(false),
  rdata_(0),
  cdata_(0),
  q_(0),
  Q_(0),
  scratch_(0),
  sin_table_(0),
  cos_table_(0),
  xzstate_(Spectral),
  ystate_(Spectral),
  xz_plan_(0),
  xz_iplan_(0),
  y_plan_(0),
  y_iplan_(0)
{}

FlowField::FlowField(int Nx, int Ny, int Nz, int Nd, Real Lx, Real Lz, 
		     Real a, Real b, fieldstate xzstate, fieldstate ystate, 
		     int fftw_flags) 
  :
  Nx_(0),
  Ny_(0),
  Nz_(0),
  Nzpad_(0),
  Nzpad2_(0),
  Nd_(0),
  Lx_(0),
  Lz_(0),
  a_(0),
  b_(0),
  dealiasIO_(false),
  rdata_(0),
  cdata_(0),
  q_(0),
  Q_(0),
  scratch_(0),
  sin_table_(0),
  cos_table_(0),
  xzstate_(xzstate),
  ystate_(ystate),
  xz_plan_(0),
  xz_iplan_(0),
  y_plan_(0),
  y_iplan_(0)
{
  // This isn't in classic C++ initialization style, but it consolidates
  // all resize and initialization code, and the relative overhead is neglgble.
  resize(Nx, Ny, Nz, Nd, Lx, Lz, a, b, fftw_flags);
}

FlowField::FlowField(const FlowField& f) 
  :
  Nx_(0),
  Ny_(0),
  Nz_(0),
  Nzpad_(0),
  Nzpad2_(0),
  Nd_(0),
  Lx_(0),
  Lz_(0),
  a_(0),
  b_(0),
  dealiasIO_(f.dealiasIO_),
  rdata_(0),
  cdata_(0),
  q_(0),
  Q_(0),
  scratch_(0),
  sin_table_(0),
  cos_table_(0),
  xzstate_(Spectral),
  ystate_(Spectral),
  xz_plan_(0),
  xz_iplan_(0),
  y_plan_(0),
  y_iplan_(0)

{
  resize(f.Nx_, f.Ny_, f.Nz_, f.Nd_, f.Lx_, f.Lz_, f.a_, f.b_);
  setState(f.xzstate_, f.ystate_);
  dealiasIO_ = f.dealiasIO_;
  int N = Nd_*Ny_*Nx_*Nzpad_;
  for (int i=0; i<N; ++i)
    rdata_[i] = f.rdata_[i];

  fftw_initialize();
}

FlowField::FlowField(const string& filebase) 
  :
  Nx_(0),
  Ny_(0),
  Nz_(0),
  Nzpad_(0),
  Nzpad2_(0),
  Nd_(0),
  Lx_(0.0),
  Lz_(0.0),
  a_(0.0),
  b_(0.0),
  dealiasIO_(false),
  rdata_(0),
  cdata_(0),
  q_(0),
  Q_(0),
  scratch_(0),
  sin_table_(0),
  cos_table_(0),
  xzstate_(Spectral),
  ystate_(Spectral),
  xz_plan_(0),
  xz_iplan_(0),
  y_plan_(0),
  y_iplan_(0)
{
  
  string filename(filebase);
  filename += string(".ff");
  ifstream is(filename.c_str());
  if (!is.good()) {
    cerr << "FlowField::FlowField(filename) : can't open file " << filename << endl;
    abort();
  }
    
  read(is, Nx_);
  read(is, Ny_);
  read(is, Nz_);
  read(is, Nd_);
  read(is, xzstate_);
  read(is, ystate_);
  read(is, Lx_);
  read(is, Lz_);
  read(is, a_);
  read(is, b_);
  read(is, dealiasIO_);
  resize(Nx_, Ny_, Nz_, Nd_, Lx_, Lz_, a_, b_);

  // Read data only for non-aliased modes, assume 0 for aliased.
  if (dealiasIO_ == true && xzstate_ == Spectral) {
    int Nxd=2*(Nx_/6);
    int Nzd=2*(Nz_/3)+1;

    // In innermost loop, array index is (nz + Nzpad2_*(nx + Nx_*(ny + Ny_*i))),
    // which is the same as the FlowField::flatten function.
    for (int i=0; i<Nd_; ++i) {
      for (int ny=0; ny<Ny_; ++ny) {

	for (int nx=0; nx<=Nxd; ++nx) {
	  for (int nz=0; nz<=Nzd; ++nz)
	    read(is, rdata_[flatten(nx,ny,nz,i)]);
	  for (int nz=Nzd+1; nz<Nzpad_; ++nz)
	    rdata_[flatten(nx,ny,nz,i)] = 0.0;
	}
	for (int nx=Nxd+1; nx<=Nxd; ++nx) 
	  for (int nz=0; nz<=Nzpad_; ++nz)
	    rdata_[flatten(nx,ny,nz,i)] = 0.0;
	
	for (int nx=Nx_-Nxd; nx<Nx_; ++nx) {
	  for (int nz=0; nz<=Nzd; ++nz)
	    read(is, rdata_[flatten(nx,ny,nz,i)]);
	  for (int nz=Nzd+1; nz<Nzpad_; ++nz)
	    rdata_[flatten(nx,ny,nz,i)] = 0.0;
	}
      }
    }
  }
  else {
    int N = Nd_*Ny_*Nx_*Nzpad_;
    for (int i=0; i<N; ++i)
      read(is, rdata_[i]);
  }
}

Vector FlowField::xgridpts() const {
  Vector xpts(Nx_);
  for (int nx=0; nx<Nx_; ++nx)
    xpts[nx] = x(nx);
  return xpts;
}
Vector FlowField::ygridpts() const {
  Vector ypts(Ny_);
  Real c = 0.5*(b_+a_);
  Real r = 0.5*(b_-a_);
  Real piN = pi/(Ny_-1);
  for (int ny=0; ny<Ny_; ++ny)
    ypts[ny] = c + r*cos(piN*ny);
  return ypts;
}
Vector FlowField::zgridpts() const {
  Vector zpts(Nz_);
  for (int nz=0; nz<Nz_; ++nz)
    zpts[nz] = z(nz);
  return zpts;
}

FlowField& FlowField::operator=(const FlowField& f) {
  resize(f.Nx_, f.Ny_, f.Nz_, f.Nd_, f.Lx_, f.Lz_, f.a_, f.b_);
  setState(f.xzstate_, f.ystate_);
  dealiasIO_ = f.dealiasIO_;
  int Ntotal = Nx_ * Ny_ * Nzpad_ * Nd_;
  for (int i=0; i<Ntotal; ++i)
    rdata_[i] = f.rdata_[i];
  return *this;
}

FlowField::~FlowField() {
  delete[] cos_table_;
  delete[] sin_table_;
  delete[] Q_;
  delete[] q_;
  delete[] scratch_;
  delete[] rdata_;
  
  rfftwnd_destroy_plan(y_plan_);
  rfftwnd_destroy_plan(y_iplan_);
  rfftwnd_destroy_plan(xz_plan_);
  rfftwnd_destroy_plan(xz_iplan_);
}

void FlowField::resize(int Nx, int Ny, int Nz, int Nd, Real Lx, Real Lz, 
		       Real a, Real b, int fftw_flags) {
  delete[] cos_table_;
  delete[] sin_table_;
  delete[] Q_;
  delete[] q_;
  delete[] scratch_;
  delete[] rdata_;
  if (y_plan_) rfftwnd_destroy_plan(y_plan_);
  if (y_iplan_) rfftwnd_destroy_plan(y_iplan_);
  if (xz_plan_) rfftwnd_destroy_plan(xz_plan_);
  if (xz_iplan_) rfftwnd_destroy_plan(xz_iplan_);
  
  Nx_ = Nx;
  Ny_ = Ny;
  Nz_ = Nz;
  Nd_ = Nd;
  Lx_ = Lx;
  Lz_ = Lz;
  a_ = a;
  b_ = b;

  assert(Nx_>=0);
  assert(Ny_>=0);
  assert(Nz_>=0);
  assert(Nd_>=0);
  assert(Lx_>=0);
  assert(Lz_>=0);
  assert(b_ >= a_);
  
  Nzpad_ = 2*(Nz_/2+1);
  Nzpad2_ = Nz_/2+1;
  rdata_ = new Real[Nx_ * Ny_ * Nzpad_ * Nd_];
  cdata_ = (Complex*)rdata_;
    
  int N = Nd_*Ny_*Nx_*Nzpad_;
  for (int i=0; i<N; ++i)
    rdata_[i] = 0.0;

  int Nybar = (Ny_ > 0) ? Ny_-1 : 0;
  q_ = new Real[Nybar];
  Q_ = new fftw_complex[Nybar/2 +1];
  scratch_ = new Real[Ny_];
  sin_table_ = new Real[Nybar];
  cos_table_ = new Real[Nybar];

  fftw_initialize(fftw_flags);
}
  
void FlowField::fftw_initialize(int fftw_flags) {
  int Ny1 = Ny_-1;
  Real piNy1 = pi/Ny1;
  for (int j=0; j<Ny1; ++j) {
    sin_table_[j] = sin(j*piNy1);
    cos_table_[j] = cos(j*piNy1);
  }
  int xz_flags = fftw_flags | FFTW_IN_PLACE;
  int y_flags = fftw_flags;
  
  if (Nx_ !=0 && Nz_ !=0) {
    xz_plan_ = rfftw2d_create_plan(Nx_, Nz_, FFTW_REAL_TO_COMPLEX,  xz_flags);
    xz_iplan_ = rfftw2d_create_plan(Nx_, Nz_, FFTW_COMPLEX_TO_REAL, xz_flags);
  }
  if (Ny_ != 0) {
    int rank=1;
    int n[1];
    n[0] = Ny1;
    y_plan_  = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, y_flags);
    y_iplan_ = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, y_flags);
  }
}

void FlowField::rescale(Real Lx, Real Lz) {
  assertState(Spectral, Spectral);
  Real scalev = Lx_/Lx;
  Real scalew = (Lx*Lz_)/(Lz*Lx_);
  
  for (int ny=0; ny<numYmodes(); ++ ny)
    for (int nx=0; nx<numXmodes(); ++ nx)
      for (int nz=0; nz<numZmodes(); ++ nz) {
	cmplx(nx,ny,nz,1) *= scalev;
	cmplx(nx,ny,nz,2) *= scalew;
      }
  Lx_ = Lx;
  Lz_ = Lz;
}

void FlowField::interpolate(const FlowField& u) {
  assert(u.Lx() == Lx_ && u.Lz() == Lz_);
  assert(a_>=u.a() && b_<=u.b());
  assert(Nd_ == u.Nd_);
  u.assertState(Spectral, Spectral);

  int kxmn = Greater(kxmin(), u.kxmin());
  int kxmx = lesser(kxmax(), u.kxmax());
  int kzmn = Greater(kzmin(), u.kzmin()); // == 0
  int kzmx = lesser(kzmax(), u.kzmax());
  int uNy = u.Ny();

  setState(Spectral, Spectral);
  setToZero();

  // chebyshev discretization is the same, copy coeffs
  if (Ny_ == uNy &&  a_ == u.a_ && b_ == u.b_) {
    for (int i=0; i<Nd_; ++i) {
      for (int kx=kxmn; kx<=kxmx; ++kx) {
	int umx = u.mx(kx);
	int tmx = mx(kx);
	for (int kz=kzmn; kz<=kzmx; ++kz) {
	  int umz = u.mz(kz);
	  int tmz = mz(kz); 
	  for (int ny=0; ny<uNy; ++ny) 
	    cmplx(tmx, ny, tmz, i) = u.cmplx(umx, ny, umz, i);
	}
      }
    }
  }  
  else {
    // chebyshev discretization is the different, interpolate coeffs
    ComplexChebyCoeff uprof(u.Ny(), u.a(), u.b(), Spectral);
    ComplexChebyCoeff tprof(Ny_, a_, b_, Physical);
    ChebyTransform trans(Ny_);
    
    for (int i=0; i<Nd_; ++i) {
      for (int kx=kxmn; kx<=kxmx; ++kx) {
	int umx = u.mx(kx);
	int tmx = mx(kx);
	for (int kz=kzmn; kz<=kzmx; ++kz) {
	  int umz = u.mz(kz);
	  int tmz = mz(kz); 
	  for (int uny=0; uny<uNy; ++uny) 
	    uprof.set(uny, u.cmplx(umx, uny, umz, i));
	  tprof.interpolate(uprof);
	  tprof.makeSpectral(trans);
	  for (int tny=0; tny<Ny_; ++tny) 
	    cmplx(tmx, tny, tmz, i) = tprof[tny];
	}
      }
    }
  }
  //makeSpectral();
}

/*****************************************************
void FlowField::reflect(const FlowField& u) {
  assert(u.Lx() == Lx_ && u.Lz() == Lz_);
  assert((a_+b_)/2 == u.a() && b_ <= u.b() && b_ > u.a());
  assert(Nd_ == u.Nd_);
  u.assertState(Spectral, Spectral);

  setState(Spectral, Physical);
  setToZero();
  
  ComplexChebyCoeff uprof(u.Ny(), u.a(), u.b(), Spectral);
  ComplexChebyCoeff tprof(Ny_, a_, b_, Physical);

  int kxmn = Greater(kxmin(), u.kxmin());
  int kxmx = lesser(kxmax(), u.kxmax());
  int kzmn = Greater(kzmin(), u.kzmin()); // == 0
  int kzmx = lesser(kzmax(), u.kzmax());
  int uNy = u.Ny();

  for (int i=0; i<Nd_; ++i) {
    // Copy kx==0 coeffs.
    parity p = (i==1) ? Even : Odd;
    for (int kx=kxmn; kx<=kxmx; ++kx) {
      int unx = u.nx(kx);
      int tnx = nx(kx);
      for (int kz=kzmn; kz<=kzmx; ++kz) {
	int unz = u.nz(kz);
	int tnz = nz(kz);
	for (int uny=0; uny<uNy; ++uny) 
	  uprof.set(uny, u.cmplx(unx, uny, unz, i));
	tprof.reflect(uprof,p);
	for (int tny=0; tny<Ny_; ++tny) 
	  cmplx(tnx, tny, tnz, i) = tprof[tny];
      }
    }
  }
  //makeSpectral();
}
********************************************/

/*****************************************************************
void FlowField::interpolate(const FlowField& U) {
  U.assertState(Spectral, Spectral);
  assert(congruent(U));
  setToZero();
  // Interpolation of (x,z) variation is done by simply copying the common
  // Spectral modes. The Spectral mode coefficients can be copied, too,
  // if we're increasing the number of Spectral modes. But if we're 
  // truncating the expansion, we have to use interpolation in order to 
  // meet the boundary conditions at y=+-1.
  int kxm = (kxmax() < U.kxmax()) ? kxmax()-1 : U.kxmax(); 
  int kzm = lesser(kzmax(), U.kzmax());

  //char* t = " x ";
  //cout << "FlowField::interpolate() {" << endl;
  //cout << "from " << U.Nx_ << t << U.Ny_ << t << U.Nz_ << t << U.Nd_ << endl;
  //cout << "  to " << Nx_   << t <<   Ny_ << t <<   Nz_ << t <<   Nd_ << endl;
  //cout << "kxm,kzm == " << kxm << ',' << kzm << endl;
  // Rescale velocities to keep div==0 and u velocities unchanged.

  setState(Spectral, Spectral);
  for (int i=0; i<Nd_; ++i) {
    // Copy kx==0 coeffs.
    for (int kz=0; kz<=kzm; ++kz) 
      for (int ny=0; ny<Ny_; ++ny) 
	cmplx(0, ny, kz, i) = U.cmplx(0, ny, kz, i);
    // Copy kx>0 coeffs, which have negative counterparts.
    for (int kx=1; kx<=kxm; ++kx) {
      for (int kz=0; kz<=kzm; ++kz) 
	for (int ny=0; ny<Ny_; ++ny) {
	  cmplx(kx, ny, kz, i) = U.cmplx(kx, ny, kz, i);
	  cmplx(Nx_-kx, ny, kz, i) = U.cmplx(U.Nx_-kx, ny, kz,i);
	}
    }
  }
  return;
}
****************************************************************/

void FlowField::optimizeFFTW() {
  rfftwnd_destroy_plan(xz_plan_);
  rfftwnd_destroy_plan(xz_iplan_);
  rfftwnd_destroy_plan(y_plan_);
  rfftwnd_destroy_plan(y_iplan_);

  int xz_flags = FFTW_MEASURE | FFTW_IN_PLACE | FFTW_USE_WISDOM;
  int y_flags = FFTW_MEASURE | FFTW_USE_WISDOM;
  int rank=1;
  int n[1];
  n[0] = Ny_-1;

  xz_plan_ = rfftw2d_create_plan(Nx_, Nz_, FFTW_REAL_TO_COMPLEX,  xz_flags);
  xz_iplan_ = rfftw2d_create_plan(Nx_, Nz_, FFTW_COMPLEX_TO_REAL, xz_flags);
  y_plan_  = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, y_flags);
  y_iplan_ = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, y_flags);
}

ComplexChebyCoeff FlowField::profile(int mx, int mz, int i) const {
  assert(xzstate_== Spectral);
  ComplexChebyCoeff rtn(Ny_, a_, b_, ystate_);
  for (int ny=0; ny<Ny_; ++ny)
    rtn.set(ny, cmplx(mx, ny, mz, i));
  return rtn;
}

void FlowField::addProfile(const ChebyCoeff& profile, int i) {
  assert(xzstate_== Spectral);
  assert(ystate_ == profile.state());
  int kx=0;
  int kz=0;
  int m_x = mx(kx);
  int m_z = mz(kz);
  for (int ny=0; ny<Ny_; ++ny) 
    cmplx(m_x, ny, m_z, i) += Complex(profile[ny], 0.0);
}

void FlowField::addProfile(const ComplexChebyCoeff& profile, 
			   int m_x, int m_z, int i, bool addconj) {
  assert(xzstate_== Spectral);
  assert(ystate_ == profile.state());
  int k_x = kx(m_x);
  int k_z = kz(m_z);
  for (int ny=0; ny<Ny_; ++ny) 
    cmplx(m_x, ny, m_z, i) += profile[ny];
  if (addconj && k_x !=0 && k_z ==0) {
    m_x = mx(-k_x);
    for (int ny=0; ny<Ny_; ++ny) 
      cmplx(m_x,ny,m_z, i) += conjugate(profile[ny]);
  }
}

BasisFunc FlowField::profile(int mx, int mz) const {
  assert(xzstate_== Spectral);
  int k_x = kx(mx);
  int k_z = kz(mz);
  BasisFunc rtn(Ny_, k_x, k_z, Lx_, Lz_, a_, b_, ystate_);
  
  int Nd = lesser(Nd_, 3);
  for (int i=0; i<Nd; ++i)
    for (int ny=0; ny<Ny_; ++ny)
      rtn[i].set(ny, cmplx(mx, ny, mz, i));
  return rtn;
}

void FlowField::addProfile(const BasisFunc& profile, bool addconj) {
  assert(xzstate_== Spectral);
  assert(ystate_ == profile.state());
  assert(Nd_ == 3);
  int m_x = mx(profile.kx());
  int m_z = mz(profile.kz());
  for (int i=0; i<Nd_; ++i)
    for (int ny=0; ny<Ny_; ++ny) 
      cmplx(m_x, ny, m_z, i) += profile[i][ny];

  if (addconj && profile.kx() !=0 && profile.kz() ==0) {
    m_x = mx(-profile.kx());
    for (int i=0; i<Nd_; ++i)
      for (int ny=0; ny<Ny_; ++ny) 
	cmplx(m_x, ny, m_z, i) += conjugate(profile[i][ny]);
  }
}

bool FlowField::geomCongruent(const FlowField& v) const {
  return (Nx_ == v.Nx_ &&
	  Ny_ == v.Ny_ &&
	  Nz_ == v.Nz_ &&
	  Lx_ == v.Lx_ &&
	  Lz_ == v.Lz_ &&
	  a_ == v.a_ &&
	  b_ == v.b_);
}

bool FlowField::congruent(const FlowField& v) const {
  return (Nx_ == v.Nx_ &&
	  Ny_ == v.Ny_ &&
	  Nz_ == v.Nz_ &&
	  Nd_ == v.Nd_ &&
	  Lx_ == v.Lx_ &&
	  Lz_ == v.Lz_ &&
	  a_ == v.a_ &&
	  b_ == v.b_ &&
	  xzstate_ == v.xzstate_ &&
	  ystate_ == v.ystate_);
}

bool FlowField::congruent(const BasisFunc& phi) const {
  return (Ny_==phi.Ny() && Lx_==phi.Lx() && Lz_==phi.Lz() &&
	  a_==phi.a() && b_==phi.b() && ystate_==phi.state());
}


FlowField& FlowField::operator*=(Real x) {
  int Ntotal = Nx_ * Ny_ * Nzpad_ * Nd_;
  for (int i=0; i<Ntotal; ++i)
    rdata_[i] *= x;
  return *this;
}

FlowField& FlowField::operator*=(Complex z) {
  assert(xzstate_ == Spectral);
  int Ntotal = Nx_ * Ny_ * Nzpad2_ * Nd_;
  for (int i=0; i<Ntotal; ++i)
    cdata_[i] *= z;
  return *this;
}

FlowField& FlowField::operator+=(const ChebyCoeff& U) {
  assert(xzstate_== Spectral);
  assert(ystate_ == U.state());
  for (int ny=0; ny<Ny_; ++ny) 
    cmplx(0, ny, 0, 0) += Complex(U[ny], 0.0);
  return *this;
}

FlowField& FlowField::operator-=(const ChebyCoeff& U) {
  assert(xzstate_== Spectral);
  assert(ystate_ == U.state());
  for (int ny=0; ny<Ny_; ++ny) 
    cmplx(0, ny, 0, 0) -= Complex(U[ny], 0.0);
  return *this;
}

FlowField& FlowField::operator+=(const ComplexChebyCoeff& U) {
  assert(xzstate_== Spectral);
  assert(ystate_ == U.state());
  for (int ny=0; ny<Ny_; ++ny) 
    cmplx(0, ny, 0, 0) += U[ny];
  return *this;
}

FlowField& FlowField::operator-=(const ComplexChebyCoeff& U) {
  assert(xzstate_== Spectral);
  assert(ystate_ == U.state());
  for (int ny=0; ny<Ny_; ++ny) 
    cmplx(0, ny, 0, 0) -= U[ny];
  return *this;
}

FlowField& FlowField::operator += (const BasisFunc& profile){
  addProfile(profile);
  return *this;
}
FlowField& FlowField::operator -= (const BasisFunc& profile){
  BasisFunc copy(profile);
  copy *= -1;
  addProfile(copy);
  return *this;
}
FlowField& FlowField::operator+=(const FlowField& U) {
  assert(congruent(U));
  int Ntotal = Nx_ * Ny_ * Nzpad_ * Nd_;
  for (int i=0; i<Ntotal; ++i)
    rdata_[i] += U.rdata_[i];
  return *this;
}
FlowField& FlowField::operator -= (const FlowField& U) {
  assert(congruent(U));
  int Ntotal = Nx_ * Ny_ * Nzpad_ * Nd_;
  for (int i=0; i<Ntotal; ++i)
    rdata_[i] -= U.rdata_[i];
  return *this;
}
FlowField& FlowField::operator *= (const FlowField& U) {
  assert(congruent(U));
  if (xzstate_ == Spectral) {
    int Ntotal = Nx_ * Ny_ * Nzpad2_ * Nd_;
    for (int i=0; i<Ntotal; ++i)
      cdata_[i] *= U.cdata_[i];
  }
  else {
    int Ntotal = Nx_ * Ny_ * Nzpad_ * Nd_;
    for (int i=0; i<Ntotal; ++i)
      rdata_[i] *= U.rdata_[i];
  }
  return *this;
}
  
void FlowField::realfft_xz() {
  assert (xzstate_ == Physical);
  // args are (plan, howmany, in, istride, idist, ostride, odist)
  rfftwnd_real_to_complex(xz_plan_, Nd_*Ny_, rdata_, 1, Nx_*Nzpad_, 0,0,0);
  int Ntotal = Nd_* Nx_ * Ny_ * Nzpad_;
  Real scale = 1.0/(Nx_*Nz_);
  for (int i=0; i<Ntotal; ++i)
    rdata_[i] *= scale;
  xzstate_ = Spectral;
}

void FlowField::irealfft_xz() {
  assert(xzstate_ == Spectral);
  //cout << "rfftwnd_complex_to_real : " << endl;
  //cout << "howmany = " << Nd_*Ny_ << endl;
  //cout << "idist = " << Nx_*Nzpad_/2 << endl;

  rfftwnd_complex_to_real(xz_iplan_, Nd_*Ny_, (fftw_complex*)rdata_, 1, Nx_*Nzpad_/2, 0,0,0);
  xzstate_ = Physical;
}

void FlowField::chebyfft_y() {
  assert(ystate_ == Physical);
  //int rank =1;
  int Ny1 = Ny_-1;
  int n[1];
  n[0] = Ny1;

  Real c = 2.0/(Ny_-1);

  for (int i=0; i<Nd_; ++i) 
    for (int nx=0; nx<Nx_; ++nx) 
      for (int nz=0; nz<Nzpad_; ++nz) {
	Real a;
	Real b;

	// Copy data spread through memory into a stride-1 scratch array.
	int j; // MSVC++ FOR-SCOPE BUG
	for (j=0; j<Ny_; ++j) 
	  scratch_[j] = c*rdata_[flatten(nx, j, nz, i)];
	
	// Transform data according to NR formula 
	for (j=0; j<Ny1; ++j) {
	  a = scratch_[j];
	  b = scratch_[Ny1-j];
	  q_[j] = 0.5*(a+b) - sin_table_[j]*(a-b);
	}

	rfftwnd_one_real_to_complex(y_plan_, q_, Q_);

	// Initialize recursion for unpacking transform
	Real sum = 0.5*(scratch_[0] - scratch_[Ny1]);
	scratch_[0] = Q_[0].re;

	for (j=1; j<Ny1; ++j)
	  sum += scratch_[j]*cos_table_[j];
	scratch_[1] = sum;
	
	for (int k=1; k<Ny1/2; ++k) {      // N is even 
	  scratch_[2*k]   = Q_[k].re;
	  scratch_[2*k+1] = (sum -= Q_[k].im); 
	}
	scratch_[Ny1] = Q_[Ny1/2].re;

	// Copy transformed data back into main data array

	// 0th elem is different due to reln between cheby and cos transforms.
	rdata_[flatten(nx, 0, nz, i)] = 0.5*scratch_[0];
	for (j=1; j<Ny_; ++j) 
	  rdata_[flatten(nx, j, nz, i)] = scratch_[j];
      }
  ystate_ = Spectral;
  return;
}


void FlowField::ichebyfft_y() {
  assert(ystate_ == Spectral);

  //int rank =1;
  int Ny1 = Ny_-1;
  int n[1];
  n[0] = Ny1;

  for (int i=0; i<Nd_; ++i) 
    for (int nx=0; nx<Nx_; ++nx) 
      for (int nz=0; nz<Nzpad_; ++nz) {
	Real a;
	Real b;

	// Copy data spread through memory into a stride-1 scratch array.
	// 0th elem is different due to reln between cheby and cos transforms.
	scratch_[0] = 2*rdata_[flatten(nx, 0, nz, i)];
	int j; // MSVC++ FOR-SCOPE BUG
	for (j=1; j<Ny_; ++j) 
	  scratch_[j] = rdata_[flatten(nx, j, nz, i)];
	
	// Transform data according to NR formula 
	for (j=0; j<Ny1; ++j) {
	  a = scratch_[j];
	  b = scratch_[Ny1-j];
	  q_[j] = 0.5*(a+b) - sin_table_[j]*(a-b);
	}

	rfftwnd_one_real_to_complex(y_plan_, q_, Q_);

	// Initialize recursion for unpacking transform
	Real sum = 0.5*(scratch_[0] - scratch_[Ny1]);
	scratch_[0] = Q_[0].re;

	for (j=1; j<Ny1; ++j)
	  sum += scratch_[j]*cos_table_[j];
	scratch_[1] = sum;
	
	for (int k=1; k<Ny1/2; ++k) {      // N is even 
	  scratch_[2*k]   = Q_[k].re;
	  scratch_[2*k+1] = (sum -= Q_[k].im); 
	}
	scratch_[Ny1] = Q_[Ny1/2].re;

	// Copy transformed data back into main data array
	for (j=0; j<Ny_; ++j) 
	  rdata_[flatten(nx, j, nz, i)] = scratch_[j];
	}
  ystate_ = Physical;
  return;
}

void FlowField::makeSpectral_xz() {if (xzstate_==Physical) realfft_xz();}
void FlowField::makePhysical_xz() {if (xzstate_==Spectral) irealfft_xz();}
void FlowField::makeSpectral_y()  {if (ystate_==Physical) chebyfft_y();}
void FlowField::makePhysical_y()  {if (ystate_==Spectral) ichebyfft_y();}
void FlowField::makeSpectral()  {makeSpectral_xz(); makeSpectral_y();}
void FlowField::makePhysical()  {makePhysical_y(); makePhysical_xz();}
void FlowField::makeState(fieldstate xzstate, fieldstate ystate)  {
  (xzstate == Physical) ? makePhysical_xz() : makeSpectral_xz();
  (ystate  == Physical) ? makePhysical_y()  : makeSpectral_y();
}

void FlowField::addPoisseuille(Real Ucenter) {
  assertState(Spectral, Spectral);
  ChebyTransform trans(Ny_);
  
  // Assign Poisseuille flow, or a mean flow with equivalent bulk velocity.
  ChebyCoeff U(Ny_, a_, b_, Physical);
  int ny; // MSVC++ FOR-SCOPE BUG
  for (ny=0; ny<Ny_; ++ny) {
    Real y_ = y(ny);
    Real y2 = square(y_);
    //Real y4 = square(y2);
    //Real y12 = cube(y4);
    //Real y18 = y2*y4*y12;
    U[ny] 
      = Ucenter*(1.0  - y2);
    //= Complex(0.55*(1.0 - 0.1*y12 - 0.5*y4 -0.4*y18)); 
  }
  trans.chebyfft(U);

  for (ny=0; ny<Ny_; ++ny) 
    cmplx(0, ny, 0, 0) += Complex(U[ny]);
}

void FlowField::addPerturbation(int kx, int kz, Real mag, Real decay) {
  assertState(Spectral, Spectral);
  if (mag == 0.0)
    return;

  // Add a dive-free perturbation to the base flow.
  ComplexChebyCoeff u(Ny_, a_, b_, Spectral);
  ComplexChebyCoeff v(Ny_, a_, b_, Spectral);
  ComplexChebyCoeff w(Ny_, a_, b_, Spectral);
  randomProfile(u,v,w, kx, kz, Lx_, Lz_, mag, decay);
  int m_x = mx(kx);
  int m_z = mz(kz);
  for (int ny=0; ny<Ny_; ++ny) {
    cmplx(m_x, ny, m_z, 0) += u[ny];
    cmplx(m_x, ny, m_z, 1) += v[ny];
    cmplx(m_x, ny, m_z, 2) += w[ny];
  }
  if (kz==0 && kx!=0) {
    int m_x = mx(-kx);
    int m_z = mz(0); // -kz=0
    for (int i=0; i<Nd_; ++i)
      for (int ny=0; ny<Ny_; ++ny) {
	cmplx(m_x, ny, m_z, 0) += conjugate(u[ny]);
	cmplx(m_x, ny, m_z, 1) += conjugate(v[ny]);
	cmplx(m_x, ny, m_z, 2) += conjugate(w[ny]);
      }
  }
  return;
}

void FlowField::addPerturbations(int kxmx, int kzmx, Real mag, Real decay) {
  assertState(Spectral, Spectral);
  if (mag == 0.0)
    return;

  int kxmn = Greater(-kxmx, kxmin());
  kxmx = lesser(kxmx, kxmax());
  kzmx = lesser(kzmx, kzmax());
  // Add a div-free perturbation to the base flow.
  for (int kx=kxmn; kx<=kxmx; ++kx) 
    for (int kz=0; kz<=kzmx; ++kz) {
      //Real norm = pow(10.0, -(abs(2*pi*kx/Lx_) + abs(2*pi*kz/Lz_)));
      Real norm = pow(decay, 2*(abs(kx) + abs(kz)));
      if (!(kx==0 && kz==0))
	addPerturbation(kx, kz, mag*norm, decay);
    }
  makePhysical();
  makeSpectral();
  return;
}

void FlowField::addPerturbations(Real mag, Real decay) {
  assertState(Spectral, Spectral);
  if (mag == 0.0)
    return;

  // Add a div-free perturbation to the base flow.
  for (int nx=0; nx<numXmodes(); ++nx) {
    int kx_ = kx(nx);
    for (int nz=0; nz<numZmodes(); ++nz) {
      int kz_ = kz(nz);
      Real norm = pow(decay, 2*(abs(kx_) + abs(kz_)));
      addPerturbation(kx_, kz_, mag*norm, decay);
    }
  }
  makePhysical();
  makeSpectral();
  return;
}
void FlowField::dealiasIO(bool b) {dealiasIO_ = b;}

void FlowField::setToZero() {
  int Ntotal = Nx_*Ny_*Nzpad_*Nd_;
  for (int i=0; i<Ntotal; ++i)
    rdata_[i] = 0.0;
}

void FlowField::print() const {
  cout << Nx_ << " x " << Ny_ << " x " << Nz_ << endl;
  cout << "[0, " << Lx_ << "] x [-1, 1] x [0, " << Lz_ << "]" << endl;
  cout << xzstate_ << " x " << ystate_ << " x " << xzstate_ << endl;
  cout << xzstate_ << " x " << ystate_ << " x " << xzstate_ << endl;
  if (xzstate_ == Spectral) {
  cout << "FlowField::print() real view " << endl;
    for (int i=0; i<Nd_; ++i) {
      for (int ny=0; ny<Ny_; ++ny) {
	for (int nx=0; nx<Nx_; ++nx) {
 	  cout << "i=" << i << " ny=" << ny << " nx= " << nx << ' ';
 	  int nz; // MSVC++ FOR-SCOPE BUG
	  for (nz=0; nz<Nz_; ++nz) 
	    cout << rdata_[flatten(nx,ny,nz,i)] << ' ';
	  cout << " pad : ";
	  for (nz=Nz_; nz<Nzpad_; ++nz) 
	    cout << rdata_[flatten(nx,ny,nz,i)] << ' ';
	  cout << endl;
	}
      }
    }
  }
  else {
    cout << "complex view " << endl;
    for (int i=0; i<Nd_; ++i) {
       for (int ny=0; ny<Ny_; ++ny) {
	 for (int nx=0; nx<Nx_; ++nx) {
	   cout << "i=" << i << " ny=" << ny << " nx= " << nx << ' ';
	   for (int nz=0; nz<Nz_/2; ++nz) 
	     cout << cdata_[complex_flatten(nx,ny,nz,i)] << ' ';
	   cout << endl;
	 }
       }    
    }
  }
}

// k == direction of normal    (e.g. k=0 means a x-normal slice in yz plane.
// i == component of FlowField (e.g. i=0 means u-component)
// n == nth gridpoint along k direction
void FlowField::saveSlice(int k, int i, int nk, const string& filebase) const {
  assert(k>=0 && k<3);
  assert(i>=0 && i<Nd_);

  string filename(filebase);
  filename += string(".asc");
  ofstream os(filename.c_str());

  FlowField& u = (FlowField&) *this;  // cast away constness
  fieldstate xzstate = xzstate_;
  fieldstate ystate = ystate_;

  u.makePhysical();

  switch (k) {
  case 0: 
    os << "% yz slice\n";
    os << "% (i,j)th elem is field at (x_n, y_i, z_j)\n";
    for (int ny=0; ny<Ny_; ++ny) {
      for (int nz=0; nz<Nz_; ++nz) 
	os << u(nk, ny, nz, i) << ' ';
      os << '\n';
    }
    break;
  case 1: // xz slice
    os << "% xz slice\n";
    os << "% (i,j)th elem is field at (x_j, y_n, z_i)\n";
    for (int nz=0; nz<Nz_; ++nz) {
      for (int nx=0; nx<Nx_; ++nx) 
	os << (*this)(nx, nk, nz, i) << ' ';
      os << '\n';
    }
    break;
  case 2: 
    os << "% yz slice\n";
    os << "% (i,j)th elem is field at (x_j, y_i, z_n)\n";
    for (int ny=0; ny<Ny_; ++ny) {
      for (int nx=0; nx<Nx_; ++nx) 
	os << (*this)(nx, ny, nk, i) << ' ';
      os << '\n';
    }
    break;
  }
  u.makeState(xzstate, ystate);
}

void FlowField::saveProfile(int mx, int mz, const string& filebase) const {
  ChebyTransform trans(Ny_);
  saveProfile(mx,mz,filebase, trans);
}

void FlowField::saveProfile(int mx, int mz, const string& filebase, const ChebyTransform& trans) const {
  assert(xzstate_ == Spectral);
  string filename(filebase);
  if (Nd_ == 3)
    filename += string(".bf");  // this convention is unfortunate, need to fix
  else 
    filename += string(".asc");

  ofstream os(filename.c_str());
  os << setprecision(REAL_DIGITS);

  if (ystate_ == Physical) {
    for (int ny=0; ny<Ny_; ++ny) {
      for (int i=0; i<Nd_; ++i) {
	Complex c = (*this).cmplx(mx, ny, mz, i);
	os << Re(c) << ' ' << Im(c) << ' ';
      }
      os << '\n';
    }
  }
  else {
    ComplexChebyCoeff* f = new ComplexChebyCoeff[Nd_];
    for (int i=0; i<Nd_; ++i) {
      f[i] = ComplexChebyCoeff(Ny_, a_, b_, Spectral);
      for (int ny=0; ny<Ny_; ++ny) 
	f[i].set(ny, (*this).cmplx(mx, ny, mz, i));
      f[i].makePhysical(trans);
    }
    for (int ny=0; ny<Ny_; ++ny) {
      for (int i=0; i<Nd_; ++i) 
	os << f[i].re[ny] << ' ' << f[i].im[ny] << ' ';
      os << '\n';
    }
  }
}
void FlowField::saveSpectrum(const string& filebase, int i, int ny) const {
  string filename(filebase);
  filename += string(".asc");
  ofstream os(filename.c_str());

  bool sum = (ny == -1) ? true : false;
  assert(xzstate_ == Spectral);
  
  for (int mx=0; mx<Mx(); ++mx) {
    for (int mz=0; mz<Mz(); ++mz) {
      if (sum) 
	os << sqrt(energy(mx,mz)) << ' ';
      else {
	Complex f = this->cmplx(mx, ny, mz, i);
	os << Re(f) << ' ' << Im(f) << ' ';
      }
    }
    os << endl;
  }
  os.close();
}

void FlowField::saveSpectrum(const string& filebase) const {
  string filename(filebase);
  filename += string(".asc");
  ofstream os(filename.c_str());

  assert(xzstate_ == Spectral && ystate_ == Spectral);
 
  int M_x = Mx();
  int M_z = Mz();
  ComplexChebyCoeff u(Ny_,a_,b_,Spectral);

  for (int mx=0; mx<M_x; ++mx) {
    for (int mz=0; mz<M_z; ++mz) {
      Real e = 0.0;
      for (int i=0; i<Nd_; ++i) {
	for (int ny=0; ny<Ny_; ++ny) 
	  u.set(ny,this->cmplx(mx,ny,mz,i));
	e += L2Norm2(u);
      }
      os << e << ' ';
    }
    os << endl;
  }
  os.close();
}

/*******************************************************************
void FlowField::saveDissSpectrum(const string& filebase, Real nu) const {
  string filename(filebase);
  filename += string(".asc");
  ofstream os(filename.c_str());

  assert(xzstate_ == Spectral && ystate_ == Spectral);
  BasisFunc prof(Ny_, 0, 0, Lx_, Lz_, a_, b_, Spectral);
  for (int mx=0; mx<Mx(); ++mx) {
    for (int mz=0; mz<Mz(); ++mz) {
      os << dissipation(mx,mz,nu) << ' ';
    }
    os << endl;
  }
  os.close();
}
*******************************************************************/

void FlowField::saveDivSpectrum(const string& filebase) const {
  string filename(filebase);
  filename += string(".asc");
  ofstream os(filename.c_str());

  assert(xzstate_ == Spectral && ystate_ == Spectral);
  
  //assert(congruent(div));
  assert(Nd_ >= 3);

  Complex cz = 2*pi*I/Lz_;
  Complex cx = 2*pi*I/Lx_;

  int Nx = numXmodes();
  int Ny = numYmodes();
  int Nz = numZmodes();

  ComplexChebyCoeff v(Ny, a_, b_, Spectral);
  ComplexChebyCoeff vy(Ny, a_, b_, Spectral);

  for (int nx=0; nx<Nx; ++nx) {
    int kx_=kx(nx);
    for (int nz=0; nz<Nz; ++nz) {
      int kz_=kz(nz);
      for (int ny=0; ny<Ny; ++ny) 
	v.set(ny, cmplx(nx,ny,nz,1));
      diff(v,vy);
      Real div = 0.0;
      for (int ny=0; ny<Ny; ++ny) {
	Complex ux = cx*Real(kx_)*cmplx(nx,ny,nz,0);
	Complex wz = cz*Real(kz_)*cmplx(nx,ny,nz,2);
	div += norm2(ux + vy[ny] + wz);
      }
      os << sqrt(div) << ' ';
    }
    os << endl;
  }
  os.close();
}

void FlowField::asciiSave(const string& filebase) const {
  string filename(filebase);
  filename += string(".aff"); // "ascii flow field"
  ofstream os(filename.c_str());

  os << setprecision(REAL_DIGITS);
  os << Nx_ << ' ' << Ny_ << ' ' << Nz_ << ' ' << Nd_ << ' ';
  os << xzstate_ << ystate_ << ' ';
  os << Lx_ << ' ' << Lz_ << ' ' << a_ << ' ' << b_ << endl;

  int Ntotal = Nd_* Nx_ * Ny_ * Nzpad_;
  for (int i=0; i<Ntotal; ++i)
    os << rdata_[i] << '\n';
}

void FlowField::binarySave(const string& filebase) const {
  string filename(filebase);
  filename += string(".ff"); // "flow field"
  ofstream os(filename.c_str());
  if (!os.good()) {
    cerr << "FlowField::binarySave(filebase) : can't open file " << filename << endl;
    abort();
  }

  write(os, Nx_);
  write(os, Ny_);
  write(os, Nz_);
  write(os, Nd_);
  write(os, xzstate_);
  write(os, ystate_);
  write(os, Lx_);
  write(os, Lz_);
  write(os, a_);
  write(os, b_);
  write(os, dealiasIO_);

  // Write data only for non-aliased modes.
  if (dealiasIO_ && xzstate_ == Spectral) {
    int Nxd=2*(Nx_/6);
    int Nzd=2*(Nz_/3)+1;

    // In innermost loop, array index is (nz + Nzpad2_*(nx + Nx_*(ny + Ny_*i))),
    // which is the same as the FlowField::flatten function.
    for (int i=0; i<Nd_; ++i) {
      for (int ny=0; ny<Ny_; ++ny) {

	for (int nx=0; nx<=Nxd; ++nx) {
	  for (int nz=0; nz<=Nzd; ++nz)
	    write(os, rdata_[flatten(nx,ny,nz,i)]);
	}
	for (int nx=Nx_-Nxd; nx<Nx_; ++nx) {
	  for (int nz=0; nz<=Nzd; ++nz)
	    write(os, rdata_[flatten(nx,ny,nz,i)]);
	}
      }
    }
  }
  else {
    int Ntotal = Nd_* Nx_ * Ny_ * Nzpad_;
    for (int i=0; i<Ntotal; ++i)
      write(os, rdata_[i]);
  }
}

void FlowField::dump() const {
  for (int i=0; i<Nx_ * Ny_ * Nzpad_ * Nd_; ++i)
    cout << rdata_[i] << ' ';
  cout << endl;
}

Real FlowField::energy(bool normalize) const {
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  return L2Norm2(*this, normalize);
}
Real FlowField::energy(int mx, int mz, bool normalize) const {
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  ComplexChebyCoeff u(Ny_,a_,b_,Spectral);
  Real e = 0.0;
  for (int i=0; i<Nd_; ++i) {
    for (int ny=0; ny<Ny_; ++ny) 
      u.set(ny,this->cmplx(mx,ny,mz,i));
    e += L2Norm2(u, normalize);
  }
  if (!normalize)
    e *= Lx_*Lz_;
  return e;
}

Real FlowField::dudy_a() const {
  assert(ystate_ == Spectral);
  BasisFunc prof = profile(0,0);
  ChebyCoeff dudy = diff(Re(prof.u()));
  return dudy.eval_a();
}
Real FlowField::dudy_b() const {
  assert(ystate_ == Spectral);
  BasisFunc prof = profile(0,0);
  ChebyCoeff dudy = diff(Re(prof.u()));
  return dudy.eval_b();
}

Real FlowField::CFLfactor() const {
  FlowField& velocity = (FlowField&) *this;
  fieldstate xzstate = xzstate_;
  fieldstate ystate = ystate_;
  velocity.makePhysical();
  Vector y = chebypoints(Ny_, a_, b_);
  Real u,v,w;
  Real cfl=0.0;
  Real dx = Lx_/Nx_;
  Real dz = Lz_/Nz_;
  for (int ny=0; ny<Ny_; ++ny) { 
   Real dy = (ny==0 || ny==Ny_-1) ? y[0]-y[1] : (y[ny-1]-y[ny+1])/2.0;
   for (int nx=0; nx<Nx_; ++nx) 
     for (int nz=0; nz<Nz_; ++nz) {
  	u = velocity(nx,ny,nz, 0);
	v = velocity(nx,ny,nz, 1);
	w = velocity(nx,ny,nz, 2);
	cfl = Greater(cfl, abs(u)/dx + abs(v)/dy + abs(w)/dz);
     }
  }
  velocity.makeState(xzstate,ystate);

  return cfl;
}

/********************************************************
Real FlowField::dissipation(int mx, int mz, Real nu) const {
  assert(xzstate_ == Spectral && ystate_== Spectral);
  BasisFunc prof;
  Real d=0.0;
  for (int _=0; _<Nd_; ++_) {
    prof = profile(mx, mz);
    d += nu*Re(L2InnerProduct(prof, ::laplacian(prof), false));
  }
  return d;
}
***********************************************************/

void FlowField::setState(fieldstate xz, fieldstate y) {
  xzstate_ = xz;
  ystate_ = y;
}
void FlowField::assertState(fieldstate xz, fieldstate y) const {
  assert(xzstate_ == xz && ystate_ == y);
}

void swap(FlowField& f, FlowField& g) {
  assert(f.congruent(g));
  Real* tmp = f.rdata_;
  f.rdata_ = g.rdata_;
  g.rdata_ = tmp;

  f.cdata_ = (Complex*)f.rdata_;
  g.cdata_ = (Complex*)g.rdata_;
}
  
/**************************************
Real bcL1Norm(const FlowField& u) {
  Real sum = 0.0;
  FlowField& v = (FlowField&) u; // cast away const-ness
  bool xztransform=false;
  bool ytransform=false;
  if (u.xzstate()==Spectral) {
    v.irealfft_xz();
    xztransform=true;
  }
  if (u.ystate()==Spectral) {
    v.ichebyfft_y();
    ytransform=true;
  }
  int Nyb=u.numYgridpts()-1;
  for (int i=0; i<v.vectorDim(); ++i) {
    int nx; // MSVC++ FOR-SCOPE BUG
    for (nx=0; nx<u.numXgridpts(); ++nx) 
      for (int nz=0; nz<u.numZgridpts(); ++nz) 
	sum += fabs(u(nx,0,nz,i));

    for (nx=0; nx<u.numXgridpts(); ++nx) 
      for (int nz=0; nz<u.numZgridpts(); ++nz) 
	sum += fabs(u(nx,Nyb,nz,i));
  }
  if (xztransform) 
    v.realfft_xz();
  if (ytransform) 
    v.chebyfft_y();
    
  return sum; ///(2*Nd_*Nx_*Nz_);
}

Real L1Norm(const FlowField& u) {
  Real sum = 0.0;
  if (u.xzstate()==Spectral && u.ystate()==Spectral) {
    for (int i=0; i<u.vectorDim(); ++i)
      for (int ny=0; ny<u.numYmodes(); ++ny) 
	for (int nx=0; nx<u.numXmodes(); ++nx) 
	  for (int nz=0; nz<u.numZmodes(); ++nz) {
	    Complex d = u.cmplx(nx,ny,nz,i);
	    sum += fabs(Re(d)) + fabs(Im(d));
	  }
  }
  else if (u.xzstate()==Physical && u.ystate()==Physical) {
    for (int i=0; i<u.vectorDim(); ++i)
      for (int ny=0; ny<u.numYgridpts(); ++ny) 
	for (int nx=0; nx<u.numXgridpts(); ++nx) 
	  for (int nz=0; nz<u.numZgridpts(); ++nz) 
	    sum += fabs(u(nx,ny,nz,i));
  }
  else {
    cerr << "L1Norm(const FlowField& u) : u must be pure spectral or pure physical" << endl;
    assert(false);
  }
  return sum;
}

Real L1Dist(const FlowField& u, const FlowField& v, bool normalize) {
  Real sum = 0.0;
  assert(u.congruent(v));
  if (u.xzstate()==Spectral && u.ystate()==Spectral) {
    for (int i=0; i<u.vectorDim(); ++i)
      for (int ny=0; ny<u.numYmodes(); ++ny) 
	for (int nx=0; nx<u.numXmodes(); ++nx) 
	  for (int nz=0; nz<u.numZmodes(); ++nz) {
	    Complex d = u.cmplx(nx,ny,nz,i)-v.cmplx(nx,ny,nz,i);
	    sum += fabs(Re(d)) + fabs(Im(d));
	  }
  }
  else if (u.xzstate()==Physical && u.ystate()==Physical) { 
    for (int i=0; i<u.vectorDim(); ++i)
      for (int ny=0; ny<u.numYgridpts(); ++ny) 
	for (int nx=0; nx<u.numXgridpts(); ++nx) 
	  for (int nz=0; nz<u.numZgridpts(); ++nz) 
	    sum += fabs(u(nx,ny,nz,i) - v(nx,ny,nz,i));
  }
  else {
    cerr << "L1Dist(FlowField u, FlowField v) : u and v must be pure spectral or pure physical" << endl;
    assert(false);
  }
  return sum;

}
***********************************************/


Real bcNorm2(const FlowField& f, bool normalize) {
  assert(f.xzstate() == Spectral);
  Real bc2 = 0.0;
  int Mx = f.Mx();
  int My = f.My();
  int Mz = f.Mz();
  ComplexChebyCoeff prof(f.Ny(), f.a(), f.b(), f.ystate());
  for (int i=0; i<f.Nd(); ++i) 
    for (int mx=0; mx<Mx; ++mx) {
      for (int mz=0; mz<Mz; ++mz) {
	for (int my=0; my<My; ++my)
	  prof.set(my, f.cmplx(mx,my,mz,i));
	bc2 += norm2(prof.eval_a());
	bc2 += norm2(prof.eval_b());
      }
    }
  if (!normalize)
    bc2 *= f.Lx()*f.Lz();
  return bc2;
}

Real bcDist2(const FlowField& f, const FlowField& g, bool normalize) {
  assert(f.congruent(g));
  assert(f.xzstate() == Spectral && g.xzstate() == Spectral);
  //assert(f.ystate() == Spectral && g.ystate() == Spectral);

  Real bc2 = 0.0;
  int Nd = f.Nd();
  int Mx = f.Mx();
  int My = f.My();
  int Mz = f.Mz();
  ComplexChebyCoeff diff(My, f.a(), f.b(), f.ystate());

  for (int i=0; i<Nd; ++i) 
    for (int mx=0; mx<Mx; ++mx) {
      for (int mz=0; mz<Mz; ++mz) {
	for (int my=0; my<My; ++my)
	  diff.set(my, f.cmplx(mx,my,mz,i)-g.cmplx(mx,my,mz,i));
	bc2 += norm2(diff.eval_a());
	bc2 += norm2(diff.eval_b());
      }
    }
  if (!normalize)
    bc2 *= f.Lx()*f.Lz();
  return bc2;
}


Real bcNorm(const FlowField& f, bool normalize) {
  return sqrt(bcNorm2(f, normalize));
}

Real bcDist(const FlowField& f, const FlowField& g, bool normalize) {
  return sqrt(bcDist2(f,g, normalize));
}

Real divNorm(const FlowField& f, bool normalize)  {
  return sqrt(divNorm2(f,normalize));
}

Real divNorm2(const FlowField& f, bool normalize)  {
  assert(f.xzstate() == Spectral && f.ystate() == Spectral);
  assert(f.Nd() == 3);

  Real div2=0.0;
  int Mx = f.Mx();
  int Mz = f.Mz();
  BasisFunc prof;
  for (int mx=0; mx<Mx; ++mx) 
    for (int mz=0; mz<Mz; ++mz) 
      div2 += divNorm2(f.profile(mx,mz), normalize);
  return div2;
}
Real divDist2(const FlowField& f, const FlowField& g, bool normalize)  {
  assert(f.xzstate() == Spectral && f.ystate() == Spectral);
  assert(f.congruent(g));
  assert(f.Nd() == 3);

  Real div2=0.0;
  int Mx = f.Mx();
  int Mz = f.Mz();
  for (int mx=0; mx<Mx; ++mx)
    for (int mz=0; mz<Mz; ++mz) 
      div2 += divDist2(f.profile(mx,mz), g.profile(mx,mz), normalize);
  return div2;
}

Real divDist(const FlowField& f, const FlowField& g, bool normalize) {
  return sqrt(divDist2(f,g,normalize));
}

Real L2Norm(const FlowField& u, bool normalize) {
  return sqrt(L2Norm2(u, normalize));
}

Real L2Norm2(const FlowField& u, bool normalize) {
  assert(u.ystate() == Spectral);
  Real sum = 0.0;
  
  if (u.xzstate()==Spectral) {
    ComplexChebyCoeff prof(u.Ny(), u.a(), u.b(), Spectral);
    for (int i=0; i<u.vectorDim(); ++i)
      for (int nx=0; nx<u.numXmodes(); ++nx) {
	Real cz = 1.0; // cz = 2 for kz>0 to take account of kz<0 ghost modes
	for (int nz=0; nz<u.numZmodes(); ++nz) {
	  for (int ny=0; ny<u.numYmodes(); ++ny) 
	    prof.set(ny, u.cmplx(nx,ny,nz,i));
	  sum += cz*L2Norm2(prof, normalize);
	  cz = 2.0;
	}
      }
  }
  else {
    ChebyCoeff prof(u.Ny(), u.a(), u.b(), Spectral);
    for (int i=0; i<u.vectorDim(); ++i)
      for (int nx=0; nx<u.numXgridpts(); ++nx) 
	for (int nz=0; nz<u.numZgridpts(); ++nz) {
	  for (int ny=0; ny<u.numYmodes(); ++ny) 
	    prof[ny] = u(nx,ny,nz,i);
	  sum += L2Norm2(prof, normalize);
      }
    sum /= u.numXgridpts()*u.numZgridpts();
  }
  if (!normalize)
    sum *= u.Lx()*u.Lz();
  return sum;
}

Real L2Dist(const FlowField& u,const FlowField& v, bool normalize) {
  return sqrt(L2Dist2(u,v,normalize));
}

Real L2Dist2(const FlowField& u, const FlowField& v, bool normalize) {
  Real sum = 0.0;
  assert(u.congruent(v));
  assert(u.xzstate()==Spectral && u.ystate()==Spectral);
  ComplexChebyCoeff u_v(u.Ny(), u.a(), u.b(), Spectral);
  for (int i=0; i<u.vectorDim(); ++i)
    for (int nx=0; nx<u.numXmodes(); ++nx) {
      int cz = 1;
      for (int nz=0; nz<u.numZmodes(); ++nz) {
	for (int ny=0; ny<u.numYmodes(); ++ny) 
	  u_v.set(ny, u.cmplx(nx,ny,nz,i)-v.cmplx(nx,ny,nz,i));
	sum += cz*L2Norm2(u_v, normalize);
	cz = 2;
      }
    }
  if (!normalize)
    sum *= u.Lx()*u.Lz();
  return sum;
}

Real L2Norm2(const FlowField& u, int kxmax, int kzmax, bool normalize) {
  assert(u.ystate()==Spectral);
  assert(u.xzstate()==Spectral);
  Real sum = 0.0;
  
  // EFFICIENCY: would be better with smarter looping, but prob not worth trouble.
  ComplexChebyCoeff prof(u.Ny(), u.a(), u.b(), Spectral);
  for (int i=0; i<u.vectorDim(); ++i)
    for (int nx=0; nx<u.numXmodes(); ++nx) {
      if (abs(u.kx(nx)) > kxmax)
	continue;
      Real cz = 1.0; // cz = 2 for kz>0 to take account of kz<0 ghost modes
      for (int nz=0; nz<u.numZmodes(); ++nz) {
	if (abs(u.kz(nz)) > kzmax)
	  continue;
	for (int ny=0; ny<u.numYmodes(); ++ny) 
	  prof.set(ny, u.cmplx(nx,ny,nz,i));
	sum += cz*L2Norm2(prof, normalize);
	cz = 2.0;
      }
    }
  if (!normalize)
    sum *= u.Lx()*u.Lz();
  return sum;
}

Real L2Norm(const FlowField& u, int kxmax, int kzmax, bool normalize) {
  return sqrt(L2Norm2(u, kxmax, kzmax, normalize));
}
Real L2Dist(const FlowField& u, const FlowField& v, int kxmax, int kzmax, bool normalize) {
  return sqrt(L2Dist2(u, v, kxmax, kzmax, normalize));
}

Real L2Dist2(const FlowField& u, const FlowField& v, int kxmax, int kzmax, bool normalize) {
  Real sum = 0.0;
  assert(u.congruent(v));
  assert(u.xzstate()==Spectral && u.ystate()==Spectral);
  ComplexChebyCoeff u_v(u.Ny(), u.a(), u.b(), Spectral);
  for (int i=0; i<u.vectorDim(); ++i)
    for (int nx=0; nx<u.numXmodes(); ++nx) {
      if (abs(u.kx(nx)) > kxmax)
	continue;
      int cz = 1;
      for (int nz=0; nz<u.numZmodes(); ++nz) {
	if (abs(u.kz(nz)) > kzmax)
	  continue;
	for (int ny=0; ny<u.numYmodes(); ++ny) 
	  u_v.set(ny, u.cmplx(nx,ny,nz,i)-v.cmplx(nx,ny,nz,i));
	sum += cz*L2Norm2(u_v, normalize);
	cz = 2;
      }
    }
  if (!normalize)
    sum *= u.Lx()*u.Lz();
  return sum;
}

Complex L2InnerProduct(const FlowField& u, const BasisFunc& phi,
		       bool normalize) {
  assert(u.xzstate()==Spectral && u.ystate()==Spectral);
  assert(u.congruent(phi));
  int kx = phi.kx();
  int kz = phi.kz();
  int mx = u.mx(kx);
  int mz = u.mz(kz);
  int Ny = u.numYgridpts();
  BasisFunc profile(Ny, kx, kz, u.Lx(), u.Lz(), u.a(), u.b(), Spectral);
  
  for (int ny=0; ny<Ny; ++ny) {
    profile.u().set(ny, u.cmplx(mx,ny,mz, 0));
    profile.v().set(ny, u.cmplx(mx,ny,mz, 1));
    profile.w().set(ny, u.cmplx(mx,ny,mz, 2));
  }
  return L2InnerProduct(profile, phi, normalize);
}

/******************
Complex innerProduct(const FlowField& u, const FlowField& v, bool normalize) {
  Complex sum = 0.0;
  assert(u.congruent(v));
  assert(u.xzstate()==Spectral && u.ystate()==Spectral);
  for (int i=0; i<u.vectorDim(); ++i)
    for (int ny=0; ny<u.numYmodes(); ++ny) 
      for (int nx=0; nx<u.numXmodes(); ++nx) 
	for (int nz=0; nz<u.numZmodes(); ++nz) 
	  sum += u.cmplx(nx,ny,nz,i) * conjugate(v.cmplx(nx,ny,nz,i));
  return sum;

}

Vector vectorL1Dist(const FlowField& u, const FlowField& v) {
  Vector sum(u.vectorDim()); 
  assert(u.congruent(v));
  assert(u.xzstate()==Spectral && u.ystate()==Spectral);
  for (int i=0; i<u.vectorDim(); ++i)
    for (int ny=0; ny<u.numYmodes(); ++ny) 
      for (int nx=0; nx<u.numXmodes(); ++nx) 
	for (int nz=0; nz<u.numZmodes(); ++nz) 
	  sum[i] += true_norm(u.cmplx(nx,ny,nz,i)-v.cmplx(nx,ny,nz,i));
  cferror("L1Norm(const FlowField& u) : u must be pure spectral or pure physical");
  return sum;
}
******************/

/*********************************************************
// Takes U=FCF, returns omega=FCF
Real FlowField::bcnorm2() const {
  assert(xzstate_ == Spectral);
  Real bc2 = 0.0;
  int _Mx = Mx();
  int _Mz = Mz();
  ComplexChebyCoeff prof(Ny_, a_, b_, ystate_);
  for (int i=0; i<Nd_; ++i) 
    for (int mx=0; mx<_Mx; ++mx) {
      for (int mz=0; mz<_Mz; ++mz) {
	for (int ny=0; ny<Ny_; ++ny)
	  prof.set(ny, this->cmplx(mx,ny,mz,i));
	bc2 += norm2(prof.eval_a());
	bc2 += norm2(prof.eval_b());
      }
    }
  return bc2;
}
Real FlowField::bcnorm() const {return sqrt(bcnorm2());}
***************************************************************/

// Takes U=FCF, returns omega=FCF

void randomUprofile(ComplexChebyCoeff& u, Real mag, Real decay) {
  // Set a random u(y)
  int N = u.length();
  u.setState(Spectral);
  int n; // MSVC++ FOR-SCOPE BUG
  for (n=0; n<N; ++n) {
    u.set(n, mag*randomComplex());
    mag *= decay;
  }
  ChebyTransform trans(N);
  // Adjust u(y) so that u(+-1) == 0
  //cout << "before random u(a) == " << u.eval_a() << endl;
  //cout << "before random u(b) == " << u.eval_b() << endl;
  Complex u0 = (u.eval_b() + u.eval_a())/2.0;
  Complex u1 = (u.eval_b() - u.eval_a())/2.0;
  u.sub(0, u0);
  u.sub(1, u1);
  //cout << "after random u(a) == " << u.eval_a() << endl;
  //cout << "after random u(b) == " << u.eval_b() << endl;

  assert(true_norm(u.eval_b()) < EPSILON);
  assert(true_norm(u.eval_a()) < EPSILON);
}

void randomVprofile(ComplexChebyCoeff& v, Real mag, Real decay) {
  int N = v.length();
  v.setState(Spectral);
  // Assign a random v(y).
  v.set(0, 0.0);
  v.set(1, 0.0);
  v.set(2, 0.0);
  v.set(3, 0.0);
  int n; // MSVC++ FOR-SCOPE BUG
  for (n=0; n<4; ++n) 
    v.set(n, 0.0);
  for (n=4; n<N-2; ++n) {
    v.set(n, mag*randomComplex());
    mag *= decay;
  }

  for (n=Greater(N-2, 0); n<N; ++n) 
    v.set(n, 0.0);

  // Adjust v so that v(+-1) == v'(+/-1) == 0, by subtracting off 
  // s0 T0(x) + s1 T1(x) + s2 T2(x) + s3 T3(x), with s's chosen to
  // have same BCs as v. 
  ComplexChebyCoeff vy = diff(v);

  Complex a = v.eval_a();
  Complex b = v.eval_b(); 
  Complex c = vy.eval_a();
  Complex d = vy.eval_b(); 
    
  // The numercial coeffs are inverse of the matrix (values found with Maple)
  // T0(-1)  T1(-1)  T2(-1)  T3(-1)     s0      a
  // T0(1)   T1(1)   T2(1)   T3(1)      s1      b
  // T0'(-1) T1'(-1) T2'(-1) T3'(-1)    s2  ==  c
  // T0'(1)  T1'(1)  T2'(1)  T3'(1)     s3      d

  Complex s0 = 0.5*(a + b) + 0.125*(c - d);
  Complex s1 = 0.5625*(b - a) - 0.0625*(c + d);
  Complex s2 = 0.125*(d - c);
  Complex s3 = 0.0625*(a - b + c + d);

  ComplexChebyCoeff adj(v.numModes(), v.a(), v.b(), Spectral);
  adj.set(0, s0);
  adj.set(1, s1);
  adj.set(2, s2);
  adj.set(3, s3);
  ComplexChebyCoeff adjy = diff(adj);
  

  // Subtract off the coeffs 
  v.sub(0, s0);
  v.sub(1, s1);
  v.sub(2, s2);
  v.sub(3, s3);

  diff(v,vy);

  //cout << "random v(a)  == " << v.eval_a() << endl;
  //cout << "random v(b)  == " << v.eval_b()  << endl;
  //cout << "random v'(a) == " << vy.eval_a() << endl;
  //cout << "random v'(b) == " << vy.eval_b() << endl;

}


void chebyUprofile(ComplexChebyCoeff& u, int n, Real decay) {
  // Set a random u(y)
  int N = u.length();
  u.setToZero();
  u.setState(Spectral);
  Real theta = pi*randomReal();
  u.set(n, (cos(theta) + I*sin(theta))*pow(decay,n));
  
  ChebyTransform trans(N);
  
  // Adjust u(y) so that u(+-1) == 0
  Complex u0 = (u.eval_b() + u.eval_a())/2.0;  // 2.0 is correct for genl a,b
  Complex u1 = (u.eval_b() - u.eval_a())/2.0;  // 2.0 is correct for genl a,b
  u.sub(0, u0);
  u.sub(1, u1);
  //cout << "random u(a) == " << u.eval_a() << endl;
  //cout << "random u(b) == " << u.eval_b() << endl;
  assert(true_norm(u.eval_b()) < EPSILON);
  assert(true_norm(u.eval_a()) < EPSILON);
}

void chebyVprofile(ComplexChebyCoeff& v, int n, Real decay) {
  v.setToZero();
  v.setState(Spectral);
  Real theta = pi*randomReal();
  v.set(n, (cos(theta) + I*sin(theta))*pow(decay,n));

  // Adjust v so that v(+-1) == v'(+/-1) == 0, by subtracting off 
  // s0 T0(x) + s1 T1(x) + s2 T2(x) + s3 T3(x), with s's chosen to
  // have same BCs as v. 
  ComplexChebyCoeff vy = diff(v);

  Complex a = v.eval_a();
  Complex b = v.eval_b(); 
  Complex c = vy.eval_a();
  Complex d = vy.eval_b(); 
    
  // The numercial coeffs are inverse of the matrix (values found with Maple)
  // T0(-1)  T1(-1)  T2(-1)  T3(-1)     s0      a
  // T0(1)   T1(1)   T2(1)   T3(1)      s1      b
  // T0'(-1) T1'(-1) T2'(-1) T3'(-1)    s2  ==  c
  // T0'(1)  T1'(1)  T2'(1)  T3'(1)     s3      d

  // The above matrix is 
  // 1  -1   1  -1 
  // 1   1   1   1
  // 0   1  -4   9
  // 0   1   4   9

  Complex s0 = 0.5*(a + b) + 0.125*(c - d);
  Complex s1 = 0.5625*(b - a) - 0.0625*(c + d);
  Complex s2 = 0.125*(d - c);
  Complex s3 = 0.0625*(a - b + c + d);

  //ComplexChebyCoeff adj(v.numModes(), v.a(), v.b(), Spectral);
  //adj.set(0, s0);
  //adj.set(1, s1);
  //adj.set(2, s2);
  //adj.set(3, s3);
  //ComplexChebyCoeff adjy = diff(adj);
  
  // Subtract off the coeffs 
  v.sub(0, s0);
  v.sub(1, s1);
  v.sub(2, s2);
  v.sub(3, s3);

  //diff(v,vy);

  //cout << "random v(a)  == " << v.eval_a() << endl;
  //cout << "random v(b)  == " << v.eval_b()  << endl;
  //cout << "random v'(a) == " << vy.eval_a() << endl;
  //cout << "random v'(b) == " << vy.eval_b() << endl;

}


void randomProfile(ComplexChebyCoeff& u, ComplexChebyCoeff& v, ComplexChebyCoeff& w, int kx, int kz, Real Lx, Real Lz, Real mag, Real decay) {
  
  int N = u.length();
  ChebyTransform trans(N);
  u.setState(Spectral);
  v.setState(Spectral);
  w.setState(Spectral);
  //u.setToZero();
  //v.setToZero();
  //w.setToZero();
  if (kx == 0 && kz == 0) {
    // Assign an odd perturbation to u, so as not to change mean(U).
    // Just set even modes to zero.
    
    randomUprofile(w, mag, decay);
    w.im.setToZero();

    randomUprofile(u, mag, decay);
    u.im.setToZero();

    v.setToZero();

  }
  else {
    // Other kx,kz cases are based on a random v(y). 

    randomVprofile(v, mag, decay);
    ComplexChebyCoeff vy = diff(v);

    if (kx == 0) {
      randomUprofile(u, mag, decay);
      u.im.setToZero();
      w = vy;
      w *= -Lz/((2*pi*kz)*I);
    }
    else if (kz == 0) {
      randomUprofile(w, mag, decay);
      w.im.setToZero();
      u = vy;
      u *= -Lx/((2*pi*kx)*I);
    }
    else {

      ComplexChebyCoeff v0(v);
      ComplexChebyCoeff v1(v);
      randomVprofile(v0, mag, decay);
      randomVprofile(v1, mag, decay);

      ComplexChebyCoeff v0y = diff(v0);
      ComplexChebyCoeff v1y = diff(v1);

      // Finally, the general case, where kx, kz != 0 and u,v,w are nonzero
      // Set a random u(y)
      ComplexChebyCoeff u0(v.numModes(), v.a(), v.b(), Spectral);
      ComplexChebyCoeff w0(v.numModes(), v.a(), v.b(), Spectral);
      ComplexChebyCoeff u1(v.numModes(), v.a(), v.b(), Spectral);
      ComplexChebyCoeff w1(v.numModes(), v.a(), v.b(), Spectral);
      
      randomUprofile(u0, mag, decay);

      // Calculate w0 from div u0 == u0x + v0y + w0z == 0.
      ComplexChebyCoeff u0x(u0);
      u0x *= (2*pi*kx/Lx)*I;
      w0 = v0y;
      w0 += u0x;
      w0 *= -Lz/((2*pi*kz)*I);       // Set w = -Lz/(2*pi*I*kz) * (ux + vy);


      //randomUprofile(w1, mag, decay);

      // Calculate u0 from div u0 == u0x + v0y + w0z == 0.
      ComplexChebyCoeff w1z(w1);
      w1z *= (2*pi*kz/Lz)*I;
      u1 = v1y;
      u1 += w1z;
      u1 *= -Lx/((2*pi*kx)*I);       // Set w = -Lz/(2*pi*I*kz) * (ux + vy);

      u = u0;
      v = v0;
      w = w0;
      u += u1;
      v += v1;
      w += w1;
    }
  }

  // Check divergence
  ComplexChebyCoeff ux(u);
  ux *= (2*pi*kx/Lx)*I;
  ComplexChebyCoeff wz(w);
  wz *= (2*pi*kz/Lz)*I;
  ComplexChebyCoeff vy = diff(v);
  
  ComplexChebyCoeff div(ux);
  div += vy;
  div += wz;

  Real divNorm = L2Norm(div);
  Real ubcNorm = true_norm(u.eval_a()) + true_norm(u.eval_b());
  Real vbcNorm = true_norm(v.eval_a()) + true_norm(v.eval_b());
  Real wbcNorm = true_norm(w.eval_a()) + true_norm(w.eval_b());
  assert(divNorm < EPSILON);
  assert(ubcNorm < EPSILON);
  assert(vbcNorm < EPSILON);
  assert(wbcNorm < EPSILON);
  // supress unused-variable compiler warnings...
  wbcNorm += divNorm + ubcNorm + vbcNorm;

}

void chebyProfile(ComplexChebyCoeff& u, ComplexChebyCoeff& v, ComplexChebyCoeff& w, int un, int vn, int kx, int kz, Real Lx, Real Lz, Real decay) {
  
  int N = u.length();
  ChebyTransform trans(N);
  u.setState(Spectral);
  v.setState(Spectral);
  w.setState(Spectral);
  //u.setToZero();
  //v.setToZero();
  //w.setToZero();
  if (kx == 0 && kz == 0) {
    chebyUprofile(u, un, decay);
    chebyUprofile(w, vn, decay); // yes, vn
    u.im.setToZero();
    w.im.setToZero();
    v.setToZero();
  }
  else {
    // Other kx,kz cases are based on a random v(y). 

    chebyVprofile(v, vn, decay);
    ComplexChebyCoeff vy = diff(v);

    if (kx == 0) {
      chebyUprofile(u, un, decay);
      u.im.setToZero();
      w = vy;
      w *= -Lz/((2*pi*kz)*I);
    }
    else if (kz == 0) {
      chebyUprofile(w, un, decay); // yes, un
      w.im.setToZero();
      u = vy;
      u *= -Lx/((2*pi*kx)*I);
    }
    else {

      ComplexChebyCoeff v0(v);
      ComplexChebyCoeff v1(v);
      chebyVprofile(v0, vn, decay);
      chebyVprofile(v1, vn, decay);

      ComplexChebyCoeff v0y = diff(v0);
      ComplexChebyCoeff v1y = diff(v1);

      // Finally, the general case, where kx, kz != 0 and u,v,w are nonzero
      // Set a random u(y)
      ComplexChebyCoeff u0(v.numModes(), v.a(), v.b(), Spectral);
      ComplexChebyCoeff w0(v.numModes(), v.a(), v.b(), Spectral);
      ComplexChebyCoeff u1(v.numModes(), v.a(), v.b(), Spectral);
      ComplexChebyCoeff w1(v.numModes(), v.a(), v.b(), Spectral);
      
      chebyUprofile(u0, un, decay);

      // Calculate w0 from div u0 == u0x + v0y + w0z == 0.
      ComplexChebyCoeff u0x(u0);
      u0x *= (2*pi*kx/Lx)*I;
      w0 = v0y;
      w0 += u0x;
      w0 *= -Lz/((2*pi*kz)*I);       // Set w = -Lz/(2*pi*I*kz) * (ux + vy);


      //randomUprofile(w1, mag, decay);

      // Calculate u0 from div u0 == u0x + v0y + w0z == 0.
      ComplexChebyCoeff w1z(w1);
      w1z *= (2*pi*kz/Lz)*I;
      u1 = v1y;
      u1 += w1z;
      u1 *= -Lx/((2*pi*kx)*I);       // Set w = -Lz/(2*pi*I*kz) * (ux + vy);

      u = u0;
      v = v0;
      w = w0;
      u += u1;
      v += v1;
      w += w1;
    }
  }

  // Check divergence
  ComplexChebyCoeff ux(u);
  ux *= (2*pi*kx/Lx)*I;
  ComplexChebyCoeff wz(w);
  wz *= (2*pi*kz/Lz)*I;
  ComplexChebyCoeff vy = diff(v);
  
  ComplexChebyCoeff div(ux);
  div += vy;
  div += wz;

  Real divNorm = L2Norm(div);
  Real ubcNorm = true_norm(u.eval_a()) + true_norm(u.eval_b());
  Real vbcNorm = true_norm(v.eval_a()) + true_norm(v.eval_b());
  Real wbcNorm = true_norm(w.eval_a()) + true_norm(w.eval_b());
  assert(divNorm < EPSILON);
  assert(ubcNorm < EPSILON);
  assert(vbcNorm < EPSILON);
  assert(wbcNorm < EPSILON);
  // supress unused-variable compiler warnings...
  wbcNorm += divNorm + ubcNorm + vbcNorm;

}


void assignOrrSommField(FlowField& u, FlowField& P,
			Real t, Real Reynolds, Complex omega,
			const ComplexChebyCoeff& ueig, 
			const ComplexChebyCoeff& veig,
			const ComplexChebyCoeff& peig) {

  int Ny=u.numYgridpts();

  // Reconstruct velocity field (Poisseuille plus OS perturbation) from 
  // y-profile of (kx,kz) == (1,0) Spectral mode of pertubation (ueig & veig).
  u.setState(Spectral, Physical);
  u.setToZero();
  Complex c = exp((-t*omega)*I);
  int n=u.numXmodes()-1;
  int ny; // MSVC++ FOR-SCOPE BUG
  for (ny=0; ny<Ny; ++ny) {    
    Complex uc = c*ueig[ny];
    Complex vc = c*veig[ny];
    u.cmplx(0, ny, 0, 0) = Complex(1.0-square(u.y(ny)));
    u.cmplx(1, ny, 0, 0) = uc;
    u.cmplx(1, ny, 0, 1) = vc;
    u.cmplx(n, ny, 0, 0) = conjugate(uc);
    u.cmplx(n, ny, 0, 1) = conjugate(vc);
  }

  // Assign pressure perturbation p to P field.
  P.setState(Spectral, Physical);
  P.setToZero();
  for (ny=0; ny<Ny; ++ny) {    
    Complex pc = c*peig[ny];
    P.cmplx(1, ny, 0, 0) = pc;
    P.cmplx(n, ny, 0, 0) = conjugate(pc);
  }

  // Add velocity contrib to get modified pressure P = p + 1/2 |u|^2
  u.irealfft_xz();
  P.irealfft_xz();
  
  for (ny=0; ny<Ny; ++ny)
    for (int nx=0; nx<u.numXgridpts(); ++nx)
      for (int nz=0; nz<u.numZgridpts(); ++nz)
	P(nx,ny,nz,0) += 0.5*(square(u(nx,ny,nz,0)) + 
			      square(u(nx,ny,nz,1)) + 
			      square(u(nx,ny,nz,2)));

  u.realfft_xz();
  P.realfft_xz();
  u.chebyfft_y();
  P.chebyfft_y();
}


/**********************************************************************
// Old code kept around for a few revisions

//void FlowField::vorticity2(FlowField& omega, const ChebyCoeff& UbaseyT) const {
void FlowField::vorticity2(FlowField& omega) const {
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  //assert(UbaseyT.state() == Spectral);
  assert(Nd_ >= 3);

  omega.setState(Spectral, Spectral);
  omega.setToZero();

  Complex cz = (2*pi/Lz_)*I;
  Complex cx = (2*pi/Lx_)*I;

  int Nx = numXmodes();
  int Ny = numYmodes();
  int Nz = numZmodes();

  // Set omega to the perturbation vorticity
  for (int ny=0; ny<Ny; ++ny) {
    for (int nx=0; nx<Nx; ++nx) 
      for (int nz=0; nz<Nz; ++nz) {
	// omega(0) = -dv/dz + another term yet to come
	omega.cmplx(nx,ny,nz,0) = -1.0*cz*Real(kz(nz))*cmplx(nx,ny,nz,1);

	// omega(1) =  du/dz - dw/dx
	omega.cmplx(nx,ny,nz,1) = cz*Real(kz(nz))*cmplx(nx,ny,nz,0) 
	  - cx*Real(kx(nx))*cmplx(nx,ny,nz,2);

	// omega(2) =  dv/dx + another term yet to come
	omega.cmplx(nx,ny,nz,2) =  cx*Real(kx(nx))*cmplx(nx,ny,nz,1);
      }
  }

  // Add in the y derivatives. The inner y-loop makes this function inefficient
  ComplexChebyCoeff u(Ny, a_, b_, Spectral);
  ComplexChebyCoeff w(Ny, a_, b_, Spectral);
  ComplexChebyCoeff uy(Ny, a_, b_, Spectral);
  ComplexChebyCoeff wy(Ny, a_, b_, Spectral);
	
  for (int nx=0; nx<Nx; ++nx)
    for (int nz=0; nz<Nz; ++nz) { 
      int ny; // MSVC++ FOR-SCOPE BUG
      for (ny=0; ny<Ny; ++ny) {
	u.set(ny, cmplx(nx,ny,nz,0));
	w.set(ny, cmplx(nx,ny,nz,2));
      }
      diff(u,uy);
      diff(w,wy);
      for (ny=0; ny<Ny; ++ny) {
	omega.cmplx(nx,ny,nz,0) += wy[ny];
	omega.cmplx(nx,ny,nz,2) -= uy[ny];
      }
    }
  
  // Lastly, add the base flow vorticity term -dU/dy e_z
  //if (UbaseyT.length() > 0)
  //for (int ny=0; ny<Ny; ++ny) 
  //omega.cmplx(0,ny,0,2) -= UbaseyT[ny];
    
}

// Takes U=FTF, omega=FTF, returns f=FTF
void FlowField::nonlinearityRotational2(const ChebyCoeff& Ubase,
					const ChebyCoeff& Ubasey,
					const ChebyCoeff& UbaseyT, 
					FlowField& omega, 
					FlowField& f) const {
  
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  assert(Nd_ >= 3);
  assert(Ubase.state() == Physical);

  //vorticity2(omega, UbaseyT);
  vorticity2(omega);
  f.setToZero();
  // Cast away constness of *this for IFFT and FFT.
  FlowField& velocity = (FlowField&) *this;
  // Transform *this and omega to physcial space
  omega.ichebyfft_y(); 
  omega.irealfft_xz();
  velocity.ichebyfft_y(); 
  velocity.irealfft_xz();

  f.setState(Physical, Physical);

  Real omega_x;
  Real omega_y;
  Real omega_z;
  Real u;
  Real v;
  Real w;

  // Loop should be efficient if we get 9 cache lines. 
  // Assign f = (Omega + omega) x (u + U)
  for (int ny=0; ny<Ny_; ++ny) { 
    Real U = (Ubase.length() == 0) ? 0.0 : Ubase[ny];
    Real Uy = (Ubasey.length() == 0) ? 0.0 : Ubasey[ny];
    for (int nx=0; nx<Nx_; ++nx) 
      for (int nz=0; nz<Nz_; ++nz) {
	u = velocity(nx,ny,nz, 0) + U;
	v = velocity(nx,ny,nz, 1);
	w = velocity(nx,ny,nz, 2);
	omega_x = omega(nx,ny,nz, 0);
	omega_y = omega(nx,ny,nz, 1);
	omega_z = omega(nx,ny,nz, 2) - Uy;

	f(nx,ny,nz, 0) = omega_y*w - omega_z*v;
	f(nx,ny,nz, 1) = omega_z*u - omega_x*w;
	f(nx,ny,nz, 2) = omega_x*v - omega_y*u;
      }
  }
  omega.chebyfft_y(); 
  omega.realfft_xz();
  velocity.chebyfft_y(); 
  velocity.realfft_xz();
  f.chebyfft_y(); 
  f.realfft_xz();
  
  return;
}


// Takes U=FTF, omega=FTF, returns f=FTF
void FlowField::nonlinearityRotational(const ChebyCoeff& Ubase,
				       const ChebyCoeff& Ubasey,
				       const ChebyCoeff& UbaseyT, 
				       FlowField& omega, 
				       FlowField& f) const {

  // First assign Ubase part of nonlinearity: U du/dx + Uy v e_x
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  assert(Nd_ >= 3);
  assert(Ubase.state() == Physical);
  assert(Ubasey.state() == Physical);

  vorticity2(omega);

  // Cast away constness of *this for IFFT and FFT.
  FlowField& velocity = (FlowField&) *this;
  velocity.ichebyfft_y(); 

  f.setToZero();
  f.setState(Spectral, Physical);

  Complex uc;
  Complex vc;
  Complex wc;

  int Mx=velocity.numXmodes();
  int Mz=velocity.numZmodes();

  // Loop should be efficient if we get 9 cache lines. 
  // Assign f = omega x U + Omega x u
  for (int ny=0; ny<Ny_; ++ny) { 
    Real U = Ubase[ny];
    Real Uy = Ubasey[ny];
    for (int mx=0; mx<Mx; ++mx) {
      Complex d_dx = I*(2*pi*kx(mx)/Lx_);
      for (int mz=0; mz<Mz; ++mz) {
	uc = velocity.cmplx(mx,ny,mz, 0);
	vc = velocity.cmplx(mx,ny,mz, 1);
	wc = velocity.cmplx(mx,ny,mz, 2);

	f.cmplx(mx,ny,mz, 0) = U*d_dx*uc + vc*Uy;
	f.cmplx(mx,ny,mz, 1) = U*d_dx*vc;  
	f.cmplx(mx,ny,mz, 2) = U*d_dx*wc;
      }
    }
  }

  f.makePhysical();
  omega.makePhysical();
  velocity.makePhysical();

  Real omega_x;
  Real omega_y;
  Real omega_z;
  Real u;
  Real v;
  Real w;

  // Loop should be efficient if we get 9 cache lines. 
  // Add in omega x u so that 
  // f = omega x U + Omega x u + omega x u
  for (int ny=0; ny<Ny_; ++ny) { 
    //Real Uy = Ubasey[ny];
    for (int nx=0; nx<Nx_; ++nx) {
      for (int nz=0; nz<Nz_; ++nz) {
	u = velocity(nx,ny,nz, 0);
	v = velocity(nx,ny,nz, 1);
	w = velocity(nx,ny,nz, 2);
	omega_x = omega(nx,ny,nz, 0);
	omega_y = omega(nx,ny,nz, 1);
	omega_z = omega(nx,ny,nz, 2);

	f(nx,ny,nz, 0) += omega_y*w - omega_z*v;
	f(nx,ny,nz, 1) += omega_z*u - omega_x*w;
	f(nx,ny,nz, 2) += omega_x*v - omega_y*u;
      }
    }
  }
  //omega.makeSpectral();
  velocity.makeSpectral();
  f.makeSpectral();
  
  return;
}

// Takes U=FTF, omega=FTF, returns f=FTF
void FlowField::nonlinearityLinearized(const ChebyCoeff& Ubase,
				       const ChebyCoeff& Ubasey,
				       const ChebyCoeff& UbaseyT,
				       FlowField& omega,
				       FlowField& f) const {
  
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  assert(Nd_ >= 3);
  assert(Ubase.state() == Physical);
  assert(Ubasey.state() == Physical);

  // Cast away constness of *this for IFFT and FFT.
  FlowField& velocity = (FlowField&) *this;
  velocity.ichebyfft_y(); 

  f.setToZero();
  f.setState(Spectral, Physical);

  Complex u;
  Complex v;
  Complex w;

  int Mx=velocity.numXmodes();
  int Mz=velocity.numZmodes();

  // Loop should be efficient if we get 9 cache lines. 
  // Assign f = omega x U + Omega x u
  for (int ny=0; ny<Ny_; ++ny) { 
    //Real dy = (ny==0 || ny==Ny_-1) ? y[0]-y[1] : (y[ny-1]-y[ny+1])/2.0;
    //Real U = (Ubase.length() == 0) ? 0.0 : Ubase[ny];
    for (int mx=0; mx<Mx; ++mx) {
      Complex d_dx = I*(2*pi*kx(mx)/Lx_);

      for (int mz=0; mz<Mz; ++mz) {
	u = velocity.cmplx(mx,ny,mz, 0);
	v = velocity.cmplx(mx,ny,mz, 1);
	w = velocity.cmplx(mx,ny,mz, 2);

	f.cmplx(mx,ny,mz, 0) = Ubase[ny]*d_dx*u + v*Ubasey[ny];
	f.cmplx(mx,ny,mz, 1) = Ubase[ny]*d_dx*v;  
	f.cmplx(mx,ny,mz, 2) = Ubase[ny]*d_dx*w;

      }
    }
  }
  velocity.chebyfft_y(); 
  f.chebyfft_y(); 
  
  return;
}

// Takes U=FTF, omega=FTF, returns f=FTF
void FlowField::nonlinearityLinearizedDealiased(const ChebyCoeff& Ubase,
						const ChebyCoeff& Ubasey,
						const ChebyCoeff& UbaseyT,
						FlowField& omega, 
						FlowField& f) const {
  
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  assert(Nd_ >= 3);
  assert(Ubase.state() == Physical);

  //vorticity2(omega, UbaseyT);
  vorticity2(omega);
  f.setToZero();
  // Cast away constness of *this for IFFT and FFT.
  FlowField& velocity = (FlowField&) *this;
  // Transform *this and omega to physcial space
  omega.ichebyfft_y(); 
  omega.irealfft_xz();
  velocity.ichebyfft_y(); 
  velocity.irealfft_xz();

  f.setState(Physical, Physical);

  Real omega_x;
  Real omega_y;
  Real omega_z;
  Real Omega_z;
  Real u;
  Real v;
  Real w;

  // Loop should be efficient if we get 9 cache lines. 
  // Assign f = omega x (u + U);
  for (int ny=0; ny<Ny_; ++ny) { 
    Real U = (Ubase.length() == 0) ? 0.0 : Ubase[ny];
    for (int nx=0; nx<Nx_; ++nx) 
      for (int nz=0; nz<Nz_; ++nz) {
	u = velocity(nx,ny,nz, 0);
	v = velocity(nx,ny,nz, 1);
	w = velocity(nx,ny,nz, 2);
	omega_x = omega(nx,ny,nz, 0);
	omega_y = omega(nx,ny,nz, 1);
	omega_z = omega(nx,ny,nz, 2);
	Omega_z = -1*Ubasey[ny];

	f(nx,ny,nz, 0) = -1*Omega_z*v;
	f(nx,ny,nz, 1) = omega_z*U + Omega_z*u;
	f(nx,ny,nz, 2) = -1*omega_y*U;
      }
  }
  omega.chebyfft_y(); 
  omega.realfft_xz();
  velocity.chebyfft_y(); 
  velocity.realfft_xz();
  f.chebyfft_y(); 
  f.realfft_xz();
  
  return;
}

void FlowField::nonlinearityDealiased(const ChebyCoeff& Ubase, 
				      const ChebyCoeff& Ubasey,
				      const ChebyCoeff& UbaseyT, 
				      FlowField& omeg, 
				      FlowField& f) const {

  assert(xzstate_ == Spectral && ystate_ == Spectral);
  
  assert(Nd_ >= 3);

  //vorticity2(omeg, UbaseyT);
  vorticity2(omeg);
  f.setToZero();

  // Cast away constness of *this for IFFT and FFT.
  FlowField& velocity = (FlowField&) *this;
  // Transform *this and omega to from FTF to PTP
  FlowField& omega = (FlowField&) omeg;
  omega.irealfft_xz();
  velocity.irealfft_xz();

  int Nypad = (3*Ny_)/2;
  ChebyTransform transPad(Nypad);  // make this a member later for efficiency

  ChebyCoeff u(Nypad, a_, b_, Spectral);
  ChebyCoeff v(Nypad, a_, b_, Spectral);
  ChebyCoeff w(Nypad, a_, b_, Spectral);
  ChebyCoeff omega_x(Nypad, a_, b_, Spectral);
  ChebyCoeff omega_y(Nypad, a_, b_, Spectral);
  ChebyCoeff omega_z(Nypad, a_, b_, Spectral);
  ChebyCoeff fx(Nypad, a_, b_, Spectral);
  ChebyCoeff fy(Nypad, a_, b_, Spectral);
  ChebyCoeff fz(Nypad, a_, b_, Spectral);

  // The next twenty lines or so should go somewhere else!
  // Not to be redone for each call to nonlinearity2!
  ChebyCoeff UbasePad(Nypad, a_, b_, Spectral);
  ChebyCoeff UbaseyPad(Nypad, a_, b_, Spectral);
  if (Ubase.length() == Ny_) {
    ChebyCoeff UbaseCopy(Ubase); // Remove this!
    ChebyCoeff UbaseyCopy(Ubasey); // Remove this!
    ChebyTransform trans(Ny_);
    trans.makeSpectral(UbaseCopy);
    trans.makeSpectral(UbaseyCopy);
    for (int ny=0; ny<Ny_; ++ny) {
      UbasePad[ny] = UbaseCopy[ny];
      UbaseyPad[ny] = UbaseyCopy[ny];
    }
  }
  transPad.makePhysical(UbasePad);
  transPad.makePhysical(UbaseyPad);
  
  f.setState(Physical, Spectral);

  for (int nx=0; nx<Nx_; ++nx) {
    for (int nz=0; nz<Nz_; ++nz) {
      u.setState(Spectral);
      v.setState(Spectral);
      w.setState(Spectral);
      omega_x.setState(Spectral);
      omega_y.setState(Spectral);
      omega_z.setState(Spectral);

      // Copy cheby coeffs into padded vectors for padded ichebyfft
      int ny; // MSVC++ FOR-SCOPE BUG
      for (ny=0; ny<Ny_; ++ny) {
	u[ny] = velocity(nx,ny,nz, 0) + UbasePad[ny];
	v[ny] = velocity(nx,ny,nz, 1);
	w[ny] = velocity(nx,ny,nz, 2);
	omega_x[ny] = omega(nx,ny,nz, 0);
	omega_y[ny] = omega(nx,ny,nz, 1);
	omega_z[ny] = omega(nx,ny,nz, 2) - UbaseyPad[ny];
      }
      for (ny=Ny_; ny<Nypad; ++ny) {
      	u[ny] = 0.0;
	v[ny] = 0.0;
	w[ny] = 0.0;
	omega_x[ny] = 0.0;
	omega_y[ny] = 0.0;
	omega_z[ny] = 0.0;
      }

      // Transform padded velocity and vorticity cheby coeff to physical 
      transPad.ichebyfft(u);
      transPad.ichebyfft(v);
      transPad.ichebyfft(w);
      transPad.ichebyfft(omega_x);
      transPad.ichebyfft(omega_y);
      transPad.ichebyfft(omega_z);

      // Calculate nonlinearity on padded grid.
      fx.setState(Physical);
      fy.setState(Physical);
      fz.setState(Physical);
      for (ny=0; ny<Nypad; ++ny) {
	fx[ny] = omega_y[ny]*w[ny] - omega_z[ny]*v[ny];
	fy[ny] = omega_z[ny]*(u[ny]+UbasePad[ny]) - omega_x[ny]*w[ny];
	fz[ny] = omega_x[ny]*v[ny] - omega_y[ny]*(u[ny]+UbasePad[ny]);
      }
      // Transform padded nonlinearity back to chebyshev coeffs
      transPad.chebyfft(fx);
      transPad.chebyfft(fy);
      transPad.chebyfft(fz);

      // Copy first Ny_ cheby coeffs back into flowfield
      for (ny=0; ny<Ny_; ++ny) {
	f(nx,ny,nz, 0) = fx[ny];
	f(nx,ny,nz, 1) = fy[ny];
	f(nx,ny,nz, 2) = fz[nz];
      }
    }
  }  
  velocity.realfft_xz();
  f.realfft_xz();
  omega.realfft_xz();

  return;
}


// Thesis notes 4/22/01, 12/01/03
// Compute nonlinearity as 1/2 [u grad u + div (uu)]
// remembering that u is really u+U(y) e_x, which ends up as
// 1/2 [u grad u + U du/dx + v Uy e_x + div ((u + U e_x)(u + U e_x))]
void FlowField::nonlinearitySkewSymmetric(const ChebyCoeff& Ubase, 
					  const ChebyCoeff& Ubasey,
					  const ChebyCoeff& UbaseyT,
					  FlowField& tmp, 
					  FlowField& f) const {

  assert(Nd_ == 3);
  assert(congruent(f)); 
  assert(tmp.vectorDim() == 9);
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  FlowField& u = (FlowField&) *this;

  ComplexChebyCoeff tmpProfile(Ny_,a_,b_, Spectral);
  ComplexChebyCoeff tmpProfile_y(Ny_,a_,b_, Spectral);

  tmp.setToZero();
  f.setToZero();

  // u is Spectral,Spectral
  tmp.setState(Spectral,Spectral);
  f.setState(Physical, Physical);

  // ====================================================================
  // I. Compute u dotgrad u.
  
  // -----------------------------------------------------
  // Ia. Compute (u d/dx + v d/dy + w d/dz) u, assign to f(*,0)
  // y,x,z loop order is most efficient, but not possible for y derivatives.
  // So use x,z,y loop for d/dy and y,x,z for d/dx and d/dz.

  // Set tmp_ij = du_i/dx_j, as FTF, with storage for tmp_ij = tmp(3*j+i);
  for (int i=0; i<3; ++i) {
    for (int mx=0; mx<Nx_; ++mx)     // this loop does tmp(*,1) = du_i/dy
      for (int mz=0; mz<Nzpad2_; ++mz) {
	for (int my=0; my<Ny_; ++my)
	  tmpProfile.set(my, u.cmplx(mx,my,mz,i));
	diff(tmpProfile, tmpProfile_y);
	for (int my=0; my<Ny_; ++my)
	  tmp.cmplx(mx,my,mz, 3+i) = tmpProfile_y[my];
      }
    for (int my=0; my<Ny_; ++my)     // tmp(*,0)=du_i/dx and tmp(*,2)=du_i/dz
      for (int mx=0; mx<Nx_; ++mx) {
	Complex d_dx = (2*pi*kx(mx)/Lx_)*I;
	for (int mz=0; mz<Nzpad2_; ++mz) {
	  Complex d_dz = (2*pi*kz(mz)/Lz_)*I;
	  tmp.cmplx(mx,my,mz,i)   = d_dx*u.cmplx(mx,my,mz,i);
	  tmp.cmplx(mx,my,mz,6+i) = d_dz*u.cmplx(mx,my,mz,i);
	}
      }
  }

  u.makePhysical();

  // Ia2. Transform tmp_ij = du_i/dx_j to PPP.
  tmp.makePhysical();

  // Then add in u_j du_i/dx_j + U du_i/dx_0
  for (int i=0; i<3; ++i) {
    for (int ny=0; ny<Ny_; ++ny){
      Real U = Ubase(ny);
      for (int nx=0; nx<Nx_; ++nx) 
	for (int nz=0; nz<Nz_; ++nz) {
	  f(nx,ny,nz,i) +=
	    u(nx,ny,nz,0)*tmp(nx,ny,nz,i) +    
	    u(nx,ny,nz,1)*tmp(nx,ny,nz,3+i) +  
	    u(nx,ny,nz,2)*tmp(nx,ny,nz,6+i) +
	    U*tmp(nx,ny,nz,i);
	  
	}
    }
  }
  // Add in v dU/dy e_x
  for (int ny=0; ny<Ny_; ++ny) {
    Real Uy = Ubasey(ny);
    for (int nx=0; nx<Nx_; ++nx) 
      for (int nz=0; nz<Nz_; ++nz) 
	f(nx,ny,nz,0) += Uy*u(nx,ny,nz,1);
  }

  // ================================================================
  // II. Add grad dot (u u) to f.

  // Differentiation is done *after* multiplication here, so results are
  // in spectral. Convert f to spectral so results can be added w/o transform.

  tmp.setToZero();
  tmp.setState(Physical, Physical);

  // Set tmp_ij = u_i u_j
  // The i-loop is unrooled in the innermost loop because of the need to
  // add the mean flow U to the i=0 component. Would it be more efficient 
  // to unroll it externally or find a clever way to add U prior? 

  for (int ny=0; ny<Ny_; ++ny) {
    Real U = Ubase(ny);
    for (int nx=0; nx<Nx_; ++nx) 
      for (int nz=0; nz<Nz_; ++nz) {
	Real u0 = u(nx,ny,nz,0) + U;
	Real u1 = u(nx,ny,nz,1);
	Real u2 = u(nx,ny,nz,2);
	tmp(nx,ny,nz,0) = u0*u0;
	tmp(nx,ny,nz,1) = tmp(nx,ny,nz,3) = u0*u1;
	tmp(nx,ny,nz,2) = tmp(nx,ny,nz,6) = u0*u2;
	tmp(nx,ny,nz,4) = u1*u1;
	tmp(nx,ny,nz,5) = tmp(nx,ny,nz,7) = u1*u2;
	tmp(nx,ny,nz,8) = u2*u2;
      }
  }
  tmp.makeSpectral();
  f.makeSpectral();

  // Now set f_i = d/dx_j (u_i u_j)
  for (int i=0; i<3; ++i) {

    // Add in du_i/dx and du_i/dz, that is, d/dx_j (u_i u_j) for j=0,2
    for (int my=0; my<Ny_; ++my)     
      for (int mx=0; mx<Nx_; ++mx) {
	Complex d_dx = (2*pi*kx(mx)/Lx_)*I;
	for (int mz=0; mz<Nzpad2_; ++mz) {
	  Complex d_dz = (2*pi*kz(mz)/Lz_)*I;
	  f.cmplx(mx,my,mz,i) 
	    += d_dx*tmp.cmplx(mx,my,mz,i)+d_dz*tmp.cmplx(mx,my,mz,6+i);
	}
      }
    // Add in du_i/dy, that is d/dx_j (u_i u_j) for j=1
    for (int mx=0; mx<Nx_; ++mx) 
      for (int mz=0; mz<Nzpad2_; ++mz) {
	for (int my=0; my<Ny_; ++my)    
	  tmpProfile.set(my, tmp.cmplx(mx,my,mz,3+i));
	diff(tmpProfile, tmpProfile_y);
	for (int my=0; my<Ny_; ++my) 
	  f.cmplx(mx,my,mz,i) += tmpProfile_y[my]; // j=1
      }
  }

  f *= 0.5;
  u.makeSpectral(); // put *this back to original state.
}


// Thesis notes 4/22/01, 12/01/03
// Compute nonlinearity as f = u grad u + U du/dx + v Uy e_x 
void FlowField::nonlinearityConvection(const ChebyCoeff& Ubase, 
				       const ChebyCoeff& Ubasey,
				       const ChebyCoeff& UbaseyT,
				       FlowField& tmp, 
				       FlowField& f) const {
  assert(Nd_ == 3);
  assert(congruent(f)); 
  assert(tmp.vectorDim() == 9);
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  FlowField& u = (FlowField&) *this;

  ComplexChebyCoeff tmpProfile(Ny_,a_,b_, Spectral);
  ComplexChebyCoeff tmpProfile_y(Ny_,a_,b_, Spectral);

  tmp.setToZero();
  f.setToZero();

  // u is Spectral,Spectral
  tmp.setState(Spectral,Spectral);
  f.setState(Physical, Physical);

  // ====================================================================
  // I. Compute u dotgrad u.
  
  // -----------------------------------------------------
  // Ia. Compute (u d/dx + v d/dy + w d/dz) u, assign to f(*,0)
  // y,x,z loop order is most efficient, but not possible for y derivatives.
  // So use x,z,y loop for d/dy and y,x,z for d/dx and d/dz.

  // Set tmp_ij = du_i/dx_j, as FTF, with storage for tmp_ij = tmp(3*j+i);
  for (int i=0; i<3; ++i) {
    for (int mx=0; mx<Nx_; ++mx)     // this loop does tmp(*,1) = du_i/dy
      for (int mz=0; mz<Nzpad2_; ++mz) {
	for (int my=0; my<Ny_; ++my)
	  tmpProfile.set(my, u.cmplx(mx,my,mz,i));
	diff(tmpProfile, tmpProfile_y);
	for (int my=0; my<Ny_; ++my)
	  tmp.cmplx(mx,my,mz, 3+i) = tmpProfile_y[my];
      }
    for (int my=0; my<Ny_; ++my)     // tmp(*,0)=du_i/dx and tmp(*,2)=du_i/dz
      for (int mx=0; mx<Nx_; ++mx) {
	Complex d_dx = (2*pi*kx(mx)/Lx_)*I;
	for (int mz=0; mz<Nzpad2_; ++mz) {
	  Complex d_dz = (2*pi*kz(mz)/Lz_)*I;
	  tmp.cmplx(mx,my,mz,i)   = d_dx*u.cmplx(mx,my,mz,i);
	  tmp.cmplx(mx,my,mz,6+i) = d_dz*u.cmplx(mx,my,mz,i);
	}
      }
  }

  u.makePhysical();

  // Ia2. Transform tmp_ij = du_i/dx_j to PPP.
  tmp.makePhysical();

  // Add in v dU/dy e_x
  for (int ny=0; ny<Ny_; ++ny) {
    Real Uy = Ubasey(ny);
    for (int nx=0; nx<Nx_; ++nx) 
      for (int nz=0; nz<Nz_; ++nz) 
	f(nx,ny,nz,0) += Uy*u(nx,ny,nz,1);
  }

  // Then add in u_j du_i/dx_j + U du_i/dx_0
  for (int i=0; i<3; ++i) {
    for (int ny=0; ny<Ny_; ++ny) {
      Real U = Ubase(ny);
      for (int nx=0; nx<Nx_; ++nx) 
	for (int nz=0; nz<Nz_; ++nz) 
	  f(nx,ny,nz,i) +=
	    u(nx,ny,nz,0)*tmp(nx,ny,nz,i) +    
	    u(nx,ny,nz,1)*tmp(nx,ny,nz,3+i) +  
	    u(nx,ny,nz,2)*tmp(nx,ny,nz,6+i) +
	    U*tmp(nx,ny,nz,i);
    }
  }
  u.makeSpectral();
  f.makeSpectral();
}

// Compute nonlinearity as f =  div ((u + U e_x)(u + U e_x))
void FlowField::nonlinearityDivergence(const ChebyCoeff& Ubase, 
				       const ChebyCoeff& Ubasey,
				       const ChebyCoeff& UbaseyT,
				       FlowField& tmp, 
				       FlowField& f) const {

  assert(Nd_ == 3);
  assert(congruent(f)); 
  assert(tmp.vectorDim() == 9);
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  FlowField& u = (FlowField&) *this;

  ComplexChebyCoeff tmpProfile(Ny_,a_,b_, Spectral);
  ComplexChebyCoeff tmpProfile_y(Ny_,a_,b_, Spectral);

  // ================================================================
  // II. Add grad dot (u u) to f.

  // Differentiation is done *after* multiplication here, so results are
  // in spectral. Convert f to spectral so results can be added w/o transform.
  
  u.makePhysical();
  tmp.setToZero();
  tmp.setState(Physical, Physical);

  // Set tmp_ij = u_i u_j
  // The i-loop is unrooled in the innermost loop because of the need to
  // add the mean flow U to the i=0 component. Would it be more efficient 
  // to unroll it externally or find a clever way to add U prior? 

  for (int ny=0; ny<Ny_; ++ny) {
    Real U = Ubase(ny);
    for (int nx=0; nx<Nx_; ++nx) 
      for (int nz=0; nz<Nz_; ++nz) {
	Real u0 = u(nx,ny,nz,0) + U;
	Real u1 = u(nx,ny,nz,1);
	Real u2 = u(nx,ny,nz,2);
	tmp(nx,ny,nz,0) = u0*u0;
	tmp(nx,ny,nz,1) = tmp(nx,ny,nz,3) = u0*u1;
	tmp(nx,ny,nz,2) = tmp(nx,ny,nz,6) = u0*u2;
	tmp(nx,ny,nz,4) = u1*u1;
	tmp(nx,ny,nz,5) = tmp(nx,ny,nz,7) = u1*u2;
	tmp(nx,ny,nz,8) = u2*u2;
      }
  }
  tmp.makeSpectral();
  u.makeSpectral();   // put *this back to original state.
  f.setState(Spectral,Spectral);
  f.setToZero();

  // Now set f_i = d/dx_j (u_i u_j)
  for (int i=0; i<3; ++i) {

    // Add in du_i/dx and du_i/dz, that is, d/dx_j (u_i u_j) for j=0,2
    for (int my=0; my<Ny_; ++my)     
      for (int mx=0; mx<Nx_; ++mx) {
	Complex d_dx = (2*pi*kx(mx)/Lx_)*I;
	for (int mz=0; mz<Nzpad2_; ++mz) {
	  Complex d_dz = (2*pi*kz(mz)/Lz_)*I;
	  f.cmplx(mx,my,mz,i) 
	    += d_dx*tmp.cmplx(mx,my,mz,i)+d_dz*tmp.cmplx(mx,my,mz,6+i);
	}
      }
    // Add in du_i/dy, that is d/dx_j (u_i u_j) for j=1
    for (int mx=0; mx<Nx_; ++mx) 
      for (int mz=0; mz<Nzpad2_; ++mz) {
	for (int my=0; my<Ny_; ++my)    
	  tmpProfile.set(my, tmp.cmplx(mx,my,mz,3+i));
	diff(tmpProfile, tmpProfile_y);
	for (int my=0; my<Ny_; ++my) 
	  f.cmplx(mx,my,mz,i) += tmpProfile_y[my]; // j=1
      }
  }
}

void FlowField::saveProfile(int nx, int nz2, int i, const string& filename, const ChebyTransform& t) const {
  assert(xzstate_ == Spectral);
  
  ComplexChebyCoeff profile(Ny_, a_, b_, Spectral);
  for (int ny=0; ny<Ny_; ++ny) 
    profile.set(ny, (*this).cmplx(nx, ny, nz2, i));

   string cfile(filename);
   cfile += string("c");

  if (ystate_ == Spectral) {
    profile.save(cfile.c_str());
    t.ichebyfft(profile);
    profile.save(filename);
  }
  else { 
    profile.save(filename);
    t.chebyfft(profile);
    profile.save(cfile.c_str());
  }
}

void FlowField::saveDivProfile(int nx, int nz2, const string& filename, const ChebyTransform& t) const {
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  
  ComplexChebyCoeff v(Ny_, a_, b_, Spectral);
  for (int ny=0; ny<Ny_; ++ny) 
    v.set(ny, (*this).cmplx(nx, ny, nz2, 1));
  
  ComplexChebyCoeff vy(Ny_, a_, b_, Spectral);
  diff(v,vy);
  t.chebyfft(vy);
  
  ComplexChebyCoeff profile(Ny_, a_, b_, Spectral);
  Complex cx = 2*pi*I*kx(nx)/Lx_;
  Complex cz = 2*pi*I*kz(nz2)/Lz_;
  for (int ny=0; ny<Ny_; ++ny) 
    profile.set(ny, vy[ny] 
		+ cx*(*this).cmplx(nx, ny, nz2, 0)
		+ cz*(*this).cmplx(nx, ny, nz2, 2));
  
  profile.save(filename);
}

// Broken for quite a while, commented out 4/21/01.
// Takes U=FCF, returns omega=FCF
void FlowField::vorticity(FlowField& omega) const {
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  assert(Nd_ >= 3);

  omega.setState(Spectral, Spectral);
  omega.setToZero();

  int Nyb = Ny_-1;
  const int U=0;
  const int V=1;
  const int W=2;

  // -------------------------------------------------------------
  // Put dw/dy in 0-comp of omega with a cheby differentiation vectorized 
  // over x and z.  Cheby differentiation loops is broken into four parts. 
  // Compare to diff(ChebyCoeff&) algorithm.

  // Case ny==Ny_b
  for (int nx=0; nx<Nx_; ++nx) 
    for (int nz=0; nz<Nzpad2_; ++nz) 
      omega.cmplx(nx,Nyb,   nz, U) = 0.0;
  // Case ny==Nyb-1
  for (int nx=0; nx<Nx_; ++nx) 
    for (int nz=0; nz<Nzpad2_; ++nz) 
      omega.cmplx(nx,Nyb-1, nz, U) = 2*Nyb*cmplx(nx,Nyb,nz, W);
  // Case ny in Nyb-2 to 1
  for (int ny=Nyb-2; ny>=1; --ny) 
    for (int nx=0; nx<Nx_; ++nx) 
      for (int nz=0; nz<Nzpad2_; ++nz) 
	omega.cmplx(nx, ny, nz, U) =  omega.cmplx(nx,ny+2,nz, W) + 2*(ny+1)*cmplx(nx,ny+1,nz, W);
  // Case ny==0
  for (int nx=0; nx<Nx_; ++nx) 
    for (int nz=0; nz<Nzpad2_; ++nz) 
      omega.cmplx(nx,  0, nz, U) = 0.5 *(omega.cmplx(nx,2,nz, W) + 2*cmplx(nx,1,nz, W));

  // omega = (w_y, 0, 0)
  // -------------------------------------------------------------
  // Put -du/dy in 2-comp of omega, algorithm same as in dw/dy.

  // Case ny==Nyb
  for (int nx=0; nx<Nx_; ++nx) 
    for (int nz=0; nz<Nzpad2_; ++nz) 
      omega.cmplx(nx,  Nyb,  nz, W) = 0.0;
  // Case ny==Nyb-1
  for (int nx=0; nx<Nx_; ++nx) 
    for (int nz=0; nz<Nzpad2_; ++nz) 
      omega.cmplx(nx, Nyb-1, nz, W) = 2*Nyb*cmplx(nx,Nyb,nz, U);
  // Case ny in Nyb-2 to 1
  for (int ny=Nyb-2; ny>=1; --ny) 
    for (int nx=0; nx<Nx_; ++nx) 
      for (int nz=0; nz<Nzpad2_; ++nz) 
	omega.cmplx(nx, ny,  nz, W) =  omega.cmplx(nx,ny+2,nz, U) + 2*(ny+1)*cmplx(nx,ny+1,nz, U);

  // Case ny==0
  for (int nx=0; nx<Nx_; ++nx) 
    for (int nz=0; nz<Nzpad2_; ++nz) 
      omega.cmplx(nx,0, nz, W) = 0.5 *(omega.cmplx(nx,2,nz, U) + 2*cmplx(nx,1,nz, U));

  for (int ny=Nyb; ny>=0; --ny) 
    for (int nx=0; nx<Nx_; ++nx) 
      for (int nz=0; nz<Nzpad2_; ++nz) 
	omega.cmplx(nx,ny,nz, W) *= -1.0;

  // omega = (w_y, 0, -u_y)
  // -------------------------------------------------------------
  // Add        - v_z to 0 comp of omega
  // Assign u_z - w_x to 1 comp of omega
  // Add          v_x to 2 comp of omega
  Complex pi2i = 2*pi*I;
  for (int ny=0; ny<Ny_; ++ny)
    for (int nx=0; nx<Nx_; ++nx) {
      Complex pi2ikxLx = kx(nx)*pi2i/Lx_;
      for (int nz=0; nz<Nzpad2_; ++nz) {
	Complex pi2ikzLz = kz(nz)*pi2i/Lz_;
	omega.cmplx(nx,ny,nz, U) -= pi2ikzLz*cmplx(nx,ny,nz, V);
	omega.cmplx(nx,ny,nz, V)  = 
	  pi2ikzLz*cmplx(nx,ny,nz, U) - pi2ikxLx*cmplx(nx,ny,nz, W);
	omega.cmplx(nx,ny,nz, W) += pi2ikxLx*cmplx(nx,ny,nz, V);
      }
    }
  // Now omega = (wy-vz, uz-wx, vx-uy)
  return;
}  

Real FlowField::divergence(bool normalize) const {
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  //assert(congruent(div));
  assert(Nd_ >= 3);

  Real div=0.0;
  Real divnorm=0.0;

  Complex cz = 2*pi*I/Lz_;
  Complex cx = 2*pi*I/Lx_;

  int Nxm = numXmodes();
  int Nym = numYmodes();
  int Nzm = numZmodes();

  ComplexChebyCoeff v(Nym, a_, b_, Spectral);
  ComplexChebyCoeff vy(Nym, a_, b_, Spectral);
  for (int nx=0; nx<Nxm; ++nx) {
    int kx_=kx(nx);
    for (int nz=0; nz<Nzm; ++nz) {
      int kz_=kz(nz);
      for (int ny=0; ny<Nym; ++ny)
	v.set(ny, cmplx(nx,ny,nz,1));
      diff(v,vy);

      Real subdiv = 0.0;
      Real subdivnorm = 0.0;
      for (int ny=Nym-1; ny>=0; --ny) {
	Complex ux = cx*kx_*cmplx(nx,ny,nz,0);
	Complex wz = cz*kz_*cmplx(nx,ny,nz,2);
	subdiv += true_norm(ux + vy[ny] + wz);
	subdivnorm += true_norm(ux) + true_norm(vy[ny]) + true_norm(wz);
      }
      div += subdiv;
      divnorm += subdivnorm;
    }
  }
  return (normalize) ? div : div;
}

void FlowField::divergence(FlowField& div) const {
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  //assert(congruent(div));
  assert(Nd_ >= 3);

  div.setState(Spectral, Spectral);
  div.setToZero();

  Complex cz = (2*pi/Lz_);
  Complex cx = (2*pi/Lx_);

  int Nx = numXmodes();
  int Ny = numYmodes();
  int Nz = numZmodes();

  // Add in the x and z derivatives.
  for (int ny=0; ny<Ny; ++ny) 
    for (int nx=0; nx<Nx; ++nx) 
      for (int nz=0; nz<Nz; ++nz) {
	div.cmplx(nx,ny,nz,0) = cx*Real(kx(nx))*cmplx(nx,ny,nz,0);
	//div.cmplx(nx,ny,nz,1) = cz*Real(kz(nz))*cmplx(nx,ny,nz,2);
	div.cmplx(nx,ny,nz,0) += cz*Real(kz(nz))*cmplx(nx,ny,nz,2);
      }


  // Add in the y derivatives. The inner y-loop makes this function inefficient
  ComplexChebyCoeff v(Ny, a_, b_, Spectral);
  ComplexChebyCoeff vy(Ny, a_, b_, Spectral);
	
  for (int nx=0; nx<Nx; ++nx)
    for (int nz=0; nz<Nz; ++nz) { 
      int ny; // MSVC++ FOR-SCOPE BUG
      for (ny=0; ny<Ny; ++ny) 
	v.set(ny, cmplx(nx,ny,nz,1));
      diff(v,vy);
      for (ny=0; ny<Ny; ++ny)
	//div.cmplx(nx,ny,nz,2) = vy[ny];
	div.cmplx(nx,ny,nz,0) += vy[ny];
    }
}  

void FlowField::divComps(FlowField& div) const {
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  //assert(congruent(div));
  assert(Nd_ >= 3);

  div.setState(Spectral, Spectral);
  div.setToZero();

  Complex cz = (2*pi/Lz_)*I;
  Complex cx = (2*pi/Lx_)*I;

  int Nx = numXmodes();
  int Ny = numYmodes();
  int Nz = numZmodes();

  // Add in the x and z derivatives.
  for (int ny=0; ny<Ny; ++ny) 
    for (int nx=0; nx<Nx; ++nx) 
      for (int nz=0; nz<Nz; ++nz) {
	div.cmplx(nx,ny,nz,0) = cx*Real(kx(nx))*cmplx(nx,ny,nz,0);
	div.cmplx(nx,ny,nz,2) = cz*Real(kz(nz))*cmplx(nx,ny,nz,2);
      }


  // Add in the y derivatives. The inner y-loop makes this function inefficient
  ComplexChebyCoeff v(Ny, a_, b_, Spectral);
  ComplexChebyCoeff vy(Ny, a_, b_, Spectral);
	
  for (int nx=0; nx<Nx; ++nx)
    for (int nz=0; nz<Nz; ++nz) { 
      int ny; // MSVC++ FOR-SCOPE BUG
      for (ny=0; ny<Ny; ++ny) 
	v.set(ny, cmplx(nx,ny,nz,1));
      diff(v,vy);
      for (ny=0; ny<Ny; ++ny)
	div.cmplx(nx,ny,nz,2) = vy[ny];
    }
}  


// Takes U=FCF, returns omega=FCF
void FlowField::laplacian(FlowField& lap) const {
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  assert(Nd_ == 3);

  lap.setState(Spectral, Spectral);
  
  //int Nyb = Ny_-1;
  const int U=0;
  const int V=1;
  const int W=2;
  
  // Not efficient! due to bad loop order on vyy calc. For debugging only.
  ComplexChebyCoeff v(Ny_, a_, b_, Spectral);
  ComplexChebyCoeff vyy(Ny_, a_, b_, Spectral);

  ComplexChebyCoeff u(Ny_, a_, b_, Spectral);
  ComplexChebyCoeff uyy(Ny_, a_, b_, Spectral);

  ComplexChebyCoeff w(Ny_, a_, b_, Spectral);
  ComplexChebyCoeff wyy(Ny_, a_, b_, Spectral);

  // Calculate d2/dy2 (u,v,w)
  for (int nx=0; nx<Nx_; ++nx) 
    for (int nz=0; nz<Nzpad2_; ++nz) {
      int ny; // MSVC++ FOR-SCOPE BUG
      for (ny=0; ny<Ny_; ++ny) {
	u.set(ny, cmplx(nx,ny,nz,U));
	v.set(ny, cmplx(nx,ny,nz,V));
	w.set(ny, cmplx(nx,ny,nz,W));
      }
      diff2(u, uyy);
      diff2(v, vyy);
      diff2(w, wyy);
      for (ny=0; ny<Ny_; ++ny) {
	lap.cmplx(nx,ny,nz,U) = uyy[ny];
	lap.cmplx(nx,ny,nz,V) = vyy[ny];
	lap.cmplx(nx,ny,nz,W) = wyy[ny];
      }
    }
      
  // Add in (d2/dx2 + d2/dz2) (u,v,w)
  for (int ny=0; ny<Ny_; ++ny)
    for (int nx=0; nx<Nx_; ++nx) {
      Real cx = -square(2*pi*kx(nx)/Lx_);
      for (int nz=0; nz<Nzpad2_; ++nz) {
	Real cz = -square(2*pi*kz(nz)/Lz_);
	Real cx_cz = cx + cz;
	lap.cmplx(nx,ny,nz,U) += cx_cz*cmplx(nx,ny,nz,U);
	lap.cmplx(nx,ny,nz,V) += cx_cz*cmplx(nx,ny,nz,V);
	lap.cmplx(nx,ny,nz,W) += cx_cz*cmplx(nx,ny,nz,W);
      }
    }
  return;
}

void FlowField::normsquared(FlowField& u2) const {
  assert(xzstate_ == Physical && ystate_ == Physical);

  u2.setState(Physical, Physical);
  
  for (int nx=0; nx<Nx_; ++nx) {
    for (int nz=0; nz<Nz_; ++nz) {
      for (int ny=0; ny<Ny_; ++ny) {
	Real sum = 0.0;
	for (int i=0; i<Nd_; ++i)
	  sum += square((*this)(nx,ny,nz,i));
	u2(nx,ny,nz,0) = sum;
      }
    }
  }
}

// Takes U=FCF, returns omega=FCF
void FlowField::gradient(FlowField& grad) const {
  assert(xzstate_ == Spectral && ystate_ == Spectral);
  assert(Nd_ == 1);

  grad.setState(Spectral, Spectral);
  
  ComplexChebyCoeff v(Ny_, a_, b_, Spectral);
  ComplexChebyCoeff vy(Ny_, a_, b_, Spectral);

  // Calculate dP/dy 
  for (int nx=0; nx<Nx_; ++nx) {
    for (int nz=0; nz<Nzpad2_; ++nz) {
      int ny; // MSVC++ FOR-SCOPE BUG
      for (ny=0; ny<Ny_; ++ny) 
	v.set(ny, cmplx(nx,ny,nz,0));
      diff(v, vy);
      for (ny=0; ny<Ny_; ++ny) 
	grad.cmplx(nx,ny,nz,1) = vy[ny];
    }
  }
      
  // Calculate dP/dx and dP/dz
  for (int ny=0; ny<Ny_; ++ny)
    for (int nx=0; nx<Nx_; ++nx) {
      Complex cx = (2*pi*kx(nx)/Lx_)*I;
      for (int nz=0; nz<Nzpad2_; ++nz) {
	Complex cz = (2*pi*kz(nz)/Lz_)*I;
	grad.cmplx(nx,ny,nz,0) = cx*cmplx(nx,ny,nz,0);
	grad.cmplx(nx,ny,nz,2) = cz*cmplx(nx,ny,nz,0);
      }
    }
  return;
}
********************************/
