// -*- C++ -*-
//
// Copyright (C) 1998, 1999, 2000, 2002  Los Alamos National Laboratory,
// Copyright (C) 1998, 1999, 2000, 2002  CodeSourcery, LLC
//
// This file is part of FreePOOMA.
//
// FreePOOMA is free software; you can redistribute it and/or modify it
// under the terms of the Expat license.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Expat
// license for more details.
//
// You should have received a copy of the Expat license along with
// FreePOOMA; see the file LICENSE.
//

//-----------------------------------------------------------------------------
// Classes Doof2dCppTran, Doof2dP2, DoofNinePt, Doof2dOpt
// Implementation Classes Doof2dStorage, Doof2dBase
//-----------------------------------------------------------------------------

#ifndef POOMA_BENCHMARKS_DOOF2D_H
#define POOMA_BENCHMARKS_DOOF2D_H

// Include files

#include "Pooma/Arrays.h"
#include "Pooma/Fields.h"
#include "Utilities/Benchmark.h"

#include <cstdlib>
#include <typeinfo>
#include <string>

#if POOMA_CHEETAH
  typedef DistributedTag LayoutTag_t;
#else
  typedef ReplicatedTag LayoutTag_t;
#endif

//-----------------------------------------------------------------------------
// PatchString class definitions.  These return a string notation of
// the engine type.
//-----------------------------------------------------------------------------

template<class ETag>
struct PatchString
{
  static
  std::string get()
  {
    if (typeid(ETag) == typeid(Brick))
      return "Bk";
    else
      return "CmBk";
  }
};

template<class ETag>
struct PatchString<Remote<ETag> >
{
  static
  std::string get()
  {
    return "r" + PatchString<ETag>::get();
  }
};

template<class LTag, class ETag>
struct PatchString<MultiPatch<LTag, ETag> >
{
  static
  std::string get()
  {
    return PatchString<ETag>::get() + "MP";
  }
};

template<int D, class T, class ETag>
std::string qualification(const Array<D, T, ETag> &)
{
  return PatchString<ETag>::get();
}

template<class Mesh, class T, class ETag>
std::string qualification(const Field<Mesh, T, ETag> &)
{
  return "Fd" + PatchString<ETag>::get();
}

//-----------------------------------------------------------------------------
// Doof2dStorage class definitions.  These classes allocate the array
// or field and initialize it.  We define this as a partially
// specialized class so it is easy to make subclasses work for Uniform
// MultiPatch (UMP) or not.
//-----------------------------------------------------------------------------

template<class Storage, bool UMP>
class Doof2dStorage
{
};

template<class ETag>
class Doof2dStorage<Array<2, double, ETag>, false>
{
public:

  void initializeStorage(int &n, int np, int ng) 
  {
    // Create the domain, including "guards".
    
    Interval<1> N(1, n);
    Interval<2> vertDomain(N, N);

    // Resize the arrays.
    
    a_m.initialize(vertDomain);
    b_m.initialize(vertDomain);
  }

  Array<2, double, ETag> a_m, b_m;
};

template<class Mesh, class ETag>
class Doof2dStorage<Field<Mesh, double, ETag>, false>
{
public:

  void initializeStorage(int &n, int np, int ng) 
  {
    // Create the domain, including "guards".

    Interval<1> N(1, n);
    Interval<2> vertDomain(N, N);

    DomainLayout<2> layout(vertDomain);

    Mesh mesh(layout, Vector<2>(0.0), Vector<2>(1.0, 1.0));

    Centering<2> vert = canonicalCentering<2>(VertexType, Continuous, AllDim);

    // Initialize the fields.

    a_m.initialize(vert, layout, mesh);
    b_m.initialize(vert, layout, mesh);
  }

  Field<Mesh, double, ETag> a_m, b_m;
};

template<class ETag>
class Doof2dStorage<Array<2, double, ETag>, true>
{
public:

  void initializeStorage(int &n, int np, int ng) 
  {
    n = (n / np) * np;
    
    // Create the domain.
    
    Interval<1> N(1, n);
    Interval<2> newDomain(N, N);

    // Create the block sizes.
    
    Loc<2> blocks(np, np);

    // Create the partitioner:
    // We set the internal guard layers to ng_m, and don't allocate external
    // guard layers because all the problem implementations loop from 2 to n-1.
    
    UniformGridPartition<2> partition(blocks,
                                      GuardLayers<2>(ng),
                                      GuardLayers<2>(0));

    // Create the layout.
    
    UniformGridLayout<2> layout(newDomain, partition, LayoutTag_t());

    // Create the arrays.
    
    a_m.initialize(layout);
    b_m.initialize(layout);
  }

  Array<2, double, ETag> a_m, b_m;
};

template<class Mesh, class ETag>
class Doof2dStorage<Field<Mesh, double, ETag>, true>
{
public:

  void initializeStorage(int &n, int np, int ng) 
  {
    n = (n / np) * np;
    
    // Create the domain.
    
    Interval<1> N(1, n);
    Interval<2> newDomain(N, N);

    // Create the block sizes.
    
    Loc<2> blocks(np, np);

    // Create the partitioner:
    // We set the internal guard layers to ng_m, and don't allocate external
    // guard layers because all the problem implementations loop from 2 to n-1.
    
    UniformGridPartition<2> partition(blocks,
                                      GuardLayers<2>(ng),
                                      GuardLayers<2>(0));

    // Create the layout.
    
    UniformGridLayout<2> layout(newDomain, partition, LayoutTag_t());

    Mesh mesh(layout, Vector<2>(0.0), Vector<2>(1.0, 1.0));

    Centering<2> vert = canonicalCentering<2>(VertexType, Continuous, AllDim);

    // Initialize the fields.

    a_m.initialize(vert, layout, mesh);
    b_m.initialize(vert, layout, mesh);
  }

  Field<Mesh, double, ETag> a_m, b_m;
};

template<class Store>
struct StoreTraits
{
  typedef typename Store::Engine_t Engine_t;
  static const bool multiPatch = Engine_t::multiPatch;
};


//-----------------------------------------------------------------------------
// Doof2dBase class definitions.  We define this as a partially specialized
// class so it is easy to make subclasses work for Uniform MultiPatch
// (UMP) or not.
//-----------------------------------------------------------------------------

template<class Store>
class Doof2dBase :
  public Implementation,
  public Doof2dStorage<Store, StoreTraits<Store>::multiPatch>
{
public:

  using Doof2dStorage<Store, StoreTraits<Store>::multiPatch>::a_m;
  using Doof2dStorage<Store, StoreTraits<Store>::multiPatch>::b_m;

  // Dummy constructor.
  
  Doof2dBase(int np = 1, int ng = 0) 
  : np_m(np), ng_m(ng)
  { }
  
  // Initialize function gets the size and adjusts the arrays.
    
  void initialize(int n) 
  {
    // Save the problem size.
    
    n_m = n;

    // This call can decrease n_m to an integral multiple of np_m.
    this->initializeStorage(n_m, np_m, ng_m);

    // Set up domains for the internal cells.
    
    I = Interval<1>(2,n_m-1);
    J = Interval<1>(2,n_m-1);

    // Ensure that all memory is paged in.

    a_m = 0.0;
    b_m = 0.0;
    Pooma::blockAndEvaluate();
  }

  // Return value for checking result of benchmark run.

  double resultCheck() const { return check_m; }

  // Return number of flops in this kernel.

  double opCount() const { return ( 90 * (double(n_m) - 2) * (double(n_m) - 2) ); }

protected:

  // Problem check value.

  double check_m;

  // Problem size/number of patches.

  int n_m, np_m;
  
  // Guard layers.

  int ng_m;

  // Domains for stencil.
  
  Interval<1> I, J;

};


//-----------------------------------------------------------------------------
// Doof2dCppTran class definition, executing a C++ version of ForTran code.
//-----------------------------------------------------------------------------

template<class Store>
class Doof2dCppTran :
  public Doof2dBase<Store>
{
public:

  // Constructor allows us to specify the number of patches for each direction.
  
  Doof2dCppTran(int np = 1)
    : Doof2dBase<Store>(np)
  { }

  // This is a C++Tran benchmark, perhaps using UMP.

  const char* type() const { return Doof2dBase<Store>::CppTranType(); }
  const char* qualification() const
  {
    typedef typename Store::Engine_t Engine_t;
    return ::qualification(this->a_m).c_str();
  }

  void run() 
  {
    const double fact = 1.0 / 9.0;

    int i, j, k;

    // Run setup.

    runSetup();
    
    // Run kernel.
    
    for (k = 0; k < 5; ++k)
    {
      for (j = 2; j <= this->n_m - 1; j++) 
      {
	for (i = 2; i <= this->n_m - 1; i++) 
	{
	  this->a_m(i,j) = fact *
	    (this->b_m(i+1,j+1) + this->b_m(i+1,j  ) + this->b_m(i+1,j-1) +
	     this->b_m(i  ,j+1) + this->b_m(i  ,j  ) + this->b_m(i  ,j-1) +
	     this->b_m(i-1,j+1) + this->b_m(i-1,j  ) + this->b_m(i-1,j-1));
	}
      }
      for (j = 2; j <= this->n_m-1; j++) 
      {
        for (i = 2; i <= this->n_m-1; i++) 
	{
	  this->b_m(i,j) = fact *
	    (this->a_m(i+1,j+1) + this->a_m(i+1,j  ) + this->a_m(i+1,j-1) +
	     this->a_m(i  ,j+1) + this->a_m(i  ,j  ) + this->a_m(i  ,j-1) +
	     this->a_m(i-1,j+1) + this->a_m(i-1,j  ) + this->a_m(i-1,j-1));
	}
      }
    }
     
    // Save result for checking.
    
    this->check_m = this->b_m(this->n_m / 2, this->n_m / 2);
  }

  void runSetup()
  {
    // Run setup.
    
    for (int j = 1; j <= this->n_m; j++) 
    {
      for (int i = 1; i <= this->n_m; i++) 
      {
	this->a_m(i,j) = 0.0;
	this->b_m(i,j) = 0.0;
      }
    }
    this->b_m(this->n_m/2,this->n_m/2) = 1000.0;
  }
};


//-----------------------------------------------------------------------------
// Doof2dP2 class definition, executing Pooma data-parallel code.
//-----------------------------------------------------------------------------

template<class Store>
class Doof2dP2
  : public Doof2dBase<Store>
{
public:

  // Constructor allows us to specify the number of patches for each direction.
  
  Doof2dP2(int np = 1, int ng = 0)
    : Doof2dBase<Store>(np,ng),
      guarded_m(ng > 0)
  { }

  // This is a P2 benchmark, perhaps using UMP.

  const char* type() const { return Doof2dBase<Store>::P2Type(); }
  const char* qualification() const
  {
    typedef typename Store::Engine_t Engine_t;

    std::string qual = ::qualification(this->a_m);

    if (guarded_m)
    {
      PAssert(Engine_t::multiPatch);
      return ("GC" + qual).c_str();
    }
    else
    {
      return qual.c_str();
    }
  }

  void run() 
  {
    const double fact = 1.0 / 9.0;

    int k;

    // Run setup.

    runSetup();

    // Run kernel.
    
    for (k = 0; k < 5; ++k)
    {
      this->a_m(this->I,this->J) = fact *
	(this->b_m(this->I+1,this->J+1) + this->b_m(this->I+1,this->J  ) + this->b_m(this->I+1,this->J-1) +
	 this->b_m(this->I  ,this->J+1) + this->b_m(this->I  ,this->J  ) + this->b_m(this->I  ,this->J-1) +
	 this->b_m(this->I-1,this->J+1) + this->b_m(this->I-1,this->J  ) + this->b_m(this->I-1,this->J-1));
      this->b_m(this->I,this->J) = fact *
	(this->a_m(this->I+1,this->J+1) + this->a_m(this->I+1,this->J  ) + this->a_m(this->I+1,this->J-1) +
	 this->a_m(this->I  ,this->J+1) + this->a_m(this->I  ,this->J  ) + this->a_m(this->I  ,this->J-1) +
	 this->a_m(this->I-1,this->J+1) + this->a_m(this->I-1,this->J  ) + this->a_m(this->I-1,this->J-1));
    }

    Pooma::blockAndEvaluate();

    // Save result for checking.
    
    this->check_m = this->b_m(this->n_m / 2, this->n_m / 2);
  }

  void runSetup()
  {
    // Run setup.
    
    this->a_m = 0.0;
    this->b_m = 0.0;
    Pooma::blockAndEvaluate();
    this->b_m(this->n_m/2,this->n_m/2) = 1000.0;
  }

private:
  bool guarded_m;
};

//-----------------------------------------------------------------------------
// Stencil DoofNinePt
//-----------------------------------------------------------------------------

class DoofNinePt
{
public:
  template <class A>
  inline
  typename A::Element_t
  operator()(const A& x, int i, int j) const
  {
    return ( (1.0/9.0) *
             ( x.read(i+1,j+1) + x.read(i+1,j  ) + x.read(i+1,j-1) +
               x.read(i  ,j+1) + x.read(i  ,j  ) + x.read(i  ,j-1) +
               x.read(i-1,j+1) + x.read(i-1,j  ) + x.read(i-1,j-1) ) );
  }

  inline int lowerExtent(int) const { return 1; }
  inline int upperExtent(int) const { return 1; }

};

//-----------------------------------------------------------------------------
// Doof2dP2Opt class definition, using a stencil for computation.
//-----------------------------------------------------------------------------

template<class Store>
class Doof2dP2Opt
  : public Doof2dBase<Store>
{
public:

  // Constructor allows us to specify the number of patches for each direction.
  
  Doof2dP2Opt(int np = 1, int ng = 0)
    : Doof2dBase<Store>(np,ng),
      guarded_m(ng > 0)
  { }

  // This is a P2 benchmark, perhaps using UMP.

  const char* type() const { return "P2Opt"; }
  const char* qualification() const
  {
    typedef typename Store::Engine_t Engine_t;
    std::string qual = ::qualification(this->a_m);

    if (guarded_m)
    {
      PAssert(Engine_t::multiPatch);
      return ("GC" + qual).c_str();
    }
    else
    {
      return qual.c_str();
    }
  }

  void run() 
  {
    int k;
    Interval<2> IJ(this->I,this->J);

    // Run setup.
    
    runSetup();

    // Run kernel.
    
    for (k = 0; k < 5; ++k)
    {
      this->a_m(IJ) = stencil_m(this->b_m,IJ);

      // Note we use this form of the stencil since adding guard cells can
      // add external guard cells so the domain of a_m might be bigger than
      // we expect, in which case stencil_m(a_m) would be bigger than IJ.

      this->b_m(IJ) = stencil_m(this->a_m,IJ);
    }

    Pooma::blockAndEvaluate();

    // Save result for checking.
    
    this->check_m = this->b_m(this->n_m / 2, this->n_m / 2);
  }

  void runSetup()
  {
    // Run setup.
    
    this->a_m = 0.0;
    this->b_m = 0.0;
    Pooma::blockAndEvaluate();
    this->b_m(this->n_m/2,this->n_m/2) = 1000.0;

  }

private:
  bool guarded_m;
  Stencil<DoofNinePt> stencil_m;
};


#endif // POOMA_BENCHMARKS_DOOF2D_H

// ACL:rcsinfo
// ----------------------------------------------------------------------
// $RCSfile: Doof2d.h,v $   $Author: richard $
// $Revision: 1.7 $   $Date: 2004/11/01 18:15:11 $
// ----------------------------------------------------------------------
// ACL:rcsinfo
