/*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Copyright 2010.  Los Alamos National Security, LLC. This material was    !
! produced under U.S. Government contract DE-AC52-06NA25396 for Los Alamos !
! National Laboratory (LANL), which is operated by Los Alamos National     !
! Security, LLC for the U.S. Department of Energy. The U.S. Government has !
! rights to use, reproduce, and distribute this software.  NEITHER THE     !
! GOVERNMENT NOR LOS ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY,     !
! EXPRESS OR IMPLIED, OR ASSUMES ANY LIABILITY FOR THE USE OF THIS         !
! SOFTWARE.  If software is modified to produce derivative works, such     !
! modified software should be clearly marked, so as not to confuse it      !
! with the version available from LANL.                                    !
!                                                                          !
! Additionally, this program is free software; you can redistribute it     !
! and/or modify it under the terms of the GNU General Public License as    !
! published by the Free Software Foundation; version 2.0 of the License.   !
! Accordingly, this program is distributed in the hope that it will be     !
! useful, but WITHOUT ANY WARRANTY; without even the implied warranty of   !
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General !
! Public License for more details.                                         !
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/

#include <math.h>
#include <stdio.h>

#include "Matrix.h"

// Niklasson's SP2 density matrix purification
void sp2pure_nospin(REAL bndfil, int  hdim, REAL *bo_pointer, REAL maxeval, 
	     REAL *h_pointer, REAL maxminusmin, int minsp2iter, REAL breaktol) {
  int iter, breakloop;
  REAL trx, occ, trx2, tr2xx2, limit1, limit2;
  
  Matrix x2, bo, h;
  M_Init(x2, hdim, hdim);
  M_InitWithLocal(bo, bo_pointer, hdim, hdim);
  M_InitWithLocal(h, h_pointer, hdim, hdim);
  
  REAL idemperr0=0.0, idemperr1=0.0, idemperr2=0.0, trxold;

  occ=bndfil*hdim;

  for (int i=0; i<hdim; i++) {
    for (int j=i; j<hdim; j++) {
      if (i==j) {
	bo_pointer[i*hdim+j]=(maxeval-h_pointer[i*hdim+i])/maxminusmin;
      }
      else {
	bo_pointer[i*hdim+j]=-h_pointer[i*hdim+j]/maxminusmin;
	bo_pointer[j*hdim+i]=bo_pointer[i*hdim+j];
      }
    }
  }
  M_Push(bo);
  trx=M_Trace(bo);

  iter=0;
  breakloop=0;

  while(breakloop==0) {
    iter++;
    if (iter==50) {
      printf("SP2 purification is not converging: STOP!");
      exit(1);
    }
    M_Multiply(bo, bo, x2);
    trx2=M_Trace(x2);
//    tr2xx2=0.0;
    tr2xx2=2.0*M_Trace(bo)-M_Trace(x2);

    limit1=fabs(trx2-occ);
    limit2=fabs(tr2xx2-occ);

    if (limit1<limit2) {
      M_Copy(x2, bo);
    }
    else {
      M_Multiply(2.0, bo, bo);
      M_Subtract(bo, x2, bo);
    }

    trxold=trx;
 //   trx=0.0;
    trx=M_Trace(bo);

    idemperr2=idemperr1;
    idemperr1=idemperr0;
    idemperr0=fabs(trx-trxold);

    if (iter>minsp2iter) {
      // We're also using Niklasson's scheme to determine convergence
      if (fabs(trx-occ)<breaktol && idemperr0>=idemperr2) {
	breakloop=1;
      }
    }
  }
  M_Multiply(2.0, bo, bo);
  M_Pull(bo);

  M_DeallocateLocal(x2);
  M_DeallocateDevice(x2);
  M_DeallocateDevice(bo);
  M_DeallocateDevice(h);
}

void sp2pure_spin(REAL bndfil, int  hdim, REAL *rhoup_pointer, REAL *rhodown_pointer, 
             REAL maxeval, REAL *hup_pointer, REAL *hdown_pointer, REAL maxminusmin, int minsp2iter, REAL breaktol) {
  //
  // This subroutine implements Niklasson's SP2 density matrix purification
  // algorithm. 
  //

  int iter, breakloop;
  REAL trx, totne, occ, trx2, tr2xx2, limit1, limit2;
  
  Matrix x2up, x2down, rhoup, rhodown, hup, hdown;
  M_Init(x2up, hdim, hdim);
  M_Init(x2down, hdim, hdim);
  M_InitWithLocal(rhoup, rhoup_pointer, hdim, hdim);
  M_InitWithLocal(rhodown, rhodown_pointer, hdim, hdim);
  M_InitWithLocal(hup, hup_pointer, hdim, hdim);
  M_InitWithLocal(hdown, hdown_pointer, hdim, hdim);
  
  //
  // We're also using Niklasson's scheme to determine convergence
  //

  REAL idemperr0=0.0, idemperr1=0.0, idemperr2=0.0, trxold;

  occ=bndfil*hdim;
  totne=2.0*occ;

  //
  // Start by remapping the two H matrices such that 
  // all eigenvalues are [0:1]. We have the Gersgorin bounds
  // for both Hup and Hdown
  //

  for (int i=0; i<hdim; i++) {
    for (int j=i; j<hdim; j++) {
      if (i==j) {
	rhoup_pointer[i*hdim+j]=(maxeval-hup_pointer[i*hdim+i])/maxminusmin;
	rhodown_pointer[i*hdim+j]=(maxeval-hdown_pointer[i*hdim+i])/maxminusmin;
      }
      else {
	rhoup_pointer[i*hdim+j]=-hup_pointer[i*hdim+j]/maxminusmin;
	rhoup_pointer[j*hdim+i]=rhoup_pointer[i*hdim+j];
	rhodown_pointer[i*hdim+j]=-hdown_pointer[i*hdim+j]/maxminusmin;
	rhodown_pointer[j*hdim+i]=rhodown_pointer[i*hdim+j];
      }
    }
  }
  M_Push(rhoup);
  M_Push(rhodown);
  trx=M_Trace(rhoup)+M_Trace(rhodown);
//  printf("Initial trace=%f\n", trx);
  
  iter=0;
  breakloop=0;

  while(!breakloop) {
    iter++;
    if (iter==100) {
      printf("SP2 purification not converging: STOP!\n");
      exit(1);
    }
    
    //
    // Now we're calculating X*X for the spin-up and spin-down
    // density matrices
    //

    M_Multiply(rhoup, rhoup, x2up);
    M_Multiply(rhodown, rhodown, x2down);

    trx2=M_Trace(x2up)+M_Trace(x2down);
    tr2xx2=2.0*(M_Trace(rhoup)+M_Trace(rhodown))-(M_Trace(x2up)+M_Trace(x2down));
      
    limit1=fabs(trx2-totne);
    limit2=fabs(tr2xx2-totne);

    if (limit1<limit2) {
      M_Copy(x2up, rhoup);
      M_Copy(x2down, rhodown);
    }
    else {
      M_Multiply(2.0, rhoup, rhoup);
      M_Subtract(rhoup, x2up, rhoup);
      M_Multiply(2.0, rhodown, rhodown);
      M_Subtract(rhodown, x2down, rhodown);
    }

    trxold=trx;
    
    trx=M_Trace(rhoup)+M_Trace(rhodown);
    
//    printf("Trace=%f\n", trx);
    
    idemperr2 = idemperr1;
    idemperr1 = idemperr0;
    idemperr0 = fabs(trx - trxold);

    if (iter>minsp2iter) {
      if (fabs(trx - totne) < breaktol &&
	  idemperr0 >= idemperr2) {
	breakloop = 1;
      }
    }
  }
  M_Pull(rhoup);
  M_Pull(rhodown);

  M_DeallocateLocal(x2up);
  M_DeallocateDevice(x2up);
  M_DeallocateLocal(x2down);
  M_DeallocateDevice(x2down);
  M_DeallocateDevice(rhoup);
  M_DeallocateDevice(rhodown);
  M_DeallocateDevice(hup);
  M_DeallocateDevice(hdown);
}

