!{\src2tex{textfont=tt}}
!!****f* ABINIT/exc_haydock_driver
!! NAME
!! exc_haydock_driver
!!
!! FUNCTION
!!  Calculate the imaginary part of the macroscopic dielectric function via the Haydock recursive method.
!!
!! COPYRIGHT
!! Copyright (C) 2009-2014 ABINIT group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida, M.Giantomassi, Y. Gillet)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!! BSp<type(excparam)=The parameter for the Bethe-Salpeter run.
!!  inclvkb=If different from zero, the commutator [Vnl,r] is included in the calculation of 
!!    the matrix element of the velocity operator. Meaningless for PAW.
!! Kmesh<type(kmesh_t)>=The list of k-points in the BZ, IBZ and symmetry tables.
!! Cryst<type(crystal_t)>=Info on the crystalline structure.
!! KS_BSt=The KS energies.
!! QP_BSt=The QP energies.
!! Psps <type(pseudopotential_type)>=variables related to pseudopotentials.
!! Pawtab(Cryst%ntypat*usepaw)<pawtab_type>=PAW tabulated starting data.
!! Hur(Cryst%natom*usepaw)<type(HUr_commutator)>=Only for PAW and LDA+U, quantities used to evaluate the commutator [H_u,r].
!! Wfd<wfd_t>=Handler for the wavefunctions.
!!   %nsppol=Number of independent spin polarizations.
!!   %nspinor=Number of spinorial components.
!!   %usepaw=1 for PAW, 0 otherwise.
!!   %comm=MPI communicator.
!!
!! OUTPUT
!!  The imaginary part of the macroscopic dielectric function is written on the external file _EXC_MDF
!!
!! PARENTS
!!      bethe_salpeter
!!
!! CHILDREN
!!      matrginv,zgesv
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

#include "abi_common.h"

subroutine exc_haydock_driver(BSp,BS_files,Cryst,Kmesh,Hdr_bse,KS_BSt,QP_Bst,Wfd,Psps,Pawtab,Hur,&
& Kmesh_dense, KS_BSt_dense, QP_BSt_dense, Wfd_dense, Vcp_dense, grid)

 use defs_basis
 use defs_datatypes
 use m_bs_defs
 use m_xmpi
 use m_errors
 use m_ncfile
 use m_profiling
#ifdef HAVE_TRIO_ETSF_IO
 use etsf_io
#endif
#ifdef HAVE_TRIO_NETCDF
 use netcdf
#endif

 use defs_abitypes,       only : Hdr_type
 use m_blas,              only : xdotc
 use m_numeric_tools,     only : print_arr, symmetrize, hermitianize
 use m_crystal,           only : crystal_t 
 use m_crystal_io,        only : crystal_ncwrite
 use m_bz_mesh,           only : kmesh_t
 use m_double_grid,       only : double_grid_t
 use m_commutator_vkbr,   only : kb_potential
 use m_paw_commutator,    only : HUr_commutator
 use m_wfs,               only : wfd_t
 use m_bse_io,            only : exc_read_rcblock, exc_write_optme
 use m_pawtab,            only : pawtab_type
 use m_vcoul,             only : vcoul_t, vcoul_init

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'exc_haydock_driver'
 use interfaces_14_hidewrite
 use interfaces_18_timing
 use interfaces_28_numeric_noabirule
 use interfaces_71_bse, except_this_one => exc_haydock_driver
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 type(excparam),intent(in) :: BSp
 type(excfiles),intent(in) :: BS_files
 type(kmesh_t),intent(in) :: Kmesh
 type(crystal_t),intent(in) :: Cryst
 type(Hdr_type),intent(in) :: Hdr_bse
 type(wfd_t),intent(inout) :: Wfd
 type(pseudopotential_type),intent(in) :: Psps
 type(ebands_t),intent(in) :: KS_BSt,QP_Bst
!Interp@BSE
 type(double_grid_t),intent(in),optional :: grid
 type(kmesh_t),intent(in),optional :: Kmesh_dense
 type(wfd_t),intent(inout),optional :: Wfd_dense
 type(ebands_t),intent(in),optional :: KS_BSt_dense, QP_Bst_dense
 type(vcoul_t),intent(in),optional :: Vcp_dense
!arrays
 type(pawtab_type),intent(in) :: Pawtab(Cryst%ntypat*Wfd%usepaw)
 type(HUr_commutator),intent(in) :: Hur(Cryst%natom*Wfd%usepaw)

!Local variables ------------------------------
!scalars
 integer :: io,my_rank,master,iq,it,ierr
 integer :: hsize,comm,my_t1,my_t2,nsppol,nkets,nproc
 integer :: spin,spad,ik_bz,iv,ic,trans_idx,lomo_min,max_band
 integer :: max_r,max_c
 !Interp@BSE
 integer :: hsize_dense
 real(dp) :: omegaev,rand_phi !,norm
 complex(dpc) :: ks_avg,gw_avg,exc_avg
 logical :: is_resonant,use_mpio,diago_is_real,prtdos
 character(len=500) :: msg
 character(len=fnlen) :: hreso_fname,hcoup_fname !,ome_fname
 type(ncfile_t) :: ncf
!arrays
 real(dp) :: tsec(2)
 real(dp),allocatable :: dos(:),dos_gw(:),dos_ks(:)
 complex(dpc),allocatable :: green(:,:),hreso(:,:),hcoup(:,:),test(:,:)
 complex(dpc),allocatable :: opt_cvk(:,:,:,:,:),kets(:,:)
 complex(dpc),allocatable :: eps_rpanlf(:,:),eps_gwnlf(:,:)
 complex(dpc),allocatable :: tensor_cart(:,:),tensor_cart_rpanlf(:,:),tensor_cart_gwnlf(:,:)
 complex(dpc),allocatable :: tensor_red(:,:),tensor_red_rpanlf(:,:),tensor_red_gwnlf(:,:)
 !Interp@BSE
 complex(dpc),allocatable :: diag_dense(:), diag_coarse(:,:)
 complex(dpc),allocatable :: acoeffs(:,:),bcoeffs(:,:),ccoeffs(:,:)
 character(len=fnlen) :: tmpfname
 integer :: ii

!************************************************************************

 call timab(690,1,tsec) ! exc_haydock_driver
 call timab(691,1,tsec) ! exc_haydock_driver(read)

 if (BSp%have_complex_ene) then
   MSG_ERROR("Complex energies are not supported yet")
 end if

 my_rank = Wfd%my_rank
 master  = Wfd%master
 comm    = Wfd%comm
 nsppol  = Wfd%nsppol
 nproc   = Wfd%nproc

 use_mpio=.FALSE.
#ifdef HAVE_MPI_IO
 use_mpio = (nproc > 1)
 !use_mpio = .TRUE. 
#endif
 use_mpio=.FALSE.
 !use_mpio = .TRUE. 

 ! Hsize refers to the size of the individual blocks (resonant and coupling). 
 ! Thanks to the symmetry property of the starting vector, the Haydock method 
 ! can be reformulated in terms of matrix-vector multiplication involving the 
 ! blocks thus avoiding to allocation of the full matrix ( R   C )
 !                                                        -C* -R*)
 hsize=SUM(BSp%nreh)

 !Interp@BSE
 if(BSp%use_interp) then
   hsize_dense = SUM(BSp%nreh_interp)
 end if

 !
 ! Divide the columns of the Hamiltonian among the nodes.
 call xmpi_split_work(hsize,comm,my_t1,my_t2,msg,ierr)
 if (ierr/=0) then
   MSG_WARNING(msg)
 end if

 ABI_CHECK(my_t2-my_t1+1>0,"found processor with 0 rows")
                                                             
 ABI_MALLOC(hreso,(hsize,my_t1:my_t2))
 ABI_CHECK_ALLOC("out of memory in hreso")

 !Interp@BSE
 if(Bsp%use_interp) then
  ! No parallelization at present ...
  ABI_MALLOC(diag_dense,(hsize_dense))
  ABI_CHECK_ALLOC("out of memory in diag_dense")
  diag_dense = czero

  ABI_MALLOC(diag_coarse,(hsize,hsize))
  ABI_CHECK_ALLOC("out of memory in diag_coarse")
  diag_coarse = czero

  if(BSp%interp_mode == 2 .or. BSp%interp_mode == 3) then
    ABI_MALLOC(acoeffs,(hsize,my_t1:my_t2))
    ABI_CHECK_ALLOC("out of memory in acoeffs")

    ABI_MALLOC(bcoeffs,(hsize,my_t1:my_t2))
    ABI_CHECK_ALLOC("out of memory in bcoeffs")

    ABI_MALLOC(ccoeffs,(hsize,my_t1:my_t2))
    ABI_CHECK_ALLOC("out of memory in ccoeffs")
  end if
 end if

 !
 ! Read the resonant block from file.
 if (BS_files%in_hreso /= BSE_NOFILE) then
   hreso_fname = BS_files%in_hreso
 else 
   hreso_fname = BS_files%out_hreso
 end if

 is_resonant=.TRUE.; diago_is_real=(.not.BSp%have_complex_ene)
 call exc_read_rcblock(hreso_fname,Bsp,is_resonant,diago_is_real,nsppol,BSp%nreh,hsize,my_t1,my_t2,hreso,use_mpio,comm)

 if(BSp%use_interp) then
   if(nsppol > 1) then
     MSG_ERROR("Interpolation not yet coded with spin")
   end if
   ! Compute diagonal part of the dense Ham
   call wrtout(std_out," Computing diagonal term on the dense mesh","COLL")
   do it=1,BSp%nreh_interp(1)
     ! 1 is for spin 1
     diag_dense(it) = Bsp%Trans_interp(it,1)%en
   end do

   call wrtout(std_out," Computing diagonal term on the coarse mesh","COLL")
   do it=1,BSp%nreh(1)
     ! 1 is for spin 1
     diag_coarse(it,it) = Bsp%Trans(it,1)%en
   end do

   if(BSp%interp_mode == 2 .or. BSp%interp_mode == 3) then
     ! Read interpolation coefficients
     tmpfname = hreso_fname
     ii = LEN_TRIM(hreso_fname)
     tmpfname(ii-2:ii+1) = 'ABSR'
     call exc_read_rcblock(tmpfname,Bsp,is_resonant,diago_is_real,nsppol,BSp%nreh,hsize,my_t1,my_t2,acoeffs,use_mpio,comm)
     tmpfname(ii-2:ii+1) = 'BBSR'
     call exc_read_rcblock(tmpfname,Bsp,is_resonant,diago_is_real,nsppol,BSp%nreh,hsize,my_t1,my_t2,bcoeffs,use_mpio,comm)
     tmpfname(ii-2:ii+1) = 'CBSR'
     call exc_read_rcblock(tmpfname,Bsp,is_resonant,diago_is_real,nsppol,BSp%nreh,hsize,my_t1,my_t2,ccoeffs,use_mpio,comm)
   end if

 end if

 !call hermitianize(hreso,"All")

!BEGIN DEBUG
 if (use_mpio) then
   MSG_WARNING("Testing MPI-IO routines")
   ABI_MALLOC(test,(hsize,my_t1:my_t2))
   ABI_CHECK_ALLOC("out of memory in hreso")
   diago_is_real=(.not.BSp%have_complex_ene)
   call exc_read_rcblock(hreso_fname,Bsp,is_resonant,diago_is_real,nsppol,Bsp%nreh,hsize,my_t1,my_t2,test,.FALSE.,comm)
   test = test-hreso
   write(std_out,*)"DEBUG: Diff MPI-IO - Fortran ",MAXVAL(ABS(test))
   max_r=20; max_c=10
   write(std_out,*)" **** Testing resonant block **** "
   call print_arr(test,max_r=max_r,max_c=max_c,unit=std_out)
   if (nsppol==2) then
     write(std_out,*)" **** D down down ****"
     call print_arr(test(hsize/2+1:,hsize/2+1:),max_r=max_r,max_c=max_c,unit=std_out)
     write(std_out,*)" **** V up down ****"
     call print_arr(test(1:hsize/2,hsize/2+1:),max_r=max_r,max_c=max_c,unit=std_out)
     write(std_out,*)" **** V down up ****"
     call print_arr(test(hsize/2+1:,1:hsize/2),max_r=max_r,max_c=max_c,unit=std_out)
   end if
   ABI_FREE(test)
 end if
!END DEBUG
 !
 ! Read coupling block.
 if (BSp%use_coupling>0) then 
   ABI_CHECK(.not. Bsp%use_interp,"interpolation with coupling not coded!")
   if (BS_files%in_hcoup /= BSE_NOFILE) then
     hcoup_fname = BS_files%in_hcoup
   else 
     hcoup_fname = BS_files%out_hcoup
   end if

   ABI_MALLOC(hcoup,(hsize,my_t1:my_t2))
   ABI_CHECK_ALLOC("out of memory in hcoup")
   is_resonant=.FALSE.; diago_is_real=.FALSE.
   call exc_read_rcblock(hcoup_fname,Bsp,is_resonant,diago_is_real,nsppol,BSp%nreh,hsize,my_t1,my_t2,hcoup,use_mpio,comm)
   !call symmetrize(hcoup,"ALL")

   if (use_mpio) then
     MSG_WARNING("Testing MPI-IO routines")
     ABI_MALLOC(test,(hsize,my_t1:my_t2))
     ABI_CHECK_ALLOC("out of memory in text")
     diago_is_real=.FALSE.
     call exc_read_rcblock(hcoup_fname,Bsp,is_resonant,diago_is_real,nsppol,Bsp%nreh,hsize,my_t1,my_t2,test,.FALSE.,comm)
     test = test-hcoup
     write(std_out,*)"DEBUG: Diff MPI-IO - Fortran ",MAXVAL(ABS(test))
     max_r=20; max_c=10
     write(std_out,*)" **** Testing coupling block **** "
     call print_arr(test,max_r=max_r,max_c=max_c,unit=std_out)
     if (nsppol==2) then
       write(std_out,*)" **** D down down ****"
       call print_arr(test(hsize/2+1:,hsize/2+1:),max_r=max_r,max_c=max_c,unit=std_out)
       write(std_out,*)" **** V up down ****"
       call print_arr(test(1:hsize/2,hsize/2+1:),max_r=max_r,max_c=max_c,unit=std_out)
       write(std_out,*)" **** V down up ****"
       call print_arr(test(hsize/2+1:,1:hsize/2),max_r=max_r,max_c=max_c,unit=std_out)
     end if
     ABI_FREE(test)
   end if
 end if

 call timab(691,2,tsec) ! exc_haydock_driver(read)
 call timab(692,1,tsec) ! exc_haydock_driver(prep)
 !
 ! Prepare the starting vectors for the Lanczos chain.
 nkets=Bsp%nq

 prtdos=.FALSE. !prtdos=.TRUE.
 if (prtdos) then
   nkets=nkets+1
   if (Bsp%use_coupling>0) then 
     MSG_ERROR("DOS with coupling not coded")
     nkets=nkets+1
   end if
 end if

 !Interp@BSE
 if(BSp%use_interp) then
   ABI_MALLOC(kets,(hsize_dense,nkets))
 else
   ABI_MALLOC(kets,(hsize,nkets))
 end if

 ABI_CHECK_ALLOC("out of memory in kets")
 kets=czero
 !
 ! Prepare the kets for the macroscopic dielectric function.
 lomo_min=Bsp%lomo_min; max_band=Bsp%nbnds

 !Interp@BSE
 if(BSp%use_interp) then
   ABI_MALLOC(opt_cvk,(lomo_min:max_band,lomo_min:max_band,BSp%nkbz_interp,Wfd%nsppol,BSp%nq))
 else
   ABI_MALLOC(opt_cvk,(lomo_min:max_band,lomo_min:max_band,BSp%nkbz,Wfd%nsppol,BSp%nq))
 end if
 ABI_CHECK_ALLOC("out of memory in opt_cvk")

 do iq=1,Bsp%nq

!Interp@BSE
   if(BSp%use_interp) then
     !
     ! KS_BSt is used here to calculate the commutator.
     call calc_optical_mels(Wfd_dense,Kmesh_dense,KS_BSt_dense,Cryst,Psps,Pawtab,Hur, &
&       BSp%inclvkb,BSp%lomo_spin,lomo_min,max_band,BSp%nkbz_interp,BSp%q(:,iq),opt_cvk(:,:,:,:,iq))

     !
     ! Fill ket0 using the same ordering for the indeces as the one used for the excitonic Hamiltonian.
     ! Note that only the resonant part is used here.
     do spin=1,nsppol
       spad=(spin-1)*BSp%nreh_interp(1)
       do ik_bz=1,BSp%nkbz_interp
         do iv=BSp%lomo_spin(spin),BSp%homo_spin(spin)
           do ic=BSp%lumo_spin(spin),BSp%nbnds
             trans_idx = BSp%vcks2t_interp(iv,ic,ik_bz,spin)
             if (trans_idx>0) kets(trans_idx+spad,iq)=opt_cvk(ic,iv,ik_bz,spin,iq)
           end do
         end do
       end do
     end do
  else
   !
   ! KS_BSt is used here to calculate the commutator.
   call calc_optical_mels(Wfd,Kmesh,KS_BSt,Cryst,Psps,Pawtab,Hur,BSp%inclvkb,Bsp%lomo_spin,lomo_min,max_band,&
&                         BSp%nkbz,BSp%q(:,iq),opt_cvk(:,:,:,:,iq))
   !
   ! Fill ket0 using the same ordering for the indeces as the one used for the excitonic Hamiltonian.
   ! Note that only the resonant part is used here.
   do spin=1,nsppol
     spad=(spin-1)*BSp%nreh(1)
     do ik_bz=1,BSp%nkbz
       do iv=BSp%lomo_spin(spin),BSp%homo_spin(spin)
         do ic=BSp%lumo_spin(spin),BSp%nbnds
           trans_idx = BSp%vcks2t(iv,ic,ik_bz,spin)
           if (trans_idx>0) kets(trans_idx+spad,iq)=opt_cvk(ic,iv,ik_bz,spin,iq)
         end do
       end do
     end do
   end do

  end if
 end do

 call timab(692,2,tsec) ! exc_haydock_driver(prep)
 call timab(693,1,tsec) ! exc_haydock_driver(wo lf    - that is, without local field
 !
 ! ==========================================================
 ! === Writing the Optical Matrix Elements to NetCDF file ===
 ! ==========================================================

 !if (.false.) then
 !  ome_fname='test_OME.nc'
 !  call exc_write_optme(ome_fname,minb,maxb,BSp%nkbz,Wfd%nsppol,BSp%nq,opt_cvk,ierr)
 !end if

 !
 ! =======================================================
 ! === Make EPS RPA and GW without local-field effects ===
 ! =======================================================

 ABI_MALLOC(eps_rpanlf,(BSp%nomega,BSp%nq))
 ABI_MALLOC(dos_ks,(BSp%nomega))
 ABI_MALLOC(eps_gwnlf ,(BSp%nomega,BSp%nq))
 ABI_MALLOC(dos_gw,(BSp%nomega))
 
 if(BSp%use_interp) then
   call wrtout(std_out," Calculating Interpolated RPA NLF and QP NLF epsilon","COLL")

   call exc_eps_rpa(BSp%nbnds,BSp%lomo_spin,BSp%lomo_min,BSp%homo_spin,Kmesh_dense,KS_BSt_dense,BSp%nq,nsppol,&
&    opt_cvk,Cryst%ucvol,BSp%broad,BSp%nomega,BSp%omega,eps_rpanlf,dos_ks)

   call exc_eps_rpa(BSp%nbnds,BSp%lomo_spin,BSp%lomo_min,BSp%homo_spin,Kmesh_dense,QP_BSt_dense,BSp%nq,nsppol,&
&    opt_cvk,Cryst%ucvol,Bsp%broad,BSp%nomega,BSp%omega,eps_gwnlf,dos_gw)

 else
   call wrtout(std_out," Calculating RPA NLF and QP NLF epsilon","COLL")

   call exc_eps_rpa(BSp%nbnds,BSp%lomo_spin,BSp%lomo_min,BSp%homo_spin,Kmesh,KS_BSt,BSp%nq,nsppol,opt_cvk,&
&    Cryst%ucvol,BSp%broad,BSp%nomega,BSp%omega,eps_rpanlf,dos_ks)

   call exc_eps_rpa(BSp%nbnds,BSp%lomo_spin,BSp%lomo_min,BSp%homo_spin,Kmesh,QP_BSt,BSp%nq,nsppol,opt_cvk,&
&    Cryst%ucvol,Bsp%broad,BSp%nomega,BSp%omega,eps_gwnlf,dos_gw)
 end if

 if (my_rank==master) then ! Only master works.
   !
   ! Master node writes final results on file.
   call exc_write_data(BSp,BS_files,"RPA_NLF_MDF",eps_rpanlf,dos=dos_ks)

   call exc_write_data(BSp,BS_files,"GW_NLF_MDF",eps_gwnlf,dos=dos_gw)

   ! Computing and writing tensor in files

   ! RPA_NLF
   ABI_MALLOC(tensor_cart_rpanlf,(BSp%nomega,6))
   ABI_MALLOC(tensor_red_rpanlf,(BSp%nomega,6))

   call wrtout(std_out," Calculating RPA NLF dielectric tensor","COLL")
 
   call haydock_mdf_to_tensor(BSp,Cryst,eps_rpanlf,tensor_cart_rpanlf, tensor_red_rpanlf, ierr)

   if(ierr == 0) then
      ! Writing tensor
      call exc_write_tensor(BSp,BS_files,"RPA_NLF_TSR_CART",tensor_cart_rpanlf)
      call exc_write_tensor(BSp,BS_files,"RPA_NLF_TSR_RED",tensor_red_rpanlf)
   else 
      write(msg,'(3a)')&
&       'The RPA_NLF dielectric complex tensor cannot be computed',ch10,&
&       'There must be 6 different q-points in long wavelength limit (see gw_nqlwl)'
      MSG_COMMENT(msg)
   end if

   ABI_FREE(tensor_cart_rpanlf)
   ABI_FREE(tensor_red_rpanlf)

   ! GW_NLF
   ABI_MALLOC(tensor_cart_gwnlf,(BSp%nomega,6))
   ABI_MALLOC(tensor_red_gwnlf,(BSp%nomega,6))

   call wrtout(std_out," Calculating GW NLF dielectric tensor","COLL")

   call haydock_mdf_to_tensor(BSp,Cryst,eps_gwnlf,tensor_cart_gwnlf, tensor_red_gwnlf, ierr)

   if(ierr == 0) then
      ! Writing tensor
      call exc_write_tensor(BSp,BS_files,"GW_NLF_TSR_CART",tensor_cart_gwnlf)
      call exc_write_tensor(BSp,BS_files,"GW_NLF_TSR_RED",tensor_red_gwnlf)
   else
      write(msg,'(3a)')&
&       'The GW_NLF dielectric complex tensor cannot be computed',ch10,&
&       ' There must be 6 different q-points in long wavelength limit (see gw_nqlwl)'
      MSG_COMMENT(msg)
   end if

   ABI_FREE(tensor_cart_gwnlf)
   ABI_FREE(tensor_red_gwnlf)
 
   !call wrtout(std_out," Checking Kramers Kronig on Excitonic Macroscopic Epsilon","COLL")
   !call check_kramerskronig(BSp%nomega,REAL(BSp%omega),eps_exc(:,1))

   !call wrtout(std_out," Checking Kramers Kronig on RPA NLF Macroscopic Epsilon","COLL")
   !call check_kramerskronig(BSp%nomega,REAL(BSp%omega),eps_rpanlf(:,1))

   !call wrtout(std_out," Checking Kramers Kronig on GW NLF Macroscopic Epsilon","COLL")
   !call check_kramerskronig(BSp%nomega,REAL(BSp%omega),eps_gwnlf(:,1))

   !call wrtout(std_out," Checking f-sum rule on Excitonic Macroscopic Epsilon","COLL")

   !if (BSp%exchange_term>0) then 
   !  MSG_COMMENT(' f-sum rule should be checked without LF')
   !end if
   !call check_fsumrule(BSp%nomega,REAL(BSp%omega),AIMAG(eps_exc(:,1)),drude_plsmf)

   !call wrtout(std_out," Checking f-sum rule on RPA NLF Macroscopic Epsilon","COLL")
   !call check_fsumrule(BSp%nomega,REAL(BSp%omega),AIMAG(eps_rpanlf(:,1)),drude_plsmf)

   !call wrtout(std_out," Checking f-sum rule on GW NLF Macroscopic Epsilon","COLL")
   !call check_fsumrule(BSp%nomega,REAL(BSp%omega),AIMAG(eps_gwnlf(:,1)),drude_plsmf)
 end if ! my_rank==master

 ABI_FREE(opt_cvk)
 !call xmpi_barrier(comm)
 !
 ! The ket for the approximated DOS.
 if (prtdos) then 
   MSG_WARNING("Calculating DOS with Haydock method")
   ABI_CHECK(BSp%use_coupling==0,"DOS with coupling not coded")
   iq = BSp%nq + 1
   if (my_rank==master) then
     !call random_seed()
     do it=1,SUM(Bsp%nreh)
       call RANDOM_NUMBER(rand_phi)
       rand_phi = two_pi*rand_phi
       kets(it,iq) = CMPLX( COS(rand_phi), SIN(rand_phi) )
     end do
     ! Normalize the vector.
     !norm = SQRT( DOT_PRODUCT(kets(:,iq), kets(:,iq)) ) 
     !kets(:,iq) = kets(:,iq)/norm
   end if
   call xmpi_bcast(kets(:,iq),master,comm,ierr)
 end if

 call timab(693,2,tsec) ! exc_haydock_driver(wo lf    - that is, without local field
 call timab(694,1,tsec) ! exc_haydock_driver(apply

 ABI_MALLOC(green,(BSp%nomega,nkets))

 if (BSp%use_coupling==0) then 
   !Interp@BSE
   if(BSp%use_interp) then
      call haydock_herm_interp(BSp,BS_files,Cryst,Hdr_bse,hsize,hsize_dense,my_t1,my_t2,hreso,nkets,kets,diag_coarse,diag_dense,&
&       grid,Wfd,Wfd_dense,Kmesh,Kmesh_dense,acoeffs,bcoeffs,ccoeffs,Vcp_dense,green,comm)
   else
      call haydock_herm(BSp,BS_files,Cryst,Hdr_bse,hsize,my_t1,my_t2,hreso,nkets,kets,green,comm)
   end if
 else
   call haydock_psherm(BSp,BS_files,Cryst,Hdr_bse,hsize,my_t1,my_t2,hreso,hcoup,nkets,kets,green,comm)
 end if
 !
 ! Add 1 to have the real part right.
 green = one + green

 ABI_FREE(kets)

 call timab(694,2,tsec) ! exc_haydock_driver(apply
 call timab(695,1,tsec) ! exc_haydock_driver(end)

 if (my_rank==master) then ! Master writes the final results.
   !
   if (prtdos) then
     ABI_MALLOC(dos,(BSp%nomega))
     dos = -AIMAG(green(:,BSp%nq+1))
     call exc_write_data(BSp,BS_files,"EXC_MDF",green,dos=dos)
     ABI_FREE(dos)
   else 
     call exc_write_data(BSp,BS_files,"EXC_MDF",green)
   end if
   !
   ! =========================
   ! === Write out Epsilon ===
   ! =========================

   ABI_MALLOC(tensor_cart,(BSp%nomega,6))
   ABI_MALLOC(tensor_red,(BSp%nomega,6))
   ! Computing tensor
   call wrtout(std_out," Calculating EXC dielectric tensor","COLL")

   call haydock_mdf_to_tensor(BSp,Cryst,green,tensor_cart,tensor_red,ierr)

   if (ierr == 0) then
       ! Writing tensor
       call exc_write_tensor(BSp,BS_files,"EXC_TSR_CART",tensor_cart)
       call exc_write_tensor(BSp,BS_files,"EXC_TSR_RED",tensor_red)
   else
       write(msg,'(3a)')&
&        'The EXC dielectric complex tensor cannot be computed',ch10,&
&        ' There must be 6 different q-points in long wavelength limit (see gw_nqlwl)'
       MSG_COMMENT(msg)
   end if

   ABI_FREE(tensor_cart)
   ABI_FREE(tensor_red)
   !
   ! This part will be removed when fldiff will be able to compare two mdf files.
   write(ab_out,*)" "
   write(ab_out,*)"Macroscopic dielectric function:"
   write(ab_out,*)"omega [eV] <KS_RPA_nlf>  <GW_RPA_nlf>  <BSE> "
   do io=1,MIN(BSp%nomega,10)
     omegaev = REAL(BSp%omega(io))*Ha_eV
     ks_avg  = SUM( eps_rpanlf(io,:)) / Bsp%nq
     gw_avg  = SUM( eps_gwnlf (io,:)) / Bsp%nq
     exc_avg = SUM( green     (io,:)) / BSp%nq
     write(ab_out,'(7f9.4)')omegaev,ks_avg,gw_avg,exc_avg
   end do
   write(ab_out,*)" "

   ! Write MDF file with the final results.
   ! FIXME: It won't work if prtdos == True
#ifdef HAVE_TRIO_ETSF_IO
     NCF_CHECK(ncfile_create(ncf,TRIM(BS_files%out_basename)//"_MDF.nc", NF90_CLOBBER), "Creating MDF file")
     call crystal_ncwrite(Cryst,ncf%ncid)
     call mdfs_ncwrite(ncf%ncid, Bsp, green, eps_rpanlf, eps_gwnlf)
     NCF_CHECK(ncfile_close(ncf),"Closing MDF file")
#else
     ABI_UNUSED(ncf%ncid)
#endif

 end if 

 ABI_FREE(green)
 ABI_FREE(eps_rpanlf)
 ABI_FREE(eps_gwnlf)
 ABI_FREE(dos_ks)
 ABI_FREE(dos_gw)

 ABI_FREE(hreso)
 if (allocated(hcoup))  then
   ABI_FREE(hcoup)
 end if

 if(BSp%use_interp) then
   ABI_FREE(diag_dense)
   ABI_FREE(diag_coarse)
   if(BSp%interp_mode == 2 .or. BSp%interp_mode == 3) then
     ABI_FREE(acoeffs)
     ABI_FREE(bcoeffs)
     ABI_FREE(ccoeffs)
   end if
 end if

 call timab(695,2,tsec) ! exc_haydock_driver(end)
 call timab(690,2,tsec) ! exc_haydock_driver

end subroutine exc_haydock_driver
!!***

!Interp@BSE
!----------------------------------------------------------------------

!!****f* ABINIT/haydock_herm_interp
!! NAME
!! haydock_herm_interp
!!
!! FUNCTION
!!  Reads the excitonic Hamiltonian from file and construct the Lanczos set of vectors 
!!  by iterative matrix-vector multiplications.
!!
!! COPYRIGHT
!! Copyright (C) 2009-2012 ABINIT group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida, M.Giantomassi, Y. Gillet)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!! BSp<excparam>=Parameters for the Bethe-Salpeter calculation.
!! BS_files<excparam>=Files associated to the bethe_salpeter code.
!! Cryst<crystal_t>=Info on the crystalline structure.
!! hize=Size of the excitonic matrix.
!! my_t1,my_t2=First and last columns treated by this node.
!! hmat(hsize,my_t1:my_t2)=Excitonic matrix.
!! nkets=Number of starting vectors for Haydock method.
!! kets(hsize,nkets)=The kets in the eh representation.
!! comm=MPI communicator.
!!
!! OUTPUT
!!  green(BSp%nomega,nkets)=
!!
!! PARENTS
!!      haydock
!!
!! CHILDREN
!!      matrginv,zgesv
!!
!! SOURCE

subroutine haydock_herm_interp(BSp,BS_files,Cryst,Hdr_bse,hsize,hsize_dense,my_t1,my_t2,hmat,nkets,kets,diag_coarse,diag_dense,&
& grid,Wfd,Wfd_dense,Kmesh_coarse,Kmesh_dense,acoeffs,bcoeffs,ccoeffs,Vcp_dense,green,comm)

 use defs_basis
 use m_profiling
 use m_bs_defs
 use m_xmpi
 use m_errors
 use m_linalg_interfaces

 use m_io_tools,       only : get_unit, file_exists, delete_file, flush_unit
 use m_numeric_tools,  only : continued_fract
 use defs_abitypes,    only : Hdr_type
 use m_bz_mesh,        only : kmesh_t
 use m_double_grid,    only : double_grid_t
 use m_blas,           only : xdotc, xgemv
 use m_wfs,            only : wfd_t
 use m_crystal,        only : crystal_t
 use m_vcoul,          only : vcoul_t

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'haydock_herm_interp'
 use interfaces_14_hidewrite
 use interfaces_28_numeric_noabirule
 use interfaces_71_bse, except_this_one => haydock_herm_interp
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: hsize,my_t1,my_t2,nkets,comm
 integer,intent(in) :: hsize_dense
 type(crystal_t),intent(in) :: Cryst
 type(kmesh_t),intent(in) :: Kmesh_coarse,Kmesh_dense
 type(excparam),intent(in) :: BSp
 type(excfiles),intent(in) :: BS_files
 type(Hdr_type),intent(in) :: Hdr_bse
 type(double_grid_t),intent(in) :: grid
 type(wfd_t),intent(inout) :: Wfd,Wfd_dense
!arrays
 complex(dp),intent(out) :: green(BSp%nomega,nkets)
 complex(dpc),intent(in) :: hmat(hsize,my_t1:my_t2),kets(hsize_dense,nkets)
 complex(dpc),intent(in) :: diag_dense(hsize_dense), diag_coarse(hsize,hsize)
!DBYG
 complex(dpc),intent(in) :: acoeffs(hsize,my_t1:my_t2),bcoeffs(hsize,my_t1:my_t2),ccoeffs(hsize,my_t1:my_t2)
 type(vcoul_t),intent(in) :: Vcp_dense

!Local variables ------------------------------
!scalars
 integer :: inn,it,out_unt,ios,nproc,my_rank,master,ierr
 integer :: iovlp,ix,iy,iz
 integer :: niter_file,niter_max,niter_done,nsppol,iq,my_nt,term_type
 integer :: n_all_omegas
 real(dp) :: norm,nfact
 logical :: can_restart,is_converged
 complex(dpc) :: factor
 character(len=500) :: msg
 character(len=fnlen),parameter :: tag_file="_HAYDR_SAVE"
 character(len=fnlen) :: restart_file,out_file
!arrays
 real(dp),pointer :: bb_file(:)
 real(dp),allocatable :: bb(:)
 complex(dpc),allocatable :: aa(:),phi_nm1(:),phi_n(:),hphi_n(:)
 complex(dpc),pointer :: aa_file(:),phi_n_file(:),phi_nm1_file(:)
 complex(dpc),allocatable :: ket0(:)
 complex(dpc),allocatable :: all_omegas(:)
 complex(dpc),allocatable :: green_temp(:,:)
 logical :: check(2)
 complex(gwpc),allocatable :: overlaps(:,:,:) 
 complex(dpc),allocatable :: all_hmat(:,:,:)
 complex(dpc),allocatable :: all_acoeffs(:,:,:),all_bcoeffs(:,:,:),all_ccoeffs(:,:,:)
 integer,allocatable :: corresp(:,:)
 integer,allocatable :: indices(:,:)
 real(dp),allocatable :: interp_factors(:,:,:)
 !DBYG
 complex(dpc),allocatable :: hinterp(:,:)
 !real(dp) :: rmet(3,3), gmet(3,3), gprimd(3,3), ucvol

!************************************************************************

 nproc  = xcomm_size(comm)
 my_rank= xcomm_rank(comm)
 master = 0
 nsppol = Hdr_bse%nsppol

 if(BSp%use_interp) then
   MSG_COMMENT("No parallelization in Interpolation")
   my_nt = hsize_dense
 else
   my_nt = my_t2-my_t1+1
 end if

 ABI_CHECK(my_nt>0,"One of the processors has zero columns")

 write(msg,'(a,i0)')' Haydock algorithm with MAX number of iterations: ',BSp%niter
 call wrtout(std_out,msg,"COLL")
 !
 ! Select the terminator for the continued fraction.
 term_type=0; if (Bsp%hayd_term>0) term_type=1
 write(msg,'(a,i0)')" Using terminator type: ",term_type
 call wrtout(std_out,msg,"COLL")
 !
 ! Check for presence of the restart file.
 can_restart=.FALSE.
 if ( BS_files%in_haydock_basename /= BSE_NOFILE) then
   restart_file = TRIM(BS_files%in_haydock_basename)//TRIM(tag_file)
   if (file_exists(restart_file) ) then
     can_restart=.TRUE.
     msg = " Restarting Haydock calculation from file: "//TRIM(restart_file)
     call wrtout(std_out,msg,"COLL")
     call wrtout(ab_out,msg,"COLL")
   else
     can_restart=.FALSE.
     call wrtout(ab_out," WARNING: cannot find restart file: "//TRIM(restart_file),"COLL")
   end if
 end if

 if(BSp%nsppol > 1) then
   MSG_BUG("Nsppol > 1 not implemented yet")
 end if

 !
 ! Compute overlaps & compute all hmat
 ABI_MALLOC(overlaps,(BSp%nreh_interp(1),BSp%maxnbndv*BSp%maxnbndc,8))
 ABI_CHECK_ALLOC("out of memory in overlaps")

 ABI_MALLOC(all_hmat,(hsize,hsize,8))
 ABI_CHECK_ALLOC("out of memory in all_hmat")

 if(BSp%interp_mode == 2 .or. BSp%interp_mode == 3) then
   ABI_MALLOC(all_acoeffs,(hsize,hsize,8))
   ABI_CHECK_ALLOC("out of memory in all_acoeffs")

   ABI_MALLOC(all_bcoeffs,(hsize,hsize,8))
   ABI_CHECK_ALLOC("out of memory in all_bcoeffs")

   ABI_MALLOC(all_ccoeffs,(hsize,hsize,8))
   ABI_CHECK_ALLOC("out of memory in all_ccoeffs")
 end if 

 ABI_MALLOC(corresp,(hsize,8))
 ABI_CHECK_ALLOC("out of memory in corresp")

 ABI_MALLOC(indices,(Bsp%nreh(1),grid%ndiv))
 ABI_CHECK_ALLOC("out of memory in indices")

 ABI_MALLOC(interp_factors,(BSp%nreh(1),8,grid%ndiv))
 ABI_CHECK_ALLOC("out of memory in interp_factors")

 ! Compute gprimd
 !call metric(gmet,gprimd,-1,rmet,Cryst%rprimd,ucvol)
 
 iovlp = 1
 do iovlp = 1,8
   ix = (iovlp-1)/4
   iy = (iovlp-ix*4-1)/2
   iz = (iovlp-ix*4-iy*2-1)
   write(msg,'(a,i1,a)') "Computing overlap ",iovlp,"/8"
   call wrtout(std_out,msg,"COLL")

   call compute_overlaps(BSp,grid,Wfd,Wfd_dense,Cryst,Kmesh_coarse,Kmesh_dense,BSp%nreh_interp(1),&
&      BSp%maxnbndv*BSp%maxnbndc,(/ix,iy,iz/),hsize,corresp(:,iovlp),overlaps(:,:,iovlp))

   all_hmat(:,:,iovlp) = hmat(:,corresp(:,iovlp)) - diag_coarse(:,corresp(:,iovlp))

   if(BSp%interp_mode == 2 .or. BSP%interp_mode == 3) then
     all_acoeffs(:,:,iovlp) = acoeffs(:,corresp(:,iovlp))
     all_bcoeffs(:,:,iovlp) = bcoeffs(:,corresp(:,iovlp))
     all_ccoeffs(:,:,iovlp) = ccoeffs(:,corresp(:,iovlp))
   end if
 end do

 call preprocess_tables(BSp,grid,interp_factors,indices)

 if(BSp%interp_mode == 2 .or. BSp%interp_mode == 3) then ! If mode = divergence abc, not yet interpolated product ...

   write(std_out,*) "Memory needed for hinterp = ",1._dp*hsize_dense*hsize_dense*2*dpc*b2Mb," Mb"

   ABI_MALLOC(hinterp,(hsize_dense,hsize_dense))
   ABI_CHECK_ALLOC('Out of memory in hinterp')

   call compute_hinterp(BSp,hsize,hsize_dense,all_hmat,grid,&
&    BSp%nreh_interp(1),BSp%maxnbndv*BSp%maxnbndc,corresp,overlaps,interp_factors,indices,&
&    all_acoeffs,all_bcoeffs,all_ccoeffs,Kmesh_dense,Vcp_dense,cryst%gmet,hinterp)
 end if
 
 !
 ! Open the file and write basic dimensions and info.
 if (my_rank==master) then
   out_unt = get_unit()
   out_file = TRIM(BS_files%out_basename)//TRIM(tag_file)
   open(unit=out_unt,file=out_file,form="unformatted",iostat=ios)
   ABI_CHECK(ios==0," Opening file: "//TRIM(out_file))
   ! write header TODO: standardize this part.
   write(out_unt)hsize,Bsp%use_coupling,BSE_HAYD_IMEPS,nkets,Bsp%broad
 end if
 !
 ! Calculate green(w) for the different starting points.
 green=czero
 do iq=1,nkets
   ABI_ALLOCATE(ket0,(hsize_dense))
   ket0=kets(:,iq)
   !
   !
   niter_file=0
   nullify(aa_file)
   nullify(bb_file)
   nullify(phi_nm1_file)
   nullify(phi_n_file)

!   if (can_restart) then
!     call haydock_restart(BSp,restart_file,BSE_HAYD_IMEPS,iq,hsize,&
!&      niter_file,aa_file,bb_file,phi_nm1_file,phi_n_file,comm)
!   end if 
   !
   ! For n>1, we have:
   !  1) a_n = <n|H|n>
   !  2) b_n = || H|n> - a_n|n> -b_{n-1}|n-1> ||
   !  3) |n+1> = [H|n> -a_n|n> -b_{n-1}|n-1>]/b_n
   !
   ! The sequences starts with |1> normalized to 1 and b_0 =0, therefore:
   !  a_1 = <1|H|1>
   !  b_1 = || H|1> - a_1|1> ||
   !  |2> = [H|1> - a_1|1>]/b_1
   !
   ABI_ALLOCATE(hphi_n,(hsize_dense))
   ABI_ALLOCATE(phi_nm1,(hsize_dense))
   ABI_ALLOCATE(phi_n,(hsize_dense))

   niter_max = niter_file + Bsp%niter
   ABI_ALLOCATE(aa,(niter_max))
   ABI_ALLOCATE(bb,(niter_max))
   aa=czero; bb=zero

   if (niter_file==0) then       ! Calculation from scratch.
     phi_nm1=ket0(:)   ! Select the slice treated by this node.
     norm = DZNRM2(hsize_dense,ket0,1) ! Normalization  
     phi_nm1=phi_nm1/norm

     ! hphi_n = MATMUL(hmat,phi_nm1)
     call haydock_interp_matmul(BSp,hsize,hsize_dense,all_hmat,diag_dense,phi_nm1,hphi_n,grid,&
&      BSp%nreh_interp(1),BSp%maxnbndv*BSp%maxnbndc,corresp,overlaps, interp_factors, indices, hinterp)
     !temp_phi = hphi_n

     !call xgemv('N',hsize_dense,my_nt,cone,diag_dense,hsize_dense,phi_nm1,1,czero,hphi_n,1)
     !call xmpi_sum(hphi_n,comm,ierr)

     aa(1)=xdotc(my_nt,phi_nm1,1,hphi_n(:),1)
     call xmpi_sum(aa(1:1),comm,ierr)

     phi_n = hphi_n(:) - aa(1)*phi_nm1

     bb(1) = xdotc(my_nt,phi_n,1,phi_n,1)
     call xmpi_sum(bb(1:1),comm,ierr)
     bb(1) = SQRT(bb(1))

     phi_n = phi_n/bb(1)
     niter_done=1

   else ! Use the previous a and b.
     niter_done=niter_file
     aa(1:niter_done) = aa_file
     bb(1:niter_done) = bb_file
     phi_nm1=phi_nm1_file(:)   ! Select the slice treated by this node.
     phi_n  =phi_n_file  (:)
   end if

   if (associated(aa_file     ))  then
     ABI_DEALLOCATE(aa_file)
   end if
   if (associated(bb_file     ))  then
     ABI_DEALLOCATE(bb_file)
   end if
   if (associated(phi_nm1_file))  then
     ABI_DEALLOCATE(phi_nm1_file)
   end if
   if (associated(phi_n_file  ))  then
     ABI_DEALLOCATE(phi_n_file)
   end if

   if(BSp%use_interp) then
     ! Multiplicative factor (k-point sampling and unit cell volume)  
     ! TODO be careful with the spin here
     ! TODO four_pi comes from the coulomb term 1/|q| is already included in the 
     ! oscillators hence the present approach wont work if a cutoff interaction is used.
     nfact = -four_pi/(Cryst%ucvol*BSp%nkbz_interp)
     if (nsppol==1) nfact=two*nfact

     factor = nfact*(DZNRM2(hsize_dense,ket0,1)**2)
   else
     ! Multiplicative factor (k-point sampling and unit cell volume)  
     ! TODO be careful with the spin here
     ! TODO four_pi comes from the coulomb term 1/|q| is already included in the 
     ! oscillators hence the present approach wont work if a cutoff interaction is used.
     nfact = -four_pi/(Cryst%ucvol*BSp%nkbz)
     if (nsppol==1) nfact=two*nfact

     factor = nfact*(DZNRM2(hsize,ket0,1)**2)
   end if

   ! Which quantity should be checked for convergence?
   check = (/.TRUE.,.TRUE./)
   if (ABS(Bsp%haydock_tol(2)-one)<tol6) check = (/.TRUE. ,.FALSE./)
   if (ABS(Bsp%haydock_tol(2)-two)<tol6) check = (/.FALSE.,.TRUE./)

   ! Create new frequencies "mirror" in negative range to add 
   ! their contributions. Can be improved by computing only once
   ! zero frequency, but loosing clearness
   n_all_omegas = 2*BSp%nomega

   ABI_ALLOCATE(all_omegas,(n_all_omegas))
   ! Put all omegas with frequency > 0 in table
   all_omegas(BSp%nomega+1:n_all_omegas) = BSp%omega
   ! Put all omegas with frequency < 0
   ! Warning, the broadening must be kept positive
   all_omegas(1:BSp%nomega) = -DBLE(BSp%omega(BSp%nomega:1:-1)) &
& + j_dpc*AIMAG(BSp%omega(BSp%nomega:1:-1))

   ABI_ALLOCATE(green_temp,(n_all_omegas,nkets))

   ! Calling haydock_herm_algo with green_temp with full range of frequencies
   call haydock_herm_algo_interp(BSp,niter_done,niter_max,n_all_omegas,all_omegas,BSp%haydock_tol(1),check,hsize,hsize_dense,&
&    my_t1,my_t2,all_hmat,diag_dense,grid,factor,term_type,aa,bb,phi_nm1,phi_n,&
&    green_temp(:,iq),inn,is_converged,BSp%nreh_interp(1),BSp%maxnbndv*BSp%maxnbndc,corresp,overlaps,interp_factors,&
&    indices,comm,hinterp)

   ! Computing result from two ranges of frequencies
   ! The real part is added, the imaginary part is substracted
   green(:,iq) = green_temp(BSp%nomega+1:n_all_omegas,iq)+CONJG(green_temp(BSp%nomega:1:-1,iq))

   ABI_DEALLOCATE(all_omegas)
   ABI_DEALLOCATE(green_temp)
   !
   ! Save the a"s and the b"s for possible restarting.
   ! 1) Info on the Q.
   ! 2) Number of iterations performed.
   ! 3) do iter=1,niter_performed 
   !      aa(iter),bb(iter)
   !    end do
   ! 4) |n-1>
   !    |n>
   !
   if (my_rank==master) then ! Open the file and writes basic dimensions and info.
     write(out_unt)Bsp%q(:,iq)
     write(out_unt)MIN(inn,niter_max)  ! NB if the previous loop completed inn=niter_max+1
     do it=1,MIN(inn,niter_max)        ! if we exited then inn is not incremented by one.
       write(out_unt)it,aa(it),bb(it)
     end do
   end if

   !
   ! hphi_n is used as workspace to gather |n> and |n+1>.
   hphi_n = czero
   hphi_n(my_t1:my_t2) = phi_nm1
   call xmpi_sum_master(hphi_n,master,comm,ierr)
   if (my_rank==master) write(out_unt)hphi_n ! |n-1>

   hphi_n = czero
   hphi_n(my_t1:my_t2) = phi_n
   call xmpi_sum_master(hphi_n,master,comm,ierr)
   if (my_rank==master) write(out_unt)hphi_n ! |n>

   ABI_DEALLOCATE(hphi_n)
   ABI_DEALLOCATE(phi_nm1)
   ABI_DEALLOCATE(phi_n)
   ABI_DEALLOCATE(aa)
   ABI_DEALLOCATE(bb)
   ABI_DEALLOCATE(ket0)
 end do ! iq

 ABI_FREE(corresp)
 ABI_FREE(all_hmat)
 if(BSp%interp_mode == 2 .or. BSp%interp_mode == 3) then
   ABI_FREE(hinterp)
   ABI_FREE(all_acoeffs)
   ABI_FREE(all_bcoeffs)
   ABI_FREE(all_ccoeffs)
 end if
 ABI_FREE(overlaps)
 ABI_FREE(interp_factors)
 ABI_FREE(indices)

 if (my_rank==master) close(out_unt)

 call xmpi_barrier(comm)

end subroutine haydock_herm_interp
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/haydock_herm_algo_interp
!! NAME
!! haydock_herm_algo_interp
!!
!! FUNCTION
!!
!! COPYRIGHT
!! Copyright (C) 2009-2012 ABINIT group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida, M.Giantomassi)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!  niter_done=Number of iterations already performed (0 if the run starts from scratch).
!!  niter_max=Max number of iterations. Always > niter_done
!!  nomega=Number of Frequency points for the evaluation of the matrix element.
!!  omega(nomega)=Frequency set (imaginary part is already included).
!!  tol_iter=Tolerance used to stop the algorithm.
!!  check(2)=Logical flags to specify where both the real and the imaginary part of the 
!!    matrix elements of the Green functions have to be checked for convergence. 
!!  hsize=Size of the blocks.
!!  my_t1,my_t2=Indices of the first and last column stored treated by this done.
!!  term_type=0 if no terminator is used, 1 otherwise.
!!  hmat(hsize,my_t1:my_t2)=The columns of the block.
!!  factor
!!  ntrans = Number of transitions
!!  nbnd_coarse = Product of number of conduction and number of valences
!!  corresp = mapping between coarse points and neighbours
!!  overlaps = overlaps of wavefunctions between dense k-point
!!       coarse neighbours and bands
!!  comm=MPI communicator.
!!
!! OUTPUT
!!  green(nomega)=Output matrix elements.
!!  inn=Last iteration performed.
!!  is_converged=.TRUE. of the algorithm converged.
!!
!! SIDE EFFECTS
!!  phi_nm1(my_t2-my_t1+1), phi_n(my_t2-my_t1+1)
!!    input: vectors used to initialize the iteration
!!    output: the vectors obtained in the last iteration 
!!  aa(niter_max) and bb(niter_max)
!!    if niter_done>0: aa(1:niter_done), bb(1:niter_done) store the coefficients of the previous run.
!!    when the routine returns aa(1:inn) and bb(1:inn) contain the matrix elements of the tridiagonal form.
!!
!! PARENTS
!!      haydock
!!
!! CHILDREN
!!      matrginv,zgesv
!!
!! SOURCE

subroutine haydock_herm_algo_interp(BSp,niter_done,niter_max,nomega,omega,tol_iter,check,hsize,hsize_dense,&
& my_t1,my_t2,hmat,diag_dense,grid,factor,term_type,aa,bb,phi_nm1,phi_n,&
& green,inn,is_converged,ntrans,nbnd_coarse,corresp,overlaps,interp_factors,indices,comm,hinterp)

 use defs_basis
 use m_bs_defs
 use m_xmpi
 use m_errors
 use m_profiling

 use m_numeric_tools,  only : continued_fract
 use m_blas,           only : xdotc, xgemv
 use m_bz_mesh,        only : kmesh_t
 use m_double_grid,    only : double_grid_t
 use m_wfs,            only : wfd_t
 use m_crystal,        only : crystal_t

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'haydock_herm_algo_interp'
 use interfaces_14_hidewrite
 use interfaces_71_bse, except_this_one => haydock_herm_algo_interp
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: niter_max,niter_done,nomega
 integer,intent(in) :: hsize,my_t1,my_t2,term_type
 integer,intent(in) :: hsize_dense
 integer,intent(in) :: ntrans,nbnd_coarse
 integer,intent(in) :: comm
 integer,intent(out) :: inn
 logical,intent(out) :: is_converged
 real(dp),intent(in) :: tol_iter
 complex(dpc),intent(in) :: factor
 type(excparam),intent(in) :: BSp
 type(double_grid_t),intent(in) :: grid
!arrays
 real(dp),intent(inout) :: bb(niter_max)
 complex(dpc),intent(out) :: green(nomega)
 complex(dpc),intent(in) :: omega(nomega)
 complex(dpc),intent(inout) :: aa(niter_max)
 complex(dpc),intent(in) :: hmat(hsize,hsize,8)
 complex(dpc),intent(in) :: diag_dense(hsize_dense)
 complex(dpc),intent(inout) :: phi_nm1(hsize_dense)
 complex(dpc),intent(inout) :: phi_n  (hsize_dense)
 complex(gwpc),intent(in) :: overlaps(ntrans,nbnd_coarse,8)
 integer,intent(in) :: corresp(hsize,8)
 logical,intent(in) :: check(2)
 real(dp),intent(in) :: interp_factors(BSp%nreh(1),8,grid%ndiv)
 integer,intent(in) :: indices(BSp%nreh(1),grid%ndiv)
 complex(dpc),intent(in) :: hinterp(hsize_dense,hsize_dense)

!Local variables ------------------------------
!scalars
 integer :: ierr,my_nt,niter_min,nconv
 character(len=500) :: msg
 logical,parameter :: force_real=.TRUE.
!arrays
 real(dp) :: abs_err(nomega,2) !,rel_err(nomega,2)
 complex(dpc),allocatable :: oldg(:),newg(:)
 complex(dpc),allocatable :: phi_np1(:),hphi_n(:),cfact(:)
 logical :: test(2)

!************************************************************************

 ! The sequences starts with |1> normalized to 1 and b_0 =0, therefore:
 !  a_1 = <1|H|1>
 !  b_1 = || H|1> - a_1|1> ||
 !  |2> = [H|1> - a_1|1>]/b_1
 !
 ! For n>1 we have
 !  1) a_n = <n|H|n>
 !  2) b_n = || H|n> - a_n|n> -b_{n-1}|n-1> ||
 !  3) |n+1> = [H|n> -a_n|n> -b_{n-1}|n-1>]/b_n
 !
 my_nt = my_t2-my_t1+1
 my_nt = hsize_dense

 ABI_ALLOCATE(hphi_n,(hsize_dense))
 ABI_CHECK_ALLOC("out-of-memory hphi_n")

 ABI_ALLOCATE(phi_np1,(my_nt))

 ABI_ALLOCATE(oldg,(nomega))
 oldg=czero
 ABI_ALLOCATE(newg,(nomega))
 newg=czero
 ABI_ALLOCATE(cfact,(nomega))
 cfact=czero

 nconv=0
 do inn=niter_done+1,niter_max
   !
   ! hphi_n = MATMUL(hmat,phi_n)
   call haydock_interp_matmul(BSp,hsize,hsize_dense,hmat,diag_dense,phi_n,hphi_n,grid,&
&    ntrans,nbnd_coarse,corresp,overlaps,interp_factors,indices,hinterp)
   !call xgemv('N',hsize_dense,my_nt,cone,diag_dense,hsize_dense,phi_n,1,czero,hphi_n,1)
   !call xmpi_sum(hphi_n,comm,ierr)

   aa(inn) = xdotc(my_nt,phi_n,1,hphi_n(:),1)
   call xmpi_sum(aa(inn:inn),comm,ierr)
   if (force_real) aa(inn) = DBLE(aa(inn)) ! Matrix is Hermitian.

   ! |n+1> = H|n> - A(n)|n> - B(n-1)|n-1>
   phi_np1 = hphi_n(:) - aa(inn)*phi_n - bb(inn-1)*phi_nm1

   bb(inn) = xdotc(my_nt,phi_np1,1,phi_np1,1)
   call xmpi_sum(bb(inn),comm,ierr)
   bb(inn) = SQRT(bb(inn))

   phi_np1 = phi_np1/bb(inn)

   phi_nm1 = phi_n
   phi_n   = phi_np1

   write(msg,'(a,i0,a,3es12.4)')' Iteration number ',inn,', b_i RE(a_i) IM(a_i) ',bb(inn),REAL(aa(inn)),AIMAG(aa(inn))
   call wrtout(std_out,msg,"COLL")
   !if (MOD(inn,2)==0) then
   !  write(100,*)inn,bb(inn),REAL(aa(inn)),AIMAG(aa(inn)) 
   !else 
   !  write(101,*)inn,bb(inn),REAL(aa(inn)),AIMAG(aa(inn)) 
   !end if
   call continued_fract(inn,term_type,aa,bb,nomega,omega,cfact)

   newg= factor*cfact
   !
   ! Avoid spurious convergence.
   niter_min=4; if (niter_done>1) niter_min=niter_done+1
   if (inn>niter_min) then
     test=.TRUE.
     abs_err(:,1) = ABS(DBLE (newg-oldg))
     abs_err(:,2) = ABS(AIMAG(newg-oldg))
     !
     if (tol_iter>zero) then
       ! Test on the L1 norm.
       if (check(1)) test(1) = SUM(abs_err(:,1)) < tol_iter*SUM(ABS(DBLE (newg)))
       if (check(2)) test(2) = SUM(abs_err(:,2)) < tol_iter*SUM(ABS(AIMAG(newg)))

     else
       ! Stringent test for each point.
       if (check(1)) test(1) = ALL( abs_err(:,1) < -tol_iter*ABS(DBLE (newg)))
       if (check(2)) test(2) = ALL( abs_err(:,2) < -tol_iter*ABS(AIMAG(newg)))
     end if
     !
     if (ALL(test)) then
       nconv = nconv+1
     else
       nconv = 0
     end if
     if (nconv==2) then
       write(msg,'(a,es10.2,a,i0,a)')&
&        " >>> Haydock algorithm converged twice within haydock_tol= ",tol_iter," after ",inn," iterations."
       call wrtout(std_out,msg,'COLL')
       call wrtout(ab_out,msg,'COLL')
       EXIT
     end if
   end if

   oldg = newg
 end do ! inn

 green = newg
 if (nconv/=2) then
   write(msg,'(a,es10.2,a,i0,a)')&
&    " WARNING: Haydock algorithm did not converge within ",tol_iter," after ",niter_max," iterations."
   call wrtout(std_out,msg,'COLL')
   call wrtout(ab_out,msg,'COLL')
 end if

 is_converged = (nconv==2)

 ABI_DEALLOCATE(oldg)
 ABI_DEALLOCATE(newg)
 ABI_DEALLOCATE(cfact)
 ABI_DEALLOCATE(hphi_n)
 ABI_DEALLOCATE(phi_np1)

end subroutine haydock_herm_algo_interp
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/compute_overlaps
!! NAME
!! compute_overlaps
!!
!! FUNCTION
!! Compute overlaps between coarse and dense mesh
!!
!! COPYRIGHT
!! Copyright (C) 2012 ABINIT group (YG)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!
!! OUTPUT
!! matrix(i,j) where i is a transition index and j is a couple valence-conduction
!!
!! PARENTS
!!      haydock
!!
!! CHILDREN
!!      matrginv,zgesv
!!
!! SOURCE

subroutine compute_overlaps(BSp,grid,Wfd,Wfd_dense,Cryst,Kmesh_coarse,Kmesh_dense,ntrans,nbnd_coarse,neighbour,hsize,&
& corresp,overlaps)

 use defs_basis
 use m_bs_defs
 use m_xmpi
 use m_errors
 use m_profiling

 use m_io_tools,  only : get_unit
 use m_double_grid, only : double_grid_t, get_kpt_from_indices_coarse
 use m_bz_mesh,   only : kmesh_t
 use m_wfs,       only : wfd_t,wfd_sym_ur,wfd_get_ur
 use m_blas,      only : xdotc
 use m_crystal,   only : crystal_t

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'compute_overlaps'
 use interfaces_28_numeric_noabirule
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 type(excparam),intent(in) :: BSp
 type(double_grid_t),intent(in) :: grid
 integer,intent(in) :: ntrans,nbnd_coarse
 type(wfd_t),intent(inout) :: Wfd,Wfd_dense
 type(crystal_t),intent(in) :: Cryst
 type(kmesh_t),intent(in) :: Kmesh_coarse,Kmesh_dense
 integer,intent(in) :: neighbour(3)
 integer,intent(in) :: hsize
!arrays
 complex(gwpc),intent(out) :: overlaps(ntrans,nbnd_coarse)
 integer,intent(out) :: corresp(hsize)

!Local variables ------------------------------
!scalars
 integer :: it,ik_dense,ik_coarse,iorder,it_coarse
 integer :: ic,iv,iv1,ic1, ibnd_coarse
 integer :: fft_idx, ix, iy, iz, nx, ny, nz
 real(dp) :: gdotr
 integer :: ik_coarse0,it_coarse0
!arrays
 integer :: curindices_dense(6)
 integer :: curindices_coarse(3)
 integer :: g0(3),g01(3),diffg0(3)
 complex(gwpc) :: dvv(BSp%maxnbndv), dcc(BSp%maxnbndc)
 complex(gwpc),allocatable :: urv_coarse(:),urc_coarse(:)
 complex(gwpc),allocatable :: urv_dense(:),urc_dense(:)
 complex(gwpc),allocatable :: ceigr(:)
!DBYG
! integer :: dump_unt
! character(len=500) :: msg
! integer :: ib, is1
 integer :: is
! complex(gwpc) :: temp_ovlp
 real(dp) :: threshold
 real(dp) :: sum_ovlp

!************************************************************************

 ! MG TODO: call this routine to make sure that Wfd and Wfd_coarse use the same FFT mesh.
 !call wfd_change_ngfft(Wfd,Cryst,Psps,new_ngfft)

 ABI_MALLOC(urv_coarse,(Wfd%nfft*Wfd%nspinor))
 ABI_MALLOC(urc_coarse,(Wfd%nfft*Wfd%nspinor))
 ABI_MALLOC(urv_dense,(Wfd_dense%nfft*Wfd_dense%nspinor))
 ABI_MALLOC(urc_dense,(Wfd_dense%nfft*Wfd_dense%nspinor))
 ABI_MALLOC(ceigr,(Wfd_dense%nfft*Wfd_dense%nspinor))

 overlaps = 0

 threshold = 0.1

 !DBYG
 !dump_unt = 111+neighbour(1)*100+neighbour(2)*10+neighbour(3)
 !write(msg, *) "Neighbour = ",neighbour
 !call wrtout(dump_unt, 'Overlaps between u-waves', "PERS")
 !call wrtout(dump_unt, msg, "PERS")
 !call wrtout(dump_unt, ' k_coarse ib_coarse k_dense ib_dense re_ovlp im_ovlp ',"PERS")
 
 !do ik_coarse = 1,Kmesh_coarse%nbz
 ! write(msg, *) "ik_coarse = ",ik_coarse,&
 !&   "kpt_coarse = ",Kmesh_coarse%bz(:,ik_coarse), &
 !&   "ind = ",grid%indices_coarse(:,grid%iktoint_coarse(ik_coarse))
 ! call wrtout(dump_unt, msg, "PERS")
 !end do

 !do ik_dense = 1,Kmesh_dense%nbz
 ! write(msg,*) "ik_dense = ",ik_dense,&
 !&   "kpt_dense = ",Kmesh_dense%bz(:,ik_dense), &
 !&   "ind = ",grid%indices_dense(:,grid%iktoint_dense(ik_dense))
 ! call wrtout(dump_unt, msg, "PERS")
 !end do
 !END DBYG

 if(BSp%nsppol > 1) then
   MSG_BUG("Nsppol > 1 not yet implemented")
 end if

 do is = 1,BSp%nsppol
   do it = 1,BSp%nreh_interp(is)
    ! From it -> ik_ibz,ic,iv
    ik_dense = BSp%Trans_interp(it,is)%k
    ic = BSp%Trans_interp(it,is)%c
    iv = BSp%Trans_interp(it,is)%v

    ! From ik_ibz in the dense mesh -> indices_dense
    iorder = grid%iktoint_dense(ik_dense)
    g01 = grid%g0_dense(:,iorder)

    ik_coarse0 = grid%dense_to_coarse(ik_dense)

   ! From indices_dense -> indices_coarse
    curindices_dense = grid%indices_dense(:,iorder)

    curindices_coarse = curindices_dense(1:3)+neighbour(:)
    ! From indices_coarse -> ik_ibz in the coarse mesh
    call get_kpt_from_indices_coarse(curindices_coarse,grid%maxcomp_coarse,&
&     grid%inttoik_coarse,grid%g0_coarse,grid%nbz_closedcoarse,ik_coarse,g0)

    diffg0 = g0 - g01

    ! From ik_ibz,ic,iv to it_coarse
    it_coarse = BSp%vcks2t(iv,ic,ik_coarse,1)
    it_coarse0 = BSp%vcks2t(iv,ic,ik_coarse0,1)
    corresp(it_coarse0) = it_coarse

    call wfd_sym_ur(Wfd_dense,Cryst,Kmesh_dense,iv,ik_dense,is,urv_dense)
    call wfd_sym_ur(Wfd_dense,Cryst,Kmesh_dense,ic,ik_dense,is,urc_dense)

   ! Take into account a possible umklapp.
   ! WARNING works only with nspinor = 1 !!!
   if (ANY(diffg0/=0)) then
     !ur_kbz = ur_kbz*eig0r
     nx = Wfd%ngfft(1)
     ny = Wfd%ngfft(2)
     nz = Wfd%ngfft(3)

     fft_idx=0
     do iz=0,nz-1
       do iy=0,ny-1
         do ix=0,nx-1
           gdotr= two_pi*( diffg0(1)*(ix/DBLE(nx)) &
&                         +diffg0(2)*(iy/DBLE(ny)) &
&                         +diffg0(3)*(iz/DBLE(nz)) )
           fft_idx = fft_idx+1
           ceigr(fft_idx)=DCMPLX(DCOS(gdotr),DSIN(gdotr))
         end do
       end do
     end do

     urv_dense(:) = urv_dense(:)*ceigr(:)
     urc_dense(:) = urc_dense(:)*ceigr(:)
   end if

   !DBYG
   !do ib = BSp%lomo_spin(is),BSp%humo_spin(is)
   !  call wfd_sym_ur(Wfd,Cryst,Kmesh_coarse,ib,ik_coarse,is,urv_coarse)
   !  temp_ovlp = xdotc(Wfd%nfft,urv_coarse,1,urv_dense,1)/Wfd%nfft
   !  write(msg,*) ik_coarse, ib, ik_dense, iv, REAL(temp_ovlp), AIMAG(temp_ovlp)
   !  call wrtout(dump_unt, msg, "PERS")
   !  temp_ovlp = xdotc(Wfd%nfft,urv_coarse,1,urc_dense,1)/Wfd%nfft
   !  write(msg,*) ik_coarse, ib, ik_dense, ic, REAL(temp_ovlp), AIMAG(temp_ovlp)
   !  call wrtout(dump_unt, msg, "PERS")
   !end do
   !ENDDBYG

   do iv1 = BSp%lomo_spin(is),Bsp%homo_spin(is)
     call wfd_sym_ur(Wfd,Cryst,Kmesh_coarse,iv1,ik_coarse,is,urv_coarse)
     dvv(iv1-Bsp%lomo_spin(is)+1) = xdotc(Wfd%nfft,urv_coarse,1,urv_dense,1)/Wfd%nfft
     if(ABS(dvv(iv1-Bsp%lomo_spin(is)+1)) < threshold) then
        dvv(iv1-Bsp%lomo_spin(is)+1) = 0.0
     end if
   end do
   
!  DBYG
!   write(std_out,*) "(i1,i2,i3,j1,j2,j3) = ",curindices_dense
!   write(std_out,*) "ic = ",ic,"; iv = ",iv
!   write(std_out,*) "Sum of dvv = ",REAL(SUM(DCONJG(dvv(:))*dvv(:)))
!   call flush(std_out)
!  ENDDBYG

   do ic1 = BSp%lumo_spin(is),BSp%humo_spin(is)
     call wfd_sym_ur(Wfd,Cryst,Kmesh_coarse,ic1,ik_coarse,is,urc_coarse)
     dcc(ic1-BSp%lumo_spin(is)+1) = xdotc(Wfd%nfft,urc_coarse,1,urc_dense,1)/Wfd%nfft
     if(ABS(dcc(ic1-Bsp%lumo_spin(is)+1)) < threshold) then
        dcc(ic1-Bsp%lumo_spin(is)+1) = 0.0
     end if
   end do

!  DBYG
!   write(std_out,*) "Sum of dcc = ",REAL(SUM(DCONJG(dcc(:))*dcc(:)))
!   call flush(std_out)
!  ENDDBYG

   do iv1 = BSp%lomo_spin(is),BSp%homo_spin(is)
     do ic1 = BSp%lumo_spin(is),Bsp%humo_spin(is)
       ibnd_coarse = (iv1-BSp%lomo_spin(is))*BSp%maxnbndc+(ic1-BSp%lumo_spin(is)+1)
       overlaps(it, ibnd_coarse) = GWPC_CONJG(dvv(iv1-BSp%lomo_spin(is)+1))*dcc(ic1-BSp%lumo_spin(is)+1)
     end do
   end do


!  DBYG
!   if((sumdvv < 0.95 .or. sumdcc < 0.95)) then
!   write(std_out,*) "(i1,i2,i3,j1,j2,j3) = ",curindices_dense
!   write(std_out,*) "(i1n,i2n,i3n) = ",curindices_coarse
!   write(std_out,*) "ic = ",ic,"; iv = ",iv
!   write(std_out,*) "k_dense = ",Kmesh_dense%bz(:,ik_dense)
!   write(std_out,*) "k_coarse = ",Kmesh_coarse%bz(:,ik_coarse)
!   write(std_out,*) "Sum of dvv = ",REAL(SUM(DCONJG(dvv(:))*dvv(:)))
!   write(std_out,*) "Sum of dcc = ",REAL(SUM(DCONJG(dcc(:))*dcc(:)))
!   write(std_out,*) "Sum of ovlps = ",REAL(SUM(DCONJG(overlaps(it,:))*overlaps(it,:)))
!   call flush(std_out)
!   end if
!  ENDDBYG

   if(BSp%sum_overlaps) then
    sum_ovlp = SQRT(REAL(SUM(GWPC_CONJG(overlaps(it,:))*overlaps(it,:))))
!   I want the overlaps to sum to 1 !!
    if(ABS(sum_ovlp) > tol6) then
      overlaps(it,:) = overlaps(it,:)/sum_ovlp
    else
      overlaps(it,:) = 0
    end if
   end if

   end do
 end do

 ABI_FREE(urv_coarse)
 ABI_FREE(urc_coarse)
 ABI_FREE(urv_dense)
 ABI_FREE(urc_dense)
 ABI_FREE(ceigr)

end subroutine compute_overlaps
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/preprocess_tables
!! NAME
!! preprocess_tables
!!
!! FUNCTION
!! Pre-process tables to improve interpolation technique
!!
!! COPYRIGHT
!! Copyright (C) 2012 ABINIT group (YG)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!
!! OUTPUT
!!
!! PARENTS
!!      haydock
!!
!! CHILDREN
!!      matrginv,zgesv
!!
!! SOURCE

subroutine preprocess_tables(BSp,grid,interp_factors,indices)

 use defs_basis
 use m_profiling
 use m_bs_defs
 use m_xmpi
 use m_errors

 use m_io_tools,  only : get_unit
 use m_double_grid, only : double_grid_t, get_kpt_from_indices_coarse
 use m_bz_mesh,   only : kmesh_t
 use m_wfs,       only : wfd_t,wfd_sym_ur
 use m_blas,      only : xdotc
 use m_crystal,   only : crystal_t

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'preprocess_tables'
 use interfaces_28_numeric_noabirule
!End of the abilint section

 implicit none
!Argument ------------------------------------
!scalars
 type(excparam), intent(in) :: BSp
 type(double_grid_t),intent(in) :: grid
!arrays
 integer,intent(out) :: indices(BSp%nreh(1),grid%ndiv)
 real(dp),intent(out) :: interp_factors(BSP%nreh(1),8,grid%ndiv)

!Local variables -----------------------------
!scalars
 integer :: it, it_coarse, iorder
 integer :: ik_dense, ic, iv, ik_coarse
 integer :: ix, iy, iz, ineighbour
 integer :: curdim, curj
 real(dp) :: interp_factor
!arrays
 integer :: allxyz(3)
 integer :: curindex(BSp%nreh(1))
 integer :: curindices_dense(6)

!*********************************************

 curindex = 1
 do it = 1,BSp%nreh_interp(1)
  ! From it -> ik_ibz,ic,iv
  ik_dense = BSP%Trans_interp(it,1)%k
  ic = BSp%Trans_interp(it,1)%c
  iv = BSp%Trans_interp(it,1)%v

!  ! From ik_ibz in the dense mesh -> indices_dense
  iorder = grid%iktoint_dense(ik_dense)
!  g01 = grid%g0_dense(:,iorder)
!  ! From indices_dense -> indices_coarse
  curindices_dense = grid%indices_dense(:,iorder)
!  curindices_coarse = curindices_dense(1:3)
!  ! From indices_coarse -> ik_ibz in the coarse mesh
!  call get_kpt_from_indices_coarse(curindices_coarse,grid%maxcomp_coarse,&
!&   grid%inttoik_coarse,grid%g0_coarse,grid%nbz_closedcoarse,ik_coarse,g0)

  ik_coarse = grid%dense_to_coarse(ik_dense)
  it_coarse = BSp%vcks2t(iv,ic,ik_coarse,1)

  indices(it_coarse,curindex(it_coarse)) = ik_dense
  ineighbour = 1 
  do ix = 0,1
    do iy = 0,1
      do iz = 0,1 
        allxyz = (/ix,iy,iz/) 
        interp_factor = 1.0
        do curdim = 1,3
          curj = curindices_dense(3+curdim)
          interp_factor = interp_factor*((allxyz(curdim)*(curj*1.0/grid%kmult(curdim)))&
&                                  +((1-allxyz(curdim))*(1-(curj*1.0/grid%kmult(curdim)))))
        end do
        !DBYG
        !Rohlfing and louie
        if(BSp%rl_nb /= 0) then
          if(ineighbour == BSp%rl_nb) then
            interp_factor = 1.0
          else
            interp_factor = 0.0
          end if
        end if
        interp_factors(it_coarse,ineighbour,curindex(it_coarse)) = interp_factor
        !ENDDBYG
        ineighbour = ineighbour+1
      end do
    end do
  end do
  curindex(it_coarse) = curindex(it_coarse) + 1
 end do

end subroutine preprocess_tables

!!***

!----------------------------------------------------------------------

!!****f* ABINIT/compute_hinterp
!! NAME
!! compute_hinterp
!!
!! FUNCTION
!! Compute interpolated matrix elements for methods 2 and 3
!!
!! COPYRIGHT
!! Copyright (C) 2014 ABINIT group (YG)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!
!! OUTPUT
!!
!! PARENTS
!!      haydock
!!
!! CHILDREN
!!      matrginv,zgesv
!!
!! SOURCE

subroutine compute_hinterp(BSp,hsize,hsize_dense,hmat,grid,ntrans,nbnd_coarse,corresp,overlaps,interp_factors,indices,&
&  acoeffs,bcoeffs,ccoeffs,Kmesh_dense,Vcp_dense,gmet,hinterp)

 use defs_basis
 use m_bs_defs
 use m_xmpi
 use m_errors
 use m_profiling

 use m_io_tools,  only : get_unit
 use m_double_grid, only : double_grid_t, get_kpt_from_indices_coarse
 use m_bz_mesh,   only : kmesh_t,findqg0,get_bz_item
 use m_wfs,       only : wfd_t,wfd_sym_ur
 use m_blas,      only : xdotc
 use m_crystal,   only : crystal_t
 use m_vcoul,     only : vcoul_t
 use m_numeric_tools,  only : wrap2_pmhalf

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'compute_hinterp'
 use interfaces_14_hidewrite
 use interfaces_18_timing
 use interfaces_28_numeric_noabirule
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: hsize,hsize_dense
 type(excparam),intent(in) :: BSp
 type(double_grid_t),intent(in) :: grid
 integer,intent(in) :: nbnd_coarse
 integer,intent(in) :: ntrans
!arrays
 complex(dpc),intent(in) :: hmat(hsize,hsize,8)
!DBYG
 complex(dpc),intent(in) :: acoeffs(hsize,hsize,8),bcoeffs(hsize,hsize,8),ccoeffs(hsize,hsize,8)
 integer,intent(in) :: corresp(hsize,8)
 complex(gwpc),intent(in) :: overlaps(ntrans,nbnd_coarse,8)
 real(dp),intent(in) :: interp_factors(BSp%nreh(1),8,grid%ndiv)
 integer,intent(in) :: indices(BSp%nreh(1),grid%ndiv)
 complex(dpc),intent(out) :: hinterp(hsize_dense,hsize_dense)
 type(vcoul_t),intent(in) :: Vcp_dense
 type(kmesh_t),intent(in) :: Kmesh_dense
 real(dp),intent(in) :: gmet(3,3)

                                                                     
!Local variables ------------------------------
!scalars
 integer :: ic,iv,iv1,ic1,ik_dense,ik_coarse,it_coarse,it_dense,idiv,ibnd_coarse,ibnd_coarse1,ineighbour
 integer :: icp,ivp,ikp_dense,ikp_coarse,itp_coarse,itp_dense,idivp,ibndp_coarse,ibndp_coarse1,ineighbourp,itp_coarse1
 integer :: ix, dump_unt
 complex(dpc) :: http
 character(len=500) :: msg
 real(dp) :: kmkp(3),q2(3)
!arrays
 real(dp) :: factor, tmp
 complex(dpc) :: term
 real(dp) :: vc_sqrt_qbz
 real(dp) :: tsec(2)
!Temp matrices for optimized version
 complex(dpc),allocatable :: btemp(:),ctemp(:)
 complex(dpc),allocatable :: Cmat(:,:,:)
 real(dp) :: qinred(3), qnorm
 integer :: it_dense1,indwithnb
 integer :: is, is1, isp

!************************************************************************

 call timab(696,1,tsec)
 factor = 1.0/grid%ndiv

 hinterp = 0
 term = czero

 ABI_MALLOC(btemp,(8*nbnd_coarse))
 ABI_CHECK_ALLOC("out of memory in btemp")

 ABI_MALLOC(ctemp,(8*nbnd_coarse))
 ABI_CHECK_ALLOC("out of memory in ctemp")

 btemp = czero
 ctemp = czero

 ABI_MALLOC(Cmat,(nbnd_coarse,nbnd_coarse,8))
 ABI_CHECK_ALLOC("out of memory in Cmat")

 Cmat = czero

 if(BSp%interp_mode == 3 .or. BSp%interp_mode == 2) then
   if(TRIM(Vcp_dense%mode) /= 'CRYSTAL' .and. TRIM(Vcp_dense%mode) /= 'AUXILIARY_FUNCTION') then
     MSG_BUG('Vcp_dense%mode not implemented yet !')
   end if
 end if

 if(BSp%nsppol > 1) then
   MSG_BUG("nsppol > 1 not yet implemented")
 end if

 do ik_dense = 1,grid%nbz_dense
   ik_coarse = grid%dense_to_coarse(ik_dense)
   do ikp_dense = 1,grid%nbz_dense
     ikp_coarse = grid%dense_to_coarse(ikp_dense)

     if(BSp%interp_mode == 3 .or. BSp%interp_mode == 2) then
       ! Check if we are along the diagonal

       kmkp = Kmesh_dense%bz(:,ik_dense) - Kmesh_dense%bz(:,ikp_dense)

       call wrap2_pmhalf(kmkp(1),q2(1),tmp)
       call wrap2_pmhalf(kmkp(2),q2(2),tmp)
       call wrap2_pmhalf(kmkp(3),q2(3),tmp)

       qinred = MATMUL(grid%kptrlatt_coarse,q2)

       if(BSp%interp_mode == 3) then
         if(ANY((ABS(qinred)-tol7) > one)) then
           ! We are outside the diagonal
           cycle
         end if 
       end if

       qnorm = two_pi*SQRT(DOT_PRODUCT(q2,MATMUL(gmet,q2)))

       if(ALL(ABS(q2(:)) < 1.e-3)) then
          vc_sqrt_qbz = SQRT(Vcp_dense%i_sz)
       else
          vc_sqrt_qbz = SQRT(four_pi/qnorm**2)
       end if

       !!DEBUG CHK !

       !!COMPUTE Qpoint
       !call findqg0(iq_bz,g0,kmkp,Qmesh_dense%nbz,Qmesh_dense%bz,BSp%mG0)

       !! * Get iq_ibz, and symmetries from iq_bz
       !call get_BZ_item(Qmesh_dense,iq_bz,qbz,iq_ibz,isym_q,itim_q)

       !if(iq_ibz > 1 .and. ABS(vc_sqrt_qbz - Vcp_dense%vc_sqrt(1,iq_ibz)) > 1.e-3) then
       !   write(*,*) "vc_sqrt_qbz = ",vc_sqrt_qbz
       !   write(*,*) "Vcp_dense%vc_sqrt(1,iq_ibz) = ",Vcp_dense%vc_sqrt(1,iq_ibz)
       !   MSG_ERROR("vcp are not the same !")
       !else if(iq_ibz == 1 .and. ABS(vc_sqrt_qbz - SQRT(Vcp_dense%i_sz)) > 1.e-3) then
       !   write(*,*) "vc_sqrt_qbz = ",vc_sqrt_qbz
       !   write(*,*) "SQRT(Vcp_dense%i_sz) = ",SQRT(Vcp_dense%i_sz)
       !   MSG_ERROR("vcp are not the same !")
       !end if

       !!END DEBUG CHK !

     end if

     ! TODO : I need to rewrite this part to compute v(q) directly,
     !  since findqg0 scales as nqpt !
     ! Treatment of the q-dependence
     !if(BSp%interp_mode == 2 .or. BSp%interp_mode == 3) then
     !  !COMPUTE Qpoint
     !  kmkp = Kmesh_dense%bz(:,ik_dense) - Kmesh_dense%bz(:,ikp_dense)

     !  call findqg0(iq_bz,g0,kmkp,Qmesh_dense%nbz,Qmesh_dense%bz,BSp%mG0)

     !  ! * Get iq_ibz, and symmetries from iq_bz
     !  call get_BZ_item(Qmesh_dense,iq_bz,qbz,iq_ibz,isym_q,itim_q)

     !  ! Compute coulomb interaction at G = 0
     !  if(iq_ibz == 1) then
     !     vc_sqrt_qbz = SQRT(Vcp_dense%i_sz)
     !  else
     !     vc_sqrt_qbz = Vcp_dense%vc_sqrt(1,iq_ibz)
     !  end if
     !end if

     do is = 1, BSp%nsppol
       do ic = BSp%lumo_spin(is),BSp%humo_spin(is)
         do iv = BSp%lomo_spin(is),BSp%homo_spin(is)
           it_dense = BSp%vcks2t_interp(iv,ic,ik_dense,is)
           it_coarse = BSp%vcks2t(iv,ic,ik_coarse,is)
           ibnd_coarse = (iv-BSp%lomo_spin(is))*BSp%maxnbndc+(ic-BSp%lumo_spin(is)+1)

           ! This part should be optimized
           do ix = 1,grid%ndiv
             if(indices(it_coarse,ix) == ik_dense) then
               idiv = ix
               exit
             end if
           end do

           do isp = 1, BSp%nsppol
             do icp = BSp%lumo_spin(isp),BSp%humo_spin(isp)
               do ivp = BSp%lomo_spin(isp),BSp%homo_spin(isp)
                 itp_dense = BSp%vcks2t_interp(ivp,icp,ikp_dense,isp)
                 ! There was a bug here, before it was iv, ic !!!
                 itp_coarse = BSp%vcks2t(ivp,icp,ikp_coarse,isp)
                 ibndp_coarse = (ivp-Bsp%lomo_spin(isp))*BSp%maxnbndc+(icp-BSp%lumo_spin(isp)+1)

                 do ix = 1,grid%ndiv
                   if(indices(itp_coarse,ix) == ikp_dense) then
                     idivp = ix
                     exit
                   end if
                 end do

                 btemp = czero
                 ctemp = czero

                 do ineighbour = 1,8
                   do ineighbourp = 1,8 

                     do is1 = 1, BSp%nsppol 
                       do ic1 = BSp%lumo_spin(is1),BSp%humo_spin(is1)
                         do iv1 = BSp%lomo_spin(is1),BSp%homo_spin(is1)
                           ibndp_coarse1 = (iv1-BSp%lomo_spin(is1))*BSp%maxnbndc+(ic1-BSp%lumo_spin(is1)+1)
                           indwithnb = (ineighbourp-1)*nbnd_coarse+ibndp_coarse1
                           itp_coarse1 = BSp%vcks2t(iv1,ic1,ikp_coarse,is1)
                           if(BSp%interp_mode == 1) then
                             btemp(indwithnb) = hmat(corresp(it_coarse,ineighbour),itp_coarse1,ineighbourp)
                           else if(BSp%interp_mode == 2) then
                             btemp(indwithnb) = acoeffs(corresp(it_coarse,ineighbour),itp_coarse1,ineighbourp)*(vc_sqrt_qbz**2) &
&                                                + bcoeffs(corresp(it_coarse,ineighbour),itp_coarse1,ineighbourp)*(vc_sqrt_qbz) &
&                                                + ccoeffs(corresp(it_coarse,ineighbour),itp_coarse1,ineighbourp)
                           else if(BSp%interp_mode == 3) then ! Diff between divergence and hmat
                             btemp(indwithnb) = acoeffs(corresp(it_coarse,ineighbour),itp_coarse1,ineighbourp)*(vc_sqrt_qbz**2) &
&                                                + bcoeffs(corresp(it_coarse,ineighbour),itp_coarse1,ineighbourp)*(vc_sqrt_qbz) &
&                                                + ccoeffs(corresp(it_coarse,ineighbour),itp_coarse1,ineighbourp) &
&                                                - hmat(corresp(it_coarse,ineighbour),itp_coarse1,ineighbourp)
                           end if
                           ctemp(indwithnb) = CONJG(overlaps(itp_dense,&
& ibndp_coarse1,ineighbourp))*interp_factors(itp_coarse,ineighbourp,idivp)
                         end do ! iv1
                       end do !ic1
                     end do !ic1
                   end do !ineighbourp

                   Cmat(ibnd_coarse,ibndp_coarse,ineighbour) = DOT_PRODUCT(ctemp,btemp)
                 end do !ineighbour

               end do !ivp
             end do !icp
           end do !isp

         end do !iv
       end do !ic
     end do !is

     do is = 1, BSp%nsppol
       do ic = BSp%lumo_spin(is),BSp%humo_spin(is)
         do iv = BSp%lomo_spin(is),BSp%homo_spin(is)
           it_dense = BSp%vcks2t_interp(iv,ic,ik_dense,is)
           it_coarse = BSp%vcks2t(iv,ic,ik_coarse,is)
           ibnd_coarse = (iv-BSp%lomo_spin(is))*BSp%maxnbndc+(ic-BSp%lumo_spin(is)+1)

           do ix = 1,grid%ndiv
             if(indices(it_coarse,ix) == ik_dense) then
               idiv = ix
               exit
             end if
           end do

           do isp = 1, BSp%nsppol
             do icp = BSp%lumo_spin(isp),BSp%humo_spin(isp)
               do ivp = BSp%lomo_spin(isp),BSp%homo_spin(isp)
                 itp_dense = BSp%vcks2t_interp(ivp,icp,ikp_dense,isp)
                 ! Bug here 
                 ! itp_coarse = BSp%vcks2t(iv,ic,ikp_coarse,isp)
                 itp_coarse = BSp%vcks2t(ivp,icp,ikp_coarse,isp)
                 ibndp_coarse = (ivp-Bsp%lomo_spin(isp))*BSp%maxnbndc+(icp-BSp%lumo_spin(isp)+1)

                 do ix = 1,grid%ndiv
                   if(indices(itp_coarse,ix) == ikp_dense) then
                     idivp = ix
                     exit
                   end if
                 end do

                 btemp = czero
                 ctemp = czero

                 do ineighbour = 1,8
                   do is1 = 1, BSp%nsppol
                     do ic1 = BSp%lumo_spin(is1),BSp%humo_spin(is1)
                       do iv1 = BSp%lomo_spin(is1),BSp%homo_spin(is1)
                         ibnd_coarse1 = (iv1-BSp%lomo_spin(is1))*BSp%maxnbndc+(ic1-BSp%lumo_spin(is1)+1)
                         it_dense1 = BSp%vcks2t_interp(iv1,ic1,ik_dense,is1)
                         indwithnb = (ineighbour-1)*nbnd_coarse+ibnd_coarse1
                         btemp(indwithnb) = Cmat(ibnd_coarse1,ibndp_coarse,ineighbour)
                         ctemp(indwithnb) = overlaps(it_dense,ibnd_coarse1,ineighbour)*interp_factors(it_coarse,ineighbour,idiv)
                       end do !iv1
                     end do !ic1
                   end do !is1
                 end do !ineighbour

                 hinterp(it_dense,itp_dense) = DOT_PRODUCT(ctemp,btemp)

               end do !ivp
             end do !icp
           end do !isp

         end do !iv
       end do !ic
     end do !is

   end do !ikp
 end do !ik

 ABI_FREE(btemp)
 ABI_FREE(ctemp)
 ABI_FREE(Cmat)

 hinterp = hinterp*factor
 
 call timab(696,2,tsec)

 dump_unt = 997
 msg='Interpolated Reasonant Hamiltonian matrix elements: '
 call wrtout(dump_unt,msg,"PERS")
 call wrtout(dump_unt,'    k v  c  s     k" v" c" s"       H',"PERS")
 do itp_dense=1,BSp%nreh_interp(1)      
   ikp_dense = Bsp%Trans_interp(itp_dense,1)%k
   ivp    = Bsp%Trans_interp(itp_dense,1)%v
   icp    = Bsp%Trans_interp(itp_dense,1)%c
   do it_dense=1,BSp%nreh_interp(1)
     ik_dense = Bsp%Trans_interp(it_dense,1)%k
     iv    = Bsp%Trans_interp(it_dense,1)%v
     ic    = Bsp%Trans_interp(it_dense,1)%c
     http = hinterp(it_dense,itp_dense)
     !if (ABS(http) > tol3) then
     write(msg,'(2(i0,1x),2(i5,3i3,3x),2f24.20)')it_dense,itp_dense,ik_dense,iv,ic,1,ikp_dense,ivp,icp,1, http
     call wrtout(dump_unt,msg,"PERS")
     !end if
   end do
 end do

end subroutine compute_hinterp
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/haydock_interp_matmul
!! NAME
!! haydock_interp_matmul
!!
!! FUNCTION
!! Compute matrix-vector product Hmat * phi by interpolating coarse Hmat
!!
!! COPYRIGHT
!! Copyright (C) 2012 ABINIT group (YG)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!  BSp<type(excparam)>=Parameters defining the BS calculation
!!  hsize = Size of the coarse hamiltonian
!!  hsize_dense = Size of the dense hamiltonian
!!  hmat = coarse hamiltonian
!!  diag_dense = Diagonal with the interpolated transition energies
!!  phi = ket on which apply the matrix
!!  comm = MPI comm
!!
!! OUTPUT
!!  hphi = Interp(hmat)*phi
!!
!! PARENTS
!!      haydock
!!
!! CHILDREN
!!      matrginv,zgesv
!!
!! SOURCE

subroutine haydock_interp_matmul(BSp,hsize,hsize_dense,hmat,diag_dense,phi,hphi,grid,&
&   ntrans,nbnd_coarse,corresp,overlaps, interp_factors, indices, hinterp)

 use defs_basis
 use m_bs_defs
 use m_xmpi
 use m_errors
 use m_profiling

 use m_io_tools,  only : get_unit
 use m_double_grid, only : double_grid_t, get_kpt_from_indices_coarse
 use m_bz_mesh,   only : kmesh_t
 use m_wfs,       only : wfd_t,wfd_sym_ur
 use m_blas,      only : xdotc, xgemv
 use m_crystal,   only : crystal_t

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'haydock_interp_matmul'
 use interfaces_18_timing
 use interfaces_28_numeric_noabirule
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: hsize,hsize_dense
 type(excparam),intent(in) :: BSp
 type(double_grid_t),intent(in) :: grid
 integer,intent(in) :: nbnd_coarse
 integer,intent(in) :: ntrans
!arrays
 complex(dpc),intent(in) :: phi(hsize_dense)
 complex(dpc),intent(out) :: hphi(hsize_dense)
 complex(dpc),intent(in) :: hmat(hsize,hsize,8)
 integer,intent(in) :: corresp(hsize,8)
 complex(dpc),intent(in) :: diag_dense(hsize_dense)
 complex(gwpc),intent(in) :: overlaps(ntrans,nbnd_coarse,8)
 real(dp),intent(in) :: interp_factors(BSp%nreh(1),8,grid%ndiv)
 integer,intent(in) :: indices(BSp%nreh(1),grid%ndiv)
 complex(dpc),intent(in) :: hinterp(hsize_dense,hsize_dense)

!Local variables ------------------------------
!scalars
 integer :: it,ik_dense,ik_coarse,it_coarse
 integer :: ic,iv,iv1,ic1, ibnd_coarse, ix
 integer :: ineighbour,idense
 integer :: my_k1, my_k2, ind_with_nb
!arrays
 real(dp) :: factor
 complex(dpc) :: allp(hsize,8), test(hsize)
 complex(dpc) :: ophi(grid%nbz_dense,8,nbnd_coarse)
 integer :: allindices(nbnd_coarse)
 complex(dpc),allocatable :: b(:), c(:)
 complex(dpc),allocatable :: A(:,:)
 complex(dpc) :: tmp
 complex(dpc),allocatable :: tmp_array(:), tmp_array2(:,:)
 integer :: ikpt
 logical,parameter :: use_blas=.True.
 real(dp) :: tsec(2)
 integer :: is, is1, nrehs

!************************************************************************

 call timab(697,1,tsec)
 allp(:,:) = czero
 test = czero

 factor = 1.0/grid%ndiv

 hphi = diag_dense*phi

 if(BSp%interp_mode == 2) then
   if(use_blas) then
     call xgemv('N',hsize_dense,hsize_dense,cone,hinterp,hsize_dense,phi,1,cone,hphi,1)
   else
     hphi = hphi + MATMUL(hinterp,phi)
   end if
   call timab(697,2,tsec)
   return ! Return, we are done
 else if(BSp%interp_mode == 3) then
   if(use_blas) then
     call xgemv('N',hsize_dense,hsize_dense,cone,hinterp,hsize_dense,phi,1,cone,hphi,1)
   else
     hphi = hphi + MATMUL(hinterp,phi)
   end if
   ! Go on with the rest
 end if


 ! Outer index : k point in the dense zone
 ! Sum over vc
 ! Index of result : k point in the dense zone, v2,c2,neighbour

 ! Parallelization on nbz in the coarse mesh !
 my_k1 = 1
 my_k2 = grid%nbz_coarse

 ABI_MALLOC(A,(8*nbnd_coarse,nbnd_coarse))
 ABI_MALLOC(b,(nbnd_coarse))
 ABI_MALLOC(c,(8*nbnd_coarse))

 c = czero

 !$OMP PARALLEL DO DEFAULT(none) PRIVATE(A,b,c,is1,iv1,ic1,ibnd_coarse,it,allindices,is,iv,ic,idense,ineighbour,ind_with_nb) SHARED(BSp,overlaps,phi,nbnd_coarse,ophi,grid)
 do ik_dense = 1,grid%nbz_dense
 ! if( ik_dense is not in my set of k-points)
 !   ! continue
 !
   do is1 = 1, BSp%nsppol
     do iv1 = BSp%lomo_spin(is1),Bsp%homo_spin(is1)
       do ic1 = BSp%lumo_spin(is1),Bsp%humo_spin(is1)
         ibnd_coarse = (iv1-BSp%lomo_spin(is1))*BSp%maxnbndc+(ic1-BSp%lumo_spin(is1)+1)
         it = BSp%vcks2t_interp(iv1,ic1,ik_dense,is1)
         allindices(ibnd_coarse) = it
       end do !ic1
     end do !iv1
   end do !is1

   b(:) = phi(allindices(:))
  
   do is = 1, BSp%nsppol
     do iv = BSp%lomo_spin(is),Bsp%homo_spin(is)
       do ic = BSp%lumo_spin(is),Bsp%humo_spin(is)
         ibnd_coarse = (iv-BSp%lomo_spin(is))*BSp%maxnbndc+(ic-BSp%lumo_spin(is)+1)
         idense = Bsp%vcks2t_interp(iv,ic,ik_dense,is)

         do ineighbour = 1,8
           ind_with_nb = (ineighbour-1)*(nbnd_coarse)+ibnd_coarse

           A(ind_with_nb,:) = overlaps(allindices(:),ibnd_coarse,ineighbour)
         end do !ineighbour
       end do !ic
     end do !iv
   end do !is

   if(use_blas) then
     call xgemv('N',8*nbnd_coarse,nbnd_coarse,cone,A,8*nbnd_coarse,b,1,czero,c,1)
   else
     c = MATMUL(A,b)
   end if

   do is = 1, BSp%nsppol
     do iv = BSp%lomo_spin(is),BSp%homo_spin(is)
       do ic = BSp%lumo_spin(is),BSp%humo_spin(is)
         ibnd_coarse = (iv-BSp%lomo_spin(is))*BSp%maxnbndc+(ic-BSp%lumo_spin(is)+1)
         do ineighbour = 1,8
           ind_with_nb = (ineighbour-1)*(nbnd_coarse)+ibnd_coarse
           ophi(ik_dense,ineighbour,ibnd_coarse) = c(ind_with_nb)
         end do !ineighbour
       end do !ic
     end do !iv
   end do !is
 end do !ik_dense
 !$OMP END PARALLEL DO

 ABI_FREE(A)
 ABI_FREE(b)
 ABI_FREE(c)

 ! Outer index : k,v,c in the coarse zone, ineighbour
 ! Sum over all k-dense relative to one coarse point
 ! Index of result : k,v,c in the coarse zone, ineighbour

 ABI_MALLOC(b,(grid%ndiv))
 ABI_MALLOC(c,(grid%ndiv))

 !$OMP PARALLEL DO COLLAPSE(2) DEFAULT(none) PRIVATE(is, it_coarse, ibnd_coarse, ineighbour, b, c, tmp) SHARED(allp,BSp,interp_factors,ophi,indices)
 do is = 1, BSp%nsppol
   do ineighbour = 1,8

     do it_coarse = 1, BSp%nreh(is)
       ibnd_coarse = (Bsp%trans(it_coarse,is)%v-BSp%lomo_spin(is))*BSp%maxnbndc+&
&            (BSp%Trans(it_coarse,is)%c-BSp%lumo_spin(is)+1)
       b(:) = interp_factors(it_coarse,ineighbour,:) 
       c(:) = ophi(indices(it_coarse,:),ineighbour,ibnd_coarse)
       tmp = DOT_PRODUCT(b,c)
       allp(it_coarse,ineighbour) = tmp     
     end do

   end do
 end do
 !$OMP END PARALLEL DO

 ABI_FREE(b)
 ABI_FREE(c)

 ABI_MALLOC(tmp_array,(hsize))
 ABI_MALLOC(tmp_array2,(hsize,hsize))
 tmp_array(:) = czero
 tmp_array2(:,:) = czero

 ! Second step : Multiplication by hmat
 ! Note: OMP is deactivated since this would require large copies in stack of
 ! each thread !
 !!!!!!!!!$OMP PARALLEL DO DEFAULT(none) PRIVATE(ineighbour,tmp_array,tmp_array2) SHARED(factor,hmat,allp,hsize,std_out) reduction(+:test)
 do ineighbour = 1,8
   if(use_blas) then
     !call xgemv('N',hsize,hsize,cone,factor*(hmat(:,:,ineighbour)),hsize,allp(:,ineighbour),1,czero,tmp_array,1)
     tmp_array2 = hmat(:,:,ineighbour)
     tmp_array2 = factor*tmp_array2
     call xgemv('N',hsize,hsize,cone,tmp_array2,hsize,allp(:,ineighbour),1,czero,tmp_array,1)
     test = test + tmp_array 
   else 
     test = test+MATMUL(factor*(hmat(:,:,ineighbour)),allp(:,ineighbour))
   end if
 end do
 !!!!!!!$OMP END PARALLEL DO
 
 ABI_FREE(tmp_array)
 ABI_FREE(tmp_array2)

 ! Outer index : ineighbour
 ! Sum over all v c
 ! Index of result : ineighbour, k_dense, v,c
 !ABI_MALLOC(A,(Bsp%nreh_interp(1),nbnd_coarse))
 ABI_MALLOC(A,(nbnd_coarse,nbnd_coarse))
 ABI_MALLOC(b,(nbnd_coarse))
 !ABI_MALLOC(c,(BSp%nreh_interp(1)))
 ABI_MALLOC(c,(nbnd_coarse))

 c = czero

 !$OMP PARALLEL DO COLLAPSE(2) DEFAULT(none) PRIVATE(ineighbour,ik_dense,is1,iv1,ic1,is,iv,ic,A,b,c,ibnd_coarse,ik_coarse,it,idense) SHARED(test,overlaps,ophi,BSp,grid,corresp,nbnd_coarse)
 do ineighbour = 1,8
   do ik_dense = 1,grid%nbz_dense
     do is1 = 1, Bsp%nsppol
       do iv1 = Bsp%lomo_spin(is1),Bsp%homo_spin(is1)
         do ic1 = BSp%lumo_spin(is1), Bsp%humo_spin(is1)
           ibnd_coarse = (iv1-BSp%lomo_spin(is1))*BSp%maxnbndc+(ic1-BSp%lumo_spin(is1)+1)

           ik_coarse = grid%dense_to_coarse(ik_dense)
           it = BSp%vcks2t(iv1,ic1,ik_coarse,is1)
           b(ibnd_coarse) = test(corresp(it,ineighbour))
         end do ! ic1
       end do ! iv1
    end do ! is1

    do is = 1, BSp%nsppol
      do iv = BSp%lomo_spin(is),Bsp%homo_spin(is)
        do ic = BSp%lumo_spin(is),BSp%humo_spin(is)
          ibnd_coarse = (iv-BSp%lomo_spin(is))*Bsp%maxnbndc+(ic-BSp%lumo_spin(is)+1)
          idense = BSp%vcks2t_interp(iv,ic,ik_dense,is)

          !A(idense,:) = CONJG(overlaps(idense,:,ineighbour))
          A(ibnd_coarse,:) = CONJG(overlaps(idense,:,ineighbour))
        end do ! ic
      end do !iv
    end do !is

    if(use_blas) then
      !call xgemv('N',BSp%nreh_interp(1),nbnd_coarse,cone,CONJG(A),BSp%nreh_interp(1),b,1,czero,c,1)
      call xgemv('N',nbnd_coarse,nbnd_coarse,cone,A,nbnd_coarse,b,1,czero,c,1)
    else
      c = MATMUL(A,b)
    end if

    do is = 1, BSp%nsppol
      do iv = BSp%lomo_spin(is),Bsp%homo_spin(is)
        do ic = BSp%lumo_spin(is),BSp%humo_spin(is)
          ibnd_coarse = (iv-BSp%lomo_spin(is))*BSp%maxnbndc+(ic-BSp%lumo_spin(is)+1)
          idense = Bsp%vcks2t_interp(iv,ic,ik_dense,is)
          !ophi(ik_dense,ineighbour,ibnd_coarse) = c(idense)
          ophi(ik_dense,ineighbour,ibnd_coarse) = c(ibnd_coarse)
        end do
      end do
    end do
  end do
 end do
 !$OMP END PARALLEL DO
 
 ABI_FREE(A)
 ABI_FREE(b)
 ABI_FREE(c)

 ! Outer indices : it_dense 
 ! Sum over neighbours
 ! Index of result : it_dense (ik,ic,iv dense)

 ABI_MALLOC(b,(8))
 ABI_MALLOC(c,(8))

 do is = 1, BSp%nsppol
   nrehs = BSp%nreh_interp(is)
   !Disable OpenMP since it leads to segmentation faults !
   !!!!!!$OMP PARALLEL DO DEFAULT(none) PRIVATE(is,it,ik_dense,ic,iv,ik_coarse,it_coarse,ibnd_coarse,ix,ikpt,b,c,tmp) SHARED(BSp,grid,indices,interp_factors,ophi,hphi,nrehs,std_out) 
   do it = 1,nrehs
    ! From it -> ik_ibz,ic,iv
    ik_dense = BSp%Trans_interp(it,is)%k
    ic = BSp%Trans_interp(it,is)%c
    iv = BSp%Trans_interp(it,is)%v

    ! From ik_ibz in the dense mesh -> indices_dense
    ik_coarse = grid%dense_to_coarse(ik_dense)
    it_coarse = BSp%vcks2t(iv,ic,ik_coarse,is)

    ibnd_coarse = (iv-BSp%lomo_spin(is))*BSp%maxnbndc+(ic-BSp%lumo_spin(is)+1)

    do ix = 1,grid%ndiv
      if(indices(it_coarse,ix) == ik_dense) then
        ikpt = ix
        exit 
      end if
    end do
    b = interp_factors(it_coarse,:,ikpt)
    c =  ophi(ik_dense,:,ibnd_coarse)
    if(use_blas) then
      tmp = xdotc(8, b, 1, c, 1)
    else
      tmp = DOT_PRODUCT(b,c)
    end if
    hphi(it) = hphi(it)+tmp
   end do 
   !!!!!!$OMP END PARALLEL DO

 end do

 ABI_FREE(b)
 ABI_FREE(c)

 call timab(697,2,tsec)

end subroutine haydock_interp_matmul
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/haydock_herm
!! NAME
!! haydock_herm
!!
!! FUNCTION
!!  Reads the excitonic Hamiltonian from file and construct the Lanczos set of vectors 
!!  by iterative matrix-vector multiplications.
!!
!! COPYRIGHT
!! Copyright (C) 2009-2014 ABINIT group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida, M.Giantomassi, Y. Gillet)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!! BSp<excparam>=Parameters for the Bethe-Salpeter calculation.
!! BS_files<excparam>=Files associated to the bethe_salpeter code.
!! Cryst<crystal_t>=Info on the crystalline structure.
!! hize=Size of the excitonic matrix.
!! my_t1,my_t2=First and last columns treated by this node.
!! hmat(hsize,my_t1:my_t2)=Excitonic matrix.
!! nkets=Number of starting vectors for Haydock method.
!! kets(hsize,nkets)=The kets in the eh representation.
!! comm=MPI communicator.
!!
!! OUTPUT
!!  green(BSp%nomega,nkets)=
!!
!! PARENTS
!!      haydock
!!
!! CHILDREN
!!      matrginv,zgesv
!!
!! SOURCE

subroutine haydock_herm(BSp,BS_files,Cryst,Hdr_bse,hsize,my_t1,my_t2,hmat,nkets,kets,green,comm)

 use defs_basis
 use defs_datatypes
 use m_bs_defs
 use m_profiling
 use m_xmpi
 use m_errors

 use m_io_tools,       only : get_unit, file_exists, delete_file, flush_unit
 use m_numeric_tools,  only : continued_fract
 use m_blas,           only : xdotc, xgemv
 use defs_abitypes,    only : Hdr_type
 use m_crystal,        only : crystal_t
 use m_linalg_interfaces
 use m_haydock_io

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'haydock_herm'
 use interfaces_14_hidewrite
 use interfaces_71_bse, except_this_one => haydock_herm
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: hsize,my_t1,my_t2,nkets,comm
 type(crystal_t),intent(in) :: Cryst
 type(excparam),intent(in) :: BSp
 type(excfiles),intent(in) :: BS_files
 type(Hdr_type),intent(in) :: Hdr_bse
!arrays
 complex(dp),intent(out) :: green(BSp%nomega,nkets)
 complex(dpc),intent(in) :: hmat(hsize,my_t1:my_t2),kets(hsize,nkets)

!Local variables ------------------------------
!scalars
 integer :: inn,it,out_unt,ios,nproc,my_rank,master,ierr
 integer :: niter_file,niter_max,niter_done,nsppol,iq,my_nt,term_type
 integer :: n_all_omegas
 real(dp) :: norm,nfact
 logical :: can_restart,is_converged
 complex(dpc) :: factor
 character(len=500) :: msg
 character(len=fnlen),parameter :: tag_file="_HAYDR_SAVE"
 character(len=fnlen) :: restart_file,out_file 
!DBYG
 logical, parameter :: use_yg = .TRUE. ! Put to false to keep old version of haydock files
 type(haydock_type) :: haydock_file
!ENDDBYG
!arrays
 real(dp),pointer :: bb_file(:)
 real(dp),allocatable :: bb(:)
 complex(dpc),allocatable :: aa(:),phi_nm1(:),phi_n(:),hphi_n(:),hphi_nm1(:)
 complex(dpc),pointer :: aa_file(:),phi_n_file(:),phi_nm1_file(:)
 complex(dpc),allocatable :: ket0(:)
 complex(dpc),allocatable :: all_omegas(:)
 complex(dpc),allocatable :: green_temp(:,:)
 logical :: check(2)
 
!************************************************************************

 nproc  = xcomm_size(comm)
 my_rank= xcomm_rank(comm)
 master = 0 
 nsppol = Hdr_bse%nsppol

 my_nt = my_t2-my_t1+1
 ABI_CHECK(my_nt>0,"One of the processors has zero columns")

 write(msg,'(a,i0)')' Haydock algorithm with MAX number of iterations: ',BSp%niter
 call wrtout(std_out,msg,"COLL")
 !
 ! Select the terminator for the continued fraction.
 term_type=0; if (Bsp%hayd_term>0) term_type=1 
 write(msg,'(a,i0)')" Using terminator type: ",term_type
 call wrtout(std_out,msg,"COLL")
 !
 ! Check for presence of the restart file.
 can_restart=.FALSE.
 if ( BS_files%in_haydock_basename /= BSE_NOFILE) then
   restart_file = TRIM(BS_files%in_haydock_basename)
   if (file_exists(restart_file) ) then
     can_restart=.TRUE.
     msg = " Restarting Haydock calculation from file: "//TRIM(restart_file)
     call wrtout(std_out,msg,"COLL")
     call wrtout(ab_out,msg,"COLL")
   else 
     can_restart=.FALSE.
     MSG_WARNING("Cannot find restart file: "//TRIM(restart_file))
   end if
 end if
 !
 ! Open the file and write basic dimensions and info.
 if (my_rank==master) then
   out_file = TRIM(BS_files%out_basename)//TRIM(tag_file)
   if(use_yg) then
      call open_haydock(out_file,haydock_file) 
      haydock_file%hsize = hsize
      haydock_file%use_coupling = Bsp%use_coupling
      haydock_file%op = BSE_HAYD_IMEPS
      haydock_file%nq = nkets
      haydock_file%broad = Bsp%broad
      call write_dim_haydock(haydock_file)
      out_unt = haydock_file%unt
   else
      out_unt = get_unit()
      open(unit=out_unt,file=trim(out_file),form="unformatted",iostat=ios)
      ABI_CHECK(ios==0," Opening file: "//TRIM(out_file))
      ! write header TODO: standardize this part.
      write(out_unt)hsize,Bsp%use_coupling,BSE_HAYD_IMEPS,nkets,Bsp%broad
   end if
 end if
 !
 ! Calculate green(w) for the different starting points.
 green=czero
 do iq=1,nkets
   ABI_MALLOC(ket0,(hsize))
   ket0=kets(:,iq)
   !
   niter_file=0
   nullify(aa_file)
   nullify(bb_file)
   nullify(phi_nm1_file)
   nullify(phi_n_file)

   if (can_restart) then
     call haydock_restart(BSp,restart_file,BSE_HAYD_IMEPS,iq,hsize,&
&      niter_file,aa_file,bb_file,phi_nm1_file,phi_n_file,comm)
   end if 
   !
   ! For n>1, we have:
   !  1) a_n = <n|H|n>
   !  2) b_n = || H|n> - a_n|n> -b_{n-1}|n-1> ||
   !  3) |n+1> = [H|n> -a_n|n> -b_{n-1}|n-1>]/b_n
   !
   ! The sequences starts with |1> normalized to 1 and b_0 =0, therefore:
   !  a_1 = <1|H|1>
   !  b_1 = || H|1> - a_1|1> ||
   !  |2> = [H|1> - a_1|1>]/b_1
   !
   ABI_MALLOC(hphi_n,(hsize))
   ABI_MALLOC(hphi_nm1,(hsize))
   ABI_MALLOC(phi_nm1,(my_nt))
   ABI_MALLOC(phi_n,(my_nt))

   niter_max = niter_file + Bsp%niter
   ABI_MALLOC(aa,(niter_max))
   ABI_MALLOC(bb,(niter_max))
   aa=czero; bb=zero

   if (niter_file==0) then       ! Calculation from scratch.
     phi_nm1=ket0(my_t1:my_t2)   ! Select the slice treated by this node.
     norm = DZNRM2(hsize,ket0,1) ! Normalization  
     phi_nm1=phi_nm1/norm      
                                                                                
     ! hphi_n = MATMUL(hmat,phi_nm1)
     call xgemv('N',hsize,my_nt,cone,hmat,hsize,phi_nm1,1,czero,hphi_n,1)
     call xmpi_sum(hphi_n,comm,ierr)

     aa(1)=xdotc(my_nt,phi_nm1,1,hphi_n(my_t1:),1)
     call xmpi_sum(aa(1:1),comm,ierr)

     phi_n = hphi_n(my_t1:my_t2) - aa(1)*phi_nm1

     bb(1) = xdotc(my_nt,phi_n,1,phi_n,1)
     call xmpi_sum(bb(1:1),comm,ierr)
     bb(1) = SQRT(bb(1))

     phi_n = phi_n/bb(1)
     niter_done=1

   else ! Use the previous a and b.
     niter_done=niter_file
     aa(1:niter_done) = aa_file
     bb(1:niter_done) = bb_file
     phi_nm1=phi_nm1_file(my_t1:my_t2)   ! Select the slice treated by this node.
     phi_n  =phi_n_file  (my_t1:my_t2)   
   end if

   if (associated(aa_file     ))  then
     ABI_FREE(aa_file)
   end if
   if (associated(bb_file     ))  then
     ABI_FREE(bb_file)
   end if
   if (associated(phi_nm1_file))  then
     ABI_FREE(phi_nm1_file)
   end if
   if (associated(phi_n_file  ))  then
     ABI_FREE(phi_n_file)
   end if

   ! Multiplicative factor (k-point sampling and unit cell volume)  
   ! TODO be careful with the spin here
   ! TODO four_pi comes from the coulomb term 1/|q| is already included in the 
   ! oscillators hence the present approach wont work if a cutoff interaction is used.
   nfact = -four_pi/(Cryst%ucvol*BSp%nkbz)
   if (nsppol==1) nfact=two*nfact 

   factor = nfact*(DZNRM2(hsize,ket0,1)**2)

   ! Which quantity should be checked for convergence?
   check = (/.TRUE.,.TRUE./) 
   if (ABS(Bsp%haydock_tol(2)-one)<tol6) check = (/.TRUE. ,.FALSE./) 
   if (ABS(Bsp%haydock_tol(2)-two)<tol6) check = (/.FALSE.,.TRUE./) 

   ! Create new frequencies "mirror" in negative range to add 
   ! their contributions. Can be improved by computing only once
   ! zero frequency, but loosing clearness
   n_all_omegas = 2*BSp%nomega

   ABI_MALLOC(all_omegas,(n_all_omegas))
   ! Put all omegas with frequency > 0 in table
   all_omegas(BSp%nomega+1:n_all_omegas) = BSp%omega
   ! Put all omegas with frequency < 0
   ! Warning, the broadening must be kept positive
   all_omegas(1:BSp%nomega) = -DBLE(BSp%omega(BSp%nomega:1:-1)) &
& + j_dpc*AIMAG(BSp%omega(BSp%nomega:1:-1))   

   ABI_MALLOC(green_temp,(n_all_omegas,nkets))

   ! Calling haydock_herm_algo with green_temp with full range of frequencies
   call haydock_herm_algo(niter_done,niter_max,n_all_omegas,all_omegas,BSp%haydock_tol(1),check,hsize,&
&    my_t1,my_t2,hmat,factor,term_type,aa,bb,phi_nm1,phi_n,green_temp(:,iq),inn,is_converged,comm)

   ! Computing result from two ranges of frequencies
   ! The real part is added, the imaginary part is substracted
   green(:,iq) = green_temp(BSp%nomega+1:n_all_omegas,iq)+CONJG(green_temp(BSp%nomega:1:-1,iq))

   ABI_FREE(all_omegas)
   ABI_FREE(green_temp)
   !
   ! Save the a"s and the b"s for possible restarting.
   ! 1) Info on the Q.
   ! 2) Number of iterations performed.
   ! 3) do iter=1,niter_performed 
   !      aa(iter),bb(iter)
   !    end do
   ! 4) |n-1>
   !    |n>
   !
   hphi_nm1 = czero
   hphi_nm1(my_t1:my_t2) = phi_nm1
   call xmpi_sum_master(hphi_nm1,master,comm,ierr)

   hphi_n = czero
   hphi_n(my_t1:my_t2) = phi_n
   call xmpi_sum_master(hphi_n,master,comm,ierr)

   if (my_rank==master) then ! Open the file and writes basic dimensions and info.
     if(use_yg) then
        call write_haydock(haydock_file, hsize, Bsp%q(:,iq), aa, bb, hphi_n, hphi_nm1, MIN(inn,niter_max), factor)
     else

        write(out_unt)Bsp%q(:,iq)
        write(out_unt)MIN(inn,niter_max)  ! NB if the previous loop completed inn=niter_max+1
        do it=1,MIN(inn,niter_max)        ! if we exited then inn is not incremented by one.
          write(out_unt)it,aa(it),bb(it)
        end do
        write(out_unt)hphi_nm1 
        write(out_unt)hphi_n 
     end if
   end if
  
   ABI_FREE(hphi_n)
   ABI_FREE(hphi_nm1)
   ABI_FREE(phi_nm1)
   ABI_FREE(phi_n)
   ABI_FREE(aa)
   ABI_FREE(bb)
   ABI_FREE(ket0)
 end do ! iq

 if (my_rank==master) then
   if(use_yg) then
     call close_haydock(haydock_file)
   else 
     close(out_unt)
   end if
 end if

 call xmpi_barrier(comm)

end subroutine haydock_herm
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/haydock_herm_algo
!! NAME
!! haydock_herm_algo
!!
!! FUNCTION
!!  Haydock algorithm for Hermitian matrix
!!
!! COPYRIGHT
!! Copyright (C) 2009-2014 ABINIT group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida, M.Giantomassi)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!  niter_done=Number of iterations already performed (0 if the run starts from scratch).
!!  niter_max=Max number of iterations. Always > niter_done
!!  nomega=Number of Frequency points for the evaluation of the matrix element.
!!  omega(nomega)=Frequency set (imaginary part is already included).
!!  tol_iter=Tolerance used to stop the algorithm.
!!  check(2)=Logical flags to specify where both the real and the imaginary part of the 
!!    matrix elements of the Green functions have to be checked for convergence. 
!!  hsize=Size of the blocks.
!!  my_t1,my_t2=Indices of the first and last column stored treated by this done.
!!  term_type=0 if no terminator is used, 1 otherwise.
!!  hmat(hsize,my_t1:my_t2)=The columns of the block.
!!  factor
!!  comm=MPI communicator.
!!
!! OUTPUT
!!  green(nomega)=Output matrix elements.
!!  inn=Last iteration performed.
!!  is_converged=.TRUE. of the algorithm converged.
!!
!! SIDE EFFECTS
!!  phi_nm1(my_t2-my_t1+1), phi_n(my_t2-my_t1+1)
!!    input: vectors used to initialize the iteration
!!    output: the vectors obtained in the last iteration 
!!  aa(niter_max) and bb(niter_max)
!!    if niter_done>0: aa(1:niter_done), bb(1:niter_done) store the coefficients of the previous run.
!!    when the routine returns aa(1:inn) and bb(1:inn) contain the matrix elements of the tridiagonal form.
!!
!! PARENTS
!!      haydock
!!
!! CHILDREN
!!      matrginv,zgesv
!!
!! SOURCE

subroutine haydock_herm_algo(niter_done,niter_max,nomega,omega,tol_iter,check,hsize,my_t1,my_t2,hmat,&
&  factor,term_type,aa,bb,phi_nm1,phi_n,green,inn,is_converged,comm)

 use defs_basis
 use m_profiling
 use m_xmpi
 use m_errors

 use m_numeric_tools,  only : continued_fract
 use m_blas,           only : xdotc, xgemv
 use m_fstrings,       only : indent

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'haydock_herm_algo'
 use interfaces_14_hidewrite
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: niter_max,niter_done,nomega
 integer,intent(in) :: comm,hsize,my_t1,my_t2,term_type
 integer,intent(out) :: inn
 logical,intent(out) :: is_converged
 real(dp),intent(in) :: tol_iter
 complex(dpc),intent(in) :: factor
!arrays
 real(dp),intent(inout) :: bb(niter_max)
 complex(dpc),intent(out) :: green(nomega)
 complex(dpc),intent(in) :: omega(nomega) 
 complex(dpc),intent(inout) :: aa(niter_max)
 complex(dpc),intent(in) :: hmat(hsize,my_t1:my_t2)
 complex(dpc),intent(inout) :: phi_nm1(my_t2-my_t1+1)
 complex(dpc),intent(inout) :: phi_n  (my_t2-my_t1+1)
 logical,intent(in) :: check(2)

!Local variables ------------------------------
!scalars
 integer :: ierr,my_nt,niter_min,nconv
 character(len=500) :: msg
 logical,parameter :: force_real=.TRUE.
!arrays
 real(dp) :: abs_err(nomega,2) !,rel_err(nomega,2)
 complex(dpc),allocatable :: oldg(:),newg(:)
 complex(dpc),allocatable :: phi_np1(:),hphi_n(:),cfact(:)
 logical :: test(2)

!************************************************************************

 ! The sequences starts with |1> normalized to 1 and b_0 =0, therefore:
 !  a_1 = <1|H|1>
 !  b_1 = || H|1> - a_1|1> ||
 !  |2> = [H|1> - a_1|1>]/b_1
 !
 ! For n>1 we have
 !  1) a_n = <n|H|n>
 !  2) b_n = || H|n> - a_n|n> -b_{n-1}|n-1> ||
 !  3) |n+1> = [H|n> -a_n|n> -b_{n-1}|n-1>]/b_n
 !
 my_nt = my_t2-my_t1+1

 ABI_MALLOC(hphi_n,(hsize))
 ABI_CHECK_ALLOC("out-of-memory hphi_n")
 
 ABI_MALLOC(phi_np1,(my_nt))

 ABI_MALLOC(oldg,(nomega))
 ABI_MALLOC(newg,(nomega))
 ABI_MALLOC(cfact,(nomega))
 oldg=czero; newg=czero; cfact=czero 

 nconv=0
 do inn=niter_done+1,niter_max
   !
   ! hphi_n = MATMUL(hmat,phi_n)
   call xgemv('N',hsize,my_nt,cone,hmat,hsize,phi_n,1,czero,hphi_n,1)
   call xmpi_sum(hphi_n,comm,ierr)

   aa(inn) = xdotc(my_nt,phi_n,1,hphi_n(my_t1:),1)
   call xmpi_sum(aa(inn:inn),comm,ierr)
   if (force_real) aa(inn) = DBLE(aa(inn)) ! Matrix is Hermitian.

   ! |n+1> = H|n> - A(n)|n> - B(n-1)|n-1>
   phi_np1 = hphi_n(my_t1:my_t2) - aa(inn)*phi_n - bb(inn-1)*phi_nm1

   bb(inn) = xdotc(my_nt,phi_np1,1,phi_np1,1)
   call xmpi_sum(bb(inn),comm,ierr)
   bb(inn) = SQRT(bb(inn))

   phi_np1 = phi_np1/bb(inn)
   
   phi_nm1 = phi_n
   phi_n   = phi_np1

   write(msg,'(a,i0,a,3es12.4)')' Iteration number ',inn,', b_i RE(a_i) IM(a_i) ',bb(inn),REAL(aa(inn)),AIMAG(aa(inn)) 
   call wrtout(std_out,msg,"COLL")
   !if (MOD(inn,2)==0) then
   !  write(100,*)inn,bb(inn),REAL(aa(inn)),AIMAG(aa(inn)) 
   !else 
   !  write(101,*)inn,bb(inn),REAL(aa(inn)),AIMAG(aa(inn)) 
   !end if
   call continued_fract(inn,term_type,aa,bb,nomega,omega,cfact)

   newg= factor*cfact
   !
   ! Avoid spurious convergence.
   niter_min=4; if (niter_done>1) niter_min=niter_done+1
   if (inn>niter_min) then
     test=.TRUE.
     abs_err(:,1) = ABS(DBLE (newg-oldg))
     abs_err(:,2) = ABS(AIMAG(newg-oldg))
     !
     if (tol_iter>zero) then 
       ! Test on the L1 norm.
       if (check(1)) test(1) = SUM(abs_err(:,1)) < tol_iter*SUM(ABS(DBLE (newg)))
       if (check(2)) test(2) = SUM(abs_err(:,2)) < tol_iter*SUM(ABS(AIMAG(newg)))

     else 
       ! Stringent test for each point.
       if (check(1)) test(1) = ALL( abs_err(:,1) < -tol_iter*ABS(DBLE (newg))) 
       if (check(2)) test(2) = ALL( abs_err(:,2) < -tol_iter*ABS(AIMAG(newg)))
     end if
     !
     if (ALL(test)) then 
       nconv = nconv+1
     else 
       nconv = 0
     end if
     if (nconv==2) then 
       write(msg,'(a,es10.2,a,i0,a)')&
&        " >>> Haydock algorithm converged twice within haydock_tol= ",tol_iter," after ",inn," iterations." 
       call wrtout(std_out,msg,'COLL')
       call wrtout(ab_out,msg,'COLL')
       EXIT
     end if
   end if

   oldg = newg
 end do ! inn

 green = newg
 if (nconv/=2) then
   write(msg,'(a,es10.2,a,i0,a)')&
&    " WARNING: Haydock algorithm did not converge within ",tol_iter," after ",niter_max," iterations."
   call wrtout(std_out,msg,'COLL')
   call wrtout(ab_out,msg,'COLL')

   write(yaml_out,"(8a)")ch10,&
&    "--- !HaydockConvergenceWarning",ch10,&
&    "message: | ",ch10,TRIM(indent(msg)),ch10,&
&    "..."

 end if

 is_converged = (nconv==2)

 ABI_FREE(oldg)
 ABI_FREE(newg)
 ABI_FREE(cfact)
 ABI_FREE(hphi_n)
 ABI_FREE(phi_np1)

end subroutine haydock_herm_algo
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/haydock_restart
!! NAME
!! haydock_restart
!!
!! FUNCTION
!! Restart the Haydock method from file reading the data produced in a previous run.
!!
!! COPYRIGHT
!! Copyright (C) 2009-2014 ABINIT group (MG)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!  BSp<type(excparam)>=Parameters defining the Bethe-Salpeter calculation.
!!    omega(BSp%nomega)=Frequency mesh for the macroscopic dielectric function (broadening is already included).
!!  iq_search=The index of the q-point to be searched.
!!  hsize
!!  comm=MPI communicator.
!!  nsppol
!!  restart_file
!!
!! OUTPUT
!!  niter_file=Number of iterations already performed. 0 to signal that an error occurred during the reading
!!
!! SIDE EFFECTS
!!  bb_file(:)
!!  aa_file(:)
!!  phi_n_file(:)
!!  phi_nm1_file(:)
!!
!! PARENTS
!!      haydock,haydock_psherm
!!
!! CHILDREN
!!      matrginv,zgesv
!!
!! SOURCE

subroutine haydock_restart(BSp,restart_file,ftype,iq_search,hsize,niter_file,aa_file,bb_file,phi_nm1_file,phi_n_file,comm)

 use defs_basis
 use m_profiling
 use m_bs_defs
 use m_xmpi
 use m_errors

 use m_io_tools,  only : get_unit
 
!DBYG
 use m_haydock_io
!ENDDBYG

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'haydock_restart'
 use interfaces_14_hidewrite
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: comm,hsize,iq_search,ftype
 integer,intent(out) :: niter_file
 character(len=*),intent(in) :: restart_file
 type(excparam),intent(in) :: BSp
!arrays
 real(dp),pointer :: bb_file(:)
 complex(dpc),pointer :: aa_file(:),phi_n_file(:),phi_nm1_file(:)

!Local variables ------------------------------
!scalars
 integer :: inn,it,restart_unt,ios,nproc,my_rank,master,ierr,op_file
 integer :: hsize_file,nq_file,iq_file,use_coupling_file
 real(dp) :: broad_file
 complex(dpc) :: factor_file
 logical :: found_q
 character(len=500) :: msg
!DBYG
 logical,parameter :: use_yg = .TRUE. ! put to false to keep old version of Haydock files
 type(haydock_type) :: haydock_file
!ENDDBYG
!arrays
 real(dp) :: qfile(3)

!************************************************************************

 nproc  = xcomm_size(comm)
 my_rank= xcomm_rank(comm)
 master = 0
 !
 if (my_rank==master) then
   if(use_yg) then
      call open_haydock(restart_file, haydock_file)
   else
      restart_unt = get_unit()
      open(unit=restart_unt,file=trim(restart_file),form="unformatted",status="old",iostat=ios)
      ABI_CHECK(ios==0," Opening file: "//TRIM(restart_file))
   end if

   if(use_yg) then
      call read_dim_haydock(haydock_file)

      if (haydock_file%op/=ftype) then
        write(msg,"(2(a,i0))")" Expecting restart file with filetype: ",ftype," but found ",op_file
        MSG_ERROR(msg)
      end if

      if (haydock_file%hsize/=hsize) then
        write(msg,"(2(a,i0))")&
&         " Rank of H_exc read from file: ",hsize_file," differs from the one used in this run: ",hsize
        MSG_ERROR(msg)
      end if

      if (haydock_file%use_coupling /= BSp%use_coupling) then
        write(msg,'(2(a,i0))')&
&         " use_coupling_file: ",use_coupling_file," differs from input file value: ",BSp%use_coupling
        MSG_ERROR(msg)
      end if


      call read_haydock(haydock_file, Bsp%q(:,iq_search), aa_file, bb_file, &
&                      phi_n_file, phi_nm1_file, niter_file, factor_file)

   if (niter_file == 0) then
     write(msg,"(a,3f8.4,3a)")&
&      " Could not find q-point: ",BSp%q(:,iq_search)," in file ",TRIM(restart_file),&
&      " Cannot restart Haydock iterations for this q-point"
     MSG_COMMENT(msg)
   else
     write(msg,'(a,i0)')" Number of iterations already performed: ",niter_file
     call wrtout(std_out,msg,"COLL")
     call wrtout(ab_out,msg,"COLL")

     if ( ABS(haydock_file%broad - BSp%broad) > tol6) then
       write(msg,'(2a,2(a,f8.4),a)')&
&        " Restart file has been produced with a different Lorentzian broadening: ",ch10,&
&        " broad_file: ",haydock_file%broad," input broadening: ",BSp%broad," Continuing anyway. "
       MSG_WARNING(msg)
     end if

      call close_haydock(haydock_file)

    end if

   else
      read(restart_unt)hsize_file,use_coupling_file,op_file,nq_file,broad_file 
      !write(std_out,*)"hsize_file",hsize_file,nq_file,broad_file

      if (op_file/=ftype) then
        write(msg,"(2(a,i0))")" Expecting restart file with filetype: ",ftype," but found ",op_file
        MSG_ERROR(msg)
      end if

      if (hsize_file/=hsize) then
        write(msg,"(2(a,i0))")&
&         " Rank of H_exc read from file: ",hsize_file," differs from the one used in this run: ",hsize
        MSG_ERROR(msg)
      end if

      if (use_coupling_file /= BSp%use_coupling) then
        write(msg,'(2(a,i0))')&
&         " use_coupling_file: ",use_coupling_file," differs from input file value: ",BSp%use_coupling
        MSG_ERROR(msg)
      end if

   found_q=.FALSE.
   do iq_file=1,nq_file
     read(restart_unt) qfile(:)
     read(restart_unt)niter_file
     if ( ALL(ABS(qfile-BSp%q(:,iq_search)) < tol6) ) then
       found_q=.TRUE.; EXIT
     else 
       ! Skip data for this q.
       do it=1,niter_file
         read(restart_unt) ! it,aa(it),bb(it)
       end do
       read(restart_unt)
       read(restart_unt)
     end if
   end do

   if (.not.found_q) then
     niter_file=0
     write(msg,"(a,3f8.4,3a)")&
&      " Could not find q-point: ",BSp%q(:,iq_search)," in file ",TRIM(restart_file),&
&      " Cannot restart Haydock iterations for this q-point"
     MSG_COMMENT(msg)
   else
     write(msg,'(a,i0)')" Number of iterations already performed: ",niter_file
     call wrtout(std_out,msg,"COLL")
     call wrtout(ab_out,msg,"COLL")

     if ( ABS(broad_file - BSp%broad) > tol6) then
       write(msg,'(2a,2(a,f8.4),a)')&
&        " Restart file has been produced with a different Lorentzian broadening: ",ch10,&
&        " broad_file: ",broad_file," input broadening: ",BSp%broad," Continuing anyway. "
       MSG_WARNING(msg)
     end if

     ABI_MALLOC(aa_file,(niter_file))
     ABI_MALLOC(bb_file,(niter_file))
     do inn=1,niter_file
       read(restart_unt)it,aa_file(inn),bb_file(inn)
       if (inn/=it) then 
         write(msg,'(2(a,i0))')" Found it_file: ",it," while it should be: ",inn
         MSG_ERROR(msg)
       end if
     end do
     ABI_MALLOC(phi_nm1_file,(hsize))
     ABI_MALLOC(phi_n_file,(hsize))
     read(restart_unt)phi_nm1_file
     read(restart_unt)phi_n_file
   end if
   close(restart_unt)

   end if


 end if
 !
 ! Master broadcasts the data.
 call xmpi_bcast(niter_file,master,comm,ierr)

 if (my_rank/=master) then 
   ABI_MALLOC(aa_file,(niter_file))
   ABI_MALLOC(bb_file,(niter_file))
   ABI_MALLOC(phi_nm1_file,(hsize))
   ABI_MALLOC(phi_n_file,(hsize))
 end if

 call xmpi_bcast(aa_file,master,comm,ierr)
 call xmpi_bcast(bb_file,master,comm,ierr)
 call xmpi_bcast(phi_nm1_file,master,comm,ierr)
 call xmpi_bcast(phi_n_file,master,comm,ierr)

end subroutine haydock_restart
!!***

!----------------------------------------------------------------------

!!****f* ABINIT/haydock_mdf_to_tensor
!! NAME
!! haydock_mdf_to_tensor
!!
!! FUNCTION
!! Transform macroscopic dielectric function from green function to each components of the tensor in red and cart coord.
!!
!! COPYRIGHT
!! Copyright (C) 2011-2014 ABINIT group (YG)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!  BSp<type(excparam)>=Parameters defining the Bethe-Salpeter calculation.
!!    omega(BSp%nomega)=Frequency mesh for the macroscopic dielectric function (broadening is already included).
!!  Cryst=Parameters of the crystal
!!  eps(BSp%nomega,BSp%nq) = Macroscopic dielectric function to be written.
!!
!! OUTPUT
!!  tensor_cart(BSp%nomega,6) = dielectric tensor for each frequency, order (11,22,33,12,13,23) in cart. coord.
!!  tensor_red(BSp%nomega, 6) = idem in reduced coordinated
!!  ierr = 0 if the tensors have been successfully computed
!!      \= 0 if the system is ill-posed in terms of q-points (not enough or not independent q-points)
!!
!! PARENTS
!!      haydock
!!
!! CHILDREN
!!      matrginv,zgesv
!!
!! SOURCE

subroutine haydock_mdf_to_tensor(BSp,Cryst,eps,tensor_cart,tensor_red,ierr)

 use defs_basis
 use m_profiling
 use m_bs_defs
 use m_errors
 use m_linalg_interfaces

 use m_crystal,       only : crystal_t
 use m_geometry,      only : normv

!This section has been created automatically by the script Abilint (TD).
!Do not modify the following lines by hand.
#undef ABI_FUNC
#define ABI_FUNC 'haydock_mdf_to_tensor'
 use interfaces_32_util
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 integer,intent(out) :: ierr
 type(excparam),intent(in) :: BSp
 type(crystal_t),intent(in) :: Cryst
!arrays
 complex(dpc),intent(in) :: eps(BSp%nomega,BSp%nq)
 complex(dpc),intent(out) :: tensor_cart(BSp%nomega,6), tensor_red(BSp%nomega,6)

!Local variables ------------------------------
!scalars
 integer :: iq,info
 real(dp) :: normqcart, normqred
!arrays
 integer,allocatable :: ipiv(:)
 real(dp) :: qcart(3), qtmet(3)
 real(dp) :: qred2cart(3,3),qcart2red(3,3)
 complex(dpc) :: qqcart(BSp%nq,6), qqred(BSp%nq,6)
 complex(dpc) :: b(6,BSP%nomega)

!************************************************************************

 ! Error flag
 ierr = 0

 if(BSp%nq /= 6) then
    ierr = -1
    return
 end if

 ! Transformation matrices from reduced coordinates to cartesian coordinates
 qred2cart = two_pi*Cryst%gprimd
 qcart2red = qred2cart
 call matrginv(qcart2red,3,3)
 do iq = 1, 6

   ! Computing cartesian q-vector
   qcart = MATMUL(qred2cart, BSp%q(:,iq))

   ! Computing product 'metric - qred' to form quadratic form
   qtmet = (two_pi**2)*MATMUL(Cryst%gmet, BSp%q(:,iq))
 
   ! squared norms
   normqcart = qcart(1)**2+qcart(2)**2+qcart(3)**2
   normqred = (normv(BSp%q(:,iq),Cryst%gmet,"G"))**2

   ! Compute line 'iq' for matrix in cartesian coord
   qqcart(iq,1) = (qcart(1))**2
   qqcart(iq,2) = (qcart(2))**2
   qqcart(iq,3) = (qcart(3))**2
   qqcart(iq,4) = 2*(qcart(1)*qcart(2))
   qqcart(iq,5) = 2*(qcart(1)*qcart(3))
   qqcart(iq,6) = 2*(qcart(2)*qcart(3))

   ! Compute line 'iq' for matrix in reduced coord
   qqred(iq,1) = (qtmet(1))**2
   qqred(iq,2) = (qtmet(2))**2
   qqred(iq,3) = (qtmet(3))**2
   qqred(iq,4) = 2*(qtmet(1)*qtmet(2))
   qqred(iq,5) = 2*(qtmet(1)*qtmet(3))
   qqred(iq,6) = 2*(qtmet(2)*qtmet(3))

   ! Renormalize line
   qqcart(iq,:) = qqcart(iq,:)/normqcart
   qqred(iq,:) = qqred(iq,:)/normqred

 end do

 ABI_MALLOC(ipiv,(6))

 ! Solving linear system
 b = TRANSPOSE(eps)
 call ZGESV(6,BSp%nomega,qqcart,6,ipiv,b,6,info)
 tensor_cart = TRANSPOSE(b)

 if(info /= 0) then
   ! Skipping the rest of the routine
   ierr = info
   ABI_FREE(ipiv)
   return
 end if

 b = TRANSPOSE(eps)
 call ZGESV(6,BSp%nomega,qqred,6,ipiv,b,6,info)
 tensor_red = TRANSPOSE(b)

 if(info /= 0) then
   ierr = info
 end if

 ABI_FREE(ipiv)

end subroutine haydock_mdf_to_tensor
!!***
