Skip to content

Commit

Permalink
Merge branch 'dev-stable' of github.com:QuantumPackage/qp2 into dev-s…
Browse files Browse the repository at this point in the history
…table
  • Loading branch information
scemama committed Jan 30, 2024
2 parents c0a4b78 + 74dac46 commit 6269cb6
Show file tree
Hide file tree
Showing 8 changed files with 657 additions and 103 deletions.
62 changes: 62 additions & 0 deletions config/gfortran_mkl.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Common flags
##############
#
# -ffree-line-length-none : Needed for IRPF90 which produces long lines
# -lblas -llapack : Link with libblas and liblapack libraries provided by the system
# -I . : Include the curent directory (Mandatory)
#
# --ninja : Allow the utilisation of ninja. (Mandatory)
# --align=32 : Align all provided arrays on a 32-byte boundary
#
#
[COMMON]
FC : gfortran -ffree-line-length-none -I . -mavx -g -fPIC -std=legacy
LAPACK_LIB : -I${MKLROOT}/include -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_core -lpthread -lm -ldl -lmkl_gnu_thread -lgomp -fopenmp
IRPF90 : irpf90
IRPF90_FLAGS : --ninja --align=32 -DSET_NESTED

# Global options
################
#
# 1 : Activate
# 0 : Deactivate
#
[OPTION]
MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
CACHE : 0 ; Enable cache_compile.py
OPENMP : 1 ; Append OpenMP flags

# Optimization flags
####################
#
# -Ofast : Disregard strict standards compliance. Enables all -O3 optimizations.
# It also enables optimizations that are not valid
# for all standard-compliant programs. It turns on
# -ffast-math and the Fortran-specific
# -fno-protect-parens and -fstack-arrays.
[OPT]
FCFLAGS : -Ofast -mavx

# Profiling flags
#################
#
[PROFILE]
FC : -p -g
FCFLAGS : -Ofast

# Debugging flags
#################
#
# -fcheck=all : Checks uninitialized variables, array subscripts, etc...
# -g : Extra debugging information
#
[DEBUG]
FCFLAGS : -fcheck=all -g

# OpenMP flags
#################
#
[OPENMP]
FC : -fopenmp
IRPF90_FLAGS : --openmp

2 changes: 1 addition & 1 deletion plugins/local/jastrow/EZFIO.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ size: (ao_basis.ao_num)
type: double precision
doc: coefficients of the 1-electron Jastrow in AOsxAOs
interface: ezfio
size: (ao_basis.ao_num*ao_basis.ao_num)
size: (ao_basis.ao_num,ao_basis.ao_num)

[j1e_coef_ao3]
type: double precision
Expand Down
16 changes: 7 additions & 9 deletions plugins/local/non_h_ints_mu/jast_1e.irp.f
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
double precision :: cx, cy, cz
double precision :: time0, time1
double precision, allocatable :: Pa(:,:), Pb(:,:), Pt(:,:)
double precision, allocatable :: coef_fit(:), coef_fit2(:), coef_fit3(:,:)
double precision, allocatable :: coef_fit(:), coef_fit2(:,:), coef_fit3(:,:)

PROVIDE j1e_type

Expand Down Expand Up @@ -243,7 +243,7 @@

PROVIDE aos_grad_in_r_array

allocate(coef_fit2(ao_num*ao_num))
allocate(coef_fit2(ao_num,ao_num))

if(mpi_master) then
call ezfio_has_jastrow_j1e_coef_ao2(exists)
Expand All @@ -254,7 +254,7 @@
IRP_ENDIF
IRP_IF MPI
include 'mpif.h'
call MPI_BCAST(coef_fit2, ao_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
call MPI_BCAST(coef_fit2, (ao_num*ao_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
stop 'Unable to read j1e_coef_ao2 with MPI'
endif
Expand All @@ -264,22 +264,22 @@
write(6,'(A)') '.. >>>>> [ IO READ: j1e_coef_ao2 ] <<<<< ..'
call ezfio_get_jastrow_j1e_coef_ao2(coef_fit2)
IRP_IF MPI
call MPI_BCAST(coef_fit2, ao_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
call MPI_BCAST(coef_fit2, (ao_num*ao_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
stop 'Unable to read j1e_coef_ao2 with MPI'
endif
IRP_ENDIF
endif
else

call get_j1e_coef_fit_ao2(ao_num*ao_num, coef_fit2)
call get_j1e_coef_fit_ao2(ao_num, coef_fit2)
call ezfio_set_jastrow_j1e_coef_ao2(coef_fit2)

endif

!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, j, ij, ipoint, c) &
!$OMP PRIVATE (i, j, ipoint, c) &
!$OMP SHARED (n_points_final_grid, ao_num, &
!$OMP aos_grad_in_r_array, coef_fit2, &
!$OMP aos_in_r_array, j1e_gradx, j1e_grady, j1e_gradz)
Expand All @@ -292,9 +292,7 @@

do i = 1, ao_num
do j = 1, ao_num
ij = (i-1)*ao_num + j

c = coef_fit2(ij)
c = coef_fit2(j,i)

j1e_gradx(ipoint) += c * (aos_in_r_array(i,ipoint) * aos_grad_in_r_array(j,ipoint,1) + aos_grad_in_r_array(i,ipoint,1) * aos_in_r_array(j,ipoint))
j1e_grady(ipoint) += c * (aos_in_r_array(i,ipoint) * aos_grad_in_r_array(j,ipoint,2) + aos_grad_in_r_array(i,ipoint,2) * aos_in_r_array(j,ipoint))
Expand Down
167 changes: 99 additions & 68 deletions plugins/local/non_h_ints_mu/jast_1e_utils.irp.f
Original file line number Diff line number Diff line change
Expand Up @@ -120,22 +120,28 @@ subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit)

implicit none
integer , intent(in) :: dim_fit
double precision, intent(out) :: coef_fit(dim_fit)
double precision, intent(out) :: coef_fit(dim_fit,dim_fit)

integer :: i, j, k, l, ipoint
integer :: ij, kl
integer :: ij, kl, mn
integer :: info, n_svd, LWORK
double precision :: g
double precision :: t0, t1
double precision, allocatable :: A(:,:), b(:), A_inv(:,:)
double precision :: cutoff_svd, D1_inv
double precision, allocatable :: A(:,:,:,:), b(:)
double precision, allocatable :: Pa(:,:), Pb(:,:), Pt(:,:)
double precision, allocatable :: u1e_tmp(:)
double precision, allocatable :: u1e_tmp(:), tmp(:,:,:)
double precision, allocatable :: U(:,:), D(:), Vt(:,:), work(:)


PROVIDE j1e_type
PROVIDE int2_u2e_ao
PROVIDE elec_alpha_num elec_beta_num elec_num
PROVIDE mo_coef


cutoff_svd = 1d-10

call wall_time(t0)
print*, ' PROVIDING the representation of 1e-Jastrow in AOs x AOs ... '

Expand Down Expand Up @@ -169,98 +175,123 @@ subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit)
! --- --- ---
! get A

allocate(A(ao_num*ao_num,ao_num*ao_num))
allocate(tmp(n_points_final_grid,ao_num,ao_num))
allocate(A(ao_num,ao_num,ao_num,ao_num))

!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, j, k, l, ij, kl, ipoint) &
!$OMP SHARED (n_points_final_grid, ao_num, &
!$OMP final_weight_at_r_vector, aos_in_r_array_transp, A)
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, j, ipoint) &
!$OMP SHARED (n_points_final_grid, ao_num, final_weight_at_r_vector, aos_in_r_array_transp, tmp)
!$OMP DO COLLAPSE(2)
do k = 1, ao_num
do l = 1, ao_num
kl = (k-1)*ao_num + l

do i = 1, ao_num
do j = 1, ao_num
ij = (i-1)*ao_num + j

A(ij,kl) = 0.d0
do ipoint = 1, n_points_final_grid
A(ij,kl) += final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) &
* aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,l)
enddo
enddo
do j = 1, ao_num
do i = 1, ao_num
do ipoint = 1, n_points_final_grid
tmp(ipoint,i,j) = dsqrt(final_weight_at_r_vector(ipoint)) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j)
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL

! print *, ' A'
! do ij = 1, ao_num*ao_num
! write(*, '(100000(f15.7))') (A(ij,kl), kl = 1, ao_num*ao_num)
! enddo
call dgemm( "T", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
, tmp(1,1,1), n_points_final_grid, tmp(1,1,1), n_points_final_grid &
, 0.d0, A(1,1,1,1), ao_num*ao_num)

! --- --- ---
! get b

allocate(b(ao_num*ao_num))

!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, j, ij, ipoint) &
!$OMP SHARED (n_points_final_grid, ao_num, &
!$OMP final_weight_at_r_vector, aos_in_r_array_transp, u1e_tmp, b)
!$OMP DO COLLAPSE(2)
do i = 1, ao_num
do j = 1, ao_num
ij = (i-1)*ao_num + j

b(ij) = 0.d0
do ipoint = 1, n_points_final_grid
b(ij) = b(ij) + final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) * u1e_tmp(ipoint)
enddo
enddo
do ipoint = 1, n_points_final_grid
u1e_tmp(ipoint) = dsqrt(final_weight_at_r_vector(ipoint)) * u1e_tmp(ipoint)
enddo
!$OMP END DO
!$OMP END PARALLEL

call dgemv("T", n_points_final_grid, ao_num*ao_num, 1.d0, tmp(1,1,1), n_points_final_grid, u1e_tmp(1), 1, 0.d0, b(1), 1)
!call dgemm( "T", "N", ao_num*ao_num, 1, n_points_final_grid, 1.d0 &
! , tmp(1,1,1), n_points_final_grid, u1e_tmp(1), n_points_final_grid &
! , 0.d0, b(1), ao_num*ao_num)

deallocate(u1e_tmp)
deallocate(tmp)

! --- --- ---
! solve Ax = b

allocate(A_inv(ao_num*ao_num,ao_num*ao_num))
!call get_inverse(A, ao_num*ao_num, ao_num*ao_num, A_inv, ao_num*ao_num)
call get_pseudo_inverse(A, ao_num*ao_num, ao_num*ao_num, ao_num*ao_num, A_inv, ao_num*ao_num, 5d-8)
! double precision, allocatable :: A_inv(:,:,:,:)
! allocate(A_inv(ao_num,ao_num,ao_num,ao_num))
! call get_pseudo_inverse(A(1,1,1,1), ao_num*ao_num, ao_num*ao_num, ao_num*ao_num, A_inv(1,1,1,1), ao_num*ao_num, cutoff_svd)
! A = A_inv

! coef_fit = A_inv x b
call dgemv("N", ao_num*ao_num, ao_num*ao_num, 1.d0, A_inv, ao_num*ao_num, b, 1, 0.d0, coef_fit, 1)
allocate(D(ao_num*ao_num), U(ao_num*ao_num,ao_num*ao_num), Vt(ao_num*ao_num,ao_num*ao_num))

integer :: mn
double precision :: tmp, acc, nrm
allocate(work(1))
lwork = -1
call dgesvd( 'S', 'A', ao_num*ao_num, ao_num*ao_num, A(1,1,1,1), ao_num*ao_num &
, D(1), U(1,1), ao_num*ao_num, Vt(1,1), ao_num*ao_num, work, lwork, info)
if(info /= 0) then
print *, info, ': SVD failed'
stop
endif

acc = 0.d0
nrm = 0.d0
do ij = 1, ao_num*ao_num
tmp = 0.d0
do kl = 1, ao_num*ao_num
tmp += A(ij,kl) * coef_fit(kl)
enddo
tmp = tmp - b(ij)
if(dabs(tmp) .gt. 1d-7) then
print*, ' problem found in fitting 1e-Jastrow'
print*, ij, tmp
endif
LWORK = max(5*ao_num*ao_num, int(WORK(1)))
deallocate(work)
allocate(work(lwork))
call dgesvd( 'S', 'A', ao_num*ao_num, ao_num*ao_num, A(1,1,1,1), ao_num*ao_num &
, D(1), U(1,1), ao_num*ao_num, Vt(1,1), ao_num*ao_num, work, lwork, info)
if(info /= 0) then
print *, info, ':: SVD failed'
stop 1
endif

acc += dabs(tmp)
nrm += dabs(b(ij))
deallocate(work)

if(D(1) .lt. 1d-14) then
print*, ' largest singular value is very small:', D(1)
n_svd = 1
else
n_svd = 0
D1_inv = 1.d0 / D(1)
do ij = 1, ao_num*ao_num
if(D(ij)*D1_inv > cutoff_svd) then
D(ij) = 1.d0 / D(ij)
n_svd = n_svd + 1
else
D(ij) = 0.d0
endif
enddo
endif
print*, ' n_svd = ', n_svd

!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (ij, kl) &
!$OMP SHARED (ao_num, n_svd, D, Vt)
!$OMP DO
do kl = 1, ao_num*ao_num
do ij = 1, n_svd
Vt(ij,kl) = Vt(ij,kl) * D(ij)
enddo
enddo
print *, ' Relative Error (%) =', 100.d0*acc/nrm
!$OMP END DO
!$OMP END PARALLEL

! A = A_inv
call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_svd, 1.d0 &
, U(1,1), ao_num*ao_num, Vt(1,1), ao_num*ao_num &
, 0.d0, A(1,1,1,1), ao_num*ao_num)

deallocate(A, A_inv, b)
deallocate(D, U, Vt)


! ---

! coef_fit = A_inv x b
call dgemv("N", ao_num*ao_num, ao_num*ao_num, 1.d0, A(1,1,1,1), ao_num*ao_num, b(1), 1, 0.d0, coef_fit(1,1), 1)
!call dgemm( "N", "N", ao_num*ao_num, 1, ao_num*ao_num, 1.d0 &
! , A(1,1,1,1), ao_num*ao_num, b(1), ao_num*ao_num &
! , 0.d0, coef_fit(1,1), ao_num*ao_num)

deallocate(A, b)

call wall_time(t1)
print*, ' END after (min) ', (t1-t0)/60.d0
Expand Down
Loading

0 comments on commit 6269cb6

Please sign in to comment.