Merge branch 'dev-stable' of github.com:QuantumPackage/qp2 into dev-s…

…table
QuantumPackage · Jan 30, 2024 · 6269cb6 · 6269cb6
2 parents c0a4b78 + 74dac46
commit 6269cb6
Show file tree

Hide file tree

Showing 8 changed files with 657 additions and 103 deletions.
diff --git a/config/gfortran_mkl.cfg b/config/gfortran_mkl.cfg
@@ -0,0 +1,62 @@
+# Common flags
+##############
+#
+# -ffree-line-length-none : Needed for IRPF90 which produces long lines
+# -lblas -llapack         : Link with libblas and liblapack libraries provided by the system
+# -I .                    : Include the curent directory (Mandatory)
+#
+# --ninja                 : Allow the utilisation of ninja. (Mandatory)
+# --align=32              : Align all provided arrays on a 32-byte boundary
+#
+#
+[COMMON]
+FC           : gfortran -ffree-line-length-none -I . -mavx -g -fPIC -std=legacy
+LAPACK_LIB   : -I${MKLROOT}/include -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_core -lpthread -lm -ldl -lmkl_gnu_thread -lgomp -fopenmp
+IRPF90       : irpf90
+IRPF90_FLAGS : --ninja --align=32 -DSET_NESTED
+
+# Global options
+################
+#
+# 1 : Activate
+# 0 : Deactivate
+#
+[OPTION]
+MODE    : OPT        ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
+CACHE   : 0          ; Enable cache_compile.py
+OPENMP  : 1          ; Append OpenMP flags
+
+# Optimization flags
+####################
+#
+# -Ofast                  : Disregard strict standards compliance. Enables all -O3 optimizations.
+#                           It also enables optimizations that are not valid
+#                           for all standard-compliant programs.  It turns on
+#                           -ffast-math and the Fortran-specific
+#                           -fno-protect-parens and -fstack-arrays.
+[OPT]
+FCFLAGS : -Ofast -mavx
+
+# Profiling flags
+#################
+#
+[PROFILE]
+FC      : -p -g
+FCFLAGS : -Ofast
+
+# Debugging flags
+#################
+#
+# -fcheck=all  : Checks uninitialized variables,  array subscripts, etc...
+# -g           : Extra debugging information
+#
+[DEBUG]
+FCFLAGS : -fcheck=all -g
+
+# OpenMP flags
+#################
+#
+[OPENMP]
+FC           : -fopenmp
+IRPF90_FLAGS : --openmp
+
diff --git a/plugins/local/jastrow/EZFIO.cfg b/plugins/local/jastrow/EZFIO.cfg
@@ -99,7 +99,7 @@ size: (ao_basis.ao_num)
 type: double precision
 doc: coefficients of the 1-electron Jastrow in AOsxAOs
 interface: ezfio
-size: (ao_basis.ao_num*ao_basis.ao_num)
+size: (ao_basis.ao_num,ao_basis.ao_num)
 
 [j1e_coef_ao3]
 type: double precision

diff --git a/plugins/local/non_h_ints_mu/jast_1e.irp.f b/plugins/local/non_h_ints_mu/jast_1e.irp.f
@@ -78,7 +78,7 @@
   double precision              :: cx, cy, cz
   double precision              :: time0, time1
   double precision, allocatable :: Pa(:,:), Pb(:,:), Pt(:,:)
-  double precision, allocatable :: coef_fit(:), coef_fit2(:), coef_fit3(:,:)
+  double precision, allocatable :: coef_fit(:), coef_fit2(:,:), coef_fit3(:,:)
 
   PROVIDE j1e_type
 
@@ -243,7 +243,7 @@
 
     PROVIDE aos_grad_in_r_array
 
-    allocate(coef_fit2(ao_num*ao_num))
+    allocate(coef_fit2(ao_num,ao_num))
 
     if(mpi_master) then
       call ezfio_has_jastrow_j1e_coef_ao2(exists)
@@ -254,7 +254,7 @@
     IRP_ENDIF
     IRP_IF MPI
       include 'mpif.h'
-      call MPI_BCAST(coef_fit2, ao_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+      call MPI_BCAST(coef_fit2, (ao_num*ao_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
       if (ierr /= MPI_SUCCESS) then
         stop 'Unable to read j1e_coef_ao2 with MPI'
       endif
@@ -264,22 +264,22 @@
         write(6,'(A)') '.. >>>>> [ IO READ: j1e_coef_ao2 ] <<<<< ..'
         call ezfio_get_jastrow_j1e_coef_ao2(coef_fit2)
         IRP_IF MPI
-          call MPI_BCAST(coef_fit2, ao_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+          call MPI_BCAST(coef_fit2, (ao_num*ao_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
           if (ierr /= MPI_SUCCESS) then
             stop 'Unable to read j1e_coef_ao2 with MPI'
           endif
         IRP_ENDIF
       endif
     else
 
-      call get_j1e_coef_fit_ao2(ao_num*ao_num, coef_fit2)
+      call get_j1e_coef_fit_ao2(ao_num, coef_fit2)
       call ezfio_set_jastrow_j1e_coef_ao2(coef_fit2)
 
     endif
 
     !$OMP PARALLEL                                &
     !$OMP DEFAULT (NONE)                          &
-    !$OMP PRIVATE (i, j, ij, ipoint, c)           &
+    !$OMP PRIVATE (i, j, ipoint, c)               &
     !$OMP SHARED (n_points_final_grid, ao_num,    &
     !$OMP         aos_grad_in_r_array, coef_fit2, &
     !$OMP         aos_in_r_array, j1e_gradx, j1e_grady, j1e_gradz)
@@ -292,9 +292,7 @@
 
       do i = 1, ao_num
         do j = 1, ao_num
-          ij = (i-1)*ao_num + j
-
-          c = coef_fit2(ij)
+          c = coef_fit2(j,i)
 
           j1e_gradx(ipoint) += c * (aos_in_r_array(i,ipoint) * aos_grad_in_r_array(j,ipoint,1) + aos_grad_in_r_array(i,ipoint,1) * aos_in_r_array(j,ipoint))
           j1e_grady(ipoint) += c * (aos_in_r_array(i,ipoint) * aos_grad_in_r_array(j,ipoint,2) + aos_grad_in_r_array(i,ipoint,2) * aos_in_r_array(j,ipoint))

diff --git a/plugins/local/non_h_ints_mu/jast_1e_utils.irp.f b/plugins/local/non_h_ints_mu/jast_1e_utils.irp.f
@@ -120,22 +120,28 @@ subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit)
 
   implicit none
   integer         , intent(in)  :: dim_fit
-  double precision, intent(out) :: coef_fit(dim_fit)
+  double precision, intent(out) :: coef_fit(dim_fit,dim_fit)
 
   integer                       :: i, j, k, l, ipoint
-  integer                       :: ij, kl
+  integer                       :: ij, kl, mn
+  integer                       :: info, n_svd, LWORK
   double precision              :: g
   double precision              :: t0, t1
-  double precision, allocatable :: A(:,:), b(:), A_inv(:,:)
+  double precision              :: cutoff_svd, D1_inv
+  double precision, allocatable :: A(:,:,:,:), b(:)
   double precision, allocatable :: Pa(:,:), Pb(:,:), Pt(:,:)
-  double precision, allocatable :: u1e_tmp(:)
+  double precision, allocatable :: u1e_tmp(:), tmp(:,:,:)
+  double precision, allocatable :: U(:,:), D(:), Vt(:,:), work(:)
 
 
   PROVIDE j1e_type
   PROVIDE int2_u2e_ao
   PROVIDE elec_alpha_num elec_beta_num elec_num
   PROVIDE mo_coef
 
+
+  cutoff_svd = 1d-10
+
   call wall_time(t0)
   print*, ' PROVIDING the representation of 1e-Jastrow in AOs x AOs ... '
 
@@ -169,98 +175,123 @@ subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit)
   ! --- --- ---
   ! get A
 
-  allocate(A(ao_num*ao_num,ao_num*ao_num))
+  allocate(tmp(n_points_final_grid,ao_num,ao_num))
+  allocate(A(ao_num,ao_num,ao_num,ao_num))
 
-  !$OMP PARALLEL                             &
-  !$OMP DEFAULT (NONE)                       &
-  !$OMP PRIVATE (i, j, k, l, ij, kl, ipoint) &
-  !$OMP SHARED (n_points_final_grid, ao_num, &
-  !$OMP         final_weight_at_r_vector, aos_in_r_array_transp, A)
+  !$OMP PARALLEL               &
+  !$OMP DEFAULT (NONE)         &
+  !$OMP PRIVATE (i, j, ipoint) &
+  !$OMP SHARED (n_points_final_grid, ao_num, final_weight_at_r_vector, aos_in_r_array_transp, tmp)
   !$OMP DO COLLAPSE(2)
-  do k = 1, ao_num
-    do l = 1, ao_num
-      kl = (k-1)*ao_num + l
-
-      do i = 1, ao_num
-        do j = 1, ao_num
-          ij = (i-1)*ao_num + j
-
-          A(ij,kl) = 0.d0
-          do ipoint = 1, n_points_final_grid
-            A(ij,kl) += final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) &
-                                                         * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,l)
-          enddo
-        enddo
+  do j = 1, ao_num
+    do i = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        tmp(ipoint,i,j) = dsqrt(final_weight_at_r_vector(ipoint)) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j)
       enddo
     enddo
   enddo
   !$OMP END DO
   !$OMP END PARALLEL
 
-!  print *, ' A'
-!  do ij = 1, ao_num*ao_num
-!    write(*, '(100000(f15.7))') (A(ij,kl), kl = 1, ao_num*ao_num)
-!  enddo
+  call dgemm( "T", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
+            , tmp(1,1,1), n_points_final_grid, tmp(1,1,1), n_points_final_grid  &
+            , 0.d0, A(1,1,1,1), ao_num*ao_num)
 
   ! --- --- ---
   ! get b
 
   allocate(b(ao_num*ao_num))
 
-  !$OMP PARALLEL                             &
-  !$OMP DEFAULT (NONE)                       &
-  !$OMP PRIVATE (i, j, ij, ipoint)           &
-  !$OMP SHARED (n_points_final_grid, ao_num, &
-  !$OMP         final_weight_at_r_vector, aos_in_r_array_transp, u1e_tmp, b)
-  !$OMP DO COLLAPSE(2)
-  do i = 1, ao_num
-    do j = 1, ao_num
-      ij = (i-1)*ao_num + j
-
-      b(ij) = 0.d0
-      do ipoint = 1, n_points_final_grid
-        b(ij) = b(ij) + final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) * u1e_tmp(ipoint)
-      enddo
-    enddo
+  do ipoint = 1, n_points_final_grid
+    u1e_tmp(ipoint) = dsqrt(final_weight_at_r_vector(ipoint)) * u1e_tmp(ipoint)
   enddo
-  !$OMP END DO
-  !$OMP END PARALLEL
+
+  call dgemv("T", n_points_final_grid, ao_num*ao_num, 1.d0, tmp(1,1,1), n_points_final_grid, u1e_tmp(1), 1, 0.d0, b(1), 1)
+  !call dgemm( "T", "N", ao_num*ao_num, 1, n_points_final_grid, 1.d0            &
+  !          , tmp(1,1,1), n_points_final_grid, u1e_tmp(1), n_points_final_grid &
+  !          , 0.d0, b(1), ao_num*ao_num)
 
   deallocate(u1e_tmp)
+  deallocate(tmp)
 
   ! --- --- ---
   ! solve Ax = b
 
-  allocate(A_inv(ao_num*ao_num,ao_num*ao_num))
-  !call get_inverse(A, ao_num*ao_num, ao_num*ao_num, A_inv, ao_num*ao_num)
-  call get_pseudo_inverse(A, ao_num*ao_num, ao_num*ao_num, ao_num*ao_num, A_inv, ao_num*ao_num, 5d-8)
+!  double precision, allocatable :: A_inv(:,:,:,:)
+!  allocate(A_inv(ao_num,ao_num,ao_num,ao_num))
+!  call get_pseudo_inverse(A(1,1,1,1), ao_num*ao_num, ao_num*ao_num, ao_num*ao_num, A_inv(1,1,1,1), ao_num*ao_num, cutoff_svd)
+!  A = A_inv
 
-  ! coef_fit = A_inv x b
-  call dgemv("N", ao_num*ao_num, ao_num*ao_num, 1.d0, A_inv, ao_num*ao_num, b, 1, 0.d0, coef_fit, 1)
+  allocate(D(ao_num*ao_num), U(ao_num*ao_num,ao_num*ao_num), Vt(ao_num*ao_num,ao_num*ao_num))
 
-  integer          :: mn
-  double precision :: tmp, acc, nrm
+  allocate(work(1))
+  lwork = -1
+  call dgesvd( 'S', 'A', ao_num*ao_num, ao_num*ao_num, A(1,1,1,1), ao_num*ao_num &
+             , D(1), U(1,1), ao_num*ao_num, Vt(1,1), ao_num*ao_num, work, lwork, info)
+  if(info /= 0) then
+    print *,  info, ': SVD failed'
+    stop
+  endif
 
-  acc = 0.d0
-  nrm = 0.d0
-  do ij = 1, ao_num*ao_num
-    tmp = 0.d0
-    do kl = 1, ao_num*ao_num
-      tmp += A(ij,kl) * coef_fit(kl)
-    enddo
-    tmp = tmp - b(ij)
-    if(dabs(tmp) .gt. 1d-7) then
-      print*, ' problem found in fitting 1e-Jastrow'
-      print*, ij, tmp
-    endif
+  LWORK = max(5*ao_num*ao_num, int(WORK(1)))
+  deallocate(work)
+  allocate(work(lwork))
+  call dgesvd( 'S', 'A', ao_num*ao_num, ao_num*ao_num, A(1,1,1,1), ao_num*ao_num &
+             , D(1), U(1,1), ao_num*ao_num, Vt(1,1), ao_num*ao_num, work, lwork, info)
+  if(info /= 0) then
+    print *,  info, ':: SVD failed'
+    stop 1
+  endif
 
-    acc += dabs(tmp)
-    nrm += dabs(b(ij))
+  deallocate(work)
+
+  if(D(1) .lt. 1d-14) then
+    print*, ' largest singular value is very small:', D(1)
+    n_svd = 1
+  else
+    n_svd  = 0
+    D1_inv = 1.d0 / D(1)
+    do ij = 1, ao_num*ao_num
+      if(D(ij)*D1_inv > cutoff_svd) then
+        D(ij) = 1.d0 / D(ij)
+        n_svd = n_svd + 1
+      else
+        D(ij) = 0.d0
+      endif
+    enddo
+  endif
+  print*, ' n_svd = ', n_svd
+
+  !$OMP PARALLEL         &
+  !$OMP DEFAULT (NONE)   &
+  !$OMP PRIVATE (ij, kl) &
+  !$OMP SHARED (ao_num, n_svd, D, Vt)
+  !$OMP DO
+  do kl = 1, ao_num*ao_num
+    do ij = 1, n_svd
+      Vt(ij,kl) = Vt(ij,kl) * D(ij)
+    enddo
   enddo
-  print *, ' Relative Error (%) =', 100.d0*acc/nrm
+  !$OMP END DO
+  !$OMP END PARALLEL
 
+  ! A = A_inv
+  call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_svd, 1.d0 &
+            , U(1,1), ao_num*ao_num, Vt(1,1), ao_num*ao_num       &
+            , 0.d0, A(1,1,1,1), ao_num*ao_num)
 
-  deallocate(A, A_inv, b)
+  deallocate(D, U, Vt)
+
+
+  ! ---
+
+  ! coef_fit = A_inv x b
+  call dgemv("N", ao_num*ao_num, ao_num*ao_num, 1.d0, A(1,1,1,1), ao_num*ao_num, b(1), 1, 0.d0, coef_fit(1,1), 1)
+  !call dgemm( "N", "N", ao_num*ao_num, 1, ao_num*ao_num, 1.d0 &
+  !          , A(1,1,1,1), ao_num*ao_num, b(1), ao_num*ao_num  &
+  !          , 0.d0, coef_fit(1,1), ao_num*ao_num)
+
+  deallocate(A, b)
 
   call wall_time(t1)
   print*, ' END after (min) ', (t1-t0)/60.d0