diff --git a/bin/zcat b/bin/zcat deleted file mode 100755 index 7ccecf07a..000000000 --- a/bin/zcat +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -# On Darwin: try gzcat if available, otherwise use Python - -if [[ $(uname -s) = Darwin ]] ; then - which gzcat &> /dev/null - if [[ $? -eq 0 ]] ; then - exec gzcat $@ - else - - exec python3 << EOF -import sys -import gzip -with gzip.open("$1", "rt") as f: - print(f.read()) -EOF - fi -else - SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" - command=$(which -a zcat | grep -v "$SCRIPTPATH/" | head -1) - exec $command $@ -fi - diff --git a/config/gfortran_debug_mkl.cfg b/config/gfortran_debug_mkl.cfg new file mode 100644 index 000000000..1dc3f2b25 --- /dev/null +++ b/config/gfortran_debug_mkl.cfg @@ -0,0 +1,63 @@ +# Common flags +############## +# +# -ffree-line-length-none : Needed for IRPF90 which produces long lines +# -lblas -llapack : Link with libblas and liblapack libraries provided by the system +# -I . : Include the curent directory (Mandatory) +# +# --ninja : Allow the utilisation of ninja. (Mandatory) +# --align=32 : Align all provided arrays on a 32-byte boundary +# +# +[COMMON] +FC : gfortran -g -ffree-line-length-none -I . -fPIC -std=legacy +LAPACK_LIB : -I${MKLROOT}/include -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_core -lpthread -lm -ldl -lmkl_gnu_thread -lgomp -fopenmp +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : DEBUG ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -Ofast : Disregard strict standards compliance. Enables all -O3 optimizations. +# It also enables optimizations that are not valid +# for all standard-compliant programs. It turns on +# -ffast-math and the Fortran-specific +# -fno-protect-parens and -fstack-arrays. +[OPT] +FCFLAGS : -Ofast + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -Ofast + +# Debugging flags +################# +# +# -fcheck=all : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# +[DEBUG] +#FCFLAGS : -g -msse4.2 -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant -Wuninitialized -fbacktrace -ffpe-trap=zero,overflow,underflow -finit-real=nan +FCFLAGS : -g -mavx -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant -Wuninitialized -fbacktrace -ffpe-trap=zero,overflow -finit-real=nan + +# OpenMP flags +################# +# +[OPENMP] +FC : -fopenmp +IRPF90_FLAGS : --openmp + diff --git a/configure b/configure index 41c0123df..43ca9f6d7 100755 --- a/configure +++ b/configure @@ -40,14 +40,16 @@ Usage: $(basename $0) -c $(basename $0) -h $(basename $0) -i + $(basename $0) -g [nvidia|intel|none] Options: - -c Define a COMPILATION configuration file, - in "${QP_ROOT}/config/". - -h Print the HELP message - -i INSTALL . Use at your OWN RISK: - no support will be provided for the installation of - dependencies. + -c Define a COMPILATION configuration file, + in "${QP_ROOT}/config/". + -h Print the HELP message + -i INSTALL . Use at your OWN RISK: + no support will be provided for the installation of + dependencies. + -g [nvidia|intel|none] Choose GPU acceleration Example: ./$(basename $0) -c config/gfortran.cfg @@ -83,7 +85,7 @@ function execute () { PACKAGES="" -while getopts "d:c:i:h" c ; do +while getopts "d:c:i:g:h" c ; do case "$c" in c) case "$OPTARG" in @@ -100,6 +102,9 @@ while getopts "d:c:i:h" c ; do "") help ; break;; *) PACKAGES="${PACKAGE} $OPTARG" esac;; + g) + GPU=$OPTARG; + break;; h) help exit 0;; @@ -109,6 +114,27 @@ while getopts "d:c:i:h" c ; do esac done +# Handle GPU acceleration +rm -f ${QP_ROOT}/src/gpu_arch +case "$GPU" in + amd) # AMD + echo "Activating AMD GPU acceleration" + ln -s ${QP_ROOT}/plugins/local/gpu_amd ${QP_ROOT}/src/gpu_arch + ;; + intel) # Intel + echo "Activating Intel GPU acceleration (EXPERIMENTAL)" + ln -s ${QP_ROOT}/plugins/local/gpu_intel ${QP_ROOT}/src/gpu_arch + ;; + nvidia) # Nvidia + echo "Activating Nvidia GPU acceleration" + ln -s ${QP_ROOT}/plugins/local/gpu_nvidia ${QP_ROOT}/src/gpu_arch + ;; + *) # No Acceleration + echo "Disabling GPU acceleration" + ln -s ${QP_ROOT}/plugins/local/gpu_x86 ${QP_ROOT}/src/gpu_arch + ;; +esac + # Trim leading and trailing spaces PACKAGES=$(echo $PACKAGES | xargs) diff --git a/etc/paths.rc b/etc/paths.rc index dc1741e8c..843ec383b 100644 --- a/etc/paths.rc +++ b/etc/paths.rc @@ -28,6 +28,15 @@ function qp_prepend_export () { fi } +function qp_append_export () { + eval "value_1="\${$1}"" + if [[ -z $value_1 ]] ; then + echo "${2}:" + else + echo "${value_1}:${2}" + fi +} + export PYTHONPATH=$(qp_prepend_export "PYTHONPATH" "${QP_EZFIO}/Python":"${QP_PYTHON}") export PATH=$(qp_prepend_export "PATH" "${QP_PYTHON}":"${QP_ROOT}"/bin:"${QP_ROOT}"/ocaml) diff --git a/external/irpf90 b/external/irpf90 index beac61534..4ab1b175f 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit beac615343f421bd6c0571a408ba389a6d5a32ac +Subproject commit 4ab1b175fc7ed0d96c1912f13dc53579b24157a6 diff --git a/plugins/local/basis_correction/51.basis_c.bats b/plugins/local/basis_correction/51.basis_c.bats index 914b482b5..1e20bae38 100644 --- a/plugins/local/basis_correction/51.basis_c.bats +++ b/plugins/local/basis_correction/51.basis_c.bats @@ -37,14 +37,6 @@ function run_sd() { eq $energy1 $1 $thresh } -@test "O2 CAS" { - qp set_file o2_cas.gms.ezfio - qp set_mo_class -c "[1-2]" -a "[3-10]" -d "[11-46]" - run -149.72435425 3.e-4 10000 - qp set_mo_class -c "[1-2]" -a "[3-10]" -v "[11-46]" - run_md -0.1160222327 1.e-6 -} - @test "LiF RHF" { qp set_file lif.ezfio diff --git a/plugins/local/basis_correction/basis_correction.irp.f b/plugins/local/basis_correction/basis_correction.irp.f index a7ea7244d..f17b5d5bb 100644 --- a/plugins/local/basis_correction/basis_correction.irp.f +++ b/plugins/local/basis_correction/basis_correction.irp.f @@ -7,10 +7,6 @@ program basis_correction touch read_wf no_core_density = .True. touch no_core_density - if(io_mo_two_e_integrals .ne. "Read")then - provide ao_two_e_integrals_in_map - endif - provide mo_two_e_integrals_in_map call print_basis_correction end diff --git a/plugins/local/basis_correction/print_routine.irp.f b/plugins/local/basis_correction/print_routine.irp.f index 96faba301..b3b38673c 100644 --- a/plugins/local/basis_correction/print_routine.irp.f +++ b/plugins/local/basis_correction/print_routine.irp.f @@ -22,7 +22,7 @@ subroutine print_basis_correction print*, '****************************************' print*, '****************************************' print*, 'mu_of_r_potential = ',mu_of_r_potential - if(mu_of_r_potential.EQ."hf")then + if(mu_of_r_potential.EQ."hf".or.mu_of_r_potential.EQ."hf_old".or.mu_of_r_potential.EQ."hf_sparse")then print*, '' print*,'Using a HF-like two-body density to define mu(r)' print*,'This assumes that HF is a qualitative representation of the wave function ' diff --git a/plugins/local/basis_correction/test_chol_bas.irp.f b/plugins/local/basis_correction/test_chol_bas.irp.f new file mode 100644 index 000000000..076d888ca --- /dev/null +++ b/plugins/local/basis_correction/test_chol_bas.irp.f @@ -0,0 +1,18 @@ +program pouet + implicit none + call test +end +subroutine test + implicit none +! provide mos_times_cholesky_r1 +! provide mos_times_cholesky_r2 + integer :: ipoint + double precision :: accu,weight + accu = 0.d0 + do ipoint = 1, n_points_final_grid + weight = final_weight_at_r_vector(ipoint) +! accu += dabs(mu_of_r_hf(ipoint) - mu_of_r_hf_old(ipoint)) * weight + accu += dabs(f_hf_cholesky_sparse(ipoint) - f_hf_cholesky(ipoint)) * weight + enddo + print*,'accu = ',accu +end diff --git a/plugins/local/bi_ort_ints/bi_ort_ints.irp.f b/plugins/local/bi_ort_ints/bi_ort_ints.irp.f index 0349c731c..0398a18fe 100644 --- a/plugins/local/bi_ort_ints/bi_ort_ints.irp.f +++ b/plugins/local/bi_ort_ints/bi_ort_ints.irp.f @@ -17,12 +17,15 @@ program bi_ort_ints ! call test_3e ! call test_5idx ! call test_5idx2 - call test_4idx() +! call test_4idx() !call test_4idx_n4() !call test_4idx2() !call test_5idx2 !call test_5idx + call test_mos_in_r() + call test_int2_grad1_u12_bimo_t() + end subroutine test_5idx2 @@ -472,4 +475,94 @@ subroutine test_4idx() return end +! --- + +subroutine test_mos_in_r() + + implicit none + + integer :: i, j + double precision :: err_tot, nrm_tot, err_loc, acc_thr + + PROVIDE mos_l_in_r_array_transp_old mos_r_in_r_array_transp_old + PROVIDE mos_l_in_r_array_transp mos_r_in_r_array_transp + + acc_thr = 1d-13 + + err_tot = 0.d0 + nrm_tot = 0.d0 + do i = 1, mo_num + do j = 1, n_points_final_grid + err_loc = dabs(mos_l_in_r_array_transp_old(j,i) - mos_l_in_r_array_transp(j,i)) + if(err_loc > acc_thr) then + print*, " error on", j, i + print*, " old res", mos_l_in_r_array_transp_old(j,i) + print*, " new res", mos_l_in_r_array_transp (j,i) + stop + endif + err_tot = err_tot + err_loc + nrm_tot = nrm_tot + dabs(mos_l_in_r_array_transp_old(j,i)) + enddo + enddo + print *, ' absolute accuracy on mos_l_in_r_array_transp (%) =', 100.d0 * err_tot / nrm_tot + + err_tot = 0.d0 + nrm_tot = 0.d0 + do i = 1, mo_num + do j = 1, n_points_final_grid + err_loc = dabs(mos_r_in_r_array_transp_old(j,i) - mos_r_in_r_array_transp(j,i)) + if(err_loc > acc_thr) then + print*, " error on", j, i + print*, " old res", mos_r_in_r_array_transp_old(j,i) + print*, " new res", mos_r_in_r_array_transp (j,i) + stop + endif + err_tot = err_tot + err_loc + nrm_tot = nrm_tot + dabs(mos_r_in_r_array_transp_old(j,i)) + enddo + enddo + print *, ' absolute accuracy on mos_r_in_r_array_transp (%) =', 100.d0 * err_tot / nrm_tot + + return +end + +! --- + +subroutine test_int2_grad1_u12_bimo_t() + + implicit none + integer :: i, j, ipoint, m + double precision :: err_tot, nrm_tot, err_loc, acc_thr + + PROVIDE int2_grad1_u12_bimo_t_old + PROVIDE int2_grad1_u12_bimo_t + + acc_thr = 1d-13 + + err_tot = 0.d0 + nrm_tot = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do m = 1, 3 + do ipoint = 1, n_points_final_grid + err_loc = dabs(int2_grad1_u12_bimo_t_old(ipoint,m,j,i) - int2_grad1_u12_bimo_t(ipoint,m,j,i)) + if(err_loc > acc_thr) then + print*, " error on", ipoint, m, j, i + print*, " old res", int2_grad1_u12_bimo_t_old(ipoint,m,j,i) + print*, " new res", int2_grad1_u12_bimo_t (ipoint,m,j,i) + stop + endif + err_tot = err_tot + err_loc + nrm_tot = nrm_tot + dabs(int2_grad1_u12_bimo_t_old(ipoint,m,j,i)) + enddo + enddo + enddo + enddo + print *, ' absolute accuracy on int2_grad1_u12_bimo_t (%) =', 100.d0 * err_tot / nrm_tot + + return +end + +! --- + diff --git a/plugins/local/bi_ort_ints/no_dressing.irp.f b/plugins/local/bi_ort_ints/no_dressing.irp.f index bd225274e..3bbf31f97 100644 --- a/plugins/local/bi_ort_ints/no_dressing.irp.f +++ b/plugins/local/bi_ort_ints/no_dressing.irp.f @@ -322,6 +322,12 @@ BEGIN_PROVIDER [double precision, noL_0e] + BEGIN_DOC + ! + ! < Phi_left | L | Phi_right > + ! + END_DOC + implicit none integer :: i, j, k, ipoint double precision :: t0, t1 @@ -330,10 +336,6 @@ double precision, allocatable :: tmp_M(:,:), tmp_S(:), tmp_O(:), tmp_J(:,:) double precision, allocatable :: tmp_M_priv(:,:), tmp_S_priv(:), tmp_O_priv(:), tmp_J_priv(:,:) - - call wall_time(t0) - print*, " Providing noL_0e ..." - if(elec_alpha_num .eq. elec_beta_num) then allocate(tmp(elec_beta_num)) @@ -708,10 +710,7 @@ endif - call wall_time(t1) - print*, " Wall time for noL_0e (min) = ", (t1 - t0)/60.d0 - - print*, " noL_0e = ", noL_0e + print*, " noL_0e =", noL_0e END_PROVIDER diff --git a/plugins/local/bi_ort_ints/semi_num_ints_mo.irp.f b/plugins/local/bi_ort_ints/semi_num_ints_mo.irp.f index 51f0cba49..dc7642b01 100644 --- a/plugins/local/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/plugins/local/bi_ort_ints/semi_num_ints_mo.irp.f @@ -1,350 +1,54 @@ ! --- -! TODO :: optimization : transform into a DGEMM - -BEGIN_PROVIDER [ double precision, mo_v_ki_bi_ortho_erf_rk_cst_mu, (mo_num, mo_num, n_points_final_grid)] - - BEGIN_DOC - ! - ! mo_v_ki_bi_ortho_erf_rk_cst_mu(k,i,ip) = int dr chi_k(r) phi_i(r) (erf(mu |r - R_ip|) - 1 )/(2|r - R_ip|) on the BI-ORTHO MO basis - ! - ! where phi_k(r) is a LEFT MOs and phi_i(r) is a RIGHT MO - ! - ! R_ip = the "ip"-th point of the DFT Grid - ! - END_DOC - - implicit none - integer :: ipoint - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid,v_ij_erf_rk_cst_mu,mo_v_ki_bi_ortho_erf_rk_cst_mu) - !$OMP DO SCHEDULE (dynamic) - do ipoint = 1, n_points_final_grid - call ao_to_mo_bi_ortho( v_ij_erf_rk_cst_mu (1,1,ipoint), size(v_ij_erf_rk_cst_mu, 1) & - , mo_v_ki_bi_ortho_erf_rk_cst_mu(1,1,ipoint), size(mo_v_ki_bi_ortho_erf_rk_cst_mu, 1) ) - enddo - !$OMP END DO - !$OMP END PARALLEL - - mo_v_ki_bi_ortho_erf_rk_cst_mu = mo_v_ki_bi_ortho_erf_rk_cst_mu * 0.5d0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, mo_v_ki_bi_ortho_erf_rk_cst_mu_transp, (n_points_final_grid, mo_num, mo_num)] - - BEGIN_DOC - ! - ! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/(2|r - R|) on the BI-ORTHO MO basis - ! - END_DOC - - implicit none - integer :: ipoint, i, j - - do i = 1, mo_num - do j = 1, mo_num - do ipoint = 1, n_points_final_grid - mo_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,j,i) = mo_v_ki_bi_ortho_erf_rk_cst_mu(j,i,ipoint) - enddo - enddo - enddo - - !FREE mo_v_ki_bi_ortho_erf_rk_cst_mu - -END_PROVIDER - -! --- - -! TODO :: optimization : transform into a DGEMM - -BEGIN_PROVIDER [ double precision, mo_x_v_ki_bi_ortho_erf_rk_cst_mu, (mo_num, mo_num, 3, n_points_final_grid)] - - BEGIN_DOC - ! - ! mo_x_v_ki_bi_ortho_erf_rk_cst_mu(k,i,m,ip) = int dr x(m) * chi_k(r) phi_i(r) (erf(mu |r - R_ip|) - 1)/2|r - R_ip| on the BI-ORTHO MO basis - ! - ! where chi_k(r)/phi_i(r) are left/right MOs, m=1 => x(m) = x, m=2 => x(m) = y, m=3 => x(m) = z, - ! - ! R_ip = the "ip"-th point of the DFT Grid - ! - END_DOC - - implicit none - integer :: ipoint - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid,x_v_ij_erf_rk_cst_mu_transp,mo_x_v_ki_bi_ortho_erf_rk_cst_mu) - !$OMP DO SCHEDULE (dynamic) - do ipoint = 1, n_points_final_grid - - call ao_to_mo_bi_ortho( x_v_ij_erf_rk_cst_mu_transp (1,1,1,ipoint), size(x_v_ij_erf_rk_cst_mu_transp, 1) & - , mo_x_v_ki_bi_ortho_erf_rk_cst_mu(1,1,1,ipoint), size(mo_x_v_ki_bi_ortho_erf_rk_cst_mu, 1) ) - call ao_to_mo_bi_ortho( x_v_ij_erf_rk_cst_mu_transp (1,1,2,ipoint), size(x_v_ij_erf_rk_cst_mu_transp, 1) & - , mo_x_v_ki_bi_ortho_erf_rk_cst_mu(1,1,2,ipoint), size(mo_x_v_ki_bi_ortho_erf_rk_cst_mu, 1) ) - call ao_to_mo_bi_ortho( x_v_ij_erf_rk_cst_mu_transp (1,1,3,ipoint), size(x_v_ij_erf_rk_cst_mu_transp, 1) & - , mo_x_v_ki_bi_ortho_erf_rk_cst_mu(1,1,3,ipoint), size(mo_x_v_ki_bi_ortho_erf_rk_cst_mu, 1) ) - - enddo - !$OMP END DO - !$OMP END PARALLEL - - mo_x_v_ki_bi_ortho_erf_rk_cst_mu = 0.5d0 * mo_x_v_ki_bi_ortho_erf_rk_cst_mu - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, n_points_final_grid)] +BEGIN_PROVIDER [double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)] implicit none - integer :: i, j, ipoint - double precision :: wall0, wall1 - - print *, ' providing int2_grad1_u12_ao_transp ...' - call wall_time(wall0) - - if(test_cycle_tc) then - - PROVIDE int2_grad1_u12_ao_test - - do ipoint = 1, n_points_final_grid - do i = 1, ao_num - do j = 1, ao_num - int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,1) - int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,2) - int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,3) - enddo - enddo - enddo - - FREE int2_grad1_u12_ao_test - - else - - PROVIDE int2_grad1_u12_ao - - do ipoint = 1, n_points_final_grid - do i = 1, ao_num - do j = 1, ao_num - int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(j,i,ipoint,1) - int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(j,i,ipoint,2) - int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(j,i,ipoint,3) - enddo - enddo - enddo - - endif - - call wall_time(wall1) - print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)] - - implicit none - integer :: ipoint - double precision :: wall0, wall1 + integer :: i, j, ipoint + double precision :: tt1, tt2 + double precision, allocatable :: tmp(:,:,:,:) PROVIDE mo_l_coef mo_r_coef - PROVIDE int2_grad1_u12_ao_transp + PROVIDE int2_grad1_u12_ao - !print *, ' providing int2_grad1_u12_bimo_transp' - !call wall_time(wall0) + call wall_time(tt1) + + allocate(tmp(mo_num,mo_num,n_points_final_grid,3)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp) + !$OMP SHARED (ao_num, mo_num, n_points_final_grid, int2_grad1_u12_ao, tmp) !$OMP DO SCHEDULE (dynamic) - do ipoint = 1, n_points_final_grid - call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,1,ipoint), size(int2_grad1_u12_ao_transp , 1) & - , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) - call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,2,ipoint), size(int2_grad1_u12_ao_transp , 1) & - , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) - call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,3,ipoint), size(int2_grad1_u12_ao_transp , 1) & - , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) - enddo + do ipoint = 1, n_points_final_grid + call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,1), ao_num, tmp(1,1,ipoint,1), mo_num) + call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,2), ao_num, tmp(1,1,ipoint,2), mo_num) + call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,3), ao_num, tmp(1,1,ipoint,3), mo_num) + enddo !$OMP END DO !$OMP END PARALLEL - !call wall_time(wall1) - !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 - !call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)] - - implicit none - integer :: i, j, ipoint - double precision :: wall0, wall1 - - !call wall_time(wall0) - !print *, ' Providing int2_grad1_u12_bimo_t ...' - - PROVIDE mo_l_coef mo_r_coef - PROVIDE int2_grad1_u12_bimo_transp - + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, tmp, int2_grad1_u12_bimo_t) + !$OMP DO COLLAPSE(2) SCHEDULE (dynamic) do ipoint = 1, n_points_final_grid do i = 1, mo_num do j = 1, mo_num - int2_grad1_u12_bimo_t(ipoint,1,j,i) = int2_grad1_u12_bimo_transp(j,i,1,ipoint) - int2_grad1_u12_bimo_t(ipoint,2,j,i) = int2_grad1_u12_bimo_transp(j,i,2,ipoint) - int2_grad1_u12_bimo_t(ipoint,3,j,i) = int2_grad1_u12_bimo_transp(j,i,3,ipoint) + int2_grad1_u12_bimo_t(ipoint,1,j,i) = tmp(j,i,ipoint,1) + int2_grad1_u12_bimo_t(ipoint,2,j,i) = tmp(j,i,ipoint,2) + int2_grad1_u12_bimo_t(ipoint,3,j,i) = tmp(j,i,ipoint,3) enddo enddo enddo + !$OMP END DO + !$OMP END PARALLEL - FREE int2_grad1_u12_bimo_transp - - !call wall_time(wall1) - !print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0 - !call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, ao_num, ao_num)] - - implicit none - integer :: i, j, ipoint - - PROVIDE int2_grad1_u12_ao - - do ipoint = 1, n_points_final_grid - do i = 1, ao_num - do j = 1, ao_num - int2_grad1_u12_ao_t(ipoint,1,j,i) = int2_grad1_u12_ao(j,i,ipoint,1) - int2_grad1_u12_ao_t(ipoint,2,j,i) = int2_grad1_u12_ao(j,i,ipoint,2) - int2_grad1_u12_ao_t(ipoint,3,j,i) = int2_grad1_u12_ao(j,i,ipoint,3) - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp, (n_points_final_grid, 3, mo_num, mo_num)] - - implicit none - integer :: i, j, ipoint - - do i = 1, mo_num - do j = 1, mo_num - do ipoint = 1, n_points_final_grid - mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,1,j,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu(j,i,1,ipoint) - mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,2,j,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu(j,i,2,ipoint) - mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,3,j,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu(j,i,3,ipoint) - enddo - enddo - enddo -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk, (n_points_final_grid, 3, mo_num, mo_num)] - - BEGIN_DOC - ! - ! x_W_ki_bi_ortho_erf_rk(ip,m,k,i) = \int dr chi_k(r) \frac{(1 - erf(mu |r-R_ip|))}{2|r-R_ip|} (x(m)-R_ip(m)) phi_i(r) ON THE BI-ORTHO MO BASIS - ! - ! where chi_k(r)/phi_i(r) are left/right MOs, m=1 => X(m) = x, m=2 => X(m) = y, m=3 => X(m) = z, - ! - ! R_ip = the "ip"-th point of the DFT Grid - END_DOC - - implicit none - include 'constants.include.F' - - integer :: ipoint, m, i, k - double precision :: xyz - double precision :: wall0, wall1 - - print*, ' providing x_W_ki_bi_ortho_erf_rk ...' - call wall_time(wall0) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint,m,i,k,xyz) & - !$OMP SHARED (x_W_ki_bi_ortho_erf_rk,n_points_final_grid,mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp,mo_v_ki_bi_ortho_erf_rk_cst_mu_transp,mo_num,final_grid_points) - !$OMP DO SCHEDULE (dynamic) - do i = 1, mo_num - do k = 1, mo_num - do m = 1, 3 - do ipoint = 1, n_points_final_grid - xyz = final_grid_points(m,ipoint) - x_W_ki_bi_ortho_erf_rk(ipoint,m,k,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,m,k,i) - xyz * mo_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,k,i) - enddo - enddo - enddo - enddo - - !$OMP END DO - !$OMP END PARALLEL - - ! FREE mo_v_ki_bi_ortho_erf_rk_cst_mu_transp - ! FREE mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp - - call wall_time(wall1) - print *, ' time to provide x_W_ki_bi_ortho_erf_rk = ', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk_diag, (n_points_final_grid, 3, mo_num)] - BEGIN_DOC - ! x_W_ki_bi_ortho_erf_rk_diag(ip,m,i) = \int dr chi_i(r) (1 - erf(mu |r-R_ip|)) (x(m)-X(m)_ip) phi_i(r) ON THE BI-ORTHO MO BASIS -! -! where chi_k(r)/phi_i(r) are left/right MOs, m=1 => X(m) = x, m=2 => X(m) = y, m=3 => X(m) = z, -! -! R_ip = the "ip"-th point of the DFT Grid - END_DOC - - implicit none - include 'constants.include.F' - - integer :: ipoint, m, i - double precision :: xyz - double precision :: wall0, wall1 - - print*,'providing x_W_ki_bi_ortho_erf_rk_diag ...' - call wall_time(wall0) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint,m,i,xyz) & - !$OMP SHARED (x_W_ki_bi_ortho_erf_rk_diag,n_points_final_grid,mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp,mo_v_ki_bi_ortho_erf_rk_cst_mu_transp,mo_num,final_grid_points) - !$OMP DO SCHEDULE (dynamic) - do i = 1, mo_num - do m = 1, 3 - do ipoint = 1, n_points_final_grid - xyz = final_grid_points(m,ipoint) - x_W_ki_bi_ortho_erf_rk_diag(ipoint,m,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,m,i,i) - xyz * mo_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,i,i) - enddo - enddo - enddo - - !$OMP END DO - !$OMP END PARALLEL + deallocate(tmp) - call wall_time(wall1) - print*,'time to provide x_W_ki_bi_ortho_erf_rk_diag = ',wall1 - wall0 + call wall_time(tt2) + write(*,"(A,2X,F15.7)") ' wall time for int2_grad1_u12_bimo_t (sec) = ', (tt2 - tt1) END_PROVIDER diff --git a/plugins/local/bi_ort_ints/semi_num_ints_mo_old.irp.f b/plugins/local/bi_ort_ints/semi_num_ints_mo_old.irp.f new file mode 100644 index 000000000..c2b9ad6d9 --- /dev/null +++ b/plugins/local/bi_ort_ints/semi_num_ints_mo_old.irp.f @@ -0,0 +1,362 @@ + +! --- + +! TODO :: optimization : transform into a DGEMM + +BEGIN_PROVIDER [ double precision, mo_v_ki_bi_ortho_erf_rk_cst_mu, (mo_num, mo_num, n_points_final_grid)] + + BEGIN_DOC + ! + ! mo_v_ki_bi_ortho_erf_rk_cst_mu(k,i,ip) = int dr chi_k(r) phi_i(r) (erf(mu |r - R_ip|) - 1 )/(2|r - R_ip|) on the BI-ORTHO MO basis + ! + ! where phi_k(r) is a LEFT MOs and phi_i(r) is a RIGHT MO + ! + ! R_ip = the "ip"-th point of the DFT Grid + ! + END_DOC + + implicit none + integer :: ipoint + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid,v_ij_erf_rk_cst_mu,mo_v_ki_bi_ortho_erf_rk_cst_mu) + !$OMP DO SCHEDULE (dynamic) + do ipoint = 1, n_points_final_grid + call ao_to_mo_bi_ortho( v_ij_erf_rk_cst_mu (1,1,ipoint), size(v_ij_erf_rk_cst_mu, 1) & + , mo_v_ki_bi_ortho_erf_rk_cst_mu(1,1,ipoint), size(mo_v_ki_bi_ortho_erf_rk_cst_mu, 1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + mo_v_ki_bi_ortho_erf_rk_cst_mu = mo_v_ki_bi_ortho_erf_rk_cst_mu * 0.5d0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, mo_v_ki_bi_ortho_erf_rk_cst_mu_transp, (n_points_final_grid, mo_num, mo_num)] + + BEGIN_DOC + ! + ! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/(2|r - R|) on the BI-ORTHO MO basis + ! + END_DOC + + implicit none + integer :: ipoint, i, j + + do i = 1, mo_num + do j = 1, mo_num + do ipoint = 1, n_points_final_grid + mo_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,j,i) = mo_v_ki_bi_ortho_erf_rk_cst_mu(j,i,ipoint) + enddo + enddo + enddo + + !FREE mo_v_ki_bi_ortho_erf_rk_cst_mu + +END_PROVIDER + +! --- + +! TODO :: optimization : transform into a DGEMM + +BEGIN_PROVIDER [ double precision, mo_x_v_ki_bi_ortho_erf_rk_cst_mu, (mo_num, mo_num, 3, n_points_final_grid)] + + BEGIN_DOC + ! + ! mo_x_v_ki_bi_ortho_erf_rk_cst_mu(k,i,m,ip) = int dr x(m) * chi_k(r) phi_i(r) (erf(mu |r - R_ip|) - 1)/2|r - R_ip| on the BI-ORTHO MO basis + ! + ! where chi_k(r)/phi_i(r) are left/right MOs, m=1 => x(m) = x, m=2 => x(m) = y, m=3 => x(m) = z, + ! + ! R_ip = the "ip"-th point of the DFT Grid + ! + END_DOC + + implicit none + integer :: ipoint + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid,x_v_ij_erf_rk_cst_mu_transp,mo_x_v_ki_bi_ortho_erf_rk_cst_mu) + !$OMP DO SCHEDULE (dynamic) + do ipoint = 1, n_points_final_grid + + call ao_to_mo_bi_ortho( x_v_ij_erf_rk_cst_mu_transp (1,1,1,ipoint), size(x_v_ij_erf_rk_cst_mu_transp, 1) & + , mo_x_v_ki_bi_ortho_erf_rk_cst_mu(1,1,1,ipoint), size(mo_x_v_ki_bi_ortho_erf_rk_cst_mu, 1) ) + call ao_to_mo_bi_ortho( x_v_ij_erf_rk_cst_mu_transp (1,1,2,ipoint), size(x_v_ij_erf_rk_cst_mu_transp, 1) & + , mo_x_v_ki_bi_ortho_erf_rk_cst_mu(1,1,2,ipoint), size(mo_x_v_ki_bi_ortho_erf_rk_cst_mu, 1) ) + call ao_to_mo_bi_ortho( x_v_ij_erf_rk_cst_mu_transp (1,1,3,ipoint), size(x_v_ij_erf_rk_cst_mu_transp, 1) & + , mo_x_v_ki_bi_ortho_erf_rk_cst_mu(1,1,3,ipoint), size(mo_x_v_ki_bi_ortho_erf_rk_cst_mu, 1) ) + + enddo + !$OMP END DO + !$OMP END PARALLEL + + mo_x_v_ki_bi_ortho_erf_rk_cst_mu = 0.5d0 * mo_x_v_ki_bi_ortho_erf_rk_cst_mu + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, n_points_final_grid)] + + implicit none + integer :: i, j, ipoint + double precision :: wall0, wall1 + + !print *, ' providing int2_grad1_u12_ao_transp ...' + !call wall_time(wall0) + + if(test_cycle_tc) then + + PROVIDE int2_grad1_u12_ao_test + + do ipoint = 1, n_points_final_grid + do i = 1, ao_num + do j = 1, ao_num + int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,1) + int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,2) + int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,3) + enddo + enddo + enddo + + FREE int2_grad1_u12_ao_test + + else + + PROVIDE int2_grad1_u12_ao + + do ipoint = 1, n_points_final_grid + do i = 1, ao_num + do j = 1, ao_num + int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(j,i,ipoint,1) + int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(j,i,ipoint,2) + int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(j,i,ipoint,3) + enddo + enddo + enddo + + endif + + !call wall_time(wall1) + !print *, ' wall time for int2_grad1_u12_ao_transp (min) = ', (wall1 - wall0) / 60.d0 + !call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)] + + implicit none + integer :: ipoint + double precision :: wall0, wall1 + + PROVIDE mo_l_coef mo_r_coef + PROVIDE int2_grad1_u12_ao_transp + + !print *, ' providing int2_grad1_u12_bimo_transp ...' + !call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp) + !$OMP DO SCHEDULE (dynamic) + do ipoint = 1, n_points_final_grid + call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,1,ipoint), size(int2_grad1_u12_ao_transp , 1) & + , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) + call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,2,ipoint), size(int2_grad1_u12_ao_transp , 1) & + , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) + call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,3,ipoint), size(int2_grad1_u12_ao_transp , 1) & + , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !FREE int2_grad1_u12_ao_transp + + !call wall_time(wall1) + !print *, ' wall time for int2_grad1_u12_bimo_transp (min) =', (wall1 - wall0) / 60.d0 + !call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, int2_grad1_u12_bimo_t_old, (n_points_final_grid, 3, mo_num, mo_num)] + + implicit none + integer :: i, j, ipoint + double precision :: wall0, wall1 + + !call wall_time(wall0) + !print *, ' providing int2_grad1_u12_bimo_t_old ...' + + PROVIDE mo_l_coef mo_r_coef + PROVIDE int2_grad1_u12_bimo_transp + + do ipoint = 1, n_points_final_grid + do i = 1, mo_num + do j = 1, mo_num + int2_grad1_u12_bimo_t_old(ipoint,1,j,i) = int2_grad1_u12_bimo_transp(j,i,1,ipoint) + int2_grad1_u12_bimo_t_old(ipoint,2,j,i) = int2_grad1_u12_bimo_transp(j,i,2,ipoint) + int2_grad1_u12_bimo_t_old(ipoint,3,j,i) = int2_grad1_u12_bimo_transp(j,i,3,ipoint) + enddo + enddo + enddo + + FREE int2_grad1_u12_bimo_transp + + !call wall_time(wall1) + !print *, ' wall time for int2_grad1_u12_bimo_t_old (min) =', (wall1 - wall0) / 60.d0 + !call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, ao_num, ao_num)] + + implicit none + integer :: i, j, ipoint + double precision :: wall0, wall1 + + !call wall_time(wall0) + !print *, ' providing int2_grad1_u12_ao_t ...' + + PROVIDE int2_grad1_u12_ao + + do ipoint = 1, n_points_final_grid + do i = 1, ao_num + do j = 1, ao_num + int2_grad1_u12_ao_t(ipoint,1,j,i) = int2_grad1_u12_ao(j,i,ipoint,1) + int2_grad1_u12_ao_t(ipoint,2,j,i) = int2_grad1_u12_ao(j,i,ipoint,2) + int2_grad1_u12_ao_t(ipoint,3,j,i) = int2_grad1_u12_ao(j,i,ipoint,3) + enddo + enddo + enddo + + !call wall_time(wall1) + !print *, ' wall time for int2_grad1_u12_ao_t (min) =', (wall1 - wall0) / 60.d0 + !call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp, (n_points_final_grid, 3, mo_num, mo_num)] + + implicit none + integer :: i, j, ipoint + + do i = 1, mo_num + do j = 1, mo_num + do ipoint = 1, n_points_final_grid + mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,1,j,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu(j,i,1,ipoint) + mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,2,j,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu(j,i,2,ipoint) + mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,3,j,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu(j,i,3,ipoint) + enddo + enddo + enddo +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk, (n_points_final_grid, 3, mo_num, mo_num)] + + BEGIN_DOC + ! + ! x_W_ki_bi_ortho_erf_rk(ip,m,k,i) = \int dr chi_k(r) \frac{(1 - erf(mu |r-R_ip|))}{2|r-R_ip|} (x(m)-R_ip(m)) phi_i(r) ON THE BI-ORTHO MO BASIS + ! + ! where chi_k(r)/phi_i(r) are left/right MOs, m=1 => X(m) = x, m=2 => X(m) = y, m=3 => X(m) = z, + ! + ! R_ip = the "ip"-th point of the DFT Grid + END_DOC + + implicit none + include 'constants.include.F' + + integer :: ipoint, m, i, k + double precision :: xyz + double precision :: wall0, wall1 + + !print*, ' providing x_W_ki_bi_ortho_erf_rk ...' + !call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,m,i,k,xyz) & + !$OMP SHARED (x_W_ki_bi_ortho_erf_rk,n_points_final_grid,mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp,mo_v_ki_bi_ortho_erf_rk_cst_mu_transp,mo_num,final_grid_points) + !$OMP DO SCHEDULE (dynamic) + do i = 1, mo_num + do k = 1, mo_num + do m = 1, 3 + do ipoint = 1, n_points_final_grid + xyz = final_grid_points(m,ipoint) + x_W_ki_bi_ortho_erf_rk(ipoint,m,k,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,m,k,i) - xyz * mo_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,k,i) + enddo + enddo + enddo + enddo + + !$OMP END DO + !$OMP END PARALLEL + + ! FREE mo_v_ki_bi_ortho_erf_rk_cst_mu_transp + ! FREE mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp + + !call wall_time(wall1) + !print *, ' time to provide x_W_ki_bi_ortho_erf_rk = ', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk_diag, (n_points_final_grid, 3, mo_num)] + BEGIN_DOC + ! x_W_ki_bi_ortho_erf_rk_diag(ip,m,i) = \int dr chi_i(r) (1 - erf(mu |r-R_ip|)) (x(m)-X(m)_ip) phi_i(r) ON THE BI-ORTHO MO BASIS +! +! where chi_k(r)/phi_i(r) are left/right MOs, m=1 => X(m) = x, m=2 => X(m) = y, m=3 => X(m) = z, +! +! R_ip = the "ip"-th point of the DFT Grid + END_DOC + + implicit none + include 'constants.include.F' + + integer :: ipoint, m, i + double precision :: xyz + double precision :: wall0, wall1 + + !print*,'providing x_W_ki_bi_ortho_erf_rk_diag ...' + !call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,m,i,xyz) & + !$OMP SHARED (x_W_ki_bi_ortho_erf_rk_diag,n_points_final_grid,mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp,mo_v_ki_bi_ortho_erf_rk_cst_mu_transp,mo_num,final_grid_points) + !$OMP DO SCHEDULE (dynamic) + do i = 1, mo_num + do m = 1, 3 + do ipoint = 1, n_points_final_grid + xyz = final_grid_points(m,ipoint) + x_W_ki_bi_ortho_erf_rk_diag(ipoint,m,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,m,i,i) - xyz * mo_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,i,i) + enddo + enddo + enddo + + !$OMP END DO + !$OMP END PARALLEL + + !call wall_time(wall1) + !print*,'time to provide x_W_ki_bi_ortho_erf_rk_diag = ',wall1 - wall0 + +END_PROVIDER + +! --- + diff --git a/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f b/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f index 726e48ba3..73e5a611e 100644 --- a/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/plugins/local/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -123,7 +123,7 @@ subroutine give_integrals_3_body_bi_ort_spin( n, sigma_n, l, sigma_l, k, sigma_k endif return -end subroutine give_integrals_3_body_bi_ort_spin +end ! --- @@ -168,7 +168,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) integral = integral + tmp * final_weight_at_r_vector(ipoint) enddo -end subroutine give_integrals_3_body_bi_ort +end ! --- diff --git a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f index 5e6a24e96..e27fdb7fc 100644 --- a/plugins/local/bi_ort_ints/total_twoe_pot.irp.f +++ b/plugins/local/bi_ort_ints/total_twoe_pot.irp.f @@ -16,10 +16,10 @@ double precision function bi_ortho_mo_ints(l, k, j, i) integer :: m, n, p, q bi_ortho_mo_ints = 0.d0 - do m = 1, ao_num - do p = 1, ao_num - do n = 1, ao_num - do q = 1, ao_num + do p = 1, ao_num + do m = 1, ao_num + do q = 1, ao_num + do n = 1, ao_num ! p1h1p2h2 l1 l2 r1 r2 bi_ortho_mo_ints += ao_two_e_tc_tot(n,q,m,p) * mo_l_coef(m,l) * mo_l_coef(n,k) * mo_r_coef(p,j) * mo_r_coef(q,i) enddo @@ -27,7 +27,7 @@ double precision function bi_ortho_mo_ints(l, k, j, i) enddo enddo -end function bi_ortho_mo_ints +end ! --- @@ -40,38 +40,106 @@ end function bi_ortho_mo_ints END_DOC implicit none - integer :: i, j, k, l, m, n, p, q + integer :: i, j, k, l, m, n, p, q, s, r + double precision :: t1, t2, tt1, tt2 double precision, allocatable :: a1(:,:,:,:), a2(:,:,:,:) + double precision, allocatable :: a_jkp(:,:,:), a_kpq(:,:,:), ao_two_e_tc_tot_tmp(:,:,:) - PROVIDE mo_r_coef mo_l_coef - - allocate(a2(ao_num,ao_num,ao_num,mo_num)) - - call dgemm( 'T', 'N', ao_num*ao_num*ao_num, mo_num, ao_num, 1.d0 & - , ao_two_e_tc_tot(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & - , 0.d0 , a2(1,1,1,1), ao_num*ao_num*ao_num) - - allocate(a1(ao_num,ao_num,mo_num,mo_num)) - - call dgemm( 'T', 'N', ao_num*ao_num*mo_num, mo_num, ao_num, 1.d0 & - , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num & - , 0.d0, a1(1,1,1,1), ao_num*ao_num*mo_num) - - deallocate(a2) - allocate(a2(ao_num,mo_num,mo_num,mo_num)) - - call dgemm( 'T', 'N', ao_num*mo_num*mo_num, mo_num, ao_num, 1.d0 & - , a1(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & - , 0.d0, a2(1,1,1,1), ao_num*mo_num*mo_num) - - deallocate(a1) - - call dgemm( 'T', 'N', mo_num*mo_num*mo_num, mo_num, ao_num, 1.d0 & - , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num & - , 0.d0, mo_bi_ortho_tc_two_e_chemist(1,1,1,1), mo_num*mo_num*mo_num) + print *, ' PROVIDING mo_bi_ortho_tc_two_e_chemist ...' + call wall_time(t1) + call print_memory_usage() - deallocate(a2) + PROVIDE mo_r_coef mo_l_coef + if(ao_to_mo_tc_n3) then + + print*, ' memory scale of TC ao -> mo: O(N3) ' + + if(.not.read_tc_integ) then + stop 'read_tc_integ needs to be set to true' + endif + + allocate(a_jkp(ao_num,ao_num,mo_num)) + allocate(a_kpq(ao_num,mo_num,mo_num)) + allocate(ao_two_e_tc_tot_tmp(ao_num,ao_num,ao_num)) + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/ao_two_e_tc_tot', action="read") + + call wall_time(tt1) + + mo_bi_ortho_tc_two_e_chemist(:,:,:,:) = 0.d0 + do l = 1, ao_num + read(11) ao_two_e_tc_tot_tmp(:,:,:) + + do s = 1, mo_num + + call dgemm( 'T', 'N', ao_num*ao_num, mo_num, ao_num, 1.d0 & + , ao_two_e_tc_tot_tmp(1,1,1), ao_num, mo_l_coef(1,1), ao_num & + , 0.d0, a_jkp(1,1,1), ao_num*ao_num) + + call dgemm( 'T', 'N', ao_num*mo_num, mo_num, ao_num, 1.d0 & + , a_jkp(1,1,1), ao_num, mo_r_coef(1,1), ao_num & + , 0.d0, a_kpq(1,1,1), ao_num*mo_num) + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, ao_num, mo_r_coef(l,s) & + , a_kpq(1,1,1), ao_num, mo_l_coef(1,1), ao_num & + , 1.d0, mo_bi_ortho_tc_two_e_chemist(1,1,1,s), mo_num*mo_num) + + enddo ! s + + if(l == 2) then + call wall_time(tt2) + print*, ' 1 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-1)*(tt2-tt1)/60.d0 + elseif(l == 11) then + call wall_time(tt2) + print*, ' 10 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-10)*(tt2-tt1)/(60.d0*10.d0) + elseif(l == 101) then + call wall_time(tt2) + print*, ' 100 / mo_num done in (min)', (tt2-tt1)/60.d0 + print*, ' estimated time required (min)', dble(mo_num-100)*(tt2-tt1)/(60.d0*100.d0) + endif + enddo ! l + + close(11) + + deallocate(a_jkp, a_kpq, ao_two_e_tc_tot_tmp) + + else + + print*, ' memory scale of TC ao -> mo: O(N4) ' + + allocate(a2(ao_num,ao_num,ao_num,mo_num)) + + call dgemm( 'T', 'N', ao_num*ao_num*ao_num, mo_num, ao_num, 1.d0 & + , ao_two_e_tc_tot(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & + , 0.d0, a2(1,1,1,1), ao_num*ao_num*ao_num) + + FREE ao_two_e_tc_tot + + allocate(a1(ao_num,ao_num,mo_num,mo_num)) + + call dgemm( 'T', 'N', ao_num*ao_num*mo_num, mo_num, ao_num, 1.d0 & + , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num & + , 0.d0, a1(1,1,1,1), ao_num*ao_num*mo_num) + + deallocate(a2) + allocate(a2(ao_num,mo_num,mo_num,mo_num)) + + call dgemm( 'T', 'N', ao_num*mo_num*mo_num, mo_num, ao_num, 1.d0 & + , a1(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & + , 0.d0, a2(1,1,1,1), ao_num*mo_num*mo_num) + + deallocate(a1) + + call dgemm( 'T', 'N', mo_num*mo_num*mo_num, mo_num, ao_num, 1.d0 & + , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num & + , 0.d0, mo_bi_ortho_tc_two_e_chemist(1,1,1,1), mo_num*mo_num*mo_num) + + deallocate(a2) + + endif !allocate(a1(mo_num,ao_num,ao_num,ao_num)) !a1 = 0.d0 @@ -135,6 +203,10 @@ end function bi_ortho_mo_ints !enddo !deallocate(a1) + call wall_time(t2) + print *, ' WALL TIME for PROVIDING mo_bi_ortho_tc_two_e_chemist (min)', (t2-t1)/60.d0 + call print_memory_usage() + END_PROVIDER ! --- @@ -176,6 +248,34 @@ end function bi_ortho_mo_ints END_PROVIDER +BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_transp, (mo_num, mo_num, mo_num, mo_num)] + implicit none + BEGIN_DOC + ! + ! mo_bi_ortho_tc_two_e_transp(i,j,k,l) = = transpose of mo_bi_ortho_tc_two_e + ! + ! the potential V(r_12) contains ALL TWO-E CONTRIBUTION OF THE TC-HAMILTONIAN + ! + END_DOC + + integer :: i,j,k,l + print*,'Providing mo_bi_ortho_tc_two_e_transp' + double precision :: t0,t1 + call wall_time(t0) + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + mo_bi_ortho_tc_two_e_transp(i,j,k,l) = mo_bi_ortho_tc_two_e(k,l,i,j) + enddo + enddo + enddo + enddo + call wall_time(t1) + + print *, ' WALL TIME for PROVIDING mo_bi_ortho_tc_two_e_transp (min)', (t1-t0)/60.d0 + +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj, (mo_num,mo_num)] @@ -232,3 +332,23 @@ end function bi_ortho_mo_ints ! --- + BEGIN_PROVIDER [double precision, tc_2e_3idx_coulomb_integrals_transp , (mo_num,mo_num,mo_num)] +&BEGIN_PROVIDER [double precision, tc_2e_3idx_exchange_integrals_transp, (mo_num,mo_num,mo_num)] + + BEGIN_DOC + ! tc_2e_3idx_coulomb_integrals_transp (j,k,i) = + ! tc_2e_3idx_exchange_integrals_transp(j,k,i) = + END_DOC + implicit none + integer :: i, j, k + + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + tc_2e_3idx_coulomb_integrals_transp(j, k,i) = mo_bi_ortho_tc_two_e_transp(j ,k ,j ,i ) + tc_2e_3idx_exchange_integrals_transp(j,k,i) = mo_bi_ortho_tc_two_e_transp(k ,j ,j ,i ) + enddo + enddo + enddo + +END_PROVIDER diff --git a/plugins/local/bi_ortho_mos/bi_ort_mos_in_r.irp.f b/plugins/local/bi_ortho_mos/bi_ort_mos_in_r.irp.f index 255728547..b1c2dc877 100644 --- a/plugins/local/bi_ortho_mos/bi_ort_mos_in_r.irp.f +++ b/plugins/local/bi_ortho_mos/bi_ort_mos_in_r.irp.f @@ -1,135 +1,70 @@ -! TODO: left & right MO without duplicate AO calculation - -! --- - -BEGIN_PROVIDER[double precision, mos_r_in_r_array, (mo_num, n_points_final_grid)] - - BEGIN_DOC - ! mos_in_r_array(i,j) = value of the ith RIGHT mo on the jth grid point - END_DOC - - implicit none - integer :: i, j - double precision :: mos_array(mo_num), r(3) - - !$OMP PARALLEL DO & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, r, mos_array) & - !$OMP SHARED (mos_r_in_r_array, n_points_final_grid, mo_num, final_grid_points) - do i = 1, n_points_final_grid - r(1) = final_grid_points(1,i) - r(2) = final_grid_points(2,i) - r(3) = final_grid_points(3,i) - call give_all_mos_r_at_r(r, mos_array) - do j = 1, mo_num - mos_r_in_r_array(j,i) = mos_array(j) - enddo - enddo - !$OMP END PARALLEL DO - -END_PROVIDER - -! --- - -BEGIN_PROVIDER[double precision, mos_r_in_r_array_transp, (n_points_final_grid, mo_num)] + BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp, (n_points_final_grid, mo_num)] +&BEGIN_PROVIDER[double precision, mos_r_in_r_array_transp, (n_points_final_grid, mo_num)] BEGIN_DOC - ! mos_r_in_r_array_transp(i,j) = value of the jth mo on the ith grid point + ! + ! mos_l_in_r_array_transp(i,j) = value of the jth left-mo on the ith grid point + ! mos_r_in_r_array_transp(i,j) = value of the jth right-mo on the ith grid point + ! END_DOC implicit none - integer :: i,j - do i = 1, n_points_final_grid - do j = 1, mo_num - mos_r_in_r_array_transp(i,j) = mos_r_in_r_array(j,i) - enddo - enddo + integer :: i + double precision :: tt0, tt1, tt2, tt3 + double precision :: r(3) + double precision, allocatable :: aos_r(:,:) -END_PROVIDER - -! --- + call wall_time(tt0) -subroutine give_all_mos_r_at_r(r, mos_r_array) + allocate(aos_r(ao_num,n_points_final_grid)) - BEGIN_DOC - ! mos_r_array(i) = ith RIGHT MO function evaluated at "r" - END_DOC + ! provide everything required before OpenMP + r(1) = final_grid_points(1,1) + r(2) = final_grid_points(2,1) + r(3) = final_grid_points(3,1) + call give_all_aos_at_r(r, aos_r(1,1)) - implicit none - double precision, intent(in) :: r(3) - double precision, intent(out) :: mos_r_array(mo_num) - double precision :: aos_array(ao_num) - call give_all_aos_at_r(r, aos_array) - call dgemv('N', mo_num, ao_num, 1.d0, mo_r_coef_transp, mo_num, aos_array, 1, 0.d0, mos_r_array, 1) + call wall_time(tt2) -end subroutine give_all_mos_r_at_r - -! --- - -BEGIN_PROVIDER[double precision, mos_l_in_r_array, (mo_num, n_points_final_grid)] - - BEGIN_DOC - ! mos_in_r_array(i,j) = value of the ith LEFT mo on the jth grid point - END_DOC - - implicit none - integer :: i, j - double precision :: mos_array(mo_num), r(3) - - !$OMP PARALLEL DO & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,r,mos_array,j) & - !$OMP SHARED(mos_l_in_r_array,n_points_final_grid,mo_num,final_grid_points) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, r) & + !$OMP SHARED(n_points_final_grid, final_grid_points, aos_r) + !$OMP DO do i = 1, n_points_final_grid r(1) = final_grid_points(1,i) r(2) = final_grid_points(2,i) r(3) = final_grid_points(3,i) - call give_all_mos_l_at_r(r, mos_array) - do j = 1, mo_num - mos_l_in_r_array(j,i) = mos_array(j) - enddo + call give_all_aos_at_r(r, aos_r(1,i)) enddo - !$OMP END PARALLEL DO - -END_PROVIDER + !$OMP END DO + !$OMP END PARALLEL -! --- + call wall_time(tt3) + write(*,"(A,2X,F15.7)") ' wall time for AOs on r (sec) = ', (tt3 - tt2) -subroutine give_all_mos_l_at_r(r, mos_l_array) - BEGIN_DOC - ! mos_l_array(i) = ith LEFT MO function evaluated at "r" - END_DOC + call dgemm("T", "N", n_points_final_grid, mo_num, ao_num, & + 1.d0, & + aos_r(1,1), ao_num, & + mo_l_coef(1,1), ao_num, & + 0.d0, & + mos_l_in_r_array_transp(1,1), n_points_final_grid) - implicit none - double precision, intent(in) :: r(3) - double precision, intent(out) :: mos_l_array(mo_num) - double precision :: aos_array(ao_num) - - call give_all_aos_at_r(r, aos_array) - call dgemv('N', mo_num, ao_num, 1.d0, mo_l_coef_transp, mo_num, aos_array, 1, 0.d0, mos_l_array, 1) - -end subroutine give_all_mos_l_at_r - -! --- - -BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp, (n_points_final_grid,mo_num)] + call dgemm("T", "N", n_points_final_grid, mo_num, ao_num, & + 1.d0, & + aos_r(1,1), ao_num, & + mo_r_coef(1,1), ao_num, & + 0.d0, & + mos_r_in_r_array_transp(1,1), n_points_final_grid) - BEGIN_DOC - ! mos_l_in_r_array_transp(i,j) = value of the jth mo on the ith grid point - END_DOC - - implicit none - integer :: i, j + deallocate(aos_r) - do i = 1, n_points_final_grid - do j = 1, mo_num - mos_l_in_r_array_transp(i,j) = mos_l_in_r_array(j,i) - enddo - enddo + call wall_time(tt1) + write(*,"(A,2X,F15.7)") ' wall time for mos_l_in_r_array_transp & mos_r_in_r_array_transp (sec) = ', (tt1 - tt0) END_PROVIDER diff --git a/plugins/local/bi_ortho_mos/bi_ort_mos_in_r_old.irp.f b/plugins/local/bi_ortho_mos/bi_ort_mos_in_r_old.irp.f new file mode 100644 index 000000000..9fd671f8e --- /dev/null +++ b/plugins/local/bi_ortho_mos/bi_ort_mos_in_r_old.irp.f @@ -0,0 +1,137 @@ + +! TODO: left & right MO without duplicate AO calculation + +! --- + +BEGIN_PROVIDER[double precision, mos_r_in_r_array, (mo_num, n_points_final_grid)] + + BEGIN_DOC + ! mos_in_r_array(i,j) = value of the ith RIGHT mo on the jth grid point + END_DOC + + implicit none + integer :: i, j + double precision :: mos_array(mo_num), r(3) + + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, r, mos_array) & + !$OMP SHARED (mos_r_in_r_array, n_points_final_grid, mo_num, final_grid_points) + do i = 1, n_points_final_grid + r(1) = final_grid_points(1,i) + r(2) = final_grid_points(2,i) + r(3) = final_grid_points(3,i) + call give_all_mos_r_at_r(r, mos_array) + do j = 1, mo_num + mos_r_in_r_array(j,i) = mos_array(j) + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- + +BEGIN_PROVIDER[double precision, mos_r_in_r_array_transp_old, (n_points_final_grid, mo_num)] + + BEGIN_DOC + ! mos_r_in_r_array_transp_old(i,j) = value of the jth mo on the ith grid point + END_DOC + + implicit none + integer :: i,j + + do i = 1, n_points_final_grid + do j = 1, mo_num + mos_r_in_r_array_transp_old(i,j) = mos_r_in_r_array(j,i) + enddo + enddo + +END_PROVIDER + +! --- + +subroutine give_all_mos_r_at_r(r, mos_r_array) + + BEGIN_DOC + ! mos_r_array(i) = ith RIGHT MO function evaluated at "r" + END_DOC + + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out) :: mos_r_array(mo_num) + double precision :: aos_array(ao_num) + + call give_all_aos_at_r(r, aos_array) + call dgemv('N', mo_num, ao_num, 1.d0, mo_r_coef_transp, mo_num, aos_array, 1, 0.d0, mos_r_array, 1) + +end subroutine give_all_mos_r_at_r + +! --- + +BEGIN_PROVIDER[double precision, mos_l_in_r_array, (mo_num, n_points_final_grid)] + + BEGIN_DOC + ! mos_in_r_array(i,j) = value of the ith LEFT mo on the jth grid point + END_DOC + + implicit none + integer :: i, j + double precision :: mos_array(mo_num), r(3) + + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,r,mos_array,j) & + !$OMP SHARED(mos_l_in_r_array,n_points_final_grid,mo_num,final_grid_points) + do i = 1, n_points_final_grid + r(1) = final_grid_points(1,i) + r(2) = final_grid_points(2,i) + r(3) = final_grid_points(3,i) + call give_all_mos_l_at_r(r, mos_array) + do j = 1, mo_num + mos_l_in_r_array(j,i) = mos_array(j) + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- + +subroutine give_all_mos_l_at_r(r, mos_l_array) + + BEGIN_DOC + ! mos_l_array(i) = ith LEFT MO function evaluated at "r" + END_DOC + + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out) :: mos_l_array(mo_num) + double precision :: aos_array(ao_num) + + call give_all_aos_at_r(r, aos_array) + call dgemv('N', mo_num, ao_num, 1.d0, mo_l_coef_transp, mo_num, aos_array, 1, 0.d0, mos_l_array, 1) + +end subroutine give_all_mos_l_at_r + +! --- + +BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp_old, (n_points_final_grid,mo_num)] + + BEGIN_DOC + ! mos_l_in_r_array_transp_old(i,j) = value of the jth mo on the ith grid point + END_DOC + + implicit none + integer :: i, j + + do i = 1, n_points_final_grid + do j = 1, mo_num + mos_l_in_r_array_transp_old(i,j) = mos_l_in_r_array(j,i) + enddo + enddo + +END_PROVIDER + +! --- + diff --git a/plugins/local/bi_ortho_mos/overlap.irp.f b/plugins/local/bi_ortho_mos/overlap.irp.f index ff5d5c84e..7f07929f6 100644 --- a/plugins/local/bi_ortho_mos/overlap.irp.f +++ b/plugins/local/bi_ortho_mos/overlap.irp.f @@ -56,10 +56,10 @@ print*,'Average trace of overlap_bi_ortho is different from 1 by ', dabs(accu_d-1.d0) print*,'And bi orthogonality is off by an average of ',accu_nd print*,'****************' - print*,'Overlap matrix betwee mo_l_coef and mo_r_coef ' - do i = 1, mo_num - write(*,'(100(F16.10,X))')overlap_bi_ortho(i,:) - enddo + !print*,'Overlap matrix betwee mo_l_coef and mo_r_coef ' + !do i = 1, mo_num + ! write(*,'(100(F16.10,X))')overlap_bi_ortho(i,:) + !enddo endif print*,'Average trace of overlap_bi_ortho (should be 1.)' print*,'accu_d = ',accu_d diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f new file mode 100644 index 000000000..f149e7c6d --- /dev/null +++ b/plugins/local/cipsi_tc_bi_ortho/get_d0_transp.irp.f @@ -0,0 +1,108 @@ +subroutine get_d0_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs) + !todo: indices/conjg should be okay for complex + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states,2) + double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num) + double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + integer :: i, j, k, s, h1, h2, p1, p2, puti, putj, mm + double precision :: phase + double precision :: hij,hji + double precision, external :: get_phase_bi + logical :: ok + + integer, parameter :: bant=1 + double precision, allocatable :: hij_cache1(:), hij_cache2(:) + allocate (hij_cache1(mo_num),hij_cache2(mo_num)) + double precision, allocatable :: hji_cache1(:), hji_cache2(:) + allocate (hji_cache1(mo_num),hji_cache2(mo_num)) +! print*,'in get_d0_new' +! call debug_det(gen,N_int) +! print*,'coefs',coefs(1,:) + + if(sp == 3) then ! AB + h1 = p(1,1) + h2 = p(1,2) + do p1=1, mo_num + if(bannedOrb(p1, 1)) cycle +! call get_mo_two_e_integrals_complex(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map) + do mm = 1, mo_num + hij_cache1(mm) = mo_bi_ortho_tc_two_e(mm,p1,h2,h1) + hji_cache1(mm) = mo_bi_ortho_tc_two_e_transp(mm,p1,h2,h1) + enddo + !!!!!!!!!! + do p2=1, mo_num + if(bannedOrb(p2,2)) cycle + if(banned(p1, p2, bant)) cycle ! rentable? + if(p1 == h1 .or. p2 == h2) then + call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) + ! call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this +! call i_h_j_complex(det, gen, N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e_both(det,gen,N_int, hij,hji) + else + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hij = hij_cache1(p2) * phase + hji = hji_cache1(p2) * phase + end if + if (hij == 0.d0.or.hji == 0.d0) cycle + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij ! HOTSPOT + mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji ! HOTSPOT + enddo + end do + end do + + else ! AA BB + p1 = p(1,sp) + p2 = p(2,sp) + do puti=1, mo_num + if(bannedOrb(puti, sp)) cycle +! call get_mo_two_e_integrals_complex(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache1(mm) = mo_bi_ortho_tc_two_e(mm,puti,p2,p1) + hij_cache2(mm) = mo_bi_ortho_tc_two_e(mm,puti,p1,p2) + hji_cache1(mm) = mo_bi_ortho_tc_two_e_transp(mm,puti,p2,p1) + hji_cache2(mm) = mo_bi_ortho_tc_two_e_transp(mm,puti,p1,p2) + enddo + !!!!!!!!!! + do putj=puti+1, mo_num + if(bannedOrb(putj, sp)) cycle + if(banned(puti, putj, bant)) cycle ! rentable? + if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then + call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) + !call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this +! call i_h_j_complex(det, gen, N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e_both(det,gen,N_int, hij,hji) + if (hij == 0.d0.or.hji == 0.d0) cycle + else +! hij = (mo_two_e_integral_complex(p1, p2, puti, putj) - mo_two_e_integral_complex(p2, p1, puti, putj)) +! hij = (mo_bi_ortho_tc_two_e(p1, p2, puti, putj) - mo_bi_ortho_tc_two_e(p2, p1, puti, putj)) + hij = (mo_bi_ortho_tc_two_e(puti, putj, p1, p2) - mo_bi_ortho_tc_two_e(puti, putj, p2, p1)) + hji = (mo_bi_ortho_tc_two_e_transp(puti, putj, p1, p2) - mo_bi_ortho_tc_two_e_transp(puti, putj, p2, p1)) + if (hij == 0.d0.or.hji == 0.d0) cycle + phase = get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + hij = (hij) * phase + hji = (hji) * phase + end if + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + end do + + end do + end if + + deallocate(hij_cache1,hij_cache2) +end + diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f new file mode 100644 index 000000000..a3d7b0767 --- /dev/null +++ b/plugins/local/cipsi_tc_bi_ortho/get_d1_transp.irp.f @@ -0,0 +1,358 @@ +subroutine get_d1_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs) + !todo: indices should be okay for complex? + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states,2) + double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num) + double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision, external :: get_phase_bi + double precision, external :: mo_two_e_integral_complex + logical :: ok + + logical, allocatable :: lbanned(:,:) + integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j, istate + integer :: hfix, pfix, h1, h2, p1, p2, ib, k, l, mm + + integer, parameter :: turn2(2) = (/2,1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + double precision, allocatable :: hij_cache(:,:) + double precision :: hij, tmp_rowij(N_states, mo_num), tmp_rowij2(N_states, mo_num),phase + double precision, allocatable :: hji_cache(:,:) + double precision :: hji, tmp_rowji(N_states, mo_num), tmp_rowji2(N_states, mo_num) +! PROVIDE mo_integrals_map N_int +! print*,'in get_d1_new' +! call debug_det(gen,N_int) +! print*,'coefs',coefs(1,:) + + allocate (lbanned(mo_num, 2)) + allocate (hij_cache(mo_num,2)) + allocate (hji_cache(mo_num,2)) + lbanned = bannedOrb + + do i=1, p(0,1) + lbanned(p(i,1), 1) = .true. + end do + do i=1, p(0,2) + lbanned(p(i,2), 2) = .true. + end do + + ma = 1 + if(p(0,2) >= 2) ma = 2 + mi = turn2(ma) + + bant = 1 + + if(sp == 3) then + !move MA + if(ma == 2) bant = 2 + puti = p(1,mi) + hfix = h(1,ma) + p1 = p(1,ma) + p2 = p(2,ma) + if(.not. bannedOrb(puti, mi)) then +! call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,p2) + hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1) + hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,p2) + hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,p1) + do istate = 1,N_states + tmp_rowij(istate,mm) = 0.d0 + tmp_rowji(istate,mm) = 0.d0 + enddo + enddo + !! + do putj=1, hfix-1 + if(lbanned(putj, ma)) cycle + if(banned(putj, puti,bant)) cycle + hij = hij_cache(putj,1) - hij_cache(putj,2) + hji = hji_cache(putj,1) - hji_cache(putj,2) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,2) + tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,1) + enddo + endif + end do + do putj=hfix+1, mo_num + if(lbanned(putj, ma)) cycle + if(banned(putj, puti,bant)) cycle + hij = hij_cache(putj,2) - hij_cache(putj,1) + hji = hji_cache(putj,2) - hji_cache(putj,1) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,2) + tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,1) + enddo + endif + end do + + if(ma == 1) then + mat_r(1:N_states,1:mo_num,puti) = mat_r(1:N_states,1:mo_num,puti) + tmp_rowij(1:N_states,1:mo_num) + mat_l(1:N_states,1:mo_num,puti) = mat_l(1:N_states,1:mo_num,puti) + tmp_rowji(1:N_states,1:mo_num) + else + do l=1,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k,puti,l) = mat_r(k,puti,l) + tmp_rowij(k,l) + mat_l(k,puti,l) = mat_l(k,puti,l) + tmp_rowji(k,l) + enddo + enddo + end if + + end if + + !MOVE MI + pfix = p(1,mi) +! call get_mo_two_e_integrals_complex(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + do istate = 1,N_states + tmp_rowij(istate,mm) = 0.d0 + tmp_rowij2(istate,mm) = 0.d0 + tmp_rowji(istate,mm) = 0.d0 + tmp_rowji2(istate,mm) = 0.d0 + enddo + hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p1) + hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p2) + hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,pfix,p1) + hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,pfix,p2) + enddo + putj = p1 + !! + do puti=1,mo_num !HOT + if(lbanned(puti,mi)) cycle + !p1 fixed + putj = p1 + if(.not. banned(putj,puti,bant)) then + hij = hij_cache(puti,2) + hji = hji_cache(puti,2) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,2) + tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,1) + enddo + endif + end if +! + putj = p2 + if(.not. banned(putj,puti,bant)) then + hij = hij_cache(puti,1) + hji = hji_cache(puti,1) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) + hij = hij * phase + hji = hji * phase + do k=1,N_states + tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,2) + tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,1) + enddo + endif + end if + end do + + if(mi == 1) then + mat_r(:,:,p1) = mat_r(:,:,p1) + tmp_rowij(:,:) + mat_r(:,:,p2) = mat_r(:,:,p2) + tmp_rowij2(:,:) + mat_l(:,:,p1) = mat_l(:,:,p1) + tmp_rowji(:,:) + mat_l(:,:,p2) = mat_l(:,:,p2) + tmp_rowji2(:,:) + else + do l=1,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k,p1,l) = mat_r(k,p1,l) + tmp_rowij(k,l) + mat_r(k,p2,l) = mat_r(k,p2,l) + tmp_rowij2(k,l) + mat_l(k,p1,l) = mat_l(k,p1,l) + tmp_rowji(k,l) + mat_l(k,p2,l) = mat_l(k,p2,l) + tmp_rowji2(k,l) + enddo + enddo + end if + + else ! sp /= 3 + + if(p(0,ma) == 3) then + do i=1,3 + hfix = h(1,ma) + puti = p(i, ma) + p1 = p(turn3(1,i), ma) + p2 = p(turn3(2,i), ma) +! call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,p2) + hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1) + hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,p2) + hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,p1) + do istate = 1, N_states + tmp_rowij(istate,mm) = 0.d0 + tmp_rowji(istate,mm) = 0.d0 + enddo + enddo + !! + do putj=1,hfix-1 + if(banned(putj,puti,1)) cycle + if(lbanned(putj,ma)) cycle + hij = hij_cache(putj,1) - hij_cache(putj,2) + hji = hji_cache(putj,1) - hji_cache(putj,2) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + hij = hij * phase + hji = hji * phase + tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,2) + tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,1) + endif + end do + do putj=hfix+1,mo_num + if(banned(putj,puti,1)) cycle + if(lbanned(putj,ma)) cycle + hij = hij_cache(putj,2) - hij_cache(putj,1) + hji = hji_cache(putj,2) - hji_cache(putj,1) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + hij = hij * phase + hji = hji * phase + tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,2) + tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,1) + endif + end do + + mat_r(:, :puti-1, puti) = mat_r(:, :puti-1, puti) + tmp_rowij(:,:puti-1) + mat_l(:, :puti-1, puti) = mat_l(:, :puti-1, puti) + tmp_rowji(:,:puti-1) + do l=puti,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, l) = mat_r(k, puti,l) + tmp_rowij(k,l) + mat_l(k, puti, l) = mat_l(k, puti,l) + tmp_rowji(k,l) + enddo + enddo + end do + else + hfix = h(1,mi) + pfix = p(1,mi) + p1 = p(1,ma) + p2 = p(2,ma) +! call get_mo_two_e_integrals_complex(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2) +! call get_mo_two_e_integrals_complex(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2) + do mm = 1, mo_num + hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,pfix) + hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,pfix) + hji_cache(mm,1) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p1,pfix) + hji_cache(mm,2) = mo_bi_ortho_tc_two_e_transp(mm,hfix,p2,pfix) + do istate = 1,N_states + tmp_rowij (istate,mm) = 0.d0 + tmp_rowij2(istate,mm) = 0.d0 + tmp_rowji (istate,mm) = 0.d0 + tmp_rowji2(istate,mm) = 0.d0 + enddo + enddo + putj = p2 + !! + do puti=1,mo_num + if(lbanned(puti,ma)) cycle + putj = p2 + if(.not. banned(puti,putj,1)) then + hij = hij_cache(puti,1) + hji = hji_cache(puti,1) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,2) + tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,1) + enddo + endif + end if + + putj = p1 + if(.not. banned(puti,putj,1)) then + hij = hij_cache(puti,2) + hji = hji_cache(puti,2) + if (hij /= 0.d0.and.hji/=0.d0) then + phase = get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) + hij = hij * phase + hji = hji * phase + do k=1,N_states + tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,2) + tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,1) + enddo + endif + end if + end do + mat_r(:,:p2-1,p2) = mat_r(:,:p2-1,p2) + tmp_rowij(:,:p2-1) + mat_l(:,:p2-1,p2) = mat_l(:,:p2-1,p2) + tmp_rowji(:,:p2-1) + do l=p2,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k,p2,l) = mat_r(k,p2,l) + tmp_rowij(k,l) + mat_l(k,p2,l) = mat_l(k,p2,l) + tmp_rowji(k,l) + enddo + enddo + mat_r(:,:p1-1,p1) = mat_r(:,:p1-1,p1) + tmp_rowij2(:,:p1-1) + mat_l(:,:p1-1,p1) = mat_l(:,:p1-1,p1) + tmp_rowji2(:,:p1-1) + do l=p1,mo_num + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k,p1,l) = mat_r(k,p1,l) + tmp_rowij2(k,l) + mat_l(k,p1,l) = mat_l(k,p1,l) + tmp_rowji2(k,l) + enddo + enddo + end if + end if + deallocate(lbanned,hij_cache, hji_cache) + + !! MONO + if(sp == 3) then + s1 = 1 + s2 = 2 + else + s1 = sp + s2 = sp + end if + + do i1=1,p(0,s1) + ib = 1 + if(s1 == s2) ib = i1+1 + do i2=ib,p(0,s2) + p1 = p(i1,s1) + p2 = p(i2,s2) + if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + ! gen is a selector; mask is ionized generator; det is alpha + ! hij is contribution to +! call i_h_j_complex(gen, det, N_int, hij) + call htilde_mu_mat_opt_bi_ortho_no_3e_both(det, gen, N_int, hij,hji) +! call htilde_mu_mat_opt_bi_ortho_no_3e(gen, det, N_int, hji) + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + ! take conjugate to get contribution to instead of +! mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * dconjg(hij) + mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij + mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji + enddo + end do + end do +end + diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d2_good.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d2_good.irp.f index d01ed433d..86922ae99 100644 --- a/plugins/local/cipsi_tc_bi_ortho/get_d2_good.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/get_d2_good.irp.f @@ -25,9 +25,6 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, integer :: bant bant = 1 -! print*, 'in get_d2_new' -! call debug_det(gen,N_int) -! print*,'coefs',coefs(1,:) tip = p(0,1) * p(0,2) ! number of alpha particles times number of beta particles diff --git a/plugins/local/cipsi_tc_bi_ortho/get_d2_transp.irp.f b/plugins/local/cipsi_tc_bi_ortho/get_d2_transp.irp.f new file mode 100644 index 000000000..b2a7ea314 --- /dev/null +++ b/plugins/local/cipsi_tc_bi_ortho/get_d2_transp.irp.f @@ -0,0 +1,235 @@ + +subroutine get_d2_new_transp(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs) + !todo: indices/conjg should be correct for complex + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + double precision, intent(in) :: coefs(N_states,2) + double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num) + double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + double precision, external :: get_phase_bi + + integer :: i, j, k, tip, ma, mi, puti, putj + integer :: h1, h2, p1, p2, i1, i2 + double precision :: phase + double precision :: hij,hji + + integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) + integer, parameter :: turn2(2) = (/2, 1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + bant = 1 + + tip = p(0,1) * p(0,2) ! number of alpha particles times number of beta particles + + ma = sp !1:(alpha,alpha); 2:(b,b); 3:(a,b) + if(p(0,1) > p(0,2)) ma = 1 ! more alpha particles than beta particles + if(p(0,1) < p(0,2)) ma = 2 ! fewer alpha particles than beta particles + mi = mod(ma, 2) + 1 + + if(sp == 3) then ! if one alpha and one beta xhole + !(where xholes refer to the ionizations from the generator, not the holes occupied in the ionized generator) + if(ma == 2) bant = 2 ! if more beta particles than alpha particles + + if(tip == 3) then ! if 3 of one particle spin and 1 of the other particle spin + puti = p(1, mi) + if(bannedOrb(puti, mi)) return + h1 = h(1, ma) + h2 = h(2, ma) + + !! + do i = 1, 3 ! loop over all 3 combinations of 2 particles with spin ma + putj = p(i, ma) + if(banned(putj,puti,bant)) cycle + i1 = turn3(1,i) + i2 = turn3(2,i) + p1 = p(i1, ma) + p2 = p(i2, ma) + + ! |G> = |psi_{gen,i}> + ! |G'> = a_{x1} a_{x2} |G> + ! |alpha> = a_{puti}^{\dagger} a_{putj}^{\dagger} |G'> + ! |alpha> = t_{x1,x2}^{puti,putj} |G> + ! hij = + ! |alpha> = t_{p1,p2}^{h1,h2}|psi_{selectors,i}> + !todo: = ( - ) * phase + ! += dconjg(c_i) * + ! = ( - ) * phase + ! += * c_i + +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate + +! hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e( h1, h2, p2, p1) +! hji = mo_bi_ortho_tc_two_e_transp(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e_transp( h1, h2, p2, p1) + hij = mo_bi_ortho_tc_two_e_transp(p1, p2,h1, h2) - mo_bi_ortho_tc_two_e_transp( p1, p2, h2, h1) + hji = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e( p1, p2, h2, h1) + if (hij == 0.d0.or.hji==0.d0) cycle + + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji * phase + + if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hij + mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hji + enddo + else ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + end if + end do + else ! if 2 alpha and 2 beta particles + h1 = h(1,1) + h2 = h(1,2) + !! + do j = 1,2 ! loop over all 4 combinations of one alpha and one beta particle + putj = p(j, 2) + if(bannedOrb(putj, 2)) cycle + p2 = p(turn2(j), 2) + do i = 1,2 + puti = p(i, 1) + if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle + p1 = p(turn2(i), 1) + ! hij = +! hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate +! hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2 ) +! hji = mo_bi_ortho_tc_two_e_transp(h1, h2, p1, p2 ) + hij = mo_bi_ortho_tc_two_e_transp(p1, p2 ,h1, h2 ) + hji = mo_bi_ortho_tc_two_e( p1, p2, h1, h2) + if (hij /= 0.d0.or.hji==0.d0) then + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + endif + end do + end do + end if + + else ! if holes are (a,a) or (b,b) + if(tip == 0) then ! if particles are (a,a,a,a) or (b,b,b,b) + h1 = h(1, ma) + h2 = h(2, ma) + !! + do i=1,3 + puti = p(i, ma) + if(bannedOrb(puti,ma)) cycle + do j=i+1,4 + putj = p(j, ma) + if(bannedOrb(putj,ma)) cycle + if(banned(puti,putj,1)) cycle + + i1 = turn2d(1, i, j) + i2 = turn2d(2, i, j) + p1 = p(i1, ma) + p2 = p(i2, ma) +! hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2) +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate + hij = mo_bi_ortho_tc_two_e_transp(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e_transp(p1, p2, h2,h1 ) + hji = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p1, p2, h2,h1 ) + if (hij == 0.d0.or.hji == 0.d0) cycle + + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji * phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) +coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) +coefs(k,1) * hji + enddo + end do + end do + else if(tip == 3) then ! if particles are (a,a,a,b) (ma=1,mi=2) or (a,b,b,b) (ma=2,mi=1) + h1 = h(1, mi) + h2 = h(1, ma) + p1 = p(1, mi) + !! + do i=1,3 + puti = p(turn3(1,i), ma) + if(bannedOrb(puti,ma)) cycle + putj = p(turn3(2,i), ma) + if(bannedOrb(putj,ma)) cycle + if(banned(puti,putj,1)) cycle + p2 = p(i, ma) + +! hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate + hij = mo_bi_ortho_tc_two_e_transp(p1, p2 ,h1, h2) + hji = mo_bi_ortho_tc_two_e(p1, p2,h1, h2 ) + if (hij == 0.d0) cycle + + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji * phase + if (puti < putj) then + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + else + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hij + mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hji + enddo + endif + end do + else ! tip == 4 (a,a,b,b) + puti = p(1, sp) + putj = p(2, sp) + if(.not. banned(puti,putj,1)) then + p1 = p(1, mi) + p2 = p(2, mi) + h1 = h(1, mi) + h2 = h(2, mi) + !! +! hij = (mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2)) +!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! + ! take the transpose of what's written above because later use the complex conjugate + hij = (mo_bi_ortho_tc_two_e_transp(p1, p2,h1, h2) - mo_bi_ortho_tc_two_e_transp(p2,p1,h1, h2)) + hji = (mo_bi_ortho_tc_two_e(p1, p2,h1, h2) - mo_bi_ortho_tc_two_e(p2,p1,h1, h2)) + if (hij /= 0.d0.or.hji==0.d0) then + ! take conjugate to get contribution to instead of +! hij = dconjg(hij) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) + phase = get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) + hij = hij * phase + hji = hji* phase + !DIR$ LOOP COUNT AVG(4) + do k=1,N_states + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji + enddo + end if + end if + end if + end if +end diff --git a/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f b/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f index 833cc0ea5..22381991f 100644 --- a/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/pt2.irp.f @@ -65,8 +65,12 @@ subroutine tc_pt2 call pt2_dealloc(pt2_data_err) call pt2_alloc(pt2_data, N_states) call pt2_alloc(pt2_data_err, N_states) + if(transpose_two_e_int)then + provide mo_bi_ortho_tc_two_e_transp tc_2e_3idx_coulomb_integrals_transp + endif call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) + call print_summary_tc(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, N_det, N_configuration, N_states, psi_s2) end diff --git a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f index 12163e06e..72ccf9c40 100644 --- a/plugins/local/cipsi_tc_bi_ortho/selection.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/selection.irp.f @@ -636,10 +636,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere negMask(i,2) = not(mask(i,2)) end do -! print*,'in selection ' do i = 1, N_sel -! call debug_det(det(1,1,i),N_int) -! print*,i,dabs(psi_selectors_coef_transp_tc(1,2,i) * psi_selectors_coef_transp_tc(1,1,i)) if(interesting(i) < 0) then stop 'prefetch interesting(i) and det(i)' endif @@ -691,11 +688,23 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere call get_mask_phase(psi_det_sorted_tc(1,1,interesting(i)), phasemask,N_int) if(nt == 4) then - call get_d2_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + if(transpose_two_e_int)then + call get_d2_new_transp(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + else + call get_d2_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + endif elseif(nt == 3) then - call get_d1_new(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + if(transpose_two_e_int)then + call get_d1_transp(det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + else + call get_d1_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + endif else - call get_d0_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + if(transpose_two_e_int)then + call get_d0_transp (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + else + call get_d0_new (det(1,1,i), phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i))) + endif endif elseif(nt == 4) then call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) @@ -887,79 +896,11 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d call diag_htilde_mu_mat_fock_bi_ortho(N_int, det, hmono, htwoe, hthree, hii) do istate = 1,N_states delta_E = E0(istate) - Hii + E_shift - double precision :: alpha_h_psi_tmp, psi_h_alpha_tmp, error - if(debug_tc_pt2 == 1)then !! Using the old version - psi_h_alpha = 0.d0 - alpha_h_psi = 0.d0 - do iii = 1, N_det_selectors - call htilde_mu_mat_bi_ortho_tot_slow(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_bi_ortho_tot_slow(det, psi_selectors(1,1,iii), N_int, alpha_h_i) - call get_excitation_degree(psi_selectors(1,1,iii), det,degree,N_int) - if(degree == 0)then - print*,'problem !!!' - print*,'a determinant is already in the wave function !!' - print*,'it corresponds to the selector number ',iii - call debug_det(det,N_int) - stop - endif -! call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) -! call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) - psi_h_alpha += i_h_alpha * psi_selectors_coef_tc(iii,2,1) ! left function - alpha_h_psi += alpha_h_i * psi_selectors_coef_tc(iii,1,1) ! right function - enddo - else if(debug_tc_pt2 == 2)then !! debugging the new version -! psi_h_alpha_tmp = 0.d0 -! alpha_h_psi_tmp = 0.d0 -! do iii = 1, N_det_selectors ! old version -! call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) -! call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) -! psi_h_alpha_tmp += i_h_alpha * psi_selectors_coef_tc(iii,1,1) ! left function -! alpha_h_psi_tmp += alpha_h_i * psi_selectors_coef_tc(iii,2,1) ! right function -! enddo - psi_h_alpha_tmp = mat_l(istate, p1, p2) ! new version - alpha_h_psi_tmp = mat_r(istate, p1, p2) ! new version - psi_h_alpha = 0.d0 - alpha_h_psi = 0.d0 - do iii = 1, N_det ! old version - call htilde_mu_mat_opt_bi_ortho_no_3e(psi_det(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_det(1,1,iii), N_int, alpha_h_i) - psi_h_alpha += i_h_alpha * psi_l_coef_bi_ortho(iii,1) ! left function - alpha_h_psi += alpha_h_i * psi_r_coef_bi_ortho(iii,1) ! right function - enddo - if(dabs(psi_h_alpha*alpha_h_psi/delta_E).gt.1.d-10)then - error = dabs(psi_h_alpha * alpha_h_psi - psi_h_alpha_tmp * alpha_h_psi_tmp)/dabs(psi_h_alpha * alpha_h_psi) - if(error.gt.1.d-2)then - call debug_det(det, N_int) - print*,'error =',error,psi_h_alpha * alpha_h_psi/delta_E,psi_h_alpha_tmp * alpha_h_psi_tmp/delta_E - print*,psi_h_alpha , alpha_h_psi - print*,psi_h_alpha_tmp , alpha_h_psi_tmp - print*,'selectors ' - do iii = 1, N_det_selectors ! old version - print*,'iii',iii,psi_selectors_coef_tc(iii,1,1),psi_selectors_coef_tc(iii,2,1) - call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) - print*,i_h_alpha,alpha_h_i - call debug_det(psi_selectors(1,1,iii),N_int) - enddo -! print*,'psi_det ' -! do iii = 1, N_det! old version -! print*,'iii',iii,psi_l_coef_bi_ortho(iii,1),psi_r_coef_bi_ortho(iii,1) -! call debug_det(psi_det(1,1,iii),N_int) -! enddo - stop - endif - endif - else - psi_h_alpha = mat_l(istate, p1, p2) - alpha_h_psi = mat_r(istate, p1, p2) - endif + psi_h_alpha = mat_l(istate, p1, p2) + alpha_h_psi = mat_r(istate, p1, p2) val = 4.d0 * psi_h_alpha * alpha_h_psi tmp = dsqrt(delta_E * delta_E + val) -! if (delta_E < 0.d0) then -! tmp = -tmp -! endif e_pert(istate) = 0.25 * val / delta_E -! e_pert(istate) = 0.5d0 * (tmp - delta_E) if(dsqrt(tmp).gt.1.d-4.and.dabs(psi_h_alpha).gt.1.d-4)then coef(istate) = e_pert(istate) / psi_h_alpha else @@ -976,15 +917,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if(e_pert(istate).gt.0.d0)e_pert(istate)=0.d0 endif -! if(selection_tc == 1 )then -! if(e_pert(istate).lt.0.d0)then -! e_pert(istate) = 0.d0 -! endif -! else if(selection_tc == -1)then -! if(e_pert(istate).gt.0.d0)then -! e_pert(istate) = 0.d0 -! endif -! endif enddo diff --git a/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f index 99a8de7ef..0c61e38fd 100644 --- a/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f +++ b/plugins/local/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -1,4 +1,36 @@ + +! --- + +subroutine run_pouet + + BEGIN_DOC + ! Selected Full Configuration Interaction with Stochastic selection and PT2. + END_DOC + + use selection_types + implicit none + integer :: i, j, k, ndet + integer :: to_select + logical :: has + type(pt2_type) :: pt2_data, pt2_data_err + double precision :: rss + double precision :: correlation_energy_ratio + double precision :: hf_energy_ref + double precision :: relative_error + double precision, allocatable :: zeros(:),E_tc(:), norm(:) + + logical, external :: qp_stop + double precision, external :: memory_of_double + + PROVIDE mo_l_coef mo_r_coef + PROVIDE H_apply_buffer_allocated distributed_davidson + + print*, ' Diagonal elements of the Fock matrix ' + do i = 1, mo_num + write(*,*) i, Fock_matrix_tc_mo_tot(i,i) + enddo +end ! --- subroutine run_stochastic_cipsi @@ -88,6 +120,9 @@ subroutine run_stochastic_cipsi call pt2_dealloc(pt2_data_err) call pt2_alloc(pt2_data, N_states) call pt2_alloc(pt2_data_err, N_states) + if(transpose_two_e_int)then + provide mo_bi_ortho_tc_two_e_transp tc_2e_3idx_coulomb_integrals_transp + endif call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection ! stop @@ -120,6 +155,7 @@ subroutine run_stochastic_cipsi call pt2_alloc(pt2_data_err, N_states) call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm) + call print_summary_tc(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, N_det, N_configuration, N_states, psi_s2) call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) diff --git a/plugins/local/fci_tc_bi/fci_tc_bi_ortho.irp.f b/plugins/local/fci_tc_bi/fci_tc_bi_ortho.irp.f index 1c1c0411a..f1de0fe3f 100644 --- a/plugins/local/fci_tc_bi/fci_tc_bi_ortho.irp.f +++ b/plugins/local/fci_tc_bi/fci_tc_bi_ortho.irp.f @@ -65,7 +65,15 @@ subroutine run_cipsi_tc() if (.not. is_zmq_slave) then - PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e + if(.True.)then! DO NOT REMOVE THE IF(.TRUE.) !! + ! this has to be provided before mo_bi_ortho_tc_two_e to avoid twice the computation of ao_two_e_tc_tot + PROVIDE Fock_matrix_tc_mo_tot + ! because Fock_matrix_tc_mo_tot depends on ao_two_e_tc_tot + ! and that mo_bi_ortho_tc_two_e erase ao_two_e_tc_tot after being provided + endif + if(.True.)then ! DO NOT REMOVE THE IF(.TRUE.) !! + PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e + endif if((elec_alpha_num+elec_beta_num) .ge. 3) then if(three_body_h_tc) then @@ -90,8 +98,16 @@ subroutine run_cipsi_tc() call json_close else + if(.True.)then! DO NOT REMOVE THE IF(.TRUE.) !! + ! this has to be provided before mo_bi_ortho_tc_two_e to avoid twice the computation of ao_two_e_tc_tot + PROVIDE Fock_matrix_tc_mo_tot + ! because Fock_matrix_tc_mo_tot depends on ao_two_e_tc_tot + ! and that mo_bi_ortho_tc_two_e erase ao_two_e_tc_tot after being provided + endif - PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks + if(.True.)then! DO NOT REMOVE THE IF(.TRUE.) !! + PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks + endif if((elec_alpha_num+elec_beta_num) .ge. 3) then if(three_body_h_tc) then diff --git a/plugins/local/fci_tc_bi/pt2_tc.irp.f b/plugins/local/fci_tc_bi/pt2_tc.irp.f index 390042bfa..3c07e367a 100644 --- a/plugins/local/fci_tc_bi/pt2_tc.irp.f +++ b/plugins/local/fci_tc_bi/pt2_tc.irp.f @@ -13,6 +13,8 @@ program tc_pt2_prog pruning = -1.d0 touch pruning + read_wf = .True. + touch read_wf ! pt2_relative_error = 0.01d0 ! touch pt2_relative_error diff --git a/plugins/local/gpu_intel/LIB b/plugins/local/gpu_intel/LIB new file mode 100644 index 000000000..199b0f1c9 --- /dev/null +++ b/plugins/local/gpu_intel/LIB @@ -0,0 +1,2 @@ +-ltbb -lsycl -lmkl_sycl -lgpu -limf -lintlc -lstdc++ + diff --git a/plugins/local/gpu_intel/NEED b/plugins/local/gpu_intel/NEED new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/plugins/local/gpu_intel/NEED @@ -0,0 +1 @@ + diff --git a/plugins/local/gpu_intel/README.rst b/plugins/local/gpu_intel/README.rst new file mode 100644 index 000000000..d42e2557b --- /dev/null +++ b/plugins/local/gpu_intel/README.rst @@ -0,0 +1,8 @@ +========= +gpu_intel +========= + +Intel implementation of GPU routines. Uses MKL and SYCL. +```bash +icpx -fsycl gpu.cxx -c -qmkl=sequential +``` diff --git a/plugins/local/gpu_intel/gpu.sycl b/plugins/local/gpu_intel/gpu.sycl new file mode 100644 index 000000000..1f9f89ce7 --- /dev/null +++ b/plugins/local/gpu_intel/gpu.sycl @@ -0,0 +1,177 @@ +#include +#include +#include +#include + +extern "C" { + +/* Generic functions */ + +int gpu_ndevices() { + return 1; +} + +void gpu_set_device(int32_t igpu) { +} + + +/* Allocation functions */ + +void gpu_allocate(void** ptr, int64_t size) { + auto queue = sycl::queue(sycl::default_selector_v); + + try { + *ptr = sycl::malloc_shared(size, queue); + assert(*ptr != nullptr); + } catch (const sycl::exception& e) { + std::cerr << "SYCL exception caught: " << e.what() << std::endl; + *ptr = nullptr; // If allocation fails, set pointer to nullptr + } +} + +void gpu_deallocate(void** ptr) { + assert(*ptr != nullptr); + sycl::free(*ptr, sycl::queue(sycl::default_selector_v)); + *ptr = nullptr; +} + +/* Upload data from host to device */ +void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n) { + sycl::queue queue(sycl::default_selector_v); + queue.memcpy(gpu_ptr, cpu_ptr, n).wait(); +} + +/* Download data from device to host */ +void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n) { + sycl::queue queue(sycl::default_selector_v); + queue.memcpy(cpu_ptr, gpu_ptr, n).wait(); +} + +/* Copy data from one GPU memory location to another */ +void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) { + sycl::queue queue(sycl::default_selector_v); + queue.memcpy(gpu_ptr_dest, gpu_ptr_src, n).wait(); +} + +/* Queues */ + +/* SYCL queue as a replacement for CUDA stream */ +void gpu_stream_create(sycl::queue** ptr) { + *ptr = new sycl::queue(sycl::default_selector_v); +} + +void gpu_stream_destroy(sycl::queue** ptr) { + assert(*ptr != nullptr); + delete *ptr; + *ptr = nullptr; +} + +void gpu_synchronize() { + sycl::queue queue(sycl::default_selector_v); + queue.wait_and_throw(); +} + +/* BLAS functions */ + +typedef struct { + sycl::queue* queue; +} blasHandle_t; + +void gpu_set_stream(blasHandle_t* handle, sycl::queue* ptr) { + handle->queue = ptr; +} + +void gpu_blas_create(blasHandle_t** ptr) { + *ptr = (blasHandle_t*) malloc(sizeof(blasHandle_t)); + assert(*ptr != nullptr); + (*ptr)->queue = new sycl::queue(sycl::default_selector_v); + assert((*ptr)->queue != nullptr); +} + +void gpu_blas_destroy(blasHandle_t** ptr) { + assert(*ptr != nullptr); + delete (*ptr)->queue; + free(*ptr); + *ptr = nullptr; +} + + +void gpu_ddot(blasHandle_t* handle, const int64_t n, const double* x, const int64_t incx, + const double* y, const int64_t incy, double* result) { + // Ensure input parameters are valid + assert(handle != nullptr); + assert(handle->queue != nullptr); + assert(n > 0); + assert(incx > 0); + assert(incy > 0); + assert(x != nullptr); + assert(y != nullptr); + assert(result != nullptr); + + oneapi::mkl::blas::dot(*handle->queue, n, x, incx, y, incy, result); + +} + +void gpu_dgemv(blasHandle_t* handle, const char* transa, const int64_t m, const int64_t n, const double* alpha, + const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy) { + + assert(handle != nullptr); + assert(handle->queue != nullptr); + + // Validate matrix dimensions and increments to be positive + assert(m > 0 && n > 0 && lda > 0 && incx > 0 && incy > 0); + assert(a != nullptr && x != nullptr && y != nullptr && alpha != nullptr && beta != nullptr); + + // Determine the operation type + oneapi::mkl::transpose transa_ = oneapi::mkl::transpose::nontrans; + if (*transa == 'T' || *transa == 't') { + transa_ = oneapi::mkl::transpose::trans; + } + + // Perform DGEMV operation using oneMKL + oneapi::mkl::blas::column_major::gemv(*handle->queue, transa_, m, n, *alpha, a, lda, x, incx, *beta, y, incy); + +} + +void gpu_dgemm(blasHandle_t* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha, + const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc) { + + assert(handle != nullptr && handle->queue != nullptr); + assert(m > 0 && n > 0 && k > 0 && lda > 0 && ldb > 0 && ldc > 0); + assert(a != nullptr && b != nullptr && c != nullptr && alpha != nullptr && beta != nullptr); + + // Transpose operations + auto transa_ = (*transa == 'T' || *transa == 't') ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans; + auto transb_ = (*transb == 'T' || *transb == 't') ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans; + + oneapi::mkl::blas::column_major::gemm(*handle->queue, transa_, transb_, m, n, k, + *alpha, a, lda, b, ldb, *beta, c, ldc); + +} + + +void gpu_dgeam(blasHandle_t* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha, + const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { + assert(handle != nullptr && handle->queue != nullptr); + assert(m > 0 && n > 0 && lda > 0 && ldb > 0 && ldc > 0); + assert(a != nullptr && b != nullptr && c != nullptr && alpha != nullptr && beta != nullptr); + + // Determine transpose operations + bool transA = (*transa == 'T' || *transa == 't'); + bool transB = (*transb == 'T' || *transb == 't'); + + handle->queue->submit([&](sycl::handler& cgh) { + cgh.parallel_for(sycl::range<2>(m, n), [=](sycl::id<2> idx) { + const int i = idx[0]; + const int j = idx[1]; + const int ai = transA ? j * lda + i : i * lda + j; + const int bi = transB ? j * ldb + i : i * ldb + j; + const int ci = i * ldc + j; + + c[ci] = (*alpha) * a[ai] + (*beta) * b[bi]; + }); + }); + +} + +} // extern C diff --git a/plugins/local/gpu_nvidia/LIB b/plugins/local/gpu_nvidia/LIB new file mode 100644 index 000000000..91f54e91f --- /dev/null +++ b/plugins/local/gpu_nvidia/LIB @@ -0,0 +1 @@ +-lcudart -lcublas -lcublasLt diff --git a/plugins/local/gpu_nvidia/NEED b/plugins/local/gpu_nvidia/NEED new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/plugins/local/gpu_nvidia/NEED @@ -0,0 +1 @@ + diff --git a/plugins/local/gpu_nvidia/README.rst b/plugins/local/gpu_nvidia/README.rst new file mode 100644 index 000000000..5dcfca92c --- /dev/null +++ b/plugins/local/gpu_nvidia/README.rst @@ -0,0 +1,5 @@ +========== +gpu_nvidia +========== + +Nvidia implementation of GPU routines. Uses CUDA and CUBLAS libraries. diff --git a/plugins/local/gpu_nvidia/gpu.c b/plugins/local/gpu_nvidia/gpu.c new file mode 100644 index 000000000..a775ab950 --- /dev/null +++ b/plugins/local/gpu_nvidia/gpu.c @@ -0,0 +1,326 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + + +/* Generic functions */ + +int gpu_ndevices() { + int ngpus; + cudaGetDeviceCount(&ngpus); + return ngpus; +} + +void gpu_set_device(int32_t igpu) { + cudaSetDevice((int) igpu); +} + + +/* Allocation functions */ + +void gpu_allocate(void** ptr, const int64_t size) { + size_t free, total; + cudaError_t rc = cudaMemGetInfo( &free, &total ); + if (rc != cudaSuccess) { + free = INT64_MAX; + } + + rc = cudaMallocManaged(ptr, size, cudaMemAttachGlobal); +// /* Use managed memory if it does not fit on the GPU */ +// if (size < free && size < total/2) { +// rc= cudaMalloc(ptr, size); +// } else { +// rc = cudaMallocManaged(ptr, size, cudaMemAttachGlobal); +// } + assert (rc == cudaSuccess); +} + +void gpu_deallocate(void** ptr) { + assert (*ptr != NULL); + cudaFree(*ptr); + *ptr = NULL; +} + + +/* Memory transfer functions */ + +void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n) { + cudaMemcpy (gpu_ptr, cpu_ptr, n, cudaMemcpyHostToDevice); +} + +void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n) { + cudaMemcpy (cpu_ptr, gpu_ptr, n, cudaMemcpyDeviceToHost); +} + +void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) { + cudaMemcpy (gpu_ptr_dest, gpu_ptr_src, n, cudaMemcpyDeviceToDevice); +} + + +/* Streams */ + +void gpu_stream_create(cudaStream_t* ptr) { + cudaError_t rc = cudaStreamCreate(ptr); + assert (rc == cudaSuccess); +} + +void gpu_stream_destroy(cudaStream_t* ptr) { + assert (ptr != NULL); + cudaError_t rc = cudaStreamDestroy(*ptr); + assert (rc == cudaSuccess); + *ptr = NULL; +} + +void gpu_set_stream(cublasHandle_t handle, cudaStream_t stream) { + cublasSetStream(handle, stream); +} + +void gpu_synchronize() { + cudaDeviceSynchronize(); +} + + +/* BLAS functions */ + +void gpu_blas_create(cublasHandle_t* ptr) { + cublasStatus_t rc = cublasCreate(ptr); + assert (rc == CUBLAS_STATUS_SUCCESS); +} + + +void gpu_blas_destroy(cublasHandle_t* ptr) { + assert (ptr != NULL); + cublasStatus_t rc = cublasDestroy(*ptr); + assert (rc == CUBLAS_STATUS_SUCCESS); + ptr = NULL; +} + + +void gpu_ddot(cublasHandle_t handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) { + assert (handle != NULL); + /* Convert to int */ + int n_, incx_, incy_; + + n_ = (int) n; + incx_ = (int) incx; + incy_ = (int) incy; + + assert ( (int64_t) n_ == n ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + cublasStatus_t rc = cublasDdot(handle, n_, x, incx_, y, incy_, result); + assert (rc == CUBLAS_STATUS_SUCCESS); +} + + + +void gpu_sdot(cublasHandle_t handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) { + assert (handle != NULL); + + /* Convert to int */ + int n_, incx_, incy_; + + n_ = (int) n; + incx_ = (int) incx; + incy_ = (int) incy; + + /* Check for integer overflows */ + assert ( (int64_t) n_ == n ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + float result_ = 0.; + cublasStatus_t rc = cublasSdot(handle, n_, x, incx_, y, incy_, &result_); + assert (rc == CUBLAS_STATUS_SUCCESS); + *result = result_; +} + + + +void gpu_dgemv(cublasHandle_t handle, const char* transa, const int64_t m, const int64_t n, const double* alpha, + const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy) { + + assert (handle != NULL); + + /* Convert to int */ + int m_, n_, lda_, incx_, incy_; + + m_ = (int) m; + n_ = (int) n; + lda_ = (int) lda; + incx_ = (int) incx; + incy_ = (int) incy; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) lda_ == lda ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + cublasOperation_t transa_ = CUBLAS_OP_N; + if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; + + cublasDgemv(handle, transa_, m_, n_, alpha, a, lda_, x, incx_, beta, y, incy_); +} + + + +void gpu_sgemv(cublasHandle_t handle, const char* transa, const int64_t m, const int64_t n, const float* alpha, + const float* a, const int64_t lda, const float* x, const int64_t incx, const float* beta, float* y, const int64_t incy) { + + assert (handle != NULL); + + /* Convert to int */ + int m_, n_, lda_, incx_, incy_; + + m_ = (int) m; + n_ = (int) n; + lda_ = (int) lda; + incx_ = (int) incx; + incy_ = (int) incy; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) lda_ == lda ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + cublasOperation_t transa_ = CUBLAS_OP_N; + if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; + + cublasSgemv(handle, transa_, m_, n_, alpha, a, lda_, x, incx_, beta, y, incy_); +} + + +void gpu_dgemm(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha, + const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc) { + + assert (handle != NULL); + + /* Convert to int */ + int m_, n_, k_, lda_, ldb_, ldc_; + + m_ = (int) m; + n_ = (int) n; + k_ = (int) k; + lda_ = (int) lda; + ldb_ = (int) ldb; + ldc_ = (int) ldc; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) k_ == k ); + assert ( (int64_t) lda_ == lda); + assert ( (int64_t) ldb_ == ldb); + assert ( (int64_t) ldc_ == ldc); + + cublasOperation_t transa_ = CUBLAS_OP_N; + cublasOperation_t transb_ = CUBLAS_OP_N; + if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; + if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T; + + cublasDgemm(handle, transa_, transb_, m_, n_, k_, alpha, a, lda_, b, ldb_, beta, c, ldc_); +} + + + +void gpu_sgemm(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float* alpha, + const float* a, const int64_t lda, const float* b, const int64_t ldb, const float* beta, float* c, const int64_t ldc) { + + assert (handle != NULL); + + /* Convert to int */ + int m_, n_, k_, lda_, ldb_, ldc_; + + m_ = (int) m; + n_ = (int) n; + k_ = (int) k; + lda_ = (int) lda; + ldb_ = (int) ldb; + ldc_ = (int) ldc; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) k_ == k ); + assert ( (int64_t) lda_ == lda); + assert ( (int64_t) ldb_ == ldb); + assert ( (int64_t) ldc_ == ldc); + + cublasOperation_t transa_ = CUBLAS_OP_N; + cublasOperation_t transb_ = CUBLAS_OP_N; + if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; + if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T; + + cublasSgemm(handle, transa_, transb_, m_, n_, k_, alpha, a, lda_, b, ldb_, beta, c, ldc_); +} + + +void gpu_dgeam(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha, + const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { + assert (handle != NULL); + + /* Convert to int */ + int m_, n_, lda_, ldb_, ldc_; + + m_ = (int) m; + n_ = (int) n; + lda_ = (int) lda; + ldb_ = (int) ldb; + ldc_ = (int) ldc; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) lda_ == lda); + assert ( (int64_t) ldb_ == ldb); + assert ( (int64_t) ldc_ == ldc); + + cublasOperation_t transa_ = CUBLAS_OP_N; + cublasOperation_t transb_ = CUBLAS_OP_N; + if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; + if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T; + + cublasDgeam(handle, transa_, transb_, m_, n_, alpha, a, lda_, beta, b, ldb_, c, ldc_); + +} + + +void gpu_sgeam(cublasHandle_t handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const float* alpha, + const float* a, const int64_t lda, const float* beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) { + assert (handle != NULL); + + /* Convert to int */ + int m_, n_, lda_, ldb_, ldc_; + + m_ = (int) m; + n_ = (int) n; + lda_ = (int) lda; + ldb_ = (int) ldb; + ldc_ = (int) ldc; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) lda_ == lda); + assert ( (int64_t) ldb_ == ldb); + assert ( (int64_t) ldc_ == ldc); + + cublasOperation_t transa_ = CUBLAS_OP_N; + cublasOperation_t transb_ = CUBLAS_OP_N; + if (*transa == 'T' || *transa == 't') transa_ = CUBLAS_OP_T; + if (*transb == 'T' || *transb == 't') transb_ = CUBLAS_OP_T; + + cublasSgeam(handle, transa_, transb_, m_, n_, alpha, a, lda_, beta, b, ldb_, c, ldc_); + +} diff --git a/plugins/local/gpu_x86/NEED b/plugins/local/gpu_x86/NEED new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/plugins/local/gpu_x86/NEED @@ -0,0 +1 @@ + diff --git a/plugins/local/gpu_x86/README.rst b/plugins/local/gpu_x86/README.rst new file mode 100644 index 000000000..f530bf291 --- /dev/null +++ b/plugins/local/gpu_x86/README.rst @@ -0,0 +1,5 @@ +======= +gpu_x86 +======= + +x86 implementation of GPU routines. For use when GPUs are not available. diff --git a/plugins/local/gpu_x86/gpu.c b/plugins/local/gpu_x86/gpu.c new file mode 100644 index 000000000..49aec9d34 --- /dev/null +++ b/plugins/local/gpu_x86/gpu.c @@ -0,0 +1,502 @@ +#include +#include +#include +#include +#include +#include + +/* Generic functions */ + +int gpu_ndevices() { + return 0; +} + +void gpu_set_device(int32_t i) { + return; +} + + +/* Allocation functions */ + +void gpu_allocate(void** ptr, const int64_t n) { + *ptr = malloc((size_t) n); + if (*ptr == NULL) { + perror("Allocation failed"); + } +} + +void gpu_deallocate(void** ptr) { + free(*ptr); + *ptr = NULL; +} + + +/* Memory transfer functions */ + +void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n) { + memcpy(gpu_ptr, cpu_ptr, n); +} + +void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n) { + memcpy(cpu_ptr, gpu_ptr, n); +} + +void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) { + memcpy(gpu_ptr_dest, gpu_ptr_src, n); +} + + +/* Streams */ + +void gpu_stream_create(void** ptr) { + *ptr = (void*) malloc(sizeof(char)); +} + +void gpu_stream_destroy(void** ptr) { + free(*ptr); + *ptr = NULL; +} + +void gpu_set_stream(void* handle, void* stream) { + return; +} + +void gpu_synchronize() { + return; +} + + +/* BLAS functions */ + +void gpu_blas_create(void** handle) { + *handle = (void*) malloc(sizeof(char)); +} + + +void gpu_blas_destroy(void** handle) { + free(*handle); + *handle = NULL; +} + + +double ddot_(const int32_t* n, const double* x, const int32_t* incx, const double* y, const int32_t* incy); + +void gpu_ddot(void* handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) { + assert (handle != NULL); + + /* Convert to int32_t */ + int32_t n_, incx_, incy_; + + n_ = (int32_t) n; + incx_ = (int32_t) incx; + incy_ = (int32_t) incy; + + /* Check for integer overflows */ + assert ( (int64_t) n_ == n ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + *result = ddot_(&n_, x, &incx_, y, &incy_); +} + + +float sdot_(const int32_t* n, const float* x, const int32_t* incx, const float* y, const int32_t* incy); + +void gpu_sdot(void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) { + assert (handle != NULL); + + /* Convert to int32_t */ + int32_t n_, incx_, incy_; + + n_ = (int32_t) n; + incx_ = (int32_t) incx; + incy_ = (int32_t) incy; + + /* Check for integer overflows */ + assert ( (int64_t) n_ == n ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + *result = sdot_(&n_, x, &incx_, y, &incy_); +} + + +void dgemv_(const char* transa, const int32_t* m, const int32_t* n, const double* alpha, + const double* a, const int32_t* lda, const double* x, const int32_t* incx, const double* beta, double* y, const int32_t* incy); + +void gpu_dgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const double* alpha, + const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy) { + + assert (handle != NULL); + + /* Convert to int32_t */ + int32_t m_, n_, lda_, incx_, incy_; + + m_ = (int32_t) m; + n_ = (int32_t) n; + lda_ = (int32_t) lda; + incx_ = (int32_t) incx; + incy_ = (int32_t) incy; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) lda_ == lda ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + dgemv_(transa, &m_, &n_, alpha, a, &lda_, x, &incx_, beta, y, &incy_); +} + + +void sgemv_(const char* transa, const int32_t* m, const int32_t* n, const float* alpha, + const float* a, const int32_t* lda, const float* x, const int32_t* incx, const float* beta, float* y, const int32_t* incy); + +void gpu_sgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const float* alpha, + const float* a, const int64_t lda, const float* x, const int64_t incx, const float* beta, float* y, const int64_t incy) { + + assert (handle != NULL); + + /* Convert to int32_t */ + int32_t m_, n_, lda_, incx_, incy_; + + m_ = (int32_t) m; + n_ = (int32_t) n; + lda_ = (int32_t) lda; + incx_ = (int32_t) incx; + incy_ = (int32_t) incy; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) lda_ == lda ); + assert ( (int64_t) incx_ == incx); + assert ( (int64_t) incy_ == incy); + + sgemv_(transa, &m_, &n_, alpha, a, &lda_, x, &incx_, beta, y, &incy_); +} + + +void dgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const double* alpha, + const double* a, const int32_t* lda, const double* b, const int32_t* ldb, const double* beta, double* c, const int32_t* ldc); + +void gpu_dgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha, + const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc) { + + assert (handle != NULL); + + /* Convert to int32_t */ + int32_t m_, n_, k_, lda_, ldb_, ldc_; + + m_ = (int32_t) m; + n_ = (int32_t) n; + k_ = (int32_t) k; + lda_ = (int32_t) lda; + ldb_ = (int32_t) ldb; + ldc_ = (int32_t) ldc; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) k_ == k ); + assert ( (int64_t) lda_ == lda); + assert ( (int64_t) ldb_ == ldb); + assert ( (int64_t) ldc_ == ldc); + + dgemm_(transa, transb, &m_, &n_, &k_, alpha, a, &lda_, b, &ldb_, beta, c, &ldc_); +} + + + +void sgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const float* alpha, + const float* a, const int32_t* lda, const float* b, const int32_t* ldb, const float* beta, float* c, const int32_t* ldc); + +void gpu_sgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float* alpha, + const float* a, const int64_t lda, const float* b, const int64_t ldb, const float* beta, float* c, const int64_t ldc) { + + assert (handle != NULL); + + /* Convert to int32_t */ + int32_t m_, n_, k_, lda_, ldb_, ldc_; + + m_ = (int32_t) m; + n_ = (int32_t) n; + k_ = (int32_t) k; + lda_ = (int32_t) lda; + ldb_ = (int32_t) ldb; + ldc_ = (int32_t) ldc; + + /* Check for integer overflows */ + assert ( (int64_t) m_ == m ); + assert ( (int64_t) n_ == n ); + assert ( (int64_t) k_ == k ); + assert ( (int64_t) lda_ == lda); + assert ( (int64_t) ldb_ == ldb); + assert ( (int64_t) ldc_ == ldc); + + sgemm_(transa, transb, &m_, &n_, &k_, alpha, a, &lda_, b, &ldb_, beta, c, &ldc_); +} + + +void gpu_dgeam(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha, + const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) { + assert (handle != NULL); + + if ( (*transa == 'N' && *transb == 'N') || + (*transa == 'n' && *transb == 'N') || + (*transa == 'N' && *transb == 'n') || + (*transa == 'n' && *transb == 'n') ) { + + if (*alpha == 0.) { + + for (int64_t j=0 ; j 0 + jmu = - inv_sq_pi_2 * inv_mu ! initialization when r12 --> 0 + + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + if(r12 .lt. 1d-10) return + erfc_mur12 = (1.d0 - derf(mu_tmp)) + mu_tmp = mu * r12 + tmp = 0.5d0 * erfc_mur12 / r12 ! d/dx1 j(mu(r1),r12) + grad(1) = tmp * dx + grad(2) = tmp * dy + grad(3) = tmp * dz + + grad_jmu = grad + + jmu= 0.5d0 * r12 * erfc_mur12 - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) * inv_mu + + +end diff --git a/plugins/local/non_h_ints_mu/jast_deriv_utils.irp.f b/plugins/local/non_h_ints_mu/jast_deriv_utils.irp.f index 798225084..f13990de9 100644 --- a/plugins/local/non_h_ints_mu/jast_deriv_utils.irp.f +++ b/plugins/local/non_h_ints_mu/jast_deriv_utils.irp.f @@ -1,8 +1,73 @@ +subroutine get_deriv_r12_j12(x,mu,d_dx_j) + implicit none + include 'constants.include.F' + BEGIN_DOC + ! d/dr12 j(mu,r12) + END_DOC + double precision, intent(in) :: x,mu + double precision, intent(out) :: d_dx_j + + d_dx_j = 0.d0 + if(x .lt. 1d-10) return + if(j2e_type .eq. "Mu" .or. j2e_type .eq. "Mur") then + d_dx_j = 0.5d0 * (1.d0 - derf(mu * x)) + else if(j2e_type .eq. "Mugauss" .or. j2e_type .eq. "Murgauss" ) then + double precision :: x_tmp + x_tmp = mu * x + ! gradient of j(mu,x) + d_dx_j = 0.5d0 * (1.d0 - derf(x_tmp)) + + ! gradient of gaussian additional term + x_tmp *= alpha_mu_gauss + x_tmp *= x_tmp + d_dx_j += -0.5d0 * mu * c_mu_gauss * x * dexp(-x_tmp) + else + print *, ' Error in get_deriv_r12_j12: Unknown j2e_type = ', j2e_type + stop + endif +end + + +subroutine get_deriv_mu_j12(x,mu,d_d_mu) + implicit none + BEGIN_DOC + ! d/dmu j(mu,r12) + END_DOC + include 'constants.include.F' + double precision, intent(in) :: x,mu + double precision, intent(out) :: d_d_mu + double precision :: x_tmp,inv_mu_2,inv_alpha_2 + + d_d_mu = 0.d0 + if(x .lt. 1d-10) return + x_tmp = x*mu + if(mu.lt.1.d-10) return + inv_mu_2 = mu*mu + inv_mu_2 = 1.d0/inv_mu_2 + if(j2e_type .eq. "Mu" .or. j2e_type .eq. "Mur") then + ! e^{-(r12*mu)^2}/(2 sqrt(pi) * mu^2) + d_d_mu = dexp(-x_tmp*x_tmp) * inv_sq_pi_2 * inv_mu_2 + else if(j2e_type .eq. "Mugauss" .or. j2e_type .eq. "Murgauss" ) then + d_d_mu = dexp(-x_tmp*x_tmp) * inv_sq_pi_2 * inv_mu_2 + inv_alpha_2 = 1.d0/alpha_mu_gauss + inv_alpha_2 *= inv_alpha_2 + x_tmp *= alpha_mu_gauss + x_tmp *= x_tmp + d_d_mu += -0.25d0 * c_mu_gauss*inv_alpha_2*dexp(-x_tmp) * (1.d0 + 2.d0 * x_tmp) * inv_mu_2 + else + print *, ' Error in get_deriv_r12_j12: Unknown j2e_type = ', j2e_type + stop + endif +end + ! --- double precision function j12_mu(r1, r2) + BEGIN_DOC + ! j(mu,r12) = 1/2 r12 (1 - erf(mu r12)) - 1/2 (sqrt(pi) * mu) e^{-(mu*r12)^2} + END_DOC include 'constants.include.F' implicit none @@ -18,6 +83,18 @@ double precision function j12_mu(r1, r2) j12_mu = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf + else if(j2e_type .eq. "Mugauss") then + + r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & + + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) + double precision :: r12_tmp + r12_tmp = mu_erf * r12 + + j12_mu = 0.5d0 * r12 * (1.d0 - derf(r12_tmp)) - inv_sq_pi_2 * dexp(-r12_tmp*r12_tmp) / mu_erf + r12_tmp *= alpha_mu_gauss + j12_mu += 0.25d0 * c_mu_gauss / (alpha_mu_gauss*alpha_mu_gauss*mu_erf) * dexp(-r12_tmp*r12_tmp) + else print *, ' Error in j12_mu: Unknown j2e_type = ', j2e_type @@ -57,7 +134,7 @@ subroutine grad1_j12_mu(r1, r2, grad) grad = 0.d0 - if(j2e_type .eq. "Mu") then + if(j2e_type .eq. "Mu".or.j2e_type .eq. "Mugauss") then dx = r1(1) - r2(1) dy = r1(2) - r2(2) @@ -66,31 +143,42 @@ subroutine grad1_j12_mu(r1, r2, grad) r12 = dsqrt(dx * dx + dy * dy + dz * dz) if(r12 .lt. 1d-10) return - tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 + call get_deriv_r12_j12(r12,mu_erf,tmp) +! tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 grad(1) = tmp * dx grad(2) = tmp * dy grad(3) = tmp * dz + grad *= 1.d0/r12 + + elseif(j2e_type .eq. "Mur" .or. j2e_type .eq. "Murgauss") then + double precision :: jast + call grad_j_sum_mu_of_r(r1,r2,jast,grad) + + elseif(j2e_type .eq. "Bump") then + double precision ::grad_jast(3) + call get_grad_j_bump_mu_of_r(r1,r2,grad_jast) + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) + + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + if(r12 .lt. 1d-10) then + grad(1) = 0.d0 + grad(2) = 0.d0 + grad(3) = 0.d0 + return + endif + + tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 + + grad(1) = 0.5d0 * tmp * dx + grad(2) = 0.5d0 * tmp * dy + grad(3) = 0.5d0 * tmp * dz + grad(1) += 0.5d0 * grad_jast(1) + grad(2) += 0.5d0 * grad_jast(2) + grad(3) += 0.5d0 * grad_jast(3) - elseif(j2e_type .eq. "Mur") then - - dx = r1(1) - r2(1) - dy = r1(2) - r2(2) - dz = r1(3) - r2(3) - r12 = dsqrt(dx * dx + dy * dy + dz * dz) - - call mu_r_val_and_grad(r1, r2, mu_val, mu_der) - mu_tmp = mu_val * r12 - tmp = inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / (mu_val * mu_val) - grad(1) = tmp * mu_der(1) - grad(2) = tmp * mu_der(2) - grad(3) = tmp * mu_der(3) - - if(r12 .lt. 1d-10) return - tmp = 0.5d0 * (1.d0 - derf(mu_tmp)) / r12 - grad(1) = grad(1) + tmp * dx - grad(2) = grad(2) + tmp * dy - grad(3) = grad(3) + tmp * dz else @@ -369,7 +457,18 @@ subroutine grad1_env_nucl(r, grad) ! --- subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) - + BEGIN_DOC +! various flavours of mu(r1,r2) +! depends on essentially the density and other related quantities +! +! change the variable "murho_type" to change type +! +! murho_type == -1 :: mu(r1,r2) = (rho(r1) mu_mf(r1) + rho(r2) mu_mf(r2))/[rho(r1)+rho(r2)] +! +! == 0 :: mu(r1,r2) = (sqrt(rho(r1)) mu_mf(r1) + sqrt(rho(r2)) mu_mf(r2))/[sqrt(rho(r1))+sqrt(rho(r2))] +! +! == -2 :: mu(r1,r2) = 0.5(mu_mf(r1) + mu_mf(r2)) + END_DOC implicit none double precision, intent(in) :: r1(3), r2(3) double precision, intent(out) :: mu_val, mu_der(3) @@ -379,11 +478,50 @@ subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) double precision :: rho1, grad_rho1(3),rho2,rho_tot,inv_rho_tot double precision :: f_rho1, f_rho2, d_drho_f_rho1 double precision :: d_dx1_f_rho1(3),d_dx_rho_f_rho(3),nume + double precision :: mu_mf_r1, dm_r1, grad_mu_mf_r1(3), grad_dm_r1(3) + double precision :: mu_mf_r2, dm_r2, grad_mu_mf_r2(3), grad_dm_r2(3) + + double precision :: num, denom, grad_denom(3), grad_num(3) + double precision :: dsqrt_dm_r1 PROVIDE murho_type PROVIDE mu_r_ct mu_erf - if(murho_type .eq. 1) then + if(murho_type .eq. 0) then + call grad_mu_of_r_mean_field(r1,mu_mf_r1, dm_r1, grad_mu_mf_r1, grad_dm_r1) + call grad_mu_of_r_mean_field(r2,mu_mf_r2, dm_r2, grad_mu_mf_r2, grad_dm_r2) + dsqrt_dm_r1 = dsqrt(dm_r1) + denom = (dsqrt_dm_r1 + dsqrt(dm_r2) ) + if(denom.lt.1.d-7)then + mu_val = 1.d+10 + mu_der = 0.d0 + return + endif + num = (dsqrt(dm_r1) * mu_mf_r1 + dsqrt(dm_r2) * mu_mf_r2) + mu_val = num / denom + grad_denom = grad_dm_r1/dsqrt_dm_r1 + grad_num = dsqrt(dm_r1) * grad_mu_mf_r1 + mu_mf_r1 * grad_dm_r1 + mu_der = (grad_num * denom - num * grad_denom)/(denom*denom) + else if(murho_type .eq. -1) then + call grad_mu_of_r_mean_field(r1,mu_mf_r1, dm_r1, grad_mu_mf_r1, grad_dm_r1) + call grad_mu_of_r_mean_field(r2,mu_mf_r2, dm_r2, grad_mu_mf_r2, grad_dm_r2) + denom = (dm_r1 + dm_r2 ) + if(denom.lt.1.d-7)then + mu_val = 1.d+10 + mu_der = 0.d0 + return + endif + num = (dm_r1 * mu_mf_r1 + dm_r2 * mu_mf_r2) + mu_val = num / denom + grad_denom = grad_dm_r1 + grad_num = dm_r1 * grad_mu_mf_r1 + mu_mf_r1 * grad_dm_r1 + mu_der = (grad_num * denom - num * grad_denom)/(denom*denom) + else if(murho_type .eq. -2) then + call grad_mu_of_r_mean_field(r1,mu_mf_r1, dm_r1, grad_mu_mf_r1, grad_dm_r1) + call grad_mu_of_r_mean_field(r2,mu_mf_r2, dm_r2, grad_mu_mf_r2, grad_dm_r2) + mu_val = 0.5d0 * (mu_mf_r1 + mu_mf_r2) + mu_der = 0.5d0 * grad_mu_mf_r1 + else if(murho_type .eq. 1) then ! ! r = 0.5 (r1 + r2) diff --git a/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f b/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f index 335631028..d951db93e 100644 --- a/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f +++ b/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f @@ -4,7 +4,7 @@ subroutine get_grad1_u12_withsq_r1_seq(ipoint, n_grid2, resx, resy, resz, res) BEGIN_DOC - ! + ! ! grad_1 u(r1,r2) ! ! we use grid for r1 and extra_grid for r2 @@ -33,8 +33,12 @@ subroutine get_grad1_u12_withsq_r1_seq(ipoint, n_grid2, resx, resy, resz, res) r1(2) = final_grid_points(2,ipoint) r1(3) = final_grid_points(3,ipoint) - if( (j2e_type .eq. "Mu") .or. & - (j2e_type .eq. "Mur") .or. & + if( (j2e_type .eq. "Mu") .or. & + (j2e_type .eq. "Mur") .or. & + (j2e_type .eq. "Jpsi") .or. & + (j2e_type .eq. "Mugauss") .or. & + (j2e_type .eq. "Murgauss") .or. & + (j2e_type .eq. "Bump") .or. & (j2e_type .eq. "Boys") ) then if(env_type .eq. "None") then @@ -167,7 +171,7 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) integer :: jpoint integer :: i_nucl, p, mpA, npA, opA double precision :: r2(3) - double precision :: dx, dy, dz, r12, tmp, r12_inv + double precision :: dx, dy, dz, r12, tmp double precision :: mu_val, mu_tmp, mu_der(3) double precision :: rn(3), f1A, grad1_f1A(3), f2A, grad2_f2A(3), g12, grad1_g12(3) double precision :: tmp1, tmp2 @@ -181,7 +185,7 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) ! d/dy1 j(mu,r12) = 0.5 * [(1 - erf(mu * r12)) / r12] * (y1 - y2) ! d/dz1 j(mu,r12) = 0.5 * [(1 - erf(mu * r12)) / r12] * (z1 - z2) - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r2(1) = final_grid_points_extra(1,jpoint) r2(2) = final_grid_points_extra(2,jpoint) @@ -191,66 +195,107 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) dy = r1(2) - r2(2) dz = r1(3) - r2(3) - r12 = dx * dx + dy * dy + dz * dz - - if(r12 .lt. 1d-20) then - gradx(jpoint) = 0.d0 - grady(jpoint) = 0.d0 - gradz(jpoint) = 0.d0 + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + if(r12 .lt. 1d-10) then + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 cycle endif - r12_inv = 1.d0/dsqrt(r12) - r12 = r12*r12_inv - - tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) * r12_inv + tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 gradx(jpoint) = tmp * dx grady(jpoint) = tmp * dy gradz(jpoint) = tmp * dz enddo - elseif(j2e_type .eq. "Mur") then + else if(j2e_type .eq. "Mugauss") then - ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) - ! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) + ! d/dx1 j(mu,r12) = 0.5 * [(1 - erf(mu * r12)) / r12 - mu*c*r12*exp(-(mu*alpha*r12)^2] * (x1 - x2) - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r2(1) = final_grid_points_extra(1,jpoint) r2(2) = final_grid_points_extra(2,jpoint) r2(3) = final_grid_points_extra(3,jpoint) - dx = r1(1) - r2(1) - dy = r1(2) - r2(2) - dz = r1(3) - r2(3) - - r12 = dx * dx + dy * dy + dz * dz + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) - if(r12 .lt. 1d-20) then - gradx(jpoint) = 0.d0 - grady(jpoint) = 0.d0 - gradz(jpoint) = 0.d0 + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + if(r12 .lt. 1d-10) then + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 cycle endif - r12_inv = 1.d0/dsqrt(r12) - r12 = r12*r12_inv + double precision :: r12_tmp + r12_tmp = mu_erf * r12 + ! gradient of j(mu,r12) + tmp = 0.5d0 * (1.d0 - derf(r12_tmp)) / r12 + ! gradient of gaussian additional term + r12_tmp *= alpha_mu_gauss + r12_tmp *= r12_tmp + tmp += -0.5d0 * mu_erf * c_mu_gauss * r12 * dexp(-r12_tmp)/r12 + + gradx(jpoint) = tmp * dx + grady(jpoint) = tmp * dy + gradz(jpoint) = tmp * dz + enddo + + elseif(j2e_type .eq. "Mur".or.j2e_type .eq. "Murgauss") then + + ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) + ! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) - call mu_r_val_and_grad(r1, r2, mu_val, mu_der) + do jpoint = 1, n_points_extra_final_grid ! r2 - mu_tmp = mu_val * r12 - tmp = inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / (mu_val * mu_val) + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + double precision :: jast, grad_jast(3) + call grad_j_sum_mu_of_r(r1,r2,jast,grad_jast) + gradx(jpoint) = grad_jast(1) + grady(jpoint) = grad_jast(2) + gradz(jpoint) = grad_jast(3) + enddo + elseif(j2e_type .eq. "Bump") then + + ! d/dx1 jbump(r1,r2) + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + call get_grad_j_bump_mu_of_r(r1,r2,grad_jast) - gradx(jpoint) = tmp * mu_der(1) - grady(jpoint) = tmp * mu_der(2) - gradz(jpoint) = tmp * mu_der(3) + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) - tmp = 0.5d0 * (1.d0 - derf(mu_tmp)) * r12_inv + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + if(r12 .lt. 1d-10) then + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 + cycle + endif - gradx(jpoint) = gradx(jpoint) + tmp * dx - grady(jpoint) = grady(jpoint) + tmp * dy - gradz(jpoint) = gradz(jpoint) + tmp * dz + tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 + + gradx(jpoint) = 0.5d0 * tmp * dx + grady(jpoint) = 0.5d0 * tmp * dy + gradz(jpoint) = 0.5d0 * tmp * dz + gradx(jpoint) += 0.5d0 * grad_jast(1) + grady(jpoint) += 0.5d0 * grad_jast(2) + gradz(jpoint) += 0.5d0 * grad_jast(3) +! gradx(jpoint) = grad_jast(1) +! grady(jpoint) = grad_jast(2) +! gradz(jpoint) = grad_jast(3) enddo elseif(j2e_type .eq. "Boys") then @@ -264,7 +309,7 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) PROVIDE a_boys - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r2(1) = final_grid_points_extra(1,jpoint) r2(2) = final_grid_points_extra(2,jpoint) @@ -273,17 +318,14 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) dx = r1(1) - r2(1) dy = r1(2) - r2(2) dz = r1(3) - r2(3) - r12 = dx * dx + dy * dy + dz * dz - + r12 = dsqrt(dx * dx + dy * dy + dz * dz) if(r12 .lt. 1d-10) then - gradx(jpoint) = 0.d0 - grady(jpoint) = 0.d0 - gradz(jpoint) = 0.d0 + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 cycle endif - r12 = dsqrt(r12) - tmp = 1.d0 + a_boys * r12 tmp = 0.5d0 / (r12 * tmp * tmp) @@ -294,13 +336,16 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) elseif(j2e_type .eq. "Boys_Handy") then - integer :: powmax - powmax = max(maxval(jBH_m),maxval(jBH_n)) - + integer :: powmax1, powmax, powmax2 double precision, allocatable :: f1A_power(:), f2A_power(:), double_p(:), g12_power(:) - allocate (f1A_power(-1:powmax), f2A_power(-1:powmax), g12_power(-1:powmax), double_p(0:powmax)) - do p=0,powmax + powmax1 = max(maxval(jBH_m), maxval(jBH_n)) + powmax2 = maxval(jBH_o) + powmax = max(powmax1, powmax2) + + allocate(f1A_power(-1:powmax), f2A_power(-1:powmax), g12_power(-1:powmax), double_p(0:powmax)) + + do p = 0, powmax double_p(p) = dble(p) enddo @@ -318,11 +363,10 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) r2(2) = final_grid_points_extra(2,jpoint) r2(3) = final_grid_points_extra(3,jpoint) - gradx(jpoint) = 0.d0 - grady(jpoint) = 0.d0 - gradz(jpoint) = 0.d0 - - do i_nucl = 1, nucl_num + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 + do i_nucl = 1, nucl_num rn(1) = nucl_coord(i_nucl,1) rn(2) = nucl_coord(i_nucl,2) @@ -332,61 +376,59 @@ subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) call jBH_elem_fct_grad(jBH_en(i_nucl), r2, rn, f2A, grad2_f2A) call jBH_elem_fct_grad(jBH_ee(i_nucl), r1, r2, g12, grad1_g12) - ! Compute powers of f1A and f2A - - do p = 1, maxval(jBH_m(:,i_nucl)) + do p = 1, powmax1 f1A_power(p) = f1A_power(p-1) * f1A - enddo - - do p = 1, maxval(jBH_n(:,i_nucl)) f2A_power(p) = f2A_power(p-1) * f2A enddo - - do p = 1, maxval(jBH_o(:,i_nucl)) + do p = 1, powmax2 g12_power(p) = g12_power(p-1) * g12 enddo - - do p = 1, jBH_size mpA = jBH_m(p,i_nucl) npA = jBH_n(p,i_nucl) opA = jBH_o(p,i_nucl) tmp = jBH_c(p,i_nucl) - if(mpA .eq. npA) then - tmp = tmp * 0.5d0 - endif - -!TODO : Powers to optimize here - -! tmp1 = 0.d0 -! if(mpA .gt. 0) then -! tmp1 = tmp1 + dble(mpA) * f1A**(mpA-1) * f2A**npA -! endif -! if(npA .gt. 0) then -! tmp1 = tmp1 + dble(npA) * f1A**(npA-1) * f2A**mpA -! endif -! tmp1 = tmp1 * g12**(opA) -! -! tmp2 = 0.d0 -! if(opA .gt. 0) then -! tmp2 = tmp2 + dble(opA) * g12**(opA-1) * (f1A**(mpA) * f2A**(npA) + f1A**(npA) * f2A**(mpA)) -! endif tmp1 = double_p(mpA) * f1A_power(mpA-1) * f2A_power(npA) + double_p(npA) * f1A_power(npA-1) * f2A_power(mpA) - tmp1 = tmp1 * g12_power(opA) - - tmp2 = double_p(opA) * g12_power(opA-1) * (f1A_power(mpA) * f2A_power(npA) + f1A_power(npA) * f2A_power(mpA)) - - - gradx(jpoint) = gradx(jpoint) + tmp * (tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1)) - grady(jpoint) = grady(jpoint) + tmp * (tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2)) - gradz(jpoint) = gradz(jpoint) + tmp * (tmp1 * grad1_f1A(3) + tmp2 * grad1_g12(3)) + tmp1 = tmp1 * g12_power(opA) * tmp + tmp2 = double_p(opA) * g12_power(opA-1) * (f1A_power(mpA) * f2A_power(npA) + f1A_power(npA) * f2A_power(mpA)) * tmp + + !tmp1 = 0.d0 + !if(mpA .gt. 0) then + ! tmp1 = tmp1 + dble(mpA) * f1A**dble(mpA-1) * f2A**dble(npA) + !endif + !if(npA .gt. 0) then + ! tmp1 = tmp1 + dble(npA) * f1A**dble(npA-1) * f2A**dble(mpA) + !endif + !tmp1 = tmp1 * g12**dble(opA) + !tmp2 = 0.d0 + !if(opA .gt. 0) then + ! tmp2 = tmp2 + dble(opA) * g12**dble(opA-1) * (f1A**dble(mpA) * f2A**dble(npA) + f1A**dble(npA) * f2A**dble(mpA)) + !endif + +! gradx(jpoint) = gradx(jpoint) + tmp * (tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1)) +! grady(jpoint) = grady(jpoint) + tmp * (tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2)) +! gradz(jpoint) = gradz(jpoint) + tmp * (tmp1 * grad1_f1A(3) + tmp2 * grad1_g12(3)) + gradx(jpoint) = gradx(jpoint) + tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1) + grady(jpoint) = grady(jpoint) + tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2) + gradz(jpoint) = gradz(jpoint) + tmp1 * grad1_f1A(3) + tmp2 * grad1_g12(3) enddo ! p enddo ! i_nucl enddo ! jpoint + elseif(j2e_type .eq. "Jpsi") then + double precision :: grad_j_psi_r1(3),jast_psi + do jpoint = 1, n_points_extra_final_grid ! r2 + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + call get_grad_r1_jastrow_psi(r1,r2,grad_j_psi_r1,jast_psi) + gradx(jpoint) = grad_j_psi_r1(1) + grady(jpoint) = grad_j_psi_r1(2) + gradz(jpoint) = grad_j_psi_r1(3) + enddo else print *, ' Error in grad1_j12_r1_seq: Unknown j2e_type = ', j2e_type @@ -418,10 +460,10 @@ subroutine grad1_jmu_r1_seq(mu, r1, n_grid2, gradx, grady, gradz) integer :: jpoint double precision :: r2(3) - double precision :: dx, dy, dz, r12, r12_inv, tmp + double precision :: dx, dy, dz, r12, tmp - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r2(1) = final_grid_points_extra(1,jpoint) r2(2) = final_grid_points_extra(2,jpoint) @@ -431,19 +473,15 @@ subroutine grad1_jmu_r1_seq(mu, r1, n_grid2, gradx, grady, gradz) dy = r1(2) - r2(2) dz = r1(3) - r2(3) - r12 = dx * dx + dy * dy + dz * dz - - if(r12 .lt. 1d-20) then - gradx(jpoint) = 0.d0 - grady(jpoint) = 0.d0 - gradz(jpoint) = 0.d0 + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + if(r12 .lt. 1d-10) then + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 cycle endif - r12_inv = 1.d0 / dsqrt(r12) - r12 = r12 * r12_inv - - tmp = 0.5d0 * (1.d0 - derf(mu * r12)) * r12_inv + tmp = 0.5d0 * (1.d0 - derf(mu * r12)) / r12 gradx(jpoint) = tmp * dx grady(jpoint) = tmp * dy @@ -467,7 +505,7 @@ subroutine j12_r1_seq(r1, n_grid2, res) integer :: jpoint double precision :: r2(3) double precision :: dx, dy, dz - double precision :: mu_tmp, r12, mu_erf_inv + double precision :: mu_tmp, r12 PROVIDE final_grid_points_extra @@ -475,21 +513,20 @@ subroutine j12_r1_seq(r1, n_grid2, res) PROVIDE mu_erf - mu_erf_inv = 1.d0 / mu_erf - do jpoint = 1, n_points_extra_final_grid ! r2 - + do jpoint = 1, n_points_extra_final_grid ! r2 + r2(1) = final_grid_points_extra(1,jpoint) r2(2) = final_grid_points_extra(2,jpoint) r2(3) = final_grid_points_extra(3,jpoint) - + dx = r1(1) - r2(1) dy = r1(2) - r2(2) dz = r1(3) - r2(3) r12 = dsqrt(dx * dx + dy * dy + dz * dz) mu_tmp = mu_erf * r12 - - res(jpoint) = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) * mu_erf_inv + + res(jpoint) = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf enddo elseif(j2e_type .eq. "Boys") then @@ -498,7 +535,7 @@ subroutine j12_r1_seq(r1, n_grid2, res) PROVIDE a_boys - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r2(1) = final_grid_points_extra(1,jpoint) r2(2) = final_grid_points_extra(2,jpoint) @@ -540,19 +577,19 @@ subroutine jmu_r1_seq(mu, r1, n_grid2, res) tmp1 = inv_sq_pi_2 / mu - do jpoint = 1, n_points_extra_final_grid ! r2 - + do jpoint = 1, n_points_extra_final_grid ! r2 + r2(1) = final_grid_points_extra(1,jpoint) r2(2) = final_grid_points_extra(2,jpoint) r2(3) = final_grid_points_extra(3,jpoint) - + dx = r1(1) - r2(1) dy = r1(2) - r2(2) dz = r1(3) - r2(3) r12 = dsqrt(dx * dx + dy * dy + dz * dz) tmp2 = mu * r12 - + res(jpoint) = 0.5d0 * r12 * (1.d0 - derf(tmp2)) - tmp1 * dexp(-tmp2*tmp2) enddo @@ -579,7 +616,7 @@ subroutine env_nucl_r1_seq(n_grid2, res) res = 1.d0 - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r(1) = final_grid_points_extra(1,jpoint) r(2) = final_grid_points_extra(2,jpoint) r(3) = final_grid_points_extra(3,jpoint) @@ -598,7 +635,7 @@ subroutine env_nucl_r1_seq(n_grid2, res) res = 1.d0 - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r(1) = final_grid_points_extra(1,jpoint) r(2) = final_grid_points_extra(2,jpoint) r(3) = final_grid_points_extra(3,jpoint) @@ -618,7 +655,7 @@ subroutine env_nucl_r1_seq(n_grid2, res) res = 1.d0 - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r(1) = final_grid_points_extra(1,jpoint) r(2) = final_grid_points_extra(2,jpoint) r(3) = final_grid_points_extra(3,jpoint) @@ -636,7 +673,7 @@ subroutine env_nucl_r1_seq(n_grid2, res) res = 1.d0 - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r(1) = final_grid_points_extra(1,jpoint) r(2) = final_grid_points_extra(2,jpoint) r(3) = final_grid_points_extra(3,jpoint) @@ -666,7 +703,7 @@ subroutine env_nucl_r1_seq(n_grid2, res) subroutine get_grad1_u12_2e_r1_seq(ipoint, n_grid2, resx, resy, resz) BEGIN_DOC - ! + ! ! grad_1 u_2e(r1,r2) ! ! we use grid for r1 and extra_grid for r2 @@ -695,8 +732,12 @@ subroutine get_grad1_u12_2e_r1_seq(ipoint, n_grid2, resx, resy, resz) r1(2) = final_grid_points(2,ipoint) r1(3) = final_grid_points(3,ipoint) - if( (j2e_type .eq. "Mu") .or. & - (j2e_type .eq. "Mur") .or. & + if( (j2e_type .eq. "Mu") .or. & + (j2e_type .eq. "Mugauss") .or. & + (j2e_type .eq. "Mur") .or. & + (j2e_type .eq. "Jpsi") .or. & + (j2e_type .eq. "Murgauss") .or. & + (j2e_type .eq. "Bump") .or. & (j2e_type .eq. "Boys") ) then if(env_type .eq. "None") then @@ -786,7 +827,7 @@ subroutine get_grad1_u12_2e_r1_seq(ipoint, n_grid2, resx, resy, resz) subroutine get_u12_2e_r1_seq(ipoint, n_grid2, res) BEGIN_DOC - ! + ! ! u_2e(r1,r2) ! ! we use grid for r1 and extra_grid for r2 @@ -813,8 +854,11 @@ subroutine get_u12_2e_r1_seq(ipoint, n_grid2, res) r1(2) = final_grid_points(2,ipoint) r1(3) = final_grid_points(3,ipoint) - if( (j2e_type .eq. "Mu") .or. & - (j2e_type .eq. "Mur") .or. & + if( (j2e_type .eq. "Mu") .or. & + (j2e_type .eq. "Mur") .or. & + (j2e_type .eq. "Mugauss") .or. & + (j2e_type .eq. "Murgauss") .or. & + (j2e_type .eq. "Mugauss") .or. & (j2e_type .eq. "Boys") ) then if(env_type .eq. "None") then @@ -893,23 +937,24 @@ subroutine jBH_elem_fct_grad(alpha, r1, r2, fct, grad1_fct) + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) - tmp1 = 1.d0 / (1.d0 + alpha * dist) - fct = alpha * dist * tmp1 - - if(dist .lt. 1d-10) then - grad1_fct(1) = 0.d0 - grad1_fct(2) = 0.d0 - grad1_fct(3) = 0.d0 - else + if(dist .ge. 1d-10) then + tmp1 = 1.d0 / (1.d0 + alpha * dist) + + fct = alpha * dist * tmp1 tmp2 = alpha * tmp1 * tmp1 / dist grad1_fct(1) = tmp2 * (r1(1) - r2(1)) grad1_fct(2) = tmp2 * (r1(2) - r2(2)) grad1_fct(3) = tmp2 * (r1(3) - r2(3)) + else + grad1_fct(1) = 0.d0 + grad1_fct(2) = 0.d0 + grad1_fct(3) = 0.d0 + fct = 0.d0 endif return -end +end ! --- diff --git a/plugins/local/non_h_ints_mu/jastrow_psi.irp.f b/plugins/local/non_h_ints_mu/jastrow_psi.irp.f new file mode 100644 index 000000000..4c88e7938 --- /dev/null +++ b/plugins/local/non_h_ints_mu/jastrow_psi.irp.f @@ -0,0 +1,124 @@ +BEGIN_PROVIDER [ double precision, c_ij_ab_jastrow, (mo_num, mo_num, elec_alpha_num, elec_beta_num)] + implicit none + integer :: iunit, getUnitAndOpen + c_ij_ab_jastrow = 0.d0 + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'c_ij_ab', 'R') + read(iunit) c_ij_ab_jastrow + close(iunit) + print*,'c_ij_ab_jastrow = ' + integer :: i,j,a,b + do i = 1, elec_beta_num ! r2 + do j = 1, elec_alpha_num ! r1 + do a = elec_beta_num+1, mo_num ! r2 + do b = elec_alpha_num+1, mo_num ! r1 +! print*,b,a,j,i + print*,c_ij_ab_jastrow(b,a,j,i),b,a,j,i + if(dabs(c_ij_ab_jastrow(b,a,j,i)).lt.1.d-12)then + c_ij_ab_jastrow(b,a,j,i) = 0.d0 + endif + enddo + enddo + enddo + enddo +END_PROVIDER + +double precision function jastrow_psi(r1,r2) + implicit none + double precision, intent(in) :: r1(3), r2(3) + integer :: i,j,a,b + double precision, allocatable :: mos_array_r1(:), mos_array_r2(:) + allocate(mos_array_r1(mo_num), mos_array_r2(mo_num)) + call give_all_mos_at_r(r1,mos_array_r1) + call give_all_mos_at_r(r2,mos_array_r2) + double precision :: eps,coef, numerator,denominator + double precision :: phi_i_phi_j + eps = a_boys + jastrow_psi= 0.d0 + do i = 1, elec_beta_num ! r1 + do j = 1, elec_alpha_num ! r2 + phi_i_phi_j = mos_array_r1(i) * mos_array_r2(j) + eps + denominator = 1.d0/phi_i_phi_j + do a = elec_beta_num+1, mo_num ! r1 + do b = elec_alpha_num+1, mo_num ! r2 + coef = c_ij_ab_jastrow(b,a,j,i) + numerator = mos_array_r2(b) * mos_array_r1(a) + jastrow_psi += coef * numerator*denominator + enddo + enddo + enddo + enddo +end + +subroutine get_grad_r1_jastrow_psi(r1,r2,grad_j_psi_r1,jast) + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision, intent(out):: grad_j_psi_r1(3),jast + integer :: i,j,a,b + double precision, allocatable :: mos_array_r1(:), mos_array_r2(:) + double precision, allocatable :: mos_grad_array_r1(:,:),mos_grad_array_r2(:,:) + double precision :: num_j, denom_j, num_j_grad(3), denom_j_grad(3),delta,coef + double precision :: inv_denom_j + allocate(mos_array_r1(mo_num), mos_array_r2(mo_num)) + allocate(mos_grad_array_r1(3,mo_num), mos_grad_array_r2(3,mo_num)) + delta = a_boys + call give_all_mos_and_grad_at_r(r1,mos_array_r1,mos_grad_array_r1) + call give_all_mos_and_grad_at_r(r2,mos_array_r2,mos_grad_array_r2) + grad_j_psi_r1 = 0.d0 + jast = 0.d0 + do i = 1, elec_beta_num ! r1 + do j = 1, elec_alpha_num ! r2 + call denom_jpsi(i,j,delta,mos_array_r1,mos_grad_array_r1,mos_array_r2,denom_j, denom_j_grad) + inv_denom_j = 1.d0/denom_j + do a = elec_beta_num+1, mo_num ! r1 + do b = elec_alpha_num+1, mo_num ! r2 + call numerator_psi(a,b,mos_array_r1,mos_grad_array_r1,mos_array_r2,num_j, num_j_grad) + coef = c_ij_ab_jastrow(b,a,j,i) + jast += coef * num_j * inv_denom_j + grad_j_psi_r1 += coef * (num_j_grad * denom_j - num_j * denom_j_grad) * inv_denom_j * inv_denom_j + enddo + enddo + enddo + enddo + if(jast.lt.-1.d0.or.dabs(jast).gt.1.d0)then + print*,'pb ! ' + print*,jast + print*,dsqrt(r1(1)**2+r1(2)**2+r1(3)**2),dsqrt(r2(1)**2+r2(2)**2+r2(3)**2) + print*,r1 +! print*,mos_array_r1(1:2) + print*,r2 +! print*,mos_array_r2(1:2) + stop + endif + if(log_jpsi)then + grad_j_psi_r1 = grad_j_psi_r1/(1.d0 + jast) + endif + +end + + +subroutine denom_jpsi(i,j,delta,mos_array_r1,mos_grad_array_r1,mos_array_r2,denom, grad_denom) + implicit none + integer, intent(in) :: i,j + double precision, intent(in) :: mos_array_r1(mo_num),mos_grad_array_r1(3,mo_num),mos_array_r2(mo_num),delta + double precision, intent(out) :: denom, grad_denom(3) + double precision :: coef,phi_i_phi_j,inv_phi_i_phi_j,inv_phi_i_phi_j_2 + phi_i_phi_j = mos_array_r1(i) * mos_array_r2(j) + if(phi_i_phi_j /= 0.d0)then + inv_phi_i_phi_j = 1.d0/phi_i_phi_j + inv_phi_i_phi_j_2 = 1.d0/(phi_i_phi_j * phi_i_phi_j) + else + inv_phi_i_phi_j = huge(1.0) + inv_phi_i_phi_j_2 = huge(1.d0) + endif + denom = phi_i_phi_j + delta * inv_phi_i_phi_j + grad_denom(:) = (1.d0 - delta*inv_phi_i_phi_j_2) * mos_array_r2(j) * mos_grad_array_r1(:,i) +end + +subroutine numerator_psi(a,b,mos_array_r1,mos_grad_array_r1,mos_array_r2,num, grad_num) + implicit none + integer, intent(in) :: a,b + double precision, intent(in) :: mos_array_r1(mo_num),mos_grad_array_r1(3,mo_num),mos_array_r2(mo_num) + double precision, intent(out) :: num, grad_num(3) + num = mos_array_r1(a) * mos_array_r2(b) + grad_num(:) = mos_array_r2(b) * mos_grad_array_r1(:,a) +end diff --git a/plugins/local/non_h_ints_mu/mu_of_r.irp.f b/plugins/local/non_h_ints_mu/mu_of_r.irp.f new file mode 100644 index 000000000..e9cf6c4ad --- /dev/null +++ b/plugins/local/non_h_ints_mu/mu_of_r.irp.f @@ -0,0 +1,43 @@ + +subroutine grad_mu_of_r_mean_field(r,mu_mf, dm, grad_mu_mf, grad_dm) + implicit none + BEGIN_DOC + ! returns the value and gradients of the mu(r) mean field, together with the HF density and its gradients. + END_DOC + include 'constants.include.F' + double precision, intent(in) :: r(3) + double precision, intent(out):: grad_mu_mf(3), grad_dm(3) + double precision, intent(out):: mu_mf, dm + double precision :: grad_f_mf_ab(3), grad_two_bod_dens(3),grad_dm_a(3), grad_dm_b(3) + double precision :: f_mf_ab,two_bod_dens, dm_a, dm_b + + double precision :: dist + call get_grad_f_mf_ab(r,grad_f_mf_ab, grad_two_bod_dens,f_mf_ab,two_bod_dens, dm_a, dm_b,grad_dm_a, grad_dm_b) + + dm = dm_a + dm_b + grad_dm(1:3) = grad_dm_a(1:3) + grad_dm_b(1:3) + + if(dabs(two_bod_dens).lt.1.d-10)then + mu_mf = 1.d+10 + grad_mu_mf = 0.d0 + else + if(mu_of_r_tc=="Erfmu")then + mu_mf = 0.3333333333d0 * sqpi * (f_mf_ab/two_bod_dens + 0.25d0) + grad_mu_mf(1:3) = 0.3333333333d0 * sqpi * (grad_f_mf_ab(1:3) * two_bod_dens - f_mf_ab * grad_two_bod_dens(1:3))& + /(two_bod_dens*two_bod_dens) + else if(mu_of_r_tc=="Standard")then + mu_mf = 0.5d0 * sqpi * f_mf_ab/two_bod_dens + grad_mu_mf(1:3) = 0.5d0 * sqpi * (grad_f_mf_ab(1:3) * two_bod_dens - f_mf_ab * grad_two_bod_dens(1:3))& + /(two_bod_dens*two_bod_dens) + else if(mu_of_r_tc=="Erfmugauss")then + mu_mf = (f_mf_ab/two_bod_dens + 0.25d0)/c_mu_gauss_tot + grad_mu_mf(1:3) = 1.d0/c_mu_gauss_tot* (grad_f_mf_ab(1:3) * two_bod_dens - f_mf_ab * grad_two_bod_dens(1:3))& + /(two_bod_dens*two_bod_dens) + else + print*,'Wrong value for mu_of_r_tc !' + stop + endif + endif + +end + diff --git a/plugins/local/non_h_ints_mu/mu_of_r_mean_field.irp.f b/plugins/local/non_h_ints_mu/mu_of_r_mean_field.irp.f new file mode 100644 index 000000000..295d58c2c --- /dev/null +++ b/plugins/local/non_h_ints_mu/mu_of_r_mean_field.irp.f @@ -0,0 +1,166 @@ +BEGIN_PROVIDER [ double precision, two_e_int_mf, (elec_beta_num,elec_alpha_num,elec_beta_num,elec_alpha_num)] + implicit none + integer :: i,j,k,l + double precision :: get_two_e_integral + do i = 1, elec_alpha_num + do j = 1, elec_beta_num + do k = 1, elec_alpha_num + do l = 1, elec_beta_num + two_e_int_mf(l,k,j,i) = get_two_e_integral(l,k,j,i,mo_integrals_map) + enddo + enddo + enddo + enddo +END_PROVIDER + +subroutine get_f_mf_ab(r,f_mf_ab,two_bod_dens, dm_a, dm_b) + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out):: f_mf_ab,two_bod_dens, dm_a, dm_b + double precision, allocatable :: mos_array_r(:),mos_array_a(:), mos_array_b(:) + integer :: i,j,k,l + allocate(mos_array_r(mo_num), mos_array_a(elec_alpha_num), mos_array_b(elec_alpha_num)) + call give_all_mos_at_r(r,mos_array_r) + do i = 1, elec_alpha_num + mos_array_a(i) = mos_array_r(i) + enddo + do i = 1, elec_beta_num + mos_array_b(i) = mos_array_r(i) + enddo + + dm_a = 0.d0 + do i = 1, elec_alpha_num + dm_a += mos_array_a(i) * mos_array_a(i) + enddo + + dm_b = 0.d0 + do i = 1, elec_beta_num + dm_b += mos_array_b(i) * mos_array_b(i) + enddo + two_bod_dens = dm_a * dm_b + + f_mf_ab = 0.d0 + do i = 1, elec_alpha_num + do j = 1, elec_beta_num + do k = 1, elec_alpha_num + do l = 1, elec_beta_num + f_mf_ab += two_e_int_mf(l,k,j,i) * mos_array_a(i) * mos_array_a(k) * mos_array_b(j) * mos_array_b(l) + enddo + enddo + enddo + enddo + ! multiply by two to adapt to the N(N-1) normalization condition of the active two-rdm + f_mf_ab *= 2.d0 + two_bod_dens *= 2.d0 + +end + +subroutine get_grad_f_mf_ab(r,grad_f_mf_ab, grad_two_bod_dens,f_mf_ab,two_bod_dens, dm_a, dm_b,grad_dm_a, grad_dm_b) + implicit none + BEGIN_DOC + ! gradient of mu(r) mean field, together with the gradient of the one- and two-body HF density. + END_DOC + double precision, intent(in) :: r(3) + double precision, intent(out) :: f_mf_ab, two_bod_dens + double precision, intent(out) :: grad_two_bod_dens(3), grad_f_mf_ab(3) + double precision, intent(out) :: dm_a, dm_b, grad_dm_a(3), grad_dm_b(3) + + double precision, allocatable :: mos_array_r(:), mos_grad_array_r(:,:) + double precision, allocatable :: mos_array_a(:), mos_array_b(:) + double precision, allocatable :: mos_grad_array_a(:,:), mos_grad_array_b(:,:) + double precision :: mo_i, mo_j, mo_k, mo_l + double precision :: grad_mo_i(3), grad_mo_j(3), grad_mo_k(3), grad_mo_l(3) + + integer :: i,j,k,l + allocate(mos_array_r(mo_num),mos_grad_array_r(3,mo_num)) + allocate(mos_array_a(elec_alpha_num), mos_array_b(elec_beta_num)) + allocate(mos_grad_array_a(3,elec_alpha_num), mos_grad_array_b(3,elec_beta_num)) + call give_all_mos_and_grad_at_r(r,mos_array_r,mos_grad_array_r) + do i = 1, elec_alpha_num + mos_array_a(i) = mos_array_r(i) + mos_grad_array_a(1:3,i) = mos_grad_array_r(1:3,i) + enddo + do i = 1, elec_beta_num + mos_array_b(i) = mos_array_r(i) + mos_grad_array_b(1:3,i) = mos_grad_array_r(1:3,i) + enddo + + ! ALPHA DENSITY AND GRADIENT + dm_a = 0.d0 + grad_dm_a = 0.d0 + do i = 1, elec_alpha_num + dm_a += mos_array_a(i) * mos_array_a(i) + grad_dm_a(1:3) += 2.d0 * mos_array_a(i) * mos_grad_array_a(1:3,i) + enddo + + ! BETA DENSITY AND GRADIENT + dm_b = 0.d0 + grad_dm_b = 0.d0 + do i = 1, elec_beta_num + dm_b += mos_array_b(i) * mos_array_b(i) + grad_dm_b(1:3) += 2.d0 * mos_array_b(i) * mos_grad_array_b(1:3,i) + enddo + ! TWO-BODY DENSITY AND GRADIENT + two_bod_dens = dm_a * dm_b + grad_two_bod_dens(1:3) = dm_a * grad_dm_b(1:3) + dm_b * grad_dm_a(1:3) + + ! F_MF and GRADIENT + grad_f_mf_ab = 0.d0 + f_mf_ab = 0.d0 + do i = 1, elec_alpha_num + mo_i = mos_array_a(i) + grad_mo_i(1:3) = mos_grad_array_a(1:3,i) + do j = 1, elec_beta_num + mo_j = mos_array_b(j) + grad_mo_j(1:3) = mos_grad_array_b(1:3,j) + do k = 1, elec_alpha_num + mo_k = mos_array_a(k) + grad_mo_k(1:3) = mos_grad_array_a(1:3,k) + do l = 1, elec_beta_num + mo_l = mos_array_b(l) + grad_mo_l(1:3) = mos_grad_array_b(1:3,l) + f_mf_ab += two_e_int_mf(l,k,j,i) * mo_i * mo_j * mo_k * mo_l + grad_f_mf_ab(1:3) += two_e_int_mf(l,k,j,i) * & + (mo_i * mo_j * mo_k * grad_mo_l(1:3) + mo_i * mo_j * grad_mo_k(1:3) * mo_l & + +mo_i * grad_mo_j(1:3) * mo_k * mo_l + grad_mo_i(1:3) * mo_j * mo_k * mo_l) + enddo + enddo + enddo + enddo + + f_mf_ab *= 2.d0 + two_bod_dens *= 2.d0 + grad_f_mf_ab *= 2.D0 + grad_two_bod_dens *= 2.d0 +end + +subroutine mu_of_r_mean_field(r,mu_mf, dm) + implicit none + include 'constants.include.F' + double precision, intent(in) :: r(3) + double precision, intent(out):: mu_mf, dm + double precision :: f_mf_ab,two_bod_dens, dm_a, dm_b + call get_f_mf_ab(r,f_mf_ab,two_bod_dens, dm_a, dm_b) + dm = dm_a + dm_b + if(dabs(two_bod_dens).lt.1.d-10)then + mu_mf = 1.d+10 + else + mu_mf = 0.5d0 * sqpi * f_mf_ab/two_bod_dens + endif +end + +subroutine mu_of_r_mean_field_tc(r,mu_mf, dm) + implicit none + include 'constants.include.F' + double precision, intent(in) :: r(3) + double precision, intent(out):: mu_mf, dm + double precision :: f_mf_ab,two_bod_dens, dm_a, dm_b + call get_f_mf_ab(r,f_mf_ab,two_bod_dens, dm_a, dm_b) + dm = dm_a + dm_b + if(dabs(two_bod_dens).lt.1.d-10)then + mu_mf = 1.d+10 + else + mu_mf = 0.3333333333d0 * sqpi * (f_mf_ab/two_bod_dens + 0.25d0) + endif +end + diff --git a/plugins/local/non_h_ints_mu/numerical_integ.irp.f b/plugins/local/non_h_ints_mu/numerical_integ.irp.f index 5436b8575..2737774ad 100644 --- a/plugins/local/non_h_ints_mu/numerical_integ.irp.f +++ b/plugins/local/non_h_ints_mu/numerical_integ.irp.f @@ -179,7 +179,7 @@ double precision function num_v_ij_erf_rk_cst_mu_env(i, j, ipoint) dx = r1(1) - r2(1) dy = r1(2) - r2(2) dz = r1(3) - r2(3) - r12 = dsqrt( dx * dx + dy * dy + dz * dz ) + r12 = dsqrt(dx*dx + dy*dy + dz*dz) if(r12 .lt. 1d-10) cycle tmp1 = (derf(mu_erf * r12) - 1.d0) / r12 @@ -228,7 +228,7 @@ subroutine num_x_v_ij_erf_rk_cst_mu_env(i, j, ipoint, integ) dx = r1(1) - r2(1) dy = r1(2) - r2(2) dz = r1(3) - r2(3) - r12 = dsqrt( dx * dx + dy * dy + dz * dz ) + r12 = dsqrt(dx*dx + dy*dy + dz*dz) if(r12 .lt. 1d-10) cycle tmp1 = (derf(mu_erf * r12) - 1.d0) / r12 @@ -530,7 +530,7 @@ subroutine num_int2_u_grad1u_total_env2(i, j, ipoint, integ) dx = r1(1) - r2(1) dy = r1(2) - r2(2) dz = r1(3) - r2(3) - r12 = dsqrt( dx * dx + dy * dy + dz * dz ) + r12 = dsqrt(dx*dx + dy*dy + dz*dz) if(r12 .lt. 1d-10) cycle tmp0 = env_nucl(r2) diff --git a/plugins/local/non_h_ints_mu/plot_j_gauss.irp.f b/plugins/local/non_h_ints_mu/plot_j_gauss.irp.f new file mode 100644 index 000000000..a4030d8c7 --- /dev/null +++ b/plugins/local/non_h_ints_mu/plot_j_gauss.irp.f @@ -0,0 +1,59 @@ +program plot_j_gauss + implicit none + double precision :: xmin, xmax, x, dx + double precision :: mu_min, mu_max, mu, d_mu + double precision :: pot_j_gauss,j_mu_simple,j_gauss_simple,pot_j_mu + double precision, allocatable :: mu_tab(:),j_mu(:),j_mu_gauss(:) + double precision, allocatable :: w_mu(:), w_mu_gauss(:) + + character*(128) :: output + integer :: getUnitAndOpen + integer :: i_unit_output_wee_gauss,i_unit_output_wee_mu + integer :: i_unit_output_j_gauss,i_unit_output_j_mu + output=trim(ezfio_filename)//'.w_ee_mu_gauss' + i_unit_output_wee_gauss = getUnitAndOpen(output,'w') + output=trim(ezfio_filename)//'.w_ee_mu' + i_unit_output_wee_mu = getUnitAndOpen(output,'w') + output=trim(ezfio_filename)//'.j_mu_gauss' + i_unit_output_j_gauss = getUnitAndOpen(output,'w') + output=trim(ezfio_filename)//'.j_mu' + i_unit_output_j_mu = getUnitAndOpen(output,'w') + + integer :: npt, i, j, n_mu + n_mu = 3 + allocate(mu_tab(n_mu),j_mu(n_mu),j_mu_gauss(n_mu),w_mu(n_mu), w_mu_gauss(n_mu)) + mu_min = 0.5d0 + mu_max = 2.d0 + d_mu = (mu_max - mu_min)/dble(n_mu) + mu = mu_min + do i = 1, n_mu + mu_tab(i) = mu + print*,'mu = ',mu + mu += d_mu + enddo + mu_tab(1) = 0.9d0 + mu_tab(2) = 0.95d0 + mu_tab(3) = 1.d0 + + xmin = 0.01d0 + xmax = 10.d0 + npt = 1000 + dx = (xmax - xmin)/dble(npt) + x = xmin + do i = 1, npt + do j = 1, n_mu + mu = mu_tab(j) + w_mu_gauss(j) = pot_j_gauss(x,mu) + w_mu(j) = pot_j_mu(x,mu) + j_mu(j) = j_mu_simple(x,mu) + j_mu_gauss(j) = j_gauss_simple(x,mu) + j_mu(j) + enddo + write(i_unit_output_wee_gauss,'(100(F16.10,X))')x,w_mu_gauss(:) + write(i_unit_output_wee_mu,'(100(F16.10,X))')x,w_mu(:) + write(i_unit_output_j_gauss,'(100(F16.10,X))')x,j_mu_gauss(:) + write(i_unit_output_j_mu,'(100(F16.10,X))')x,j_mu(:) + x += dx + enddo + + +end diff --git a/plugins/local/non_h_ints_mu/plot_mo.irp.f b/plugins/local/non_h_ints_mu/plot_mo.irp.f new file mode 100644 index 000000000..e1ecc7838 --- /dev/null +++ b/plugins/local/non_h_ints_mu/plot_mo.irp.f @@ -0,0 +1,19 @@ +program plot_mo + implicit none + integer :: i,npt + double precision :: xmin,xmax,dx,r(3) + double precision,allocatable :: mos_array(:) + allocate(mos_array(mo_num)) + npt = 10000 + xmin =0.d0 + xmax =10.d0 + dx=(xmax-xmin)/dble(npt) + r=0.d0 + r(1) = xmin + do i = 1, npt + call give_all_mos_at_r(r,mos_array) + write(33,'(100(F16.10,X))')r(1),mos_array(1),mos_array(2),mos_array(3) + r(1) += dx + enddo + +end diff --git a/plugins/local/non_h_ints_mu/plot_mu_of_r.irp.f b/plugins/local/non_h_ints_mu/plot_mu_of_r.irp.f index 3a5984bd3..4a3ec0d50 100644 --- a/plugins/local/non_h_ints_mu/plot_mu_of_r.irp.f +++ b/plugins/local/non_h_ints_mu/plot_mu_of_r.irp.f @@ -16,15 +16,16 @@ subroutine routine_print integer :: ipoint,nx,i double precision :: xmax,xmin,r(3),dx,sigma double precision :: mu_val, mu_der(3),dm_a,dm_b,grad,grad_dm_a(3), grad_dm_b(3) - xmax = 5.D0 - xmin = -5.D0 + xmax = 3.9D0 + xmin = -3.9D0 nx = 10000 dx = (xmax - xmin)/dble(nx) r = 0.d0 r(1) = xmin do ipoint = 1, nx - call mu_r_val_and_grad(r, r, mu_val, mu_der) - call density_and_grad_alpha_beta(r,dm_a,dm_b, grad_dm_a, grad_dm_b) +! call mu_r_val_and_grad(r, r, mu_val, mu_der) + call grad_mu_of_r_mean_field(r,mu_val, dm_a, mu_der, grad_dm_a) +! call density_and_grad_alpha_beta(r,dm_a,dm_b, grad_dm_a, grad_dm_b) sigma = 0.d0 do i = 1,3 sigma += grad_dm_a(i)**2 @@ -32,7 +33,8 @@ subroutine routine_print sigma=dsqrt(sigma) grad = mu_der(1)**2 + mu_der(2)**2 + mu_der(3)**2 grad = dsqrt(grad) - write(i_unit_output,'(100(F16.7,X))')r(1),mu_val,dm_a+dm_b,grad,sigma/dm_a + print*,r(1),mu_val + write(i_unit_output,'(100(F16.7,X))')r(1),mu_val,dm_a,grad,sigma/dm_a r(1) += dx enddo end diff --git a/plugins/local/non_h_ints_mu/pot_j_gauss.irp.f b/plugins/local/non_h_ints_mu/pot_j_gauss.irp.f new file mode 100644 index 000000000..f9a0a7bcd --- /dev/null +++ b/plugins/local/non_h_ints_mu/pot_j_gauss.irp.f @@ -0,0 +1,146 @@ +double precision function j_simple(x,mu) + implicit none + double precision, intent(in) :: x,mu + double precision :: j_mu_simple,j_gauss_simple + if(j2e_type .eq. "Mu".or.j2e_type .eq. "Mur") then + j_simple = j_mu_simple(x,mu) + else if(j2e_type .eq. "Mugauss".or.j2e_type .eq. "Murgauss") then + j_simple = j_gauss_simple(x,mu) + j_mu_simple(x,mu) + endif +end + + +double precision function j_mu_simple(x,mu) + implicit none + double precision, intent(in):: x,mu + include 'constants.include.F' + BEGIN_DOC +! j_mu(mu,x) = 0.5 x (1 - erf(mu x)) - 1/[2 sqrt(pi)mu] exp(-(x*mu)^2) + END_DOC + j_mu_simple = 0.5d0 * x * (1.D0 - derf(mu*x)) - 0.5d0 * inv_sq_pi/mu * dexp(-x*mu*x*mu) + +end + +double precision function j_gauss_simple(x,mu) + implicit none + double precision, intent(in):: x,mu + include 'constants.include.F' + BEGIN_DOC +! j_mu(mu,x) = c/[4 alpha^2 mu] exp(-(alpha * mu * x)^2) +! with c = 27/(8 sqrt(pi)), alpha=3/2 + END_DOC + double precision :: x_tmp + x_tmp = alpha_mu_gauss * mu * x + j_gauss_simple = 0.25d0 * c_mu_gauss / (alpha_mu_gauss*alpha_mu_gauss*mu) * dexp(-x_tmp*x_tmp) + +end + +double precision function j_mu_deriv(x,mu) + implicit none + BEGIN_DOC +! d/dx j_mu(mu,x) = d/dx 0.5 x (1 - erf(mu x)) - 1/[2 sqrt(pi)mu] exp(-(x*mu)^2) +! = 0.5*(1 - erf(mu x)) + END_DOC + include 'constants.include.F' + double precision, intent(in) :: x,mu + j_mu_deriv = 0.5d0 * (1.d0 - derf(mu*x)) +end + +double precision function j_mu_deriv_2(x,mu) + implicit none + BEGIN_DOC +! d^2/dx^2 j_mu(mu,x) = d^2/dx^2 0.5 x (1 - erf(mu x)) - 1/[2 sqrt(pi)mu] exp(-(x*mu)^2) +! = -mu/sqrt(pi) * exp(-(mu x)^2) + END_DOC + include 'constants.include.F' + double precision, intent(in) :: x,mu + j_mu_deriv_2 = - mu * inv_sq_pi * dexp(-x*mu*x*mu) +end + +double precision function j_gauss_deriv(x,mu) + implicit none + include 'constants.include.F' + double precision, intent(in) :: x,mu + BEGIN_DOC +! d/dx j_gauss(mu,x) = d/dx c/[4 alpha^2 mu] exp(-(alpha * mu * x)^2) +! with c = 27/(8 sqrt(pi)), alpha=3/2 +! = -0.5 * mu * c * x * exp(-(alpha * mu * x)^2) + END_DOC + double precision :: x_tmp + x_tmp = alpha_mu_gauss * mu * x + j_gauss_deriv = -0.5d0 * mu * c_mu_gauss * x * exp(-x_tmp*x_tmp) +end + +double precision function j_gauss_deriv_2(x,mu) + implicit none + include 'constants.include.F' + double precision, intent(in) :: x,mu + BEGIN_DOC +! d/dx j_gauss(mu,x) = d/dx c/[4 alpha^2 mu] exp(-(alpha * mu * x)^2) +! with c = 27/(8 sqrt(pi)), alpha=3/2 +! = 0.5 * mu * c * exp(-(alpha * mu * x)^2) * (2 (alpha*mu*x)^2 - 1) + END_DOC + double precision :: x_tmp + x_tmp = alpha_mu_gauss * mu * x + x_tmp = x_tmp * x_tmp + j_gauss_deriv_2 = 0.5d0 * mu * c_mu_gauss * exp(-x_tmp) * (2.d0*x_tmp - 1.d0) +end + +double precision function j_erf_gauss_deriv(x,mu) + implicit none + double precision, intent(in) :: x,mu + BEGIN_DOC +! d/dx (j_gauss(mu,x)+j_mu(mu,x)) + END_DOC + double precision :: j_gauss_deriv,j_mu_deriv + j_erf_gauss_deriv = j_gauss_deriv(x,mu)+j_mu_deriv(x,mu) +end + +double precision function j_erf_gauss_deriv_2(x,mu) + implicit none + double precision, intent(in) :: x,mu + BEGIN_DOC +! d^2/dx^2 (j_gauss(mu,x)+j_mu(mu,x)) + END_DOC + double precision :: j_gauss_deriv_2,j_mu_deriv_2 + j_erf_gauss_deriv_2 = j_gauss_deriv_2(x,mu)+j_mu_deriv_2(x,mu) +end + + +double precision function pot_j_gauss(x,mu) + implicit none + double precision, intent(in) :: x,mu + BEGIN_DOC + ! effective scalar potential associated with the erf_gauss correlation factor + ! + ! 1/x( 1 - 2 * d/dx j_erf_gauss(x,mu)) - d^2/dx^2 j_erf_gauss(x,mu)) - d/dx d/dx (j_erf_gauss(x,mu))^2 + END_DOC + double precision :: j_erf_gauss_deriv_2,j_erf_gauss_deriv + double precision :: deriv_1, deriv_2 + pot_j_gauss = 0.d0 + if(x.ne.0.d0)then + deriv_1 = j_erf_gauss_deriv(x,mu) + deriv_2 = j_erf_gauss_deriv_2(x,mu) + pot_j_gauss = 1.d0/x * (1.d0 - 2.d0 * deriv_1) - deriv_1 * deriv_1 - deriv_2 + endif + +end + +double precision function pot_j_mu(x,mu) + implicit none + double precision, intent(in) :: x,mu + BEGIN_DOC + ! effective scalar potential associated with the correlation factor + ! + ! 1/x( 1 - 2 * d/dx j_erf(x,mu)) - d^2/dx^2 j_erf(x,mu)) - d/dx d/dx (j_erf(x,mu))^2 + END_DOC + double precision :: j_mu_deriv_2,j_mu_deriv + double precision :: deriv_1, deriv_2 + pot_j_mu = 0.d0 + if(x.ne.0.d0)then + deriv_1 = j_mu_deriv(x,mu) + deriv_2 = j_mu_deriv_2(x,mu) + pot_j_mu= 1.d0/x * (1.d0 - 2.d0 * deriv_1) - deriv_1 * deriv_1 - deriv_2 + endif + +end diff --git a/plugins/local/non_h_ints_mu/print_jastrow_psi.irp.f b/plugins/local/non_h_ints_mu/print_jastrow_psi.irp.f new file mode 100644 index 000000000..740743cb8 --- /dev/null +++ b/plugins/local/non_h_ints_mu/print_jastrow_psi.irp.f @@ -0,0 +1,15 @@ +program print_j_psi + implicit none + integer :: i,j,a,b + do i = 1, elec_beta_num ! r2 + do j = 1, elec_alpha_num ! r1 + do a = elec_beta_num+1, mo_num ! r2 + do b = elec_alpha_num+1, mo_num ! r1 + print*,b,a,j,i + print*,c_ij_ab_jastrow(b,a,j,i) + enddo + enddo + enddo + enddo + +end diff --git a/plugins/local/non_h_ints_mu/qmckl.irp.f b/plugins/local/non_h_ints_mu/qmckl.irp.f index 1df80457f..de440f14b 100644 --- a/plugins/local/non_h_ints_mu/qmckl.irp.f +++ b/plugins/local/non_h_ints_mu/qmckl.irp.f @@ -75,3 +75,107 @@ endif END_PROVIDER + + + BEGIN_PROVIDER [ double precision, aos_in_r_array_qmckl, (ao_num,n_points_final_grid)] +&BEGIN_PROVIDER [ double precision, aos_grad_in_r_array_qmckl, (ao_num,n_points_final_grid,3)] +&BEGIN_PROVIDER [ double precision, aos_lapl_in_r_array_qmckl, (ao_num, n_points_final_grid)] + implicit none + BEGIN_DOC + ! AOS computed with qmckl + END_DOC + use qmckl + + integer*8 :: qmckl_ctx + integer(qmckl_exit_code) :: rc + + qmckl_ctx = qmckl_context_create() + + rc = qmckl_trexio_read(qmckl_ctx, trexio_file, 1_8*len(trim(trexio_filename))) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in read_trexio' + rc = qmckl_check(qmckl_ctx, rc) + stop -1 + endif + + rc = qmckl_set_point(qmckl_ctx, 'N', n_points_final_grid*1_8, final_grid_points, n_points_final_grid*3_8) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in set_electron_point' + rc = qmckl_check(qmckl_ctx, rc) + stop -1 + endif + + double precision, allocatable :: vgl(:,:,:) + allocate( vgl(ao_num,5,n_points_final_grid)) + rc = qmckl_get_ao_basis_ao_vgl_inplace(qmckl_ctx, vgl, n_points_final_grid*ao_num*5_8) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in get_ao_vgl' + rc = qmckl_check(qmckl_ctx, rc) + stop -1 + endif + + integer :: i,k + do k=1,n_points_final_grid + do i=1,ao_num + aos_in_r_array_qmckl(i,k) = vgl(i,1,k) + aos_grad_in_r_array_qmckl(i,k,1) = vgl(i,2,k) + aos_grad_in_r_array_qmckl(i,k,2) = vgl(i,3,k) + aos_grad_in_r_array_qmckl(i,k,3) = vgl(i,4,k) + aos_lapl_in_r_array_qmckl(i,k) = vgl(i,5,k) + enddo + enddo + +END_PROVIDER + + + BEGIN_PROVIDER [ double precision, mos_in_r_array_qmckl, (mo_num,n_points_final_grid)] +&BEGIN_PROVIDER [ double precision, mos_grad_in_r_array_qmckl, (mo_num,n_points_final_grid,3)] +&BEGIN_PROVIDER [ double precision, mos_lapl_in_r_array_qmckl, (mo_num, n_points_final_grid)] + implicit none + BEGIN_DOC + ! moS computed with qmckl + END_DOC + use qmckl + + integer*8 :: qmckl_ctx + integer(qmckl_exit_code) :: rc + + qmckl_ctx = qmckl_context_create() + + rc = qmckl_trexio_read(qmckl_ctx, trexio_file, 1_8*len(trim(trexio_filename))) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in read_trexio' + rc = qmckl_check(qmckl_ctx, rc) + stop -1 + endif + + rc = qmckl_set_point(qmckl_ctx, 'N', n_points_final_grid*1_8, final_grid_points, n_points_final_grid*3_8) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in set_electron_point' + rc = qmckl_check(qmckl_ctx, rc) + stop -1 + endif + + double precision, allocatable :: vgl(:,:,:) + allocate( vgl(mo_num,5,n_points_final_grid)) + rc = qmckl_get_mo_basis_mo_vgl(qmckl_ctx, vgl, n_points_final_grid*mo_num*5_8) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in get_mo_vgl' + rc = qmckl_check(qmckl_ctx, rc) + stop -1 + endif + + integer :: i,k + do k=1,n_points_final_grid + do i=1,mo_num + mos_in_r_array_qmckl(i,k) = vgl(i,1,k) + mos_grad_in_r_array_qmckl(i,k,1) = vgl(i,2,k) + mos_grad_in_r_array_qmckl(i,k,2) = vgl(i,3,k) + mos_grad_in_r_array_qmckl(i,k,3) = vgl(i,4,k) + mos_lapl_in_r_array_qmckl(i,k) = vgl(i,5,k) + enddo + enddo + +END_PROVIDER + + diff --git a/plugins/local/non_h_ints_mu/tc_integ.irp.f b/plugins/local/non_h_ints_mu/tc_integ.irp.f index 775a9e4c7..ab88dd416 100644 --- a/plugins/local/non_h_ints_mu/tc_integ.irp.f +++ b/plugins/local/non_h_ints_mu/tc_integ.irp.f @@ -44,14 +44,92 @@ elseif(tc_integ_type .eq. "numeric") then print *, ' Numerical integration over r1 and r2 will be performed' - - ! TODO combine 1shot & int2_grad1_u12_ao_num - PROVIDE int2_grad1_u12_ao_num - int2_grad1_u12_ao = int2_grad1_u12_ao_num + if(tc_save_mem) then + + integer :: n_blocks, n_rest, n_pass + integer :: i_blocks, i_rest, i_pass, ii + double precision :: mem, n_double + double precision, allocatable :: tmp(:,:,:), xx(:) + double precision, allocatable :: tmp_grad1_u12(:,:,:) + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra - !PROVIDE int2_grad1_u12_ao_num_1shot - !int2_grad1_u12_ao = int2_grad1_u12_ao_num_1shot + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num), xx(n_points_extra_final_grid)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO COLLAPSE(2) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call total_memory(mem) + mem = max(1.d0, qp_max_mem - mem) + n_double = mem * 1.d8 + n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid)) + n_rest = int(mod(n_points_final_grid, n_blocks)) + n_pass = int((n_points_final_grid - n_rest) / n_blocks) + call write_int(6, n_pass, 'Number of passes') + call write_int(6, n_blocks, 'Size of the blocks') + call write_int(6, n_rest, 'Size of the last block') + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,3)) + do i_pass = 1, n_pass + ii = (i_pass-1)*n_blocks + 1 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_blocks, ipoint) & + !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, final_grid_points, xx, tmp_grad1_u12) + !$OMP DO + do i_blocks = 1, n_blocks + ipoint = ii - 1 + i_blocks ! r1 + call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1), tmp_grad1_u12(1,i_blocks,2), tmp_grad1_u12(1,i_blocks,3), xx(1)) + enddo + !$OMP END DO + !$OMP END PARALLEL + do m = 1, 3 + call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + enddo + enddo + deallocate(tmp_grad1_u12) + if(n_rest .gt. 0) then + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,3)) + ii = n_pass*n_blocks + 1 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_rest, ipoint) & + !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, final_grid_points, xx, tmp_grad1_u12) + !$OMP DO + do i_rest = 1, n_rest + ipoint = ii - 1 + i_rest ! r1 + call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1), tmp_grad1_u12(1,i_rest,2), tmp_grad1_u12(1,i_rest,3), xx(1)) + enddo + !$OMP END DO + !$OMP END PARALLEL + do m = 1, 3 + call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + enddo + deallocate(tmp_grad1_u12) + endif + deallocate(tmp,xx) + + else + ! TODO combine 1shot & int2_grad1_u12_ao_num + PROVIDE int2_grad1_u12_ao_num + int2_grad1_u12_ao = int2_grad1_u12_ao_num +! PROVIDE int2_grad1_u12_ao_num_1shot +! int2_grad1_u12_ao = int2_grad1_u12_ao_num_1shot + endif elseif(tc_integ_type .eq. "semi-analytic") then @@ -126,7 +204,7 @@ print*, ' Writing int2_grad1_u12_ao in ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao' open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") - call ezfio_set_work_empty(.False.) + call ezfio_set_work_empty(.False.) write(11) int2_grad1_u12_ao close(11) call ezfio_set_tc_keywords_io_tc_integ('Read') @@ -177,13 +255,88 @@ print *, ' Numerical integration over r1 and r2 will be performed' - ! TODO combine 1shot & int2_grad1_u12_square_ao_num + if(tc_save_mem) then - PROVIDE int2_grad1_u12_square_ao_num - int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num + integer :: n_blocks, n_rest, n_pass + integer :: i_blocks, i_rest, i_pass, ii + double precision :: mem, n_double + double precision, allocatable :: tmp(:,:,:), xx(:,:,:) + double precision, allocatable :: tmp_grad1_u12_squared(:,:) + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO COLLAPSE(2) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call total_memory(mem) + mem = max(1.d0, qp_max_mem - mem) + n_double = mem * 1.d8 + n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid)) + n_rest = int(mod(n_points_final_grid, n_blocks)) + n_pass = int((n_points_final_grid - n_rest) / n_blocks) + call write_int(6, n_pass, 'Number of passes') + call write_int(6, n_blocks, 'Size of the blocks') + call write_int(6, n_rest, 'Size of the last block') + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_blocks), xx(n_points_extra_final_grid,n_blocks,3)) + do i_pass = 1, n_pass + ii = (i_pass-1)*n_blocks + 1 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_blocks, ipoint) & + !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, xx, final_grid_points, tmp_grad1_u12_squared) + !$OMP DO + do i_blocks = 1, n_blocks + ipoint = ii - 1 + i_blocks ! r1 + call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, xx(1,i_blocks,1), xx(1,i_blocks,2), xx(1,i_blocks,3), tmp_grad1_u12_squared(1,i_blocks)) + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao(1,1,ii), ao_num*ao_num) + enddo + deallocate(tmp_grad1_u12_squared, xx) + if(n_rest .gt. 0) then + ii = n_pass*n_blocks + 1 + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_rest), xx(n_points_extra_final_grid,n_rest,3)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_rest, ipoint) & + !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, xx, final_grid_points, tmp_grad1_u12_squared) + !$OMP DO + do i_rest = 1, n_rest + ipoint = ii - 1 + i_rest ! r1 + call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, xx(1,i_rest,1), xx(1,i_rest,2), xx(1,i_rest,3), tmp_grad1_u12_squared(1,i_rest)) + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao(1,1,ii), ao_num*ao_num) + deallocate(tmp_grad1_u12_squared, xx) + endif + deallocate(tmp) + + else - !PROVIDE int2_grad1_u12_square_ao_num_1shot - !int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num_1shot + ! TODO combine 1shot & int2_grad1_u12_square_ao_num + PROVIDE int2_grad1_u12_square_ao_num + int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num + !PROVIDE int2_grad1_u12_square_ao_num_1shot + !int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num_1shot + endif elseif(tc_integ_type .eq. "semi-analytic") then diff --git a/plugins/local/non_h_ints_mu/tc_integ_num.irp.f b/plugins/local/non_h_ints_mu/tc_integ_num.irp.f index e5d75c3d7..9d9601c04 100644 --- a/plugins/local/non_h_ints_mu/tc_integ_num.irp.f +++ b/plugins/local/non_h_ints_mu/tc_integ_num.irp.f @@ -63,12 +63,10 @@ do i_pass = 1, n_pass ii = (i_pass-1)*n_blocks + 1 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i_blocks, ipoint) & - !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, & - !$OMP final_grid_points, tmp_grad1_u12, & - !$OMP tmp_grad1_u12_squared) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_blocks, ipoint) & + !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, final_grid_points, tmp_grad1_u12, tmp_grad1_u12_squared) !$OMP DO do i_blocks = 1, n_blocks ipoint = ii - 1 + i_blocks ! r1 @@ -99,12 +97,10 @@ ii = n_pass*n_blocks + 1 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i_rest, ipoint) & - !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, & - !$OMP final_grid_points, tmp_grad1_u12, & - !$OMP tmp_grad1_u12_squared) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_rest, ipoint) & + !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, final_grid_points, tmp_grad1_u12, tmp_grad1_u12_squared) !$OMP DO do i_rest = 1, n_rest ipoint = ii - 1 + i_rest ! r1 @@ -131,7 +127,7 @@ deallocate(tmp) call wall_time(time1) - print*, ' wall time for int2_grad1_u12_ao_num & int2_grad1_u12_square_ao_num =', time1-time0 + print*, ' wall time for int2_grad1_u12_ao_num & int2_grad1_u12_square_ao_num = (min)', (time1-time0) / 60.d0 call print_memory_usage() END_PROVIDER diff --git a/plugins/local/non_h_ints_mu/test_mu_of_r_tc.irp.f b/plugins/local/non_h_ints_mu/test_mu_of_r_tc.irp.f new file mode 100644 index 000000000..79e21d1bd --- /dev/null +++ b/plugins/local/non_h_ints_mu/test_mu_of_r_tc.irp.f @@ -0,0 +1,157 @@ +program test_mu_of_r_tc + implicit none + BEGIN_DOC +! TODO + END_DOC + ! You specify that you want to avoid any contribution from + ! orbitals coming from core + call test_grad_f_mean_field + call test_grad_mu_mf + call plot_mu_of_r_mf +end + + +subroutine test_grad_f_mean_field + implicit none + integer :: i_point,k + double precision :: weight,r(3) + double precision :: grad_f_mf_ab(3), grad_two_bod_dens(3) + double precision :: grad_dm_a(3), grad_dm_b(3) + double precision :: f_mf_ab,two_bod_dens, dm_a, dm_b + + double precision :: num_grad_f_mf_ab(3), num_grad_two_bod_dens(3) + double precision :: num_grad_dm_a(3), num_grad_dm_b(3) + double precision :: f_mf_ab_p,f_mf_ab_m + double precision :: two_bod_dens_p, two_bod_dens_m + double precision :: dm_a_p, dm_a_m + double precision :: dm_b_p, dm_b_m + double precision :: rbis(3), dr + double precision :: accu_grad_f_mf_ab(3),accu_grad_two_bod_dens(3) + double precision :: accu_grad_dm_a(3),accu_grad_dm_b(3) + double precision :: accu_f_mf_ab, accu_two_bod_dens, accu_dm_a, accu_dm_b + dr = 0.00001d0 + accu_f_mf_ab = 0.d0 + accu_two_bod_dens = 0.d0 + accu_dm_a = 0.d0 + accu_dm_b = 0.d0 + + accu_grad_f_mf_ab = 0.d0 + accu_grad_two_bod_dens = 0.d0 + accu_grad_dm_a = 0.d0 + accu_grad_dm_b = 0.d0 + do i_point = 1, n_points_final_grid + r(1:3) = final_grid_points(1:3,i_point) + weight = final_weight_at_r_vector(i_point) + call get_grad_f_mf_ab(r,grad_f_mf_ab, grad_two_bod_dens,f_mf_ab,two_bod_dens, dm_a, dm_b,grad_dm_a, grad_dm_b) + call get_f_mf_ab(r,f_mf_ab_p,two_bod_dens_p, dm_a_p, dm_b_p) + accu_f_mf_ab += weight * dabs(f_mf_ab - f_mf_ab_p) + accu_two_bod_dens += weight * dabs(two_bod_dens - two_bod_dens_p) + accu_dm_a += weight*dabs(dm_a - dm_a_p) + accu_dm_b += weight*dabs(dm_b - dm_b_p) + do k = 1, 3 + rbis = r + rbis(k) += dr + call get_f_mf_ab(rbis,f_mf_ab_p,two_bod_dens_p, dm_a_p, dm_b_p) + rbis = r + rbis(k) -= dr + call get_f_mf_ab(rbis,f_mf_ab_m,two_bod_dens_m, dm_a_m, dm_b_m) + num_grad_f_mf_ab(k) = (f_mf_ab_p - f_mf_ab_m)/(2.d0*dr) + num_grad_two_bod_dens(k) = (two_bod_dens_p - two_bod_dens_m)/(2.d0*dr) + num_grad_dm_a(k) = (dm_a_p - dm_a_m)/(2.d0*dr) + num_grad_dm_b(k) = (dm_b_p - dm_b_m)/(2.d0*dr) + enddo + do k = 1, 3 + accu_grad_f_mf_ab(k) += weight * dabs(grad_f_mf_ab(k) - num_grad_f_mf_ab(k)) + accu_grad_two_bod_dens(k) += weight * dabs(grad_two_bod_dens(k) - num_grad_two_bod_dens(k)) + accu_grad_dm_a(k) += weight * dabs(grad_dm_a(k) - num_grad_dm_a(k)) + accu_grad_dm_b(k) += weight * dabs(grad_dm_b(k) - num_grad_dm_b(k)) + enddo + enddo + print*,'accu_f_mf_ab = ',accu_f_mf_ab + print*,'accu_two_bod_dens = ',accu_two_bod_dens + print*,'accu_dm_a = ',accu_dm_a + print*,'accu_dm_b = ',accu_dm_b + print*,'accu_grad_f_mf_ab = ' + print*,accu_grad_f_mf_ab + print*,'accu_grad_two_bod_dens = ' + print*,accu_grad_two_bod_dens + print*,'accu_dm_a = ' + print*,accu_grad_dm_a + print*,'accu_dm_b = ' + print*,accu_grad_dm_b + +end + +subroutine test_grad_mu_mf + implicit none + integer :: i_point,k + double precision :: weight,r(3),rbis(3) + double precision :: mu_mf, dm,grad_mu_mf(3), grad_dm(3) + double precision :: mu_mf_p, mu_mf_m, dm_m, dm_p, num_grad_mu_mf(3),dr, num_grad_dm(3) + double precision :: accu_mu, accu_dm, accu_grad_dm(3), accu_grad_mu_mf(3) + dr = 0.00001d0 + accu_grad_mu_mf = 0.d0 + accu_mu = 0.d0 + accu_grad_dm = 0.d0 + accu_dm = 0.d0 + do i_point = 1, n_points_final_grid + r(1:3) = final_grid_points(1:3,i_point) + weight = final_weight_at_r_vector(i_point) + call grad_mu_of_r_mean_field(r,mu_mf, dm, grad_mu_mf, grad_dm) + call mu_of_r_mean_field(r,mu_mf_p, dm_p) + accu_mu += weight*dabs(mu_mf_p - mu_mf) + accu_dm += weight*dabs(dm_p - dm) + do k = 1, 3 + rbis = r + rbis(k) += dr + call mu_of_r_mean_field(rbis,mu_mf_p, dm_p) + rbis = r + rbis(k) -= dr + call mu_of_r_mean_field(rbis,mu_mf_m, dm_m) + + num_grad_mu_mf(k) = (mu_mf_p - mu_mf_m)/(2.d0*dr) + num_grad_dm(k) = (dm_p - dm_m)/(2.d0*dr) + enddo + do k = 1, 3 + accu_grad_dm(k)+= weight *dabs(num_grad_dm(k) - grad_dm(k)) + accu_grad_mu_mf(k)+= weight *dabs(num_grad_mu_mf(k) - grad_mu_mf(k)) + enddo + enddo + print*,'accu_mu = ',accu_mu + print*,'accu_dm = ',accu_dm + print*,'accu_grad_dm = ' + print*, accu_grad_dm + print*,'accu_grad_mu_mf = ' + print*, accu_grad_mu_mf + +end + +subroutine plot_mu_of_r_mf + implicit none + include 'constants.include.F' + integer :: ipoint,npoint + double precision :: dx,r(3),xmax,xmin + double precision :: accu_mu,accu_nelec,mu_mf, dm,mu_mf_tc + character*(128) :: output + integer :: i_unit_output,getUnitAndOpen + output=trim(ezfio_filename)//'.mu_mf' + i_unit_output = getUnitAndOpen(output,'w') + xmax = 5.D0 + xmin = 0.d0 + npoint = 10000 + dx = (xmax - xmin)/dble(npoint) + r = 0.d0 + r(1) = xmin + accu_mu = 0.d0 + accu_nelec = 0.d0 + do ipoint = 1, npoint + call mu_of_r_mean_field(r,mu_mf, dm) + call mu_of_r_mean_field_tc(r,mu_mf_tc, dm) + write(i_unit_output,'(100(F16.10,X))')r(1),mu_mf,mu_mf_tc,dm + accu_mu += mu_mf * dm * r(1)**2*dx*4.D0*pi + accu_nelec += dm * r(1)**2*dx*4.D0*pi + r(1) += dx + enddo + print*,'nelec = ',accu_nelec + print*,'mu average = ',accu_mu/accu_nelec +end diff --git a/plugins/local/non_h_ints_mu/test_non_h_ints.irp.f b/plugins/local/non_h_ints_mu/test_non_h_ints.irp.f index 464a1c1f3..4c63dec43 100644 --- a/plugins/local/non_h_ints_mu/test_non_h_ints.irp.f +++ b/plugins/local/non_h_ints_mu/test_non_h_ints.irp.f @@ -1125,6 +1125,7 @@ subroutine test_fit_coef_A1() double precision :: accu, norm, diff double precision, allocatable :: A1(:,:) double precision, allocatable :: A2(:,:,:,:), tmp(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:,:) ! --- @@ -1165,16 +1166,17 @@ subroutine test_fit_coef_A1() call wall_time(t1) - allocate(tmp(ao_num,ao_num,n_points_final_grid)) + allocate(tmp1(ao_num,ao_num,n_points_final_grid), tmp2(ao_num,ao_num,n_points_final_grid)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, j, ipoint) & - !$OMP SHARED (n_points_final_grid, ao_num, final_weight_at_r_vector, aos_in_r_array_transp, tmp) + !$OMP SHARED (n_points_final_grid, ao_num, final_weight_at_r_vector, aos_in_r_array_transp, tmp1, tmp2) !$OMP DO COLLAPSE(2) do j = 1, ao_num do i = 1, ao_num do ipoint = 1, n_points_final_grid - tmp(i,j,ipoint) = dsqrt(final_weight_at_r_vector(ipoint)) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) + tmp1(i,j,ipoint) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) + tmp2(i,j,ipoint) = aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) enddo enddo enddo @@ -1184,9 +1186,9 @@ subroutine test_fit_coef_A1() allocate(A2(ao_num,ao_num,ao_num,ao_num)) call dgemm( "N", "T", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & - , tmp(1,1,1), ao_num*ao_num, tmp(1,1,1), ao_num*ao_num & + , tmp1(1,1,1), ao_num*ao_num, tmp2(1,1,1), ao_num*ao_num & , 0.d0, A2(1,1,1,1), ao_num*ao_num) - deallocate(tmp) + deallocate(tmp1, tmp2) call wall_time(t2) print*, ' WALL TIME FOR A2 (min) =', (t2-t1)/60.d0 @@ -1238,6 +1240,7 @@ subroutine test_fit_coef_inv() double precision, allocatable :: A1(:,:), A1_inv(:,:), A1_tmp(:,:) double precision, allocatable :: A2(:,:,:,:), tmp(:,:,:), A2_inv(:,:,:,:) double precision, allocatable :: U(:,:), D(:), Vt(:,:), work(:), A2_tmp(:,:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:,:) cutoff_svd = 5d-8 @@ -1286,16 +1289,17 @@ subroutine test_fit_coef_inv() call wall_time(t1) - allocate(tmp(n_points_final_grid,ao_num,ao_num)) + allocate(tmp1(n_points_final_grid,ao_num,ao_num), tmp2(n_points_final_grid,ao_num,ao_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, j, ipoint) & - !$OMP SHARED (n_points_final_grid, ao_num, final_weight_at_r_vector, aos_in_r_array_transp, tmp) + !$OMP SHARED (n_points_final_grid, ao_num, final_weight_at_r_vector, aos_in_r_array_transp, tmp1, tmp2) !$OMP DO COLLAPSE(2) do j = 1, ao_num do i = 1, ao_num do ipoint = 1, n_points_final_grid - tmp(ipoint,i,j) = dsqrt(final_weight_at_r_vector(ipoint)) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) + tmp1(ipoint,i,j) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) + tmp2(ipoint,i,j) = aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) enddo enddo enddo @@ -1304,11 +1308,11 @@ subroutine test_fit_coef_inv() allocate(A2(ao_num,ao_num,ao_num,ao_num)) - call dgemm( "T", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & - , tmp(1,1,1), n_points_final_grid, tmp(1,1,1), n_points_final_grid & + call dgemm( "T", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , tmp1(1,1,1), n_points_final_grid, tmp2(1,1,1), n_points_final_grid & , 0.d0, A2(1,1,1,1), ao_num*ao_num) - deallocate(tmp) + deallocate(tmp1, tmp2) call wall_time(t2) print*, ' WALL TIME FOR A2 (min) =', (t2-t1)/60.d0 diff --git a/plugins/local/non_h_ints_mu/total_tc_int.irp.f b/plugins/local/non_h_ints_mu/total_tc_int.irp.f index 9d3cf565e..634d7e87d 100644 --- a/plugins/local/non_h_ints_mu/total_tc_int.irp.f +++ b/plugins/local/non_h_ints_mu/total_tc_int.irp.f @@ -33,8 +33,10 @@ double precision :: weight1, ao_k_r, ao_i_r double precision :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq double precision :: time0, time1 - double precision, allocatable :: b_mat(:,:,:,:), c_mat(:,:,:) + double precision, allocatable :: c_mat(:,:,:) + logical, external :: ao_two_e_integral_zero double precision, external :: get_ao_two_e_integral + double precision, external :: ao_two_e_integral PROVIDe tc_integ_type PROVIDE env_type @@ -53,7 +55,9 @@ print*, ' Reading ao_two_e_tc_tot from ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot' open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/ao_two_e_tc_tot', action="read") - read(11) ao_two_e_tc_tot + do i = 1, ao_num + read(11) ao_two_e_tc_tot(:,:,:,i) + enddo close(11) else @@ -65,27 +69,59 @@ PROVIDE int2_grad1_u12_square_ao - allocate(c_mat(n_points_final_grid,ao_num,ao_num)) + if(tc_save_mem_loops) then - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & - !$OMP SHARED (aos_in_r_array_transp, c_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) - !$OMP DO SCHEDULE (static) - do i = 1, ao_num - do k = 1, ao_num - do ipoint = 1, n_points_final_grid - c_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) + print*, ' LOOPS are used to evaluate Hermitian part of ao_two_e_tc_tot ...' + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, l, ipoint, ao_i_r, ao_k_r, weight1) & + !$OMP SHARED (ao_num, n_points_final_grid, ao_two_e_tc_tot, & + !$OMP aos_in_r_array_transp, final_weight_at_r_vector, int2_grad1_u12_square_ao) + !$OMP DO COLLAPSE(3) + do i = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + do j = 1, ao_num + ao_two_e_tc_tot(j,l,k,i) = 0.d0 + do ipoint = 1, n_points_final_grid + weight1 = final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + ao_two_e_tc_tot(j,l,k,i) = ao_two_e_tc_tot(j,l,k,i) + int2_grad1_u12_square_ao(j,l,ipoint) * weight1 * ao_i_r * ao_k_r + enddo + enddo + enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL - call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & - , int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & - , 0.d0, ao_two_e_tc_tot, ao_num*ao_num) + else + print*, ' DGEMM are used to evaluate Hermitian part of ao_two_e_tc_tot ...' + + allocate(c_mat(n_points_final_grid,ao_num,ao_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (aos_in_r_array_transp, c_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + c_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & + , 0.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) + deallocate(c_mat) + endif + FREE int2_grad1_u12_square_ao if( (tc_integ_type .eq. "semi-analytic") .and. & @@ -96,6 +132,7 @@ ! an additional term is added here directly instead of ! being added in int2_grad1_u12_square_ao for performance + allocate(c_mat(n_points_final_grid,ao_num,ao_num)) PROVIDE int2_u2_env2 !$OMP PARALLEL & @@ -127,10 +164,13 @@ , int2_u2_env2(1,1,1), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & , 1.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) + deallocate(c_mat) FREE int2_u2_env2 endif ! use_ipp - deallocate(c_mat) + call wall_time(time1) + print*, ' done with Hermitian part after (min) ', (time1 - time0) / 60.d0 + call print_memory_usage() ! --- @@ -138,39 +178,71 @@ PROVIDE int2_grad1_u12_ao - allocate(b_mat(n_points_final_grid,ao_num,ao_num,3)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & - !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & - !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) - !$OMP DO SCHEDULE (static) - do i = 1, ao_num - do k = 1, ao_num - do ipoint = 1, n_points_final_grid + if(tc_save_mem_loops) then - weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) - ao_i_r = aos_in_r_array_transp(ipoint,i) - ao_k_r = aos_in_r_array_transp(ipoint,k) + print*, ' LOOPS are used to evaluate non-Hermitian part of ao_two_e_tc_tot ...' - b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) - b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) - b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, l, ipoint, ao_i_r, ao_k_r, weight1) & + !$OMP SHARED (ao_num, n_points_final_grid, ao_two_e_tc_tot, & + !$OMP aos_in_r_array_transp, final_weight_at_r_vector, & + !$OMP int2_grad1_u12_ao, aos_grad_in_r_array_transp_bis) + !$OMP DO COLLAPSE(3) + do i = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + do j = 1, ao_num + do ipoint = 1, n_points_final_grid + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + ao_two_e_tc_tot(j,l,k,i) = ao_two_e_tc_tot(j,l,k,i) & + - weight1 * int2_grad1_u12_ao(j,l,ipoint,1) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) & + - weight1 * int2_grad1_u12_ao(j,l,ipoint,2) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) & + - weight1 * int2_grad1_u12_ao(j,l,ipoint,3) * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) + enddo + enddo + enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL + + else + + print*, ' DGEMM are used to evaluate non-Hermitian part of ao_two_e_tc_tot ...' + + allocate(c_mat(n_points_final_grid,ao_num,ao_num)) + do m = 1, 3 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, c_mat, & + !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector, m) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + + c_mat(ipoint,k,i) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,m) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,m)) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL - do m = 1, 3 - call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 & - , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & - , 1.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) - enddo - deallocate(b_mat) + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 & + , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & + , 1.d0, ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) + enddo + deallocate(c_mat) - FREE int2_grad1_u12_ao + end if if(tc_integ_type .eq. "semi-analytic") then FREE int2_grad1_u2e_ao @@ -178,30 +250,72 @@ endif ! var_tc + call wall_time(time1) + print*, ' done with non-Hermitian part after (min) ', (time1 - time0) / 60.d0 + call print_memory_usage() + ! --- call sum_A_At(ao_two_e_tc_tot(1,1,1,1), ao_num*ao_num) - PROVIDE ao_integrals_map - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & - !$OMP PRIVATE(i, j, k, l) - !$OMP DO - do j = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do k = 1, ao_num - ! < 1:i, 2:j | 1:k, 2:l > - ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + ! --- + + logical :: integ_zero + double precision :: integ_val + + print*, ' adding ERI to ao_two_e_tc_tot ...' + + if(tc_save_mem) then + print*, ' ao_integrals_map will not be used' + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i, j, k, l, integ_zero, integ_val) & + !$OMP SHARED(ao_num, ao_two_e_tc_tot) + !$OMP DO COLLAPSE(3) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + integ_zero = ao_two_e_integral_zero(i,j,k,l) + if(.not. integ_zero) then + ! i,k : r1 j,l : r2 + integ_val = ao_two_e_integral(i,k,j,l) + ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + integ_val + endif + enddo enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL + else +! print*, ' ao_integrals_map will be used' +! PROVIDE ao_integrals_map + print*,'Cholesky vectors will be used ' + double precision :: get_ao_integ_chol,eri + eri = get_ao_integ_chol(1,1,1,1) ! FOR OPENMP + !$OMP PARALLEL DEFAULT(NONE) & +!!! !$OMP SHARED(ao_num, ao_two_e_tc_tot, ao_integrals_map) & + !$OMP SHARED(ao_num, ao_two_e_tc_tot) & + !$OMP PRIVATE(i, j, k, l,eri) + !$OMP DO COLLAPSE(3) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + ! < 1:i, 2:j | 1:k, 2:l > +! eri = get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + eri = get_ao_integ_chol(i,k,j,l) + ao_two_e_tc_tot(k,i,l,j) = ao_two_e_tc_tot(k,i,l,j) + eri + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL +! FREE ao_integrals_map + endif - if(tc_integ_type .eq. "numeric") then + if((tc_integ_type .eq. "numeric") .and. (.not. tc_save_mem)) then FREE int2_grad1_u12_ao_num int2_grad1_u12_square_ao_num endif @@ -211,7 +325,9 @@ print*, ' Saving ao_two_e_tc_tot in ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot' open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/ao_two_e_tc_tot', action="write") call ezfio_set_work_empty(.False.) - write(11) ao_two_e_tc_tot + do i = 1, ao_num + write(11) ao_two_e_tc_tot(:,:,:,i) + enddo close(11) call ezfio_set_tc_keywords_io_tc_integ('Read') endif diff --git a/plugins/local/non_hermit_dav/biorthog.irp.f b/plugins/local/non_hermit_dav/biorthog.irp.f index 2229e17d1..4b6182287 100644 --- a/plugins/local/non_hermit_dav/biorthog.irp.f +++ b/plugins/local/non_hermit_dav/biorthog.irp.f @@ -1,254 +1,3 @@ -subroutine non_hrmt_diag_split_degen(n, A, leigvec, reigvec, n_real_eigv, eigval) - - BEGIN_DOC - ! - ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors - ! - ! of a non hermitian matrix A(n,n) - ! - ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" - ! - END_DOC - - implicit none - - integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:) - - integer :: i, j, n_degen,k , iteration - integer :: n_good - double precision :: shift,shift_current - double precision :: r,thr - integer, allocatable :: list_good(:), iorder_origin(:),iorder(:) - double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:) - double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:) - double precision, allocatable :: im_part(:),re_part(:) - - - print*,'Computing the left/right eigenvectors ...' - print*,'Using the degeneracy splitting algorithm' - - - ! pre-processing the matrix :: sorting by diagonal elements - allocate(reigvec_tmp(n,n), leigvec_tmp(n,n)) - allocate(diag_elem(n),iorder_origin(n),A_save(n,n)) - do i = 1, n - iorder_origin(i) = i - diag_elem(i) = A(i,i) - enddo - call dsort(diag_elem, iorder_origin, n) - do i = 1, n - do j = 1, n - A_save(j,i) = A(iorder_origin(j),iorder_origin(i)) - enddo - enddo - - shift = 1.d-15 - shift_current = shift - iteration = 1 - logical :: good_ortho - good_ortho = .False. - do while(n_real_eigv.ne.n.or. .not.good_ortho) - if(shift.gt.1.d-3)then - print*,'shift > 1.d-3 !!' - print*,'Your matrix intrinsically contains complex eigenvalues' - stop - endif - print*,'***** iteration = ',iteration - print*,'shift = ',shift - allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) - Aw = A_save - do i = 1, n - do j = 1, n - if(dabs(Aw(j,i)).lt.shift)then - Aw(j,i) = 0.d0 - endif - enddo - enddo - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - allocate(im_part(n),iorder(n)) - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - call dsort(im_part, iorder, n) - - shift_current = max(10.d0 * dabs(im_part(1)),shift) - print*,'Largest imaginary part found in eigenvalues = ',im_part(1) - print*,'Splitting the degeneracies by ',shift_current - Aw = A_save - call split_matrix_degen(Aw,n,shift_current) - deallocate( im_part, iorder ) - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - ! You track the real eigenvalues - n_good = 0 - do i = 1, n - if(dabs(WI(i)).lt.1.d-20)then - n_good += 1 - else - print*,'Found an imaginary component to eigenvalue' - print*,'Re(i) + Im(i)',WR(i),WI(i) - endif - enddo - allocate( list_good(n_good), iorder(n_good) ) - n_good = 0 - do i = 1, n - if(dabs(WI(i)).lt.1.d-20)then - n_good += 1 - list_good(n_good) = i - eigval(n_good) = WR(i) - endif - enddo - deallocate( WR, WI ) - - n_real_eigv = n_good - do i = 1, n_good - iorder(i) = i - enddo - - ! You sort the real eigenvalues - call dsort(eigval, iorder, n_good) - - reigvec(:,:) = 0.d0 - leigvec(:,:) = 0.d0 - do i = 1, n_real_eigv - do j = 1, n - reigvec_tmp(j,i) = VR(j,list_good(iorder(i))) - leigvec_tmp(j,i) = Vl(j,list_good(iorder(i))) - enddo - enddo - - if(n_real_eigv == n)then - allocate(S(n,n)) - call check_bi_ortho(reigvec_tmp,leigvec_tmp,n,S,accu_nd) - print*,'accu_nd = ',accu_nd - double precision :: accu_nd - good_ortho = accu_nd .lt. 1.d-10 - deallocate(S) - endif - - deallocate( list_good, iorder ) - deallocate( VL, VR, Aw) - shift *= 10.d0 - iteration += 1 - enddo - do i = 1, n - do j = 1, n - reigvec(iorder_origin(j),i) = reigvec_tmp(j,i) - leigvec(iorder_origin(j),i) = leigvec_tmp(j,i) - enddo - enddo - -end - -! --- - -subroutine non_hrmt_real_diag_new(n, A, leigvec, reigvec, n_real_eigv, eigval) - - BEGIN_DOC - ! - ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors - ! - ! of a non hermitian matrix A(n,n) - ! - ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" - ! - END_DOC - - implicit none - - integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - - integer :: i, j - integer :: n_good - double precision :: shift,shift_current - double precision :: r,thr - integer, allocatable :: list_good(:), iorder(:) - double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:) - double precision, allocatable :: Aw(:,:) - double precision, allocatable :: im_part(:) - - - print*,'Computing the left/right eigenvectors ...' - - ! Eigvalue(n) = WR(n) + i * WI(n) - shift = 1.d-10 - do while(n_real_eigv.ne.n.or.shift.gt.1.d-3) - allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) - Aw = A - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - allocate(im_part(n), iorder(n)) - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - shift_current = max(10.d0 * dabs(im_part(1)),shift) - print*,'adding random number of magnitude ',shift_current - Aw = A - do i = 1, n - call RANDOM_NUMBER(r) - Aw(i,i) += shift_current * r - enddo - deallocate( im_part, iorder ) - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - - ! You track the real eigenvalues - thr = 1.d-10 - n_good = 0 - do i = 1, n - if(dabs(WI(i)).lt.thr)then - n_good += 1 - else - print*,'Found an imaginary component to eigenvalue' - print*,'Re(i) + Im(i)',WR(i),WI(i) - endif - enddo - - allocate( list_good(n_good), iorder(n_good) ) - n_good = 0 - do i = 1, n - if(dabs(WI(i)).lt.thr)then - n_good += 1 - list_good(n_good) = i - eigval(n_good) = WR(i) - endif - enddo - - deallocate( WR, WI ) - - n_real_eigv = n_good - do i = 1, n_good - iorder(i) = i - enddo - - ! You sort the real eigenvalues - call dsort(eigval, iorder, n_good) - - reigvec(:,:) = 0.d0 - leigvec(:,:) = 0.d0 - do i = 1, n_real_eigv - do j = 1, n - reigvec(j,i) = VR(j,list_good(iorder(i))) - leigvec(j,i) = Vl(j,list_good(iorder(i))) - enddo - enddo - - deallocate( list_good, iorder ) - deallocate( VL, VR, Aw) - shift *= 10.d0 - enddo - if(shift.gt.1.d-3)then - print*,'shift > 1.d-3 !!' - print*,'Your matrix intrinsically contains complex eigenvalues' - endif - -end ! --- @@ -282,126 +31,20 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei allocate(phi_1_tilde(n),phi_2_tilde(n),chi_1_tilde(n),chi_2_tilde(n)) - - ! ------------------------------------------------------------------------------------- - ! - - !print *, ' ' - !print *, ' Computing the left/right eigenvectors ...' - !print *, ' ' - allocate(WR(n), WI(n), VL(n,n), VR(n,n)) - - !print *, ' fock matrix' - !do i = 1, n - ! write(*, '(1000(F16.10,X))') A(i,:) - !enddo - - !thr_cut = 1.d-15 - !call cancel_small_elmts(A, n, thr_cut) - !call lapack_diag_non_sym_right(n, A, WR, WI, VR) call lapack_diag_non_sym(n, A, WR, WI, VL, VR) - !call lapack_diag_non_sym_new(n, A, WR, WI, VL, VR) - - - - !print *, ' ' - !print *, ' eigenvalues' - i = 1 - do while(i .le. n) - !write(*, '(I3,X,1000(F16.10,X))')i, WR(i), WI(i) - if(.false.)then - if(WI(i).ne.0.d0)then - print*,'*****************' - print*,'WARNING ! IMAGINARY EIGENVALUES !!!' - write(*, '(1000(F16.10,X))') WR(i), WI(i+1) - ! phi = VR(:,i), psi = VR(:,i+1), |Phi_i> = phi + j psi , |Phi_i+1> = phi - j psi - ! chi = VL(:,i), xhi = VL(:,i+1), |Chi_i> = chi + j xhi , |Chi_i+1> = chi - j xhi - ! - accu_chi_phi = 0.d0 - accu_xhi_psi = 0.d0 - accu_chi_psi = 0.d0 - accu_xhi_phi = 0.d0 - double precision :: accu_chi_phi, accu_xhi_psi, accu_chi_psi, accu_xhi_phi - double precision :: mat_ovlp(2,2),eigval_tmp(2),eigvec(2,2),mat_ovlp_orig(2,2) - do j = 1, n - accu_chi_phi += VL(j,i) * VR(j,i) - accu_xhi_psi += VL(j,i+1) * VR(j,i+1) - accu_chi_psi += VL(j,i) * VR(j,i+1) - accu_xhi_phi += VL(j,i+1) * VR(j,i) - enddo - mat_ovlp_orig(1,1) = accu_chi_phi - mat_ovlp_orig(2,1) = accu_xhi_phi - mat_ovlp_orig(1,2) = accu_chi_psi - mat_ovlp_orig(2,2) = accu_xhi_psi - print*,'old overlap matrix ' - write(*,'(100(F16.10,X))')mat_ovlp_orig(1:2,1) - write(*,'(100(F16.10,X))')mat_ovlp_orig(1:2,2) - - - mat_ovlp(1,1) = accu_xhi_phi - mat_ovlp(2,1) = accu_chi_phi - mat_ovlp(1,2) = accu_xhi_psi - mat_ovlp(2,2) = accu_chi_psi - !print*,'accu_chi_phi = ',accu_chi_phi - !print*,'accu_xhi_psi = ',accu_xhi_psi - !print*,'accu_chi_psi = ',accu_chi_psi - !print*,'accu_xhi_phi = ',accu_xhi_phi - print*,'new overlap matrix ' - write(*,'(100(F16.10,X))')mat_ovlp(1:2,1) - write(*,'(100(F16.10,X))')mat_ovlp(1:2,2) - call lapack_diag(eigval_tmp,eigvec,mat_ovlp,2,2) - print*,'eigval_tmp(1) = ',eigval_tmp(1) - print*,'eigvec(1) = ',eigvec(1:2,1) - print*,'eigval_tmp(2) = ',eigval_tmp(2) - print*,'eigvec(2) = ',eigvec(1:2,2) - print*,'*****************' - phi_1_tilde = 0.d0 - phi_2_tilde = 0.d0 - chi_1_tilde = 0.d0 - chi_2_tilde = 0.d0 - do j = 1, n - phi_1_tilde(j) += VR(j,i) * eigvec(1,1) + VR(j,i+1) * eigvec(2,1) - phi_2_tilde(j) += VR(j,i) * eigvec(1,2) + VR(j,i+1) * eigvec(2,2) - chi_1_tilde(j) += VL(j,i+1) * eigvec(1,1) + VL(j,i) * eigvec(2,1) - chi_2_tilde(j) += VL(j,i+1) * eigvec(1,2) + VL(j,i) * eigvec(2,2) - enddo - VR(1:n,i) = phi_1_tilde(1:n) - VR(1:n,i+1) = phi_2_tilde(1:n) -! Vl(1:n,i) = -chi_1_tilde(1:n) -! Vl(1:n,i+1) = chi_2_tilde(1:n) - i+=1 - endif - endif - i+=1 - enddo - !print *, ' right eigenvect bef' - !do i = 1, n - ! write(*, '(1000(F16.10,X))') VR(:,i) - !enddo - !print *, ' left eigenvect bef' - !do i = 1, n - ! write(*, '(1000(F16.10,X))') VL(:,i) - !enddo thr_diag = 1d-06 thr_norm = 1d+10 - !call check_EIGVEC(n, n, A, WR, VL, VR, thr_diag, thr_norm, .false.) - - ! - ! ------------------------------------------------------------------------------------- ! --- - ! ------------------------------------------------------------------------------------- - ! track & sort the real eigenvalues + ! track & sort the real eigenvalues n_good = 0 - !thr = 100d0 - thr = Im_thresh_tcscf + thr = Im_thresh_tc do i = 1, n - !print*, 'Re(i) + Im(i)', WR(i), WI(i) if(dabs(WI(i)) .lt. thr) then n_good += 1 else @@ -410,11 +53,12 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei endif enddo - if(n_good.ne.n)then - print*,'there are some imaginary eigenvalues ' - thr_diag = 1d-03 - n_good = n + if(n_good.ne.n) then + print*,'there are some imaginary eigenvalues ' + thr_diag = 1d-03 + n_good = n endif + allocate(list_good(n_good), iorder(n_good)) n_good = 0 @@ -446,26 +90,9 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei ASSERT(n==n_real_eigv) - !print *, ' eigenvalues' - !do i = 1, n - ! write(*, '(1000(F16.10,X))') eigval(i) - !enddo - !print *, ' right eigenvect aft ord' - !do i = 1, n - ! write(*, '(1000(F16.10,X))') reigvec(:,i) - !enddo - !print *, ' left eigenvect aft ord' - !do i = 1, n - ! write(*, '(1000(F16.10,X))') leigvec(:,i) - !enddo - - ! - ! ------------------------------------------------------------------------------------- - ! --- - ! ------------------------------------------------------------------------------------- - ! check bi-orthogonality + ! check bi-orthogonality thr_diag = 10.d0 thr_norm = 1d+10 @@ -495,8 +122,6 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei print *, ' lapack vectors are not normalized neither bi-orthogonalized' - ! --- - allocate(deg_num(n)) call reorder_degen_eigvec(n, deg_num, eigval, leigvec, reigvec) call impose_biorthog_degen_eigvec(n, deg_num, eigval, leigvec, reigvec) @@ -508,700 +133,36 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei endif call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .true.) - !call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.) - deallocate(S) endif - ! - ! ------------------------------------------------------------------------------------- - return end ! --- -subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigval) +subroutine check_bi_ortho(reigvec, leigvec, n, S, accu_nd) BEGIN_DOC - ! - ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors - ! of a non hermitian matrix A(n,n) - ! - ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" + ! retunrs the overlap matrix S = Leigvec^T Reigvec ! + ! and the square root of the sum of the squared off-diagonal elements of S END_DOC implicit none integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - - integer :: i, j - integer :: n_good - double precision :: thr - double precision :: accu_nd - - integer, allocatable :: list_good(:), iorder(:) - double precision, allocatable :: Aw(:,:) - double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:) - double precision, allocatable :: S(:,:) - double precision :: r + double precision, intent(in) :: reigvec(n,n), leigvec(n,n) + double precision, intent(out) :: S(n,n), accu_nd - - ! ------------------------------------------------------------------------------------- - ! - - print *, 'Computing the left/right eigenvectors ...' - allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n) ) - - Aw(:,:) = A(:,:) - call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR) - - thr = 1.d-12 - double precision, allocatable :: im_part(:) - n_good = 0 - do i = 1, n - if( dabs(WI(i)).lt.thr ) then - n_good += 1 - else - print*, 'Found an imaginary component to eigenvalue on i = ', i - print*, 'Re(i) + Im(i)', WR(i), WI(i) - endif - enddo - print*,'n_good = ',n_good - if(n_good .lt. n)then - print*,'Removing degeneracies to remove imaginary parts' - allocate(im_part(n),iorder(n)) - r = 0.d0 - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - call dsort(im_part,iorder,n) - thr = 10.d0 * dabs(im_part(1)) - print*,'adding random numbers on the diagonal of magnitude ',thr - Aw(:,:) = A(:,:) - do i = 1, n - call RANDOM_NUMBER(r) - print*,'r = ',r*thr - Aw(i,i) += thr * r - enddo - print*,'Rediagonalizing the matrix with random numbers' - call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR) - deallocate(im_part,iorder) - endif - deallocate( Aw ) - - ! - ! ------------------------------------------------------------------------------------- - - ! --- - - ! ------------------------------------------------------------------------------------- - ! track & sort the real eigenvalues - - n_good = 0 - thr = 1.d-5 - do i = 1, n - if( dabs(WI(i)).lt.thr ) then - n_good += 1 - else - print*, 'Found an imaginary component to eigenvalue on i = ', i - print*, 'Re(i) + Im(i)', WR(i), WI(i) - endif - enddo - print*,'n_good = ',n_good - allocate( list_good(n_good), iorder(n_good) ) - - n_good = 0 - do i = 1, n - if( dabs(WI(i)).lt.thr ) then - n_good += 1 - list_good(n_good) = i - eigval(n_good) = WR(i) - endif - enddo - - deallocate( WR, WI ) - - n_real_eigv = n_good - do i = 1, n_good - iorder(i) = i - enddo - call dsort(eigval, iorder, n_good) - - reigvec(:,:) = 0.d0 - leigvec(:,:) = 0.d0 - do i = 1, n_real_eigv - do j = 1, n - reigvec(j,i) = VR(j,list_good(iorder(i))) - leigvec(j,i) = VL(j,list_good(iorder(i))) - enddo - enddo - - deallocate( list_good, iorder ) - deallocate( VL, VR ) - - ! - ! ------------------------------------------------------------------------------------- - - ! --- - - ! ------------------------------------------------------------------------------------- - ! check bi-orthogonality - - allocate( S(n_real_eigv,n_real_eigv) ) + integer :: i,j - ! S = VL x VR - call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n, 1.d0 & - , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & - , 0.d0, S, size(S, 1) ) - - accu_nd = 0.d0 - do i = 1, n_real_eigv - do j = 1, n_real_eigv - if(i==j) cycle - accu_nd = accu_nd + S(j,i) * S(j,i) - enddo - enddo - accu_nd = dsqrt(accu_nd) - - if(accu_nd .lt. thresh_biorthog_nondiag) then - ! L x R is already bi-orthogonal - - print *, ' L & T bi-orthogonality: ok' - deallocate( S ) - return - - else - ! impose bi-orthogonality - - print *, ' L & T bi-orthogonality: not imposed yet' - print *, ' accu_nd = ', accu_nd - call impose_biorthog_qr(n, n_real_eigv, thresh_biorthog_diag, thresh_biorthog_nondiag, leigvec, reigvec) - deallocate( S ) - - endif - - ! - ! ------------------------------------------------------------------------------------- - - return - -end - -! --- - -subroutine non_hrmt_real_im(n, A, leigvec, reigvec, n_real_eigv, eigval) - - BEGIN_DOC - ! - ! routine which returns the EIGENVALUES sorted the REAL part and corresponding LEFT/RIGHT eigenvetors - ! of a non hermitian matrix A(n,n) - ! - ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" - ! - END_DOC - - implicit none - integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - - integer :: i, j - integer :: n_bad - double precision :: thr - double precision :: accu_nd - - integer, allocatable :: iorder(:) - double precision, allocatable :: Aw(:,:) - double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:) - double precision, allocatable :: S(:,:) - double precision :: r - - ! ------------------------------------------------------------------------------------- - ! - - print *, 'Computing the left/right eigenvectors ...' - allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n), iorder(n)) - - Aw(:,:) = A(:,:) - do i = 1, n - call RANDOM_NUMBER(r) - Aw(i,i) += 10.d-10* r - enddo - call lapack_diag_non_sym(n, Aw, WR, WI, VL, VR) - - ! ------------------------------------------------------------------------------------- - ! track & sort the real eigenvalues - - i = 1 - thr = 1.d-15 - n_real_eigv = 0 - do while (i.le.n) -! print*,i,dabs(WI(i)) - if( dabs(WI(i)).gt.thr ) then - print*, 'Found an imaginary component to eigenvalue on i = ', i - print*, 'Re(i) , Im(i) ', WR(i), WI(i) - iorder(i) = i - eigval(i) = WR(i) - i+=1 - print*, 'Re(i+1),Im(i+1)',WR(i), WI(i) - iorder(i) = i - eigval(i) = WR(i) - i+=1 - else - n_real_eigv += 1 - iorder(i) = i - eigval(i) = WR(i) - i+=1 - endif - enddo - call dsort(eigval, iorder, n) - reigvec(:,:) = 0.d0 - leigvec(:,:) = 0.d0 - do i = 1, n - do j = 1, n - reigvec(j,i) = VR(j,iorder(i)) - leigvec(j,i) = VL(j,iorder(i)) - enddo - enddo - - deallocate( iorder ) - deallocate( VL, VR ) - - ! - ! ------------------------------------------------------------------------------------- - - ! --- - - ! ------------------------------------------------------------------------------------- - ! check bi-orthogonality - - allocate( S(n,n) ) - - ! S = VL x VR - call dgemm( 'T', 'N', n, n, n, 1.d0 & - , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & - , 0.d0, S, size(S, 1) ) - - accu_nd = 0.d0 - do i = 1, n - do j = 1, n - if(i==j) cycle - accu_nd = accu_nd + S(j,i) * S(j,i) - enddo - enddo - accu_nd = dsqrt(accu_nd) - - deallocate( S ) - -end - -! --- - -subroutine non_hrmt_generalized_real_im(n, A, B, leigvec, reigvec, n_real_eigv, eigval) - - BEGIN_DOC - ! - ! routine which returns the EIGENVALUES sorted the REAL part and corresponding LEFT/RIGHT eigenvetors - ! for A R = lambda B R and A^\dagger L = lambda B^\dagger L - ! - ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" - ! - END_DOC - - implicit none - integer, intent(in) :: n - double precision, intent(in) :: A(n,n),B(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - - integer :: i, j - integer :: n_bad - double precision :: thr - double precision :: accu_nd - - integer, allocatable :: iorder(:) - double precision, allocatable :: Aw(:,:),Bw(:,:) - double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:), beta(:) - double precision, allocatable :: S(:,:) - double precision :: r - - ! ------------------------------------------------------------------------------------- - ! - - print *, 'Computing the left/right eigenvectors ...' - allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n), Bw(n,n),iorder(n),beta(n)) - - Aw(:,:) = A(:,:) - Bw(:,:) = B(:,:) - call lapack_diag_general_non_sym(n,Aw,Bw,WR,beta,WI,VL,VR) - - ! ------------------------------------------------------------------------------------- - ! track & sort the real eigenvalues - - i = 1 - thr = 1.d-10 - n_real_eigv = 0 - do while (i.le.n) - if( dabs(WI(i)).gt.thr ) then - print*, 'Found an imaginary component to eigenvalue on i = ', i - print*, 'Re(i) , Im(i) ', WR(i), WI(i) - iorder(i) = i - eigval(i) = WR(i)/(beta(i) + 1.d-10) - i+=1 - print*, 'Re(i+1),Im(i+1)',WR(i), WI(i) - iorder(i) = i - eigval(i) = WR(i)/(beta(i) + 1.d-10) - i+=1 - else - n_real_eigv += 1 - iorder(i) = i - eigval(i) = WR(i)/(beta(i) + 1.d-10) - i+=1 - endif - enddo - call dsort(eigval, iorder, n) - reigvec(:,:) = 0.d0 - leigvec(:,:) = 0.d0 - do i = 1, n - do j = 1, n - reigvec(j,i) = VR(j,iorder(i)) - leigvec(j,i) = VL(j,iorder(i)) - enddo - enddo - - deallocate( iorder ) - deallocate( VL, VR ) - - ! - ! ------------------------------------------------------------------------------------- - - ! --- - - ! ------------------------------------------------------------------------------------- - ! check bi-orthogonality - - allocate( S(n,n) ) - - ! S = VL x VR - call dgemm( 'T', 'N', n, n, n, 1.d0 & - , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & - , 0.d0, S, size(S, 1) ) - - accu_nd = 0.d0 - do i = 1, n - do j = 1, n - if(i==j) cycle - accu_nd = accu_nd + S(j,i) * S(j,i) - enddo - enddo - accu_nd = dsqrt(accu_nd) - - deallocate( S ) - -end - -! --- - -subroutine non_hrmt_bieig_fullvect(n, A, leigvec, reigvec, n_real_eigv, eigval) - - BEGIN_DOC - ! - ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors - ! of a non hermitian matrix A(n,n) - ! - ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" - ! - END_DOC - - implicit none - integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - - integer :: i, j - integer :: n_good - double precision :: thr - double precision :: accu_nd - - integer, allocatable :: iorder(:) - double precision, allocatable :: Aw(:,:) - double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:) - double precision, allocatable :: S(:,:) - double precision, allocatable :: eigval_sorted(:) - - - ! ------------------------------------------------------------------------------------- - ! - - print *, 'Computing the left/right eigenvectors ...' - - allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n) ) - Aw(:,:) = A(:,:) - - call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR) - - deallocate( Aw ) - - ! - ! ------------------------------------------------------------------------------------- - - ! --- - - ! ------------------------------------------------------------------------------------- - ! track & sort the real eigenvalues - - allocate( eigval_sorted(n), iorder(n) ) - - n_good = 0 - thr = 1.d-10 - - do i = 1, n - - iorder(i) = i - eigval_sorted(i) = WR(i) - - if(dabs(WI(i)) .gt. thr) then - print*, ' Found an imaginary component to eigenvalue on i = ', i - print*, ' Re(i) + Im(i)', WR(i), WI(i) - else - n_good += 1 - endif - - enddo - - n_real_eigv = n_good - - call dsort(eigval_sorted, iorder, n) - - reigvec(:,:) = 0.d0 - leigvec(:,:) = 0.d0 - do i = 1, n - eigval(i) = WR(i) - do j = 1, n - reigvec(j,i) = VR(j,iorder(i)) - leigvec(j,i) = VL(j,iorder(i)) - enddo - enddo - - deallocate( eigval_sorted, iorder ) - deallocate( WR, WI ) - deallocate( VL, VR ) - - ! - ! ------------------------------------------------------------------------------------- - - ! --- - - ! ------------------------------------------------------------------------------------- - ! check bi-orthogonality - - allocate( S(n,n) ) - - ! S = VL x VR - call dgemm( 'T', 'N', n, n, n, 1.d0 & - , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & - , 0.d0, S, size(S, 1) ) - - accu_nd = 0.d0 - do i = 1, n - do j = 1, n - if(i==j) cycle - accu_nd = accu_nd + S(j,i) * S(j,i) - enddo - enddo - accu_nd = dsqrt(accu_nd) - - if(accu_nd .lt. thresh_biorthog_nondiag) then - ! L x R is already bi-orthogonal - - !print *, ' L & T bi-orthogonality: ok' - deallocate( S ) - return - - else - ! impose bi-orthogonality - - !print *, ' L & T bi-orthogonality: not imposed yet' - !print *, ' accu_nd = ', accu_nd - call impose_biorthog_qr(n, n, thresh_biorthog_diag, thresh_biorthog_nondiag, leigvec, reigvec) - deallocate( S ) - - endif - - ! - ! ------------------------------------------------------------------------------------- - - return - -end - -! --- - - -subroutine split_matrix_degen(aw,n,shift) - implicit none - BEGIN_DOC - ! subroutines that splits the degeneracies of a matrix by adding a splitting of magnitude thr * n_degen/2 - ! - ! WARNING !! THE MATRIX IS ASSUMED TO BE PASSED WITH INCREASING DIAGONAL ELEMENTS - END_DOC - double precision,intent(inout) :: Aw(n,n) - double precision,intent(in) :: shift - integer, intent(in) :: n - integer :: i,j,n_degen - logical :: keep_on - i=1 - do while(i.lt.n) - if(dabs(Aw(i,i)-Aw(i+1,i+1)).lt.shift)then - j=1 - keep_on = .True. - do while(keep_on) - if(i+j.gt.n)then - keep_on = .False. - exit - endif - if(dabs(Aw(i,i)-Aw(i+j,i+j)).lt.shift)then - j+=1 - else - keep_on=.False. - exit - endif - enddo - n_degen = j - j=0 - keep_on = .True. - do while(keep_on) - if(i+j+1.gt.n)then - keep_on = .False. - exit - endif - if(dabs(Aw(i+j,i+j)-Aw(i+j+1,i+j+1)).lt.shift)then - Aw(i+j,i+j) += (j-n_degen/2) * shift - j+=1 - else - keep_on = .False. - exit - endif - enddo - Aw(i+n_degen-1,i+n_degen-1) += (n_degen-1-n_degen/2) * shift - i+=n_degen - else - i+=1 - endif - enddo - -end - -subroutine give_degen(a,n,shift,list_degen,n_degen_list) - implicit none - BEGIN_DOC - ! returns n_degen_list :: the number of degenerated SET of elements (i.e. with |A(i)-A(i+1)| below shift) - ! - ! for each of these sets, list_degen(1,i) = first degenerate element of the set i, - ! - ! list_degen(2,i) = last degenerate element of the set i. - END_DOC - double precision,intent(in) :: A(n) - double precision,intent(in) :: shift - integer, intent(in) :: n - integer, intent(out) :: list_degen(2,n),n_degen_list - integer :: i,j,n_degen,k - logical :: keep_on - double precision,allocatable :: Aw(:) - list_degen = -1 - allocate(Aw(n)) - Aw = A - i=1 - k = 0 - do while(i.lt.n) - if(dabs(Aw(i)-Aw(i+1)).lt.shift)then - k+=1 - j=1 - list_degen(1,k) = i - keep_on = .True. - do while(keep_on) - if(i+j.gt.n)then - keep_on = .False. - exit - endif - if(dabs(Aw(i)-Aw(i+j)).lt.shift)then - j+=1 - else - keep_on=.False. - exit - endif - enddo - n_degen = j - list_degen(2,k) = list_degen(1,k)-1 + n_degen - j=0 - keep_on = .True. - do while(keep_on) - if(i+j+1.gt.n)then - keep_on = .False. - exit - endif - if(dabs(Aw(i+j)-Aw(i+j+1)).lt.shift)then - Aw(i+j) += (j-n_degen/2) * shift - j+=1 - else - keep_on = .False. - exit - endif - enddo - Aw(i+n_degen-1) += (n_degen-1-n_degen/2) * shift - i+=n_degen - else - i+=1 - endif - enddo - n_degen_list = k - -end - -subroutine cancel_small_elmts(aw,n,shift) - implicit none - BEGIN_DOC - ! subroutines that splits the degeneracies of a matrix by adding a splitting of magnitude thr * n_degen/2 - ! - ! WARNING !! THE MATRIX IS ASSUMED TO BE PASSED WITH INCREASING DIAGONAL ELEMENTS - END_DOC - double precision,intent(inout) :: Aw(n,n) - double precision,intent(in) :: shift - integer, intent(in) :: n - integer :: i,j - do i = 1, n - do j = 1, n - if(dabs(Aw(j,i)).lt.shift)then - Aw(j,i) = 0.d0 - endif - enddo - enddo -end - -subroutine check_bi_ortho(reigvec,leigvec,n,S,accu_nd) - implicit none - integer, intent(in) :: n - double precision,intent(in) :: reigvec(n,n),leigvec(n,n) - double precision, intent(out) :: S(n,n),accu_nd - BEGIN_DOC -! retunrs the overlap matrix S = Leigvec^T Reigvec -! -! and the square root of the sum of the squared off-diagonal elements of S - END_DOC - integer :: i,j ! S = VL x VR call dgemm( 'T', 'N', n, n, n, 1.d0 & , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) & , 0.d0, S, size(S, 1) ) + accu_nd = 0.d0 do i = 1, n do j = 1, n @@ -1213,3 +174,5 @@ subroutine check_bi_ortho(reigvec,leigvec,n,S,accu_nd) accu_nd = dsqrt(accu_nd) end + + diff --git a/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f b/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f index cb38347eb..2c053ac88 100644 --- a/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f +++ b/plugins/local/non_hermit_dav/lapack_diag_non_hermit.irp.f @@ -273,60 +273,6 @@ subroutine lapack_diag_non_sym_new(n, A, WR, WI, VL, VR) ! --- -subroutine lapack_diag_non_sym_right(n, A, WR, WI, VR) - - implicit none - - integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - double precision, intent(out) :: WR(n), WI(n), VR(n,n) - - integer :: i, lda, ldvl, ldvr, LWORK, INFO - double precision, allocatable :: Atmp(:,:), WORK(:), VL(:,:) - - lda = n - ldvl = 1 - ldvr = n - - allocate( Atmp(n,n), VL(1,1) ) - Atmp(1:n,1:n) = A(1:n,1:n) - - allocate(WORK(1)) - LWORK = -1 - call dgeev('N', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO) - if(INFO.gt.0)then - print*,'dgeev failed !!',INFO - stop - endif - - LWORK = max(int(WORK(1)), 1) ! this is the optimal size of WORK - deallocate(WORK) - - allocate(WORK(LWORK)) - - ! Actual diagonalization - call dgeev('N', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO) - if(INFO.ne.0) then - print*,'dgeev failed !!', INFO - stop - endif - - deallocate(Atmp, WORK, VL) - -! print *, ' JOBL = F' -! print *, ' eigenvalues' -! do i = 1, n -! write(*, '(1000(F16.10,X))') WR(i), WI(i) -! enddo -! print *, ' right eigenvect' -! do i = 1, n -! write(*, '(1000(F16.10,X))') VR(:,i) -! enddo - -end - -! --- - subroutine non_hrmt_real_diag(n, A, leigvec, reigvec, n_real_eigv, eigval) BEGIN_DOC @@ -1780,70 +1726,6 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot) ! --- -subroutine check_weighted_biorthog(n, m, W, Vl, Vr, thr_d, thr_nd, accu_d, accu_nd, S, stop_ifnot) - - implicit none - - integer, intent(in) :: n, m - double precision, intent(in) :: Vl(n,m), Vr(n,m), W(n,n) - double precision, intent(in) :: thr_d, thr_nd - logical, intent(in) :: stop_ifnot - double precision, intent(out) :: accu_d, accu_nd, S(m,m) - - integer :: i, j - double precision, allocatable :: SS(:,:), tmp(:,:) - - print *, ' check weighted bi-orthogonality' - - ! --- - - allocate(tmp(m,n)) - call dgemm( 'T', 'N', m, n, n, 1.d0 & - , Vl, size(Vl, 1), W, size(W, 1) & - , 0.d0, tmp, size(tmp, 1) ) - call dgemm( 'N', 'N', m, m, n, 1.d0 & - , tmp, size(tmp, 1), Vr, size(Vr, 1) & - , 0.d0, S, size(S, 1) ) - deallocate(tmp) - - !print *, ' overlap matrix:' - !do i = 1, m - ! write(*,'(1000(F16.10,X))') S(i,:) - !enddo - - accu_d = 0.d0 - accu_nd = 0.d0 - do i = 1, m - do j = 1, m - if(i==j) then - accu_d = accu_d + dabs(S(i,i)) - else - accu_nd = accu_nd + S(j,i) * S(j,i) - endif - enddo - enddo - accu_nd = dsqrt(accu_nd) - - print *, ' accu_nd = ', accu_nd - print *, ' accu_d = ', dabs(accu_d-dble(m))/dble(m) - - ! --- - - if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then - print *, ' non bi-orthogonal vectors !' - print *, ' accu_nd = ', accu_nd - print *, ' accu_d = ', dabs(accu_d-dble(m))/dble(m) - !print *, ' overlap matrix:' - !do i = 1, m - ! write(*,'(1000(F16.10,X))') S(i,:) - !enddo - stop - endif - -end - -! --- - subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_ifnot) implicit none @@ -2144,6 +2026,7 @@ subroutine impose_biorthog_degen_eigvec(n, deg_num, e0, L0, R0) enddo !print*,' accu_nd after = ', accu_nd if(accu_nd .gt. 1d-12) then + print*, ' accu_nd =', accu_nd print*, ' your strategy for degenerates orbitals failed !' print*, m, 'deg on', i stop diff --git a/plugins/local/non_hermit_dav/new_routines.irp.f b/plugins/local/non_hermit_dav/new_routines.irp.f deleted file mode 100644 index 8db044d31..000000000 --- a/plugins/local/non_hermit_dav/new_routines.irp.f +++ /dev/null @@ -1,670 +0,0 @@ -subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_eigv, eigval) - - BEGIN_DOC - ! - ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors - ! - ! of a non hermitian matrix A(n,n) - ! - ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" - ! - END_DOC - - implicit none - - integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:) - - integer :: i, j, n_degen,k , iteration - double precision :: shift_current - double precision :: r,thr,accu_d, accu_nd - integer, allocatable :: iorder_origin(:),iorder(:) - double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:) - double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:) - double precision, allocatable :: im_part(:),re_part(:) - double precision :: accu,thr_cut, thr_norm=1d0 - - - thr_cut = 1.d-15 - print*,'Computing the left/right eigenvectors ...' - print*,'Using the degeneracy splitting algorithm' - ! initialization - shift_current = 1.d-15 - iteration = 0 - print*,'***** iteration = ',iteration - - - ! pre-processing the matrix :: sorting by diagonal elements - allocate(reigvec_tmp(n,n), leigvec_tmp(n,n)) - allocate(diag_elem(n),iorder_origin(n),A_save(n,n)) -! print*,'Aw' - do i = 1, n - iorder_origin(i) = i - diag_elem(i) = A(i,i) -! write(*,'(100(F16.10,X))')A(:,i) - enddo - call dsort(diag_elem, iorder_origin, n) - do i = 1, n - do j = 1, n - A_save(j,i) = A(iorder_origin(j),iorder_origin(i)) - enddo - enddo - - allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) - allocate(im_part(n),iorder(n)) - allocate( S(n,n) ) - - - Aw = A_save - call cancel_small_elmts(aw,n,thr_cut) - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - call dsort(im_part, iorder, n) - n_real_eigv = 0 - do i = 1, n - if(dabs(WI(i)).lt.1.d-20)then - n_real_eigv += 1 - else -! print*,'Found an imaginary component to eigenvalue' -! print*,'Re(i) + Im(i)',WR(i),WI(i) - endif - enddo - if(n_real_eigv.ne.n)then - shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) - print*,'Largest imaginary part found in eigenvalues = ',im_part(1) - print*,'Splitting the degeneracies by ',shift_current - else - print*,'All eigenvalues are real !' - endif - - - do while(n_real_eigv.ne.n) - iteration += 1 - print*,'***** iteration = ',iteration - if(shift_current.gt.1.d-3)then - print*,'shift_current > 1.d-3 !!' - print*,'Your matrix intrinsically contains complex eigenvalues' - stop - endif - Aw = A_save - call cancel_small_elmts(Aw,n,thr_cut) - call split_matrix_degen(Aw,n,shift_current) - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - n_real_eigv = 0 - do i = 1, n - if(dabs(WI(i)).lt.1.d-20)then - n_real_eigv+= 1 - else -! print*,'Found an imaginary component to eigenvalue' -! print*,'Re(i) + Im(i)',WR(i),WI(i) - endif - enddo - if(n_real_eigv.ne.n)then - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - call dsort(im_part, iorder, n) - shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) - print*,'Largest imaginary part found in eigenvalues = ',im_part(1) - print*,'Splitting the degeneracies by ',shift_current - else - print*,'All eigenvalues are real !' - endif - enddo - !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES - do i = 1, n - eigval(i) = WR(i) - iorder(i) = i - enddo - call dsort(eigval,iorder,n) - do i = 1, n -! print*,'eigval(i) = ',eigval(i) - reigvec_tmp(:,i) = VR(:,iorder(i)) - leigvec_tmp(:,i) = Vl(:,iorder(i)) - enddo - -!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY - ! check bi-orthogonality - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - print *, ' accu_nd bi-orthog = ', accu_nd - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print *, ' ' - print *, ' bi-orthogonality: not imposed yet' - print *, ' ' - print *, ' ' - print *, ' orthog between degen eigenvect' - print *, ' ' - double precision, allocatable :: S_nh_inv_half(:,:) - allocate(S_nh_inv_half(n,n)) - logical :: complex_root - deallocate(S_nh_inv_half) - call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp) - call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp) - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'New vectors not bi-orthonormals at ',accu_nd - call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S) - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'New vectors not bi-orthonormals at ',accu_nd - print*,'Must be a deep problem ...' - stop - endif - endif - endif - - !! EIGENVECTORS SORTED AND BI-ORTHONORMAL - do i = 1, n - do j = 1, n - VR(iorder_origin(j),i) = reigvec_tmp(j,i) - VL(iorder_origin(j),i) = leigvec_tmp(j,i) - enddo - enddo - - !! RECOMPUTING THE EIGENVALUES - eigval = 0.d0 - do i = 1, n - iorder(i) = i - accu = 0.d0 - do j = 1, n - accu += VL(j,i) * VR(j,i) - do k = 1, n - eigval(i) += VL(j,i) * A(j,k) * VR(k,i) - enddo - enddo - eigval(i) *= 1.d0/accu -! print*,'eigval(i) = ',eigval(i) - enddo - !! RESORT JUST TO BE SURE - call dsort(eigval, iorder, n) - do i = 1, n - do j = 1, n - reigvec(j,i) = VR(j,iorder(i)) - leigvec(j,i) = VL(j,iorder(i)) - enddo - enddo - print*,'Checking for final reigvec/leigvec' - shift_current = max(1.d-10,shift_current) - print*,'Thr for eigenvectors = ',shift_current - call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.) - call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - print *, ' accu_nd bi-orthog = ', accu_nd - - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog' - print*,'Eigenvectors are not bi orthonormal ..' - print*,'accu_nd = ',accu_nd - stop - endif - -end - - - -subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_eigv, eigval) - - BEGIN_DOC - ! - ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors - ! - ! of a non hermitian matrix A(n,n) - ! - ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" - ! - END_DOC - - implicit none - - integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:) - - integer :: i, j, n_degen,k , iteration - double precision :: shift_current - double precision :: r,thr,accu_d, accu_nd - integer, allocatable :: iorder_origin(:),iorder(:) - double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:) - double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:) - double precision, allocatable :: im_part(:),re_part(:) - double precision :: accu,thr_cut, thr_norm=1.d0 - double precision, allocatable :: S_nh_inv_half(:,:) - logical :: complex_root - - - thr_cut = 1.d-15 - print*,'Computing the left/right eigenvectors ...' - print*,'Using the degeneracy splitting algorithm' - ! initialization - shift_current = 1.d-15 - iteration = 0 - print*,'***** iteration = ',iteration - - - ! pre-processing the matrix :: sorting by diagonal elements - allocate(reigvec_tmp(n,n), leigvec_tmp(n,n)) - allocate(diag_elem(n),iorder_origin(n),A_save(n,n)) -! print*,'Aw' - do i = 1, n - iorder_origin(i) = i - diag_elem(i) = A(i,i) -! write(*,'(100(F16.10,X))')A(:,i) - enddo - call dsort(diag_elem, iorder_origin, n) - do i = 1, n - do j = 1, n - A_save(j,i) = A(iorder_origin(j),iorder_origin(i)) - enddo - enddo - - allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) - allocate(im_part(n),iorder(n)) - allocate( S(n,n) ) - allocate(S_nh_inv_half(n,n)) - - - Aw = A_save - call cancel_small_elmts(aw,n,thr_cut) - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - call dsort(im_part, iorder, n) - n_real_eigv = 0 - do i = 1, n - if(dabs(WI(i)).lt.1.d-20)then - n_real_eigv += 1 - else -! print*,'Found an imaginary component to eigenvalue' -! print*,'Re(i) + Im(i)',WR(i),WI(i) - endif - enddo - if(n_real_eigv.ne.n)then - shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) - print*,'Largest imaginary part found in eigenvalues = ',im_part(1) - print*,'Splitting the degeneracies by ',shift_current - else - print*,'All eigenvalues are real !' - endif - - - do while(n_real_eigv.ne.n) - iteration += 1 - print*,'***** iteration = ',iteration - if(shift_current.gt.1.d-3)then - print*,'shift_current > 1.d-3 !!' - print*,'Your matrix intrinsically contains complex eigenvalues' - stop - endif - Aw = A_save -! thr_cut = shift_current - call cancel_small_elmts(Aw,n,thr_cut) - call split_matrix_degen(Aw,n,shift_current) - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - n_real_eigv = 0 - do i = 1, n - if(dabs(WI(i)).lt.1.d-20)then - n_real_eigv+= 1 - else -! print*,'Found an imaginary component to eigenvalue' -! print*,'Re(i) + Im(i)',WR(i),WI(i) - endif - enddo - if(n_real_eigv.ne.n)then - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - call dsort(im_part, iorder, n) - shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) - print*,'Largest imaginary part found in eigenvalues = ',im_part(1) - print*,'Splitting the degeneracies by ',shift_current - else - print*,'All eigenvalues are real !' - endif - enddo - !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES - do i = 1, n - eigval(i) = WR(i) - iorder(i) = i - enddo - call dsort(eigval,iorder,n) - do i = 1, n -! print*,'eigval(i) = ',eigval(i) - reigvec_tmp(:,i) = VR(:,iorder(i)) - leigvec_tmp(:,i) = Vl(:,iorder(i)) - enddo - -!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY - ! check bi-orthogonality - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - print *, ' accu_nd bi-orthog = ', accu_nd - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print *, ' ' - print *, ' bi-orthogonality: not imposed yet' - if(complex_root) then - print *, ' ' - print *, ' ' - print *, ' orthog between degen eigenvect' - print *, ' ' - ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right - call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp) ! orthogonalization of reigvec - call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp) ! orthogonalization of leigvec - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'New vectors not bi-orthonormals at ', accu_nd - call get_inv_half_nonsymmat_diago(S, n, S_nh_inv_half, complex_root) - if(complex_root)then - call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S) ! bi-orthonormalization using QR - else - print*,'S^{-1/2} exists !!' - call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization - endif - endif - else ! the matrix S^{-1/2} exists - print*,'S^{-1/2} exists !!' - call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization - endif - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'New vectors not bi-orthonormals at ',accu_nd - print*,'Must be a deep problem ...' - stop - endif - endif - - !! EIGENVECTORS SORTED AND BI-ORTHONORMAL - do i = 1, n - do j = 1, n - VR(iorder_origin(j),i) = reigvec_tmp(j,i) - VL(iorder_origin(j),i) = leigvec_tmp(j,i) - enddo - enddo - - !! RECOMPUTING THE EIGENVALUES - eigval = 0.d0 - do i = 1, n - iorder(i) = i - accu = 0.d0 - do j = 1, n - accu += VL(j,i) * VR(j,i) - do k = 1, n - eigval(i) += VL(j,i) * A(j,k) * VR(k,i) - enddo - enddo - eigval(i) *= 1.d0/accu -! print*,'eigval(i) = ',eigval(i) - enddo - !! RESORT JUST TO BE SURE - call dsort(eigval, iorder, n) - do i = 1, n - do j = 1, n - reigvec(j,i) = VR(j,iorder(i)) - leigvec(j,i) = VL(j,iorder(i)) - enddo - enddo - print*,'Checking for final reigvec/leigvec' - shift_current = max(1.d-10,shift_current) - print*,'Thr for eigenvectors = ',shift_current - call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.) - call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - print *, ' accu_nd bi-orthog = ', accu_nd - - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog' - print*,'Eigenvectors are not bi orthonormal ..' - print*,'accu_nd = ',accu_nd - stop - endif - -end - - -subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval) - - BEGIN_DOC - ! - ! routine returning the eigenvalues and left/right eigenvectors of the TC fock matrix - ! - END_DOC - - implicit none - - integer, intent(in) :: n - double precision, intent(in) :: A(n,n) - integer, intent(out) :: n_real_eigv - double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:) - - integer :: i, j, n_degen,k , iteration - double precision :: shift_current - double precision :: r,thr,accu_d, accu_nd - integer, allocatable :: iorder_origin(:),iorder(:) - double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:) - double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:) - double precision, allocatable :: im_part(:),re_part(:) - double precision :: accu,thr_cut - double precision, allocatable :: S_nh_inv_half(:,:) - logical :: complex_root - double precision :: thr_norm=1d0 - - - thr_cut = 1.d-15 - print*,'Computing the left/right eigenvectors ...' - print*,'Using the degeneracy splitting algorithm' - ! initialization - shift_current = 1.d-15 - iteration = 0 - print*,'***** iteration = ',iteration - - - ! pre-processing the matrix :: sorting by diagonal elements - allocate(reigvec_tmp(n,n), leigvec_tmp(n,n)) - allocate(diag_elem(n),iorder_origin(n),A_save(n,n)) -! print*,'Aw' - do i = 1, n - iorder_origin(i) = i - diag_elem(i) = A(i,i) -! write(*,'(100(F16.10,X))')A(:,i) - enddo - call dsort(diag_elem, iorder_origin, n) - do i = 1, n - do j = 1, n - A_save(j,i) = A(iorder_origin(j),iorder_origin(i)) - enddo - enddo - - allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) - allocate(im_part(n),iorder(n)) - allocate( S(n,n) ) - allocate(S_nh_inv_half(n,n)) - - - Aw = A_save - call cancel_small_elmts(aw,n,thr_cut) - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - call dsort(im_part, iorder, n) - n_real_eigv = 0 - do i = 1, n - if(dabs(WI(i)).lt.1.d-20)then - n_real_eigv += 1 - else -! print*,'Found an imaginary component to eigenvalue' -! print*,'Re(i) + Im(i)',WR(i),WI(i) - endif - enddo - if(n_real_eigv.ne.n)then - shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) - print*,'Largest imaginary part found in eigenvalues = ',im_part(1) - print*,'Splitting the degeneracies by ',shift_current - else - print*,'All eigenvalues are real !' - endif - - - do while(n_real_eigv.ne.n) - iteration += 1 - print*,'***** iteration = ',iteration - if(shift_current.gt.1.d-3)then - print*,'shift_current > 1.d-3 !!' - print*,'Your matrix intrinsically contains complex eigenvalues' - stop - endif - Aw = A_save -! thr_cut = shift_current - call cancel_small_elmts(Aw,n,thr_cut) - call split_matrix_degen(Aw,n,shift_current) - call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR) - n_real_eigv = 0 - do i = 1, n - if(dabs(WI(i)).lt.1.d-20)then - n_real_eigv+= 1 - else -! print*,'Found an imaginary component to eigenvalue' -! print*,'Re(i) + Im(i)',WR(i),WI(i) - endif - enddo - if(n_real_eigv.ne.n)then - do i = 1, n - im_part(i) = -dabs(WI(i)) - iorder(i) = i - enddo - call dsort(im_part, iorder, n) - shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0) - print*,'Largest imaginary part found in eigenvalues = ',im_part(1) - print*,'Splitting the degeneracies by ',shift_current - else - print*,'All eigenvalues are real !' - endif - enddo - !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES - do i = 1, n - eigval(i) = WR(i) - iorder(i) = i - enddo - call dsort(eigval,iorder,n) - do i = 1, n -! print*,'eigval(i) = ',eigval(i) - reigvec_tmp(:,i) = VR(:,iorder(i)) - leigvec_tmp(:,i) = Vl(:,iorder(i)) - enddo - -!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY - ! check bi-orthogonality - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - print *, ' accu_nd bi-orthog = ', accu_nd - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print *, ' ' - print *, ' bi-orthogonality: not imposed yet' - print *, ' ' - print *, ' ' - print *, ' Using impose_unique_biorthog_degen_eigvec' - print *, ' ' - ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right - call impose_unique_biorthog_degen_eigvec(n, eigval, mo_coef, leigvec_tmp, reigvec_tmp) - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - print*,'accu_nd = ',accu_nd - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'New vectors not bi-orthonormals at ',accu_nd - call get_inv_half_nonsymmat_diago(S, n, S_nh_inv_half,complex_root) - if(complex_root)then - print*,'S^{-1/2} does not exits, using QR bi-orthogonalization' - call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S) ! bi-orthonormalization using QR - else - print*,'S^{-1/2} exists !!' - call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization - endif - endif - call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'New vectors not bi-orthonormals at ',accu_nd - print*,'Must be a deep problem ...' - stop - endif - endif - - !! EIGENVECTORS SORTED AND BI-ORTHONORMAL - do i = 1, n - do j = 1, n - VR(iorder_origin(j),i) = reigvec_tmp(j,i) - VL(iorder_origin(j),i) = leigvec_tmp(j,i) - enddo - enddo - - !! RECOMPUTING THE EIGENVALUES - eigval = 0.d0 - do i = 1, n - iorder(i) = i - accu = 0.d0 - do j = 1, n - accu += VL(j,i) * VR(j,i) - do k = 1, n - eigval(i) += VL(j,i) * A(j,k) * VR(k,i) - enddo - enddo - eigval(i) *= 1.d0/accu -! print*,'eigval(i) = ',eigval(i) - enddo - !! RESORT JUST TO BE SURE - call dsort(eigval, iorder, n) - do i = 1, n - do j = 1, n - reigvec(j,i) = VR(j,iorder(i)) - leigvec(j,i) = VL(j,iorder(i)) - enddo - enddo - print*,'Checking for final reigvec/leigvec' - shift_current = max(1.d-10,shift_current) - print*,'Thr for eigenvectors = ',shift_current - call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.) - call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) - print *, ' accu_nd bi-orthog = ', accu_nd - - if(accu_nd .lt. thresh_biorthog_nondiag) then - print *, ' bi-orthogonality: ok' - else - print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog' - print*,'Eigenvectors are not bi orthonormal ..' - print*,'accu_nd = ',accu_nd - stop - endif - -end - - diff --git a/plugins/local/normal_order_old/NEED b/plugins/local/normal_order_old/NEED new file mode 100644 index 000000000..e8c8c478b --- /dev/null +++ b/plugins/local/normal_order_old/NEED @@ -0,0 +1 @@ +tc_scf diff --git a/plugins/local/normal_order_old/README.rst b/plugins/local/normal_order_old/README.rst new file mode 100644 index 000000000..a284fcfd3 --- /dev/null +++ b/plugins/local/normal_order_old/README.rst @@ -0,0 +1,4 @@ +================ +normal_order_old +================ + diff --git a/plugins/local/tc_bi_ortho/normal_ordered.irp.f b/plugins/local/normal_order_old/normal_ordered.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/normal_ordered.irp.f rename to plugins/local/normal_order_old/normal_ordered.irp.f diff --git a/plugins/local/tc_bi_ortho/normal_ordered_contractions.irp.f b/plugins/local/normal_order_old/normal_ordered_contractions.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/normal_ordered_contractions.irp.f rename to plugins/local/normal_order_old/normal_ordered_contractions.irp.f diff --git a/plugins/local/tc_bi_ortho/normal_ordered_old.irp.f b/plugins/local/normal_order_old/normal_ordered_old.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/normal_ordered_old.irp.f rename to plugins/local/normal_order_old/normal_ordered_old.irp.f diff --git a/plugins/local/tc_bi_ortho/normal_ordered_v0.irp.f b/plugins/local/normal_order_old/normal_ordered_v0.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/normal_ordered_v0.irp.f rename to plugins/local/normal_order_old/normal_ordered_v0.irp.f diff --git a/plugins/local/old_delta_tc_qmc/NEED b/plugins/local/old_delta_tc_qmc/NEED new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/plugins/local/old_delta_tc_qmc/NEED @@ -0,0 +1 @@ + diff --git a/plugins/local/old_delta_tc_qmc/README.rst b/plugins/local/old_delta_tc_qmc/README.rst new file mode 100644 index 000000000..1d56f96c0 --- /dev/null +++ b/plugins/local/old_delta_tc_qmc/README.rst @@ -0,0 +1,4 @@ +================ +old_delta_tc_qmc +================ + diff --git a/plugins/local/tc_bi_ortho/compute_deltamu_right.irp.f b/plugins/local/old_delta_tc_qmc/compute_deltamu_right.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/compute_deltamu_right.irp.f rename to plugins/local/old_delta_tc_qmc/compute_deltamu_right.irp.f diff --git a/plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f b/plugins/local/old_delta_tc_qmc/dressing_vectors_lr.irp.f similarity index 88% rename from plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f rename to plugins/local/old_delta_tc_qmc/dressing_vectors_lr.irp.f index 0aff9980e..135f9d179 100644 --- a/plugins/local/tc_bi_ortho/dressing_vectors_lr.irp.f +++ b/plugins/local/old_delta_tc_qmc/dressing_vectors_lr.irp.f @@ -27,7 +27,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) i = 1 j = 1 - call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_opt_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) call hmat_bi_ortho (psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) delta = 0.d0 @@ -39,7 +39,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) do j = 1, ndet ! < I |Htilde | J > - call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_opt_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) ! < I |H | J > call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) @@ -78,7 +78,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) i = 1 j = 1 - call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_opt_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta = 0.d0 !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & @@ -88,7 +88,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) do j = 1, ndet ! < I |Htilde | J > - call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_opt_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta(i) = delta(i) + psicoef(j) * htc_tot enddo diff --git a/plugins/local/tc_keywords/tc_keywords.irp.f b/plugins/local/old_delta_tc_qmc/old_delta_tc_qmc.irp.f similarity index 82% rename from plugins/local/tc_keywords/tc_keywords.irp.f rename to plugins/local/old_delta_tc_qmc/old_delta_tc_qmc.irp.f index 3bc685500..5ff08bd69 100644 --- a/plugins/local/tc_keywords/tc_keywords.irp.f +++ b/plugins/local/old_delta_tc_qmc/old_delta_tc_qmc.irp.f @@ -1,4 +1,4 @@ -program tc_keywords +program old_delta_tc_qmc implicit none BEGIN_DOC ! TODO : Put the documentation of the program here diff --git a/plugins/local/ortho_three_e_ints/mu_j_ints_usual_mos.irp.f b/plugins/local/ortho_three_e_ints/mu_j_ints_usual_mos.irp.f index a3f1b6efd..cb7cdb221 100644 --- a/plugins/local/ortho_three_e_ints/mu_j_ints_usual_mos.irp.f +++ b/plugins/local/ortho_three_e_ints/mu_j_ints_usual_mos.irp.f @@ -183,11 +183,3 @@ subroutine give_integrals_3_body(i,j,m,k,l,n,integral) END_PROVIDER -BEGIN_PROVIDER [ double precision, sqrt_weight_at_r, (n_points_final_grid)] - implicit none - integer :: ipoint - do ipoint = 1, n_points_final_grid - sqrt_weight_at_r(ipoint) = dsqrt(final_weight_at_r_vector(ipoint)) - enddo -END_PROVIDER - diff --git a/plugins/local/slater_tc/NEED b/plugins/local/slater_tc/NEED new file mode 100644 index 000000000..a8669866b --- /dev/null +++ b/plugins/local/slater_tc/NEED @@ -0,0 +1,8 @@ +determinants +normal_order_old +bi_ort_ints +bi_ortho_mos +tc_keywords +non_hermit_dav +dav_general_mat +tc_scf diff --git a/plugins/local/tc_bi_ortho/h_mat_triple.irp.f b/plugins/local/slater_tc/h_mat_triple.irp.f similarity index 53% rename from plugins/local/tc_bi_ortho/h_mat_triple.irp.f rename to plugins/local/slater_tc/h_mat_triple.irp.f index 6f5697a29..9cb4b60a8 100644 --- a/plugins/local/tc_bi_ortho/h_mat_triple.irp.f +++ b/plugins/local/slater_tc/h_mat_triple.irp.f @@ -1,196 +1,3 @@ -subroutine get_excitation_general(key_i,key_j, Nint,degree_array,holes_array, particles_array,phase) - use bitmasks - BEGIN_DOC -! returns the array, for each spin, of holes/particles between key_i and key_j -! -! with the following convention: a^+_{particle} a_{hole}|key_i> = |key_j> - END_DOC - include 'utils/constants.include.F' - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: holes_array(100,2),particles_array(100,2),degree_array(2) - double precision, intent(out) :: phase - integer :: ispin,k,i,pos - integer(bit_kind) :: key_hole, key_particle - integer(bit_kind) :: xorvec(N_int_max,2) - holes_array = -1 - particles_array = -1 - degree_array = 0 - do i = 1, N_int - xorvec(i,1) = xor( key_i(i,1), key_j(i,1)) - xorvec(i,2) = xor( key_i(i,2), key_j(i,2)) - degree_array(1) += popcnt(xorvec(i,1)) - degree_array(2) += popcnt(xorvec(i,2)) - enddo - degree_array(1) = shiftr(degree_array(1),1) - degree_array(2) = shiftr(degree_array(2),1) - - do ispin = 1, 2 - k = 1 - !!! GETTING THE HOLES - do i = 1, N_int - key_hole = iand(xorvec(i,ispin),key_i(i,ispin)) - do while(key_hole .ne.0_bit_kind) - pos = trailz(key_hole) - holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_hole = ibclr(key_hole,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_excitation_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo - do ispin = 1, 2 - k = 1 - !!! GETTING THE PARTICLES - do i = 1, N_int - key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) - do while(key_particle .ne.0_bit_kind) - pos = trailz(key_particle) - particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_particle = ibclr(key_particle,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_excitation_general ' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo - integer :: h,p, i_ok - integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) - integer :: exc(0:2,2,2) - double precision :: phase_tmp - allocate(det_i(Nint,2),det_ip(N_int,2)) - det_i = key_i - phase = 1.d0 - do ispin = 1, 2 - do i = 1, degree_array(ispin) - h = holes_array(i,ispin) - p = particles_array(i,ispin) - det_ip = det_i - call do_single_excitation(det_ip,h,p,ispin,i_ok) - if(i_ok == -1)then - print*,'excitation was not possible ' - stop - endif - call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) - phase *= phase_tmp - det_i = det_ip - enddo - enddo - -end - -subroutine get_holes_general(key_i, key_j,Nint, holes_array) - use bitmasks - BEGIN_DOC -! returns the array, per spin, of holes between key_i and key_j -! -! with the following convention: a_{hole}|key_i> --> |key_j> - END_DOC - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: holes_array(100,2) - integer(bit_kind) :: key_hole - integer :: ispin,k,i,pos - holes_array = -1 - do ispin = 1, 2 - k = 1 - do i = 1, N_int - key_hole = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_i(i,ispin)) - do while(key_hole .ne.0_bit_kind) - pos = trailz(key_hole) - holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_hole = ibclr(key_hole,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_holes_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'stoping ...' - stop - endif - enddo - enddo - enddo -end - -subroutine get_particles_general(key_i, key_j,Nint,particles_array) - use bitmasks - BEGIN_DOC -! returns the array, per spin, of particles between key_i and key_j -! -! with the following convention: a^dagger_{particle}|key_i> --> |key_j> - END_DOC - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) - integer, intent(out) :: particles_array(100,2) - integer(bit_kind) :: key_particle - integer :: ispin,k,i,pos - particles_array = -1 - do ispin = 1, 2 - k = 1 - do i = 1, N_int - key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) - do while(key_particle .ne.0_bit_kind) - pos = trailz(key_particle) - particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos - key_particle = ibclr(key_particle,pos) - k += 1 - if(k .gt.100)then - print*,'WARNING in get_holes_general' - print*,'More than a 100-th excitation for spin ',ispin - print*,'Those are the two determinants' - call debug_det(key_i, N_int) - call debug_det(key_j, N_int) - print*,'stoping ...' - stop - endif - enddo - enddo - enddo -end - -subroutine get_phase_general(key_i,Nint,degree, holes_array, particles_array,phase) - implicit none - integer, intent(in) :: degree(2), Nint - integer(bit_kind), intent(in) :: key_i(Nint,2) - integer, intent(in) :: holes_array(100,2),particles_array(100,2) - double precision, intent(out) :: phase - integer :: i,ispin,h,p, i_ok - integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) - integer :: exc(0:2,2,2) - double precision :: phase_tmp - allocate(det_i(Nint,2),det_ip(N_int,2)) - det_i = key_i - phase = 1.d0 - do ispin = 1, 2 - do i = 1, degree(ispin) - h = holes_array(i,ispin) - p = particles_array(i,ispin) - det_ip = det_i - call do_single_excitation(det_ip,h,p,ispin,i_ok) - if(i_ok == -1)then - print*,'excitation was not possible ' - stop - endif - call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) - phase *= phase_tmp - det_i = det_ip - enddo - enddo - -end - subroutine H_tc_s2_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) BEGIN_DOC ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS diff --git a/plugins/local/tc_bi_ortho/h_tc_s2_u0.irp.f b/plugins/local/slater_tc/h_tc_s2_u0.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/h_tc_s2_u0.irp.f rename to plugins/local/slater_tc/h_tc_s2_u0.irp.f diff --git a/plugins/local/tc_bi_ortho/slater_tc_opt.irp.f b/plugins/local/slater_tc/slater_tc_opt.irp.f similarity index 79% rename from plugins/local/tc_bi_ortho/slater_tc_opt.irp.f rename to plugins/local/slater_tc/slater_tc_opt.irp.f index 59efc9430..bf9548829 100644 --- a/plugins/local/tc_bi_ortho/slater_tc_opt.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt.irp.f @@ -10,8 +10,6 @@ subroutine provide_all_three_ints_bi_ortho() implicit none double precision :: t1, t2 - PROVIDE ao_two_e_integrals_in_map - print *, ' start provide_all_three_ints_bi_ortho' call wall_time(t1) @@ -131,9 +129,9 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, endif - if(degree==0) then - htot += nuclear_repulsion - endif +! if(degree==0) then +! htot += nuclear_repulsion +! endif end @@ -181,3 +179,48 @@ subroutine htilde_mu_mat_opt_bi_ortho_no_3e(key_j, key_i, Nint, htot) ! --- +subroutine htilde_mu_mat_opt_bi_ortho_no_3e_both(key_j, key_i, Nint, hji,hij) + + BEGIN_DOC + ! + ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis + !! + ! Returns the detail of the matrix element WITHOUT ANY CONTRIBUTION FROM THE THREE ELECTRON TERMS + !! WARNING !! + ! + ! Non hermitian !! + ! + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) + double precision, intent(out) :: hji,hij + integer :: degree + + hji = 0.d0 + hij = 0.d0 + + call get_excitation_degree(key_i, key_j, degree, Nint) + if(degree.gt.2) return + + if(degree == 0) then + call diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_i,hji) + hij = hji + else if (degree == 1) then + call single_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint,key_j, key_i , hji,hij) + else if(degree == 2) then + call double_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, hji,hij) + endif + + if(degree==0) then + hji += nuclear_repulsion + hij += nuclear_repulsion + endif + +end + +! --- + diff --git a/plugins/local/tc_bi_ortho/slater_tc_opt_diag.irp.f b/plugins/local/slater_tc/slater_tc_opt_diag.irp.f similarity index 61% rename from plugins/local/tc_bi_ortho/slater_tc_opt_diag.irp.f rename to plugins/local/slater_tc/slater_tc_opt_diag.irp.f index 78f9dc66a..3c5a5d12f 100644 --- a/plugins/local/tc_bi_ortho/slater_tc_opt_diag.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt_diag.irp.f @@ -19,13 +19,13 @@ PROVIDE HF_bitmask PROVIDE mo_l_coef mo_r_coef - call diag_htilde_mu_mat_bi_ortho_slow(N_int, HF_bitmask, hmono, htwoe, htot) + call diag_htc_bi_orth_2e_brute(N_int, HF_bitmask, hmono, htwoe, htot) ref_tc_energy_1e = hmono ref_tc_energy_2e = htwoe if(three_body_h_tc) then - call diag_htilde_three_body_ints_bi_ort_slow(N_int, HF_bitmask, hthree) + call diag_htc_bi_orth_3e_brute(N_int, HF_bitmask, hthree) ref_tc_energy_3e = hthree else ref_tc_energy_3e = 0.d0 @@ -524,3 +524,310 @@ subroutine a_tc_operator_no_3e(iorb,ispin,key,hmono,htwoe,Nint,na,nb) ! --- +subroutine diag_htc_bi_orth_2e_brute(Nint, key_i, hmono, htwoe, htot) + + BEGIN_DOC + ! + ! diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS + ! + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + double precision, intent(out) :: hmono,htwoe,htot + integer :: occ(Nint*bit_kind_size,2) + integer :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk + double precision :: get_mo_two_e_integral_tc_int + integer(bit_kind) :: key_i_core(Nint,2) + + PROVIDE mo_bi_ortho_tc_two_e + + hmono = 0.d0 + htwoe = 0.d0 + htot = 0.d0 + + call bitstring_to_list_ab(key_i, occ, Ne, Nint) + + do ispin = 1, 2 + do i = 1, Ne(ispin) + ii = occ(i,ispin) + hmono += mo_bi_ortho_tc_one_e(ii,ii) + enddo + enddo + + ! alpha/beta two-body + ispin = 1 + jspin = 2 + do i = 1, Ne(ispin) ! electron 1 (so it can be associated to mu(r1)) + ii = occ(i,ispin) + do j = 1, Ne(jspin) ! electron 2 + jj = occ(j,jspin) + htwoe += mo_bi_ortho_tc_two_e(jj,ii,jj,ii) + enddo + enddo + + ! alpha/alpha two-body + do i = 1, Ne(ispin) + ii = occ(i,ispin) + do j = i+1, Ne(ispin) + jj = occ(j,ispin) + htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) + enddo + enddo + + ! beta/beta two-body + do i = 1, Ne(jspin) + ii = occ(i,jspin) + do j = i+1, Ne(jspin) + jj = occ(j,jspin) + htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) + enddo + enddo + + htot = hmono + htwoe + +end + +! --- + +subroutine diag_htc_bi_orth_3e_brute(Nint, key_i, hthree) + + BEGIN_DOC + ! diagonal element of htilde ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + double precision, intent(out) :: hthree + integer :: occ(Nint*bit_kind_size,2) + integer :: Ne(2),i,j,ii,jj,ispin,jspin,m,mm + integer(bit_kind) :: key_i_core(Nint,2) + double precision :: direct_int, exchange_int, ref + double precision, external :: sym_3_e_int_from_6_idx_tensor + double precision, external :: three_e_diag_parrallel_spin + + PROVIDE mo_l_coef mo_r_coef + + if(core_tc_op) then + do i = 1, Nint + key_i_core(i,1) = xor(key_i(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(key_i(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, Nint) + else + call bitstring_to_list_ab(key_i, occ, Ne, Nint) + endif + + hthree = 0.d0 + + if((Ne(1)+Ne(2)) .ge. 3) then + + ! alpha/alpha/beta three-body + do i = 1, Ne(1) + ii = occ(i,1) + do j = i+1, Ne(1) + jj = occ(j,1) + do m = 1, Ne(2) + mm = occ(m,2) + !direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) !uses the 6-idx tensor + !exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) !uses the 6-idx tensor + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) !uses 3-idx tensor + exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) !uses 3-idx tensor + hthree += direct_int - exchange_int + enddo + enddo + enddo + + ! beta/beta/alpha three-body + do i = 1, Ne(2) + ii = occ(i,2) + do j = i+1, Ne(2) + jj = occ(j,2) + do m = 1, Ne(1) + mm = occ(m,1) + !direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) !uses the 6-idx tensor + !exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) !uses the 6-idx tensor + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) + exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) + hthree += direct_int - exchange_int + enddo + enddo + enddo + + ! alpha/alpha/alpha three-body + do i = 1, Ne(1) + ii = occ(i,1) ! 1 + do j = i+1, Ne(1) + jj = occ(j,1) ! 2 + do m = j+1, Ne(1) + mm = occ(m,1) ! 3 + !hthree += sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) !uses the 6 idx tensor + hthree += three_e_diag_parrallel_spin(mm,jj,ii) !uses only 3-idx tensors + enddo + enddo + enddo + + ! beta/beta/beta three-body + do i = 1, Ne(2) + ii = occ(i,2) ! 1 + do j = i+1, Ne(2) + jj = occ(j,2) ! 2 + do m = j+1, Ne(2) + mm = occ(m,2) ! 3 + !hthree += sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) !uses the 6 idx tensor + hthree += three_e_diag_parrallel_spin(mm,jj,ii) !uses only 3-idx tensors + enddo + enddo + enddo + + endif + +end + + + +BEGIN_PROVIDER [ double precision, three_e_diag_parrallel_spin_prov, (mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator ON A BI ORTHONORMAL BASIS + ! + ! three_e_diag_parrallel_spin_prov(m,j,i) = All combinations of the form for same spin matrix elements + ! + ! notice the -1 sign: in this way three_e_diag_parrallel_spin_prov can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, m + double precision :: integral, wall1, wall0, three_e_diag_parrallel_spin + + three_e_diag_parrallel_spin_prov = 0.d0 + print *, ' Providing the three_e_diag_parrallel_spin_prov ...' + + integral = three_e_diag_parrallel_spin(1,1,1) ! to provide all stuffs + call wall_time(wall0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,m,integral) & + !$OMP SHARED (mo_num,three_e_diag_parrallel_spin_prov) + !$OMP DO SCHEDULE (dynamic) + do i = 1, mo_num + do j = 1, mo_num + do m = j, mo_num + three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin(m,j,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + do i = 1, mo_num + do j = 1, mo_num + do m = 1, j + three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin_prov(j,m,i) + enddo + enddo + enddo + + call wall_time(wall1) + print *, ' wall time for three_e_diag_parrallel_spin_prov', wall1 - wall0 + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, three_e_single_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_single_parrallel_spin_prov(m,j,k,i) = All combination of for same spin matrix elements + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0, three_e_single_parrallel_spin + + three_e_single_parrallel_spin_prov = 0.d0 + print *, ' Providing the three_e_single_parrallel_spin_prov ...' + + integral = three_e_single_parrallel_spin(1,1,1,1) + call wall_time(wall0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_single_parrallel_spin_prov) + !$OMP DO SCHEDULE (dynamic) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_single_parrallel_spin_prov(m,j,k,i) = three_e_single_parrallel_spin(m,j,k,i) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_single_parrallel_spin_prov', wall1 - wall0 + +END_PROVIDER + + +! --- + +BEGIN_PROVIDER [ double precision, three_e_double_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_double_parrallel_spin_prov(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0, three_e_double_parrallel_spin + + three_e_double_parrallel_spin_prov = 0.d0 + print *, ' Providing the three_e_double_parrallel_spin_prov ...' + call wall_time(wall0) + + integral = three_e_double_parrallel_spin(1,1,1,1,1) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_double_parrallel_spin_prov) + !$OMP DO SCHEDULE (dynamic) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + three_e_double_parrallel_spin_prov(m,l,j,k,i) = three_e_double_parrallel_spin(m,l,j,k,i) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_double_parrallel_spin_prov', wall1 - wall0 + +END_PROVIDER + diff --git a/plugins/local/tc_bi_ortho/slater_tc_opt_double.irp.f b/plugins/local/slater_tc/slater_tc_opt_double.irp.f similarity index 90% rename from plugins/local/tc_bi_ortho/slater_tc_opt_double.irp.f rename to plugins/local/slater_tc/slater_tc_opt_double.irp.f index 4067473c9..181ae11da 100644 --- a/plugins/local/tc_bi_ortho/slater_tc_opt_double.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt_double.irp.f @@ -505,3 +505,63 @@ subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot) end +subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, hji,hij) + + BEGIN_DOC + ! and for double excitation ONLY FOR ONE- AND TWO-BODY TERMS + !! + !! WARNING !! + ! + ! Non hermitian !! + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2) + double precision, intent(out) :: hji,hij + double precision :: hmono, htwoe_ji, htwoe_ij + integer :: occ(Nint*bit_kind_size,2) + integer :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk + integer :: degree,exc(0:2,2,2) + integer :: h1, p1, h2, p2, s1, s2 + double precision :: get_mo_two_e_integral_tc_int,phase + + + call get_excitation_degree(key_i, key_j, degree, Nint) + + hmono = 0.d0 + htwoe_ji = 0.d0 + htwoe_ij = 0.d0 + hji = 0.d0 + hij = 0.d0 + + if(degree.ne.2)then + return + endif + integer :: degree_i,degree_j + call get_excitation_degree(ref_bitmask,key_i,degree_i,N_int) + call get_excitation_degree(ref_bitmask,key_j,degree_j,N_int) + call get_double_excitation(key_i, key_j, exc, phase, Nint) + call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) + + if(s1.ne.s2)then + ! opposite spin two-body + htwoe_ji = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + htwoe_ij = mo_bi_ortho_tc_two_e_transp(p2,p1,h2,h1) + else + ! same spin two-body + ! direct terms + htwoe_ji = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + htwoe_ij = mo_bi_ortho_tc_two_e_transp(p2,p1,h2,h1) + ! exchange terms + htwoe_ji -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) + htwoe_ij -= mo_bi_ortho_tc_two_e_transp(p1,p2,h2,h1) + endif + htwoe_ji *= phase + hji = htwoe_ji + htwoe_ij *= phase + hij = htwoe_ij + +end diff --git a/plugins/local/tc_bi_ortho/slater_tc_opt_single.irp.f b/plugins/local/slater_tc/slater_tc_opt_single.irp.f similarity index 81% rename from plugins/local/tc_bi_ortho/slater_tc_opt_single.irp.f rename to plugins/local/slater_tc/slater_tc_opt_single.irp.f index e57cb05c5..47bcbe34c 100644 --- a/plugins/local/tc_bi_ortho/slater_tc_opt_single.irp.f +++ b/plugins/local/slater_tc/slater_tc_opt_single.irp.f @@ -618,3 +618,145 @@ subroutine get_single_excitation_from_fock_tc_no_3e(Nint, key_i, key_j, h, p, sp end + +subroutine single_htilde_mu_mat_fock_bi_ortho_no_3e_both(Nint, key_j, key_i, hji,hij) + + BEGIN_DOC + ! and for single excitation ONLY FOR ONE- AND TWO-BODY TERMS + !! + !! WARNING !! + ! + ! Non hermitian !! + END_DOC + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2) + double precision, intent(out) :: hji,hij + + double precision :: hmono, htwoe + integer :: occ(Nint*bit_kind_size,2) + integer :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk + integer :: degree,exc(0:2,2,2) + integer :: h1, p1, h2, p2, s1, s2 + double precision :: get_mo_two_e_integral_tc_int, phase + double precision :: direct_int, exchange_int_12, exchange_int_23, exchange_int_13 + integer :: other_spin(2) + integer(bit_kind) :: key_j_core(Nint,2), key_i_core(Nint,2) + + other_spin(1) = 2 + other_spin(2) = 1 + + hmono = 0.d0 + htwoe = 0.d0 + hji = 0.d0 + hij = 0.d0 + call get_excitation_degree(key_i, key_j, degree, Nint) + if(degree.ne.1)then + return + endif + call bitstring_to_list_ab(key_i, occ, Ne, Nint) + + call get_single_excitation(key_i, key_j, exc, phase, Nint) + call decode_exc(exc,1,h1,p1,h2,p2,s1,s2) + call get_single_excitation_from_fock_tc_no_3e_both(Nint, key_i, key_j, h1, p1, s1, phase, hji,hij) + +end + +! --- + +subroutine get_single_excitation_from_fock_tc_no_3e_both(Nint, key_i, key_j, h, p, spin, phase, hji,hij) + + use bitmasks + + implicit none + integer, intent(in) :: Nint + integer, intent(in) :: h, p, spin + double precision, intent(in) :: phase + integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) + double precision, intent(out) :: hji,hij + double precision :: hmono_ji,htwoe_ji + double precision :: hmono_ij,htwoe_ij + + integer(bit_kind) :: differences(Nint,2) + integer(bit_kind) :: hole(Nint,2) + integer(bit_kind) :: partcl(Nint,2) + integer :: occ_hole(Nint*bit_kind_size,2) + integer :: occ_partcl(Nint*bit_kind_size,2) + integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2) + integer :: i0,i + double precision :: buffer_c_ji(mo_num), buffer_x_ji(mo_num) + double precision :: buffer_c_ij(mo_num), buffer_x_ij(mo_num) + + do i = 1, mo_num + buffer_c_ji(i) = tc_2e_3idx_coulomb_integrals(i,p,h) + buffer_x_ji(i) = tc_2e_3idx_exchange_integrals(i,p,h) + buffer_c_ij(i) = tc_2e_3idx_coulomb_integrals_transp(i,p,h) + buffer_x_ij(i) = tc_2e_3idx_exchange_integrals_transp(i,p,h) + enddo + + do i = 1, Nint + differences(i,1) = xor(key_i(i,1),ref_closed_shell_bitmask(i,1)) + differences(i,2) = xor(key_i(i,2),ref_closed_shell_bitmask(i,2)) + hole(i,1) = iand(differences(i,1),ref_closed_shell_bitmask(i,1)) + hole(i,2) = iand(differences(i,2),ref_closed_shell_bitmask(i,2)) + partcl(i,1) = iand(differences(i,1),key_i(i,1)) + partcl(i,2) = iand(differences(i,2),key_i(i,2)) + enddo + + call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, Nint) + call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, Nint) + hmono_ji = mo_bi_ortho_tc_one_e(p,h) + htwoe_ji = fock_op_2_e_tc_closed_shell(p,h) + hmono_ij = mo_bi_ortho_tc_one_e(h,p) + htwoe_ij = fock_op_2_e_tc_closed_shell(h,p) + + ! holes :: direct terms + do i0 = 1, n_occ_ab_hole(1) + i = occ_hole(i0,1) + htwoe_ji -= buffer_c_ji(i) + htwoe_ij -= buffer_c_ij(i) + enddo + do i0 = 1, n_occ_ab_hole(2) + i = occ_hole(i0,2) + htwoe_ji -= buffer_c_ji(i) + htwoe_ij -= buffer_c_ij(i) + enddo + + ! holes :: exchange terms + do i0 = 1, n_occ_ab_hole(spin) + i = occ_hole(i0,spin) + htwoe_ji += buffer_x_ji(i) + htwoe_ij += buffer_x_ij(i) + enddo + + ! particles :: direct terms + do i0 = 1, n_occ_ab_partcl(1) + i = occ_partcl(i0,1) + htwoe_ji += buffer_c_ji(i) + htwoe_ij += buffer_c_ij(i) + enddo + do i0 = 1, n_occ_ab_partcl(2) + i = occ_partcl(i0,2) + htwoe_ji += buffer_c_ji(i) + htwoe_ij += buffer_c_ij(i) + enddo + + ! particles :: exchange terms + do i0 = 1, n_occ_ab_partcl(spin) + i = occ_partcl(i0,spin) + htwoe_ji -= buffer_x_ji(i) + htwoe_ij -= buffer_x_ij(i) + enddo + htwoe_ji = htwoe_ji * phase + hmono_ji = hmono_ji * phase + hji = htwoe_ji + hmono_ji + + htwoe_ij = htwoe_ij * phase + hmono_ij = hmono_ij * phase + hij = htwoe_ij + hmono_ij + +end + diff --git a/plugins/local/tc_bi_ortho/tc_hmat.irp.f b/plugins/local/slater_tc/tc_hmat.irp.f similarity index 93% rename from plugins/local/tc_bi_ortho/tc_hmat.irp.f rename to plugins/local/slater_tc/tc_hmat.irp.f index abec410df..6323d1290 100644 --- a/plugins/local/tc_bi_ortho/tc_hmat.irp.f +++ b/plugins/local/slater_tc/tc_hmat.irp.f @@ -22,6 +22,7 @@ if(noL_standard) then PROVIDE noL_0e + print*, "noL_0e =", noL_0e PROVIDE noL_1e PROVIDE noL_2e endif @@ -29,7 +30,9 @@ print *, ' PROVIDING htilde_matrix_elmt_bi_ortho ...' call wall_time(t1) - call provide_all_three_ints_bi_ortho() + if(three_body_h_tc)then + call provide_all_three_ints_bi_ortho() + endif i = 1 j = 1 diff --git a/plugins/local/slater_tc_no_opt/.gitignore b/plugins/local/slater_tc_no_opt/.gitignore new file mode 100644 index 000000000..1561915b5 --- /dev/null +++ b/plugins/local/slater_tc_no_opt/.gitignore @@ -0,0 +1,59 @@ +IRPF90_temp/ +IRPF90_man/ +build.ninja +irpf90.make +ezfio_interface.irp.f +irpf90_entities +tags +Makefile +ao_basis +ao_one_e_ints +ao_two_e_erf_ints +ao_two_e_ints +aux_quantities +becke_numerical_grid +bitmask +cis +cisd +cipsi +davidson +davidson_dressed +davidson_undressed +density_for_dft +determinants +dft_keywords +dft_utils_in_r +dft_utils_one_e +dft_utils_two_body +dressing +dummy +electrons +ezfio_files +fci +generators_cas +generators_full +hartree_fock +iterations +kohn_sham +kohn_sham_rs +mo_basis +mo_guess +mo_one_e_ints +mo_two_e_erf_ints +mo_two_e_ints +mpi +mrpt_utils +nuclei +perturbation +pseudo +psiref_cas +psiref_utils +scf_utils +selectors_cassd +selectors_full +selectors_utils +single_ref_method +slave +tools +utils +zmq diff --git a/plugins/local/slater_tc_no_opt/NEED b/plugins/local/slater_tc_no_opt/NEED new file mode 100644 index 000000000..a8669866b --- /dev/null +++ b/plugins/local/slater_tc_no_opt/NEED @@ -0,0 +1,8 @@ +determinants +normal_order_old +bi_ort_ints +bi_ortho_mos +tc_keywords +non_hermit_dav +dav_general_mat +tc_scf diff --git a/plugins/local/slater_tc_no_opt/README.rst b/plugins/local/slater_tc_no_opt/README.rst new file mode 100644 index 000000000..90679e4c5 --- /dev/null +++ b/plugins/local/slater_tc_no_opt/README.rst @@ -0,0 +1,4 @@ +================ +slater_tc_no_opt +================ + diff --git a/plugins/local/tc_bi_ortho/h_biortho.irp.f b/plugins/local/slater_tc_no_opt/h_biortho.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/h_biortho.irp.f rename to plugins/local/slater_tc_no_opt/h_biortho.irp.f diff --git a/plugins/local/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f b/plugins/local/slater_tc_no_opt/h_tc_bi_ortho_psi.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f rename to plugins/local/slater_tc_no_opt/h_tc_bi_ortho_psi.irp.f diff --git a/plugins/local/tc_bi_ortho/slater_tc_3e_slow.irp.f b/plugins/local/slater_tc_no_opt/slater_tc_3e_slow.irp.f similarity index 99% rename from plugins/local/tc_bi_ortho/slater_tc_3e_slow.irp.f rename to plugins/local/slater_tc_no_opt/slater_tc_3e_slow.irp.f index cb33d3435..f79196535 100644 --- a/plugins/local/tc_bi_ortho/slater_tc_3e_slow.irp.f +++ b/plugins/local/slater_tc_no_opt/slater_tc_3e_slow.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) +subroutine diag_htc_bi_orth_3e_brute(Nint, key_i, hthree) BEGIN_DOC ! diagonal element of htilde ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS diff --git a/plugins/local/slater_tc_no_opt/slater_tc_no_opt.irp.f b/plugins/local/slater_tc_no_opt/slater_tc_no_opt.irp.f new file mode 100644 index 000000000..0fcc587fa --- /dev/null +++ b/plugins/local/slater_tc_no_opt/slater_tc_no_opt.irp.f @@ -0,0 +1,7 @@ +program slater_tc_no_opt + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + print *, 'Hello world' +end diff --git a/plugins/local/tc_bi_ortho/slater_tc_slow.irp.f b/plugins/local/slater_tc_no_opt/slater_tc_slow.irp.f similarity index 80% rename from plugins/local/tc_bi_ortho/slater_tc_slow.irp.f rename to plugins/local/slater_tc_no_opt/slater_tc_slow.irp.f index caf7d665e..b06fd12fb 100644 --- a/plugins/local/tc_bi_ortho/slater_tc_slow.irp.f +++ b/plugins/local/slater_tc_no_opt/slater_tc_slow.irp.f @@ -61,7 +61,7 @@ subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, if(degree.gt.2) return if(degree == 0) then - call diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) + call diag_htc_bi_orth_2e_brute(Nint, key_i, hmono, htwoe, htot) else if (degree == 1) then call single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) else if(degree == 2) then @@ -76,7 +76,7 @@ subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, else if((degree == 1) .and. (elec_num .gt. 2) .and. three_e_4_idx_term) then call single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) else if((degree == 0) .and. (elec_num .gt. 2) .and. three_e_3_idx_term) then - call diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) + call diag_htc_bi_orth_3e_brute(Nint, key_i, hthree) endif endif @@ -95,75 +95,6 @@ subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, ! --- -subroutine diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) - - BEGIN_DOC - ! - ! diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS - ! - END_DOC - - use bitmasks - - implicit none - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_i(Nint,2) - double precision, intent(out) :: hmono,htwoe,htot - integer :: occ(Nint*bit_kind_size,2) - integer :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk - double precision :: get_mo_two_e_integral_tc_int - integer(bit_kind) :: key_i_core(Nint,2) - - PROVIDE mo_bi_ortho_tc_two_e - - hmono = 0.d0 - htwoe = 0.d0 - htot = 0.d0 - - call bitstring_to_list_ab(key_i, occ, Ne, Nint) - - do ispin = 1, 2 - do i = 1, Ne(ispin) - ii = occ(i,ispin) - hmono += mo_bi_ortho_tc_one_e(ii,ii) - enddo - enddo - - ! alpha/beta two-body - ispin = 1 - jspin = 2 - do i = 1, Ne(ispin) ! electron 1 (so it can be associated to mu(r1)) - ii = occ(i,ispin) - do j = 1, Ne(jspin) ! electron 2 - jj = occ(j,jspin) - htwoe += mo_bi_ortho_tc_two_e(jj,ii,jj,ii) - enddo - enddo - - ! alpha/alpha two-body - do i = 1, Ne(ispin) - ii = occ(i,ispin) - do j = i+1, Ne(ispin) - jj = occ(j,ispin) - htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) - enddo - enddo - - ! beta/beta two-body - do i = 1, Ne(jspin) - ii = occ(i,jspin) - do j = i+1, Ne(jspin) - jj = occ(j,jspin) - htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) - enddo - enddo - - htot = hmono + htwoe - -end - -! --- - subroutine double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) BEGIN_DOC diff --git a/plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f b/plugins/local/slater_tc_no_opt/test_tc_bi_ortho.irp.f similarity index 96% rename from plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f rename to plugins/local/slater_tc_no_opt/test_tc_bi_ortho.irp.f index 369efd155..559c02000 100644 --- a/plugins/local/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/plugins/local/slater_tc_no_opt/test_tc_bi_ortho.irp.f @@ -88,7 +88,7 @@ subroutine test_slater_tc_opt i_count = 0.d0 do i = 1, N_det do j = 1,N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hnewmono, hnewtwoe, hnewthree, hnewtot) if(dabs(htot).gt.1.d-15)then i_count += 1.D0 @@ -124,7 +124,7 @@ subroutine timing_tot do j = 1, N_det ! call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int) i_count += 1.d0 - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) enddo enddo call wall_time(wall1) @@ -171,7 +171,7 @@ subroutine timing_diag do i = 1, N_det do j = i,i i_count += 1.d0 - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) enddo enddo call wall_time(wall1) @@ -208,7 +208,7 @@ subroutine timing_single if(degree.ne.1)cycle i_count += 1.d0 call wall_time(wall0) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call wall_time(wall1) accu += wall1 - wall0 enddo @@ -250,7 +250,7 @@ subroutine timing_double if(degree.ne.2)cycle i_count += 1.d0 call wall_time(wall0) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call wall_time(wall1) accu += wall1 - wall0 enddo diff --git a/plugins/local/spher_harm/.gitignore b/plugins/local/spher_harm/.gitignore new file mode 100644 index 000000000..1561915b5 --- /dev/null +++ b/plugins/local/spher_harm/.gitignore @@ -0,0 +1,59 @@ +IRPF90_temp/ +IRPF90_man/ +build.ninja +irpf90.make +ezfio_interface.irp.f +irpf90_entities +tags +Makefile +ao_basis +ao_one_e_ints +ao_two_e_erf_ints +ao_two_e_ints +aux_quantities +becke_numerical_grid +bitmask +cis +cisd +cipsi +davidson +davidson_dressed +davidson_undressed +density_for_dft +determinants +dft_keywords +dft_utils_in_r +dft_utils_one_e +dft_utils_two_body +dressing +dummy +electrons +ezfio_files +fci +generators_cas +generators_full +hartree_fock +iterations +kohn_sham +kohn_sham_rs +mo_basis +mo_guess +mo_one_e_ints +mo_two_e_erf_ints +mo_two_e_ints +mpi +mrpt_utils +nuclei +perturbation +pseudo +psiref_cas +psiref_utils +scf_utils +selectors_cassd +selectors_full +selectors_utils +single_ref_method +slave +tools +utils +zmq diff --git a/plugins/local/spher_harm/NEED b/plugins/local/spher_harm/NEED new file mode 100644 index 000000000..92df7f120 --- /dev/null +++ b/plugins/local/spher_harm/NEED @@ -0,0 +1 @@ +dft_utils_in_r diff --git a/plugins/local/spher_harm/README.rst b/plugins/local/spher_harm/README.rst new file mode 100644 index 000000000..9c9b12a6e --- /dev/null +++ b/plugins/local/spher_harm/README.rst @@ -0,0 +1,7 @@ +========== +spher_harm +========== + +Routines for spherical Harmonics evaluation in real space. +The main routine is "spher_harm_func_r3(r,l,m,re_ylm, im_ylm)". +The test routine is "test_spher_harm" where everything is explained in details. diff --git a/plugins/local/spher_harm/assoc_gaus_pol.irp.f b/plugins/local/spher_harm/assoc_gaus_pol.irp.f new file mode 100644 index 000000000..fa790307c --- /dev/null +++ b/plugins/local/spher_harm/assoc_gaus_pol.irp.f @@ -0,0 +1,50 @@ +double precision function plgndr(l,m,x) + integer, intent(in) :: l,m + double precision, intent(in) :: x + BEGIN_DOC + ! associated Legenre polynom P_l,m(x). Used for the Y_lm(theta,phi) + ! Taken from https://iate.oac.uncor.edu/~mario/materia/nr/numrec/f6-8.pdf + END_DOC + integer :: i,ll + double precision :: fact,pll,pmm,pmmp1,somx2 + if(m.lt.0.or.m.gt.l.or.dabs(x).gt.1.d0)then + print*,'bad arguments in plgndr' + pause + endif + pmm=1.d0 + if(m.gt.0) then + somx2=dsqrt((1.d0-x)*(1.d0+x)) + fact=1.d0 + do i=1,m + pmm=-pmm*fact*somx2 + fact=fact+2.d0 + enddo + endif ! m > 0 + if(l.eq.m) then + plgndr=pmm + else + pmmp1=x*(2*m+1)*pmm ! Compute P_m+1^m + if(l.eq.m+1) then + plgndr=pmmp1 + else ! Compute P_l^m, l> m+1 + do ll=m+2,l + pll=(x*dble(2*ll-1)*pmmp1-dble(ll+m-1)*pmm)/(ll-m) + pmm=pmmp1 + pmmp1=pll + enddo + plgndr=pll + endif ! l.eq.m+1 + endif ! l.eq.m + return +end + +double precision function ortho_assoc_gaus_pol(l1,m1,l2) + implicit none + integer, intent(in) :: l1,m1,l2 + double precision :: fact + if(l1.ne.l2)then + ortho_assoc_gaus_pol= 0.d0 + else + ortho_assoc_gaus_pol = 2.d0*fact(l1+m1) / (dble(2*l1+1)*fact(l1-m1)) + endif +end diff --git a/plugins/local/spher_harm/routines_test.irp.f b/plugins/local/spher_harm/routines_test.irp.f new file mode 100644 index 000000000..fe8fc422c --- /dev/null +++ b/plugins/local/spher_harm/routines_test.irp.f @@ -0,0 +1,231 @@ +subroutine test_spher_harm + implicit none + BEGIN_DOC + ! routine to test the generic spherical harmonics routine "spher_harm_func_r3" from R^3 --> C + ! + ! We test = delta_m1,m2 delta_l1,l2 + ! + ! The test is done through the integration on a sphere with the Lebedev grid. + END_DOC + include 'constants.include.F' + integer :: l1,m1,i,l2,m2,lmax + double precision :: r(3),weight,accu_re, accu_im,accu + double precision :: re_ylm_1, im_ylm_1,re_ylm_2, im_ylm_2 + double precision :: theta,phi,r_abs + lmax = 5 ! Maximum angular momentum until which we are going to test orthogonality conditions + do l1 = 0,lmax + do m1 = -l1 ,l1 + do l2 = 0,lmax + do m2 = -l2 ,l2 + accu_re = 0.d0 ! accumulator for the REAL part of + accu_im = 0.d0 ! accumulator for the IMAGINARY part of + accu = 0.d0 ! accumulator for the weights ==> should be \int dOmega == 4 pi + ! = \int dOmega Y_l1,m1^* Y_l2,m2 + ! \approx \sum_i W_i Y_l1,m1^*(r_i) Y_l2,m2(r_i) WITH r_i being on the spher of radius 1 + do i = 1, n_points_integration_angular + r(1:3) = angular_quadrature_points(i,1:3) ! ith Lebedev point (x,y,z) on the sphere of radius 1 + weight = weights_angular_points(i) ! associated Lebdev weight not necessarily positive + +!!!!!!!!!!! Test of the Cartesian --> Spherical coordinates + ! theta MUST belong to [0,pi] and phi to [0,2pi] + ! gets the cartesian to spherical change of coordinates + call cartesian_to_spherical(r,theta,phi,r_abs) + if(theta.gt.pi.or.theta.lt.0.d0)then + print*,'pb with theta, it should be in [0,pi]',theta + print*,r + endif + if(phi.gt.2.d0*pi.or.phi.lt.0.d0)then + print*,'pb with phi, it should be in [0,2 pi]',phi/pi + print*,r + endif + +!!!!!!!!!!! Routines returning the Spherical harmonics on the grid point + call spher_harm_func_r3(r,l1,m1,re_ylm_1, im_ylm_1) + call spher_harm_func_r3(r,l2,m2,re_ylm_2, im_ylm_2) + +!!!!!!!!!!! Integration of Y_l1,m1^*(r) Y_l2,m2(r) + ! = \int dOmega (re_ylm_1 -i im_ylm_1) * (re_ylm_2 +i im_ylm_2) + ! = \int dOmega (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) +i (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + enddo + ! Test that the sum of the weights is 4 pi + if(dabs(accu - dfour_pi).gt.1.d-6)then + print*,'Problem !! The sum of the Lebedev weight is not 4 pi ..' + print*,accu + stop + endif + ! Test for the delta l1,l2 and delta m1,m2 + ! + ! Test for the off-diagonal part of the Kronecker delta + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + ! Test for the diagonal part of the Kronecker delta + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + endif + endif + enddo + enddo + enddo + enddo +end + +subroutine test_cart + implicit none + BEGIN_DOC + ! test for the cartesian --> spherical change of coordinates + ! + ! test the routine "cartesian_to_spherical" such that the polar angle theta ranges in [0,pi] + ! + ! and the asymuthal angle phi ranges in [0,2pi] + END_DOC + include 'constants.include.F' + double precision :: r(3),theta,phi,r_abs + print*,'' + r = 0.d0 + r(1) = 1.d0 + r(2) = 1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) =-1.d0 + r(2) = 1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) =-1.d0 + r(2) =-1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi + print*,'' + r = 0.d0 + r(1) = 1.d0 + r(2) =-1.d0 + call cartesian_to_spherical(r,theta,phi,r_abs) + print*,r + print*,phi/pi +end + + +subroutine test_brutal_spheric + implicit none + include 'constants.include.F' + BEGIN_DOC + ! Test for the = delta_m1,m2 delta_l1,l2 using the following two dimentional integration + ! + ! \int_0^2pi d Phi \int_-1^+1 d(cos(Theta)) Y_l1,m1^*(Theta,Phi) Y_l2,m2(Theta,Phi) + ! + != \int_0^2pi d Phi \int_0^pi dTheta sin(Theta) Y_l1,m1^*(Theta,Phi) Y_l2,m2(Theta,Phi) + ! + ! Allows to test for the general functions "spher_harm_func_m_pos" with "spher_harm_func_expl" + END_DOC + integer :: itheta, iphi,ntheta,nphi + double precision :: theta_min, theta_max, dtheta,theta + double precision :: phi_min, phi_max, dphi,phi + double precision :: accu_re, accu_im,weight + double precision :: re_ylm_1, im_ylm_1 ,re_ylm_2, im_ylm_2,accu + integer :: l1,m1,i,l2,m2,lmax + phi_min = 0.d0 + phi_max = 2.D0 * pi + theta_min = 0.d0 + theta_max = 1.D0 * pi + ntheta = 1000 + nphi = 1000 + dphi = (phi_max - phi_min)/dble(nphi) + dtheta = (theta_max - theta_min)/dble(ntheta) + + lmax = 2 + do l1 = 0,lmax + do m1 = 0 ,l1 + do l2 = 0,lmax + do m2 = 0 ,l2 + accu_re = 0.d0 + accu_im = 0.d0 + accu = 0.d0 + theta = theta_min + do itheta = 1, ntheta + phi = phi_min + do iphi = 1, nphi +! call spher_harm_func_expl(l1,m1,theta,phi,re_ylm_1, im_ylm_1) +! call spher_harm_func_expl(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + call spher_harm_func_m_pos(l1,m1,theta,phi,re_ylm_1, im_ylm_1) + call spher_harm_func_m_pos(l2,m2,theta,phi,re_ylm_2, im_ylm_2) + weight = dtheta * dphi * dsin(theta) + accu_re += weight * (re_ylm_1*re_ylm_2 + im_ylm_1*im_ylm_2) + accu_im += weight * (im_ylm_2*re_ylm_1 - im_ylm_1*re_ylm_2) + accu += weight + phi += dphi + enddo + theta += dtheta + enddo + print*,'l1,m1,l2,m2',l1,m1,l2,m2 + print*,'accu_re = ',accu_re + print*,'accu_im = ',accu_im + print*,'accu = ',accu + if(l1.ne.l2.or.m1.ne.m2)then + if(dabs(accu_re).gt.1.d-6.or.dabs(accu_im).gt.1.d-6)then + print*,'pb OFF DIAG !!!!! ' + endif + endif + if(l1==l2.and.m1==m2)then + if(dabs(accu_re-1.d0).gt.1.d-5.or.dabs(accu_im).gt.1.d-6)then + print*,'pb DIAG !!!!! ' + endif + endif + enddo + enddo + enddo + enddo + + +end + +subroutine test_assoc_leg_pol + implicit none + BEGIN_DOC +! Test for the associated Legendre Polynoms. The test is done through the orthogonality condition. + END_DOC + print *, 'Hello world' + integer :: l1,m1,ngrid,i,l2,m2 + l1 = 0 + m1 = 0 + l2 = 2 + m2 = 0 + double precision :: x, dx,xmax,accu,xmin + double precision :: plgndr,func_1,func_2,ortho_assoc_gaus_pol + ngrid = 100000 + xmax = 1.d0 + xmin = -1.d0 + dx = (xmax-xmin)/dble(ngrid) + do l2 = 0,10 + x = xmin + accu = 0.d0 + do i = 1, ngrid + func_1 = plgndr(l1,m1,x) + func_2 = plgndr(l2,m2,x) + write(33,*)x, func_1,func_2 + accu += func_1 * func_2 * dx + x += dx + enddo + print*,'l2 = ',l2 + print*,'accu = ',accu + print*,ortho_assoc_gaus_pol(l1,m1,l2) + enddo +end diff --git a/plugins/local/spher_harm/spher_harm.irp.f b/plugins/local/spher_harm/spher_harm.irp.f new file mode 100644 index 000000000..e8deafb92 --- /dev/null +++ b/plugins/local/spher_harm/spher_harm.irp.f @@ -0,0 +1,7 @@ +program spher_harm + implicit none + call test_spher_harm +! call test_cart +! call test_brutal_spheric +end + diff --git a/plugins/local/spher_harm/spher_harm_func.irp.f b/plugins/local/spher_harm/spher_harm_func.irp.f new file mode 100644 index 000000000..f12c8fb90 --- /dev/null +++ b/plugins/local/spher_harm/spher_harm_func.irp.f @@ -0,0 +1,164 @@ +subroutine spher_harm_func_r3(r,l,m,re_ylm, im_ylm) + implicit none + integer, intent(in) :: l,m + double precision, intent(in) :: r(3) + double precision, intent(out) :: re_ylm, im_ylm + + double precision :: theta, phi,r_abs + call cartesian_to_spherical(r,theta,phi,r_abs) + call spher_harm_func(l,m,theta,phi,re_ylm, im_ylm) +! call spher_harm_func_expl(l,m,theta,phi,re_ylm, im_ylm) +end + + +subroutine spher_harm_func_m_pos(l,m,theta,phi,re_ylm, im_ylm) + include 'constants.include.F' + implicit none + BEGIN_DOC +! Y_lm(theta,phi) with m >0 +! + END_DOC + double precision, intent(in) :: theta, phi + integer, intent(in) :: l,m + double precision, intent(out):: re_ylm,im_ylm + double precision :: prefact,fact,cos_theta,plgndr,p_lm + double precision :: tmp + prefact = dble(2*l+1)*fact(l-m)/(dfour_pi * fact(l+m)) + prefact = dsqrt(prefact) + cos_theta = dcos(theta) + p_lm = plgndr(l,m,cos_theta) + tmp = prefact * p_lm + re_ylm = dcos(dble(m)*phi) * tmp + im_ylm = dsin(dble(m)*phi) * tmp +end + +subroutine spher_harm_func(l,m,theta,phi,re_ylm, im_ylm) + implicit none + BEGIN_DOC + ! Y_lm(theta,phi) with -l l in spher_harm_func !! stopping ...' + stop + endif + if(m.ge.0)then + call spher_harm_func_m_pos(l,m,theta,phi,re_ylm_pos, im_ylm_pos) + re_ylm = re_ylm_pos + im_ylm = im_ylm_pos + else + minus_m = -m !> 0 + call spher_harm_func_m_pos(l,minus_m,theta,phi,re_ylm_pos, im_ylm_pos) + tmp = (-1)**minus_m + re_ylm = tmp * re_ylm_pos + im_ylm = -tmp * im_ylm_pos ! complex conjugate + endif +end + +subroutine cartesian_to_spherical(r,theta,phi,r_abs) + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out):: theta, phi,r_abs + double precision :: r_2,x_2_y_2,tmp + include 'constants.include.F' + x_2_y_2 = r(1)*r(1) + r(2)*r(2) + r_2 = x_2_y_2 + r(3)*r(3) + r_abs = dsqrt(r_2) + + if(r_abs.gt.1.d-20)then + theta = dacos(r(3)/r_abs) + else + theta = 0.d0 + endif + + if(.true.)then + if(dabs(r(1)).gt.0.d0)then + tmp = datan(r(2)/r(1)) +! phi = datan2(r(2),r(1)) + endif + ! From Wikipedia on Spherical Harmonics + if(r(1).gt.0.d0)then + phi = tmp + else if(r(1).lt.0.d0.and.r(2).ge.0.d0)then + phi = tmp + pi + else if(r(1).lt.0.d0.and.r(2).lt.0.d0)then + phi = tmp - pi + else if(r(1)==0.d0.and.r(2).gt.0.d0)then + phi = 0.5d0*pi + else if(r(1)==0.d0.and.r(2).lt.0.d0)then + phi =-0.5d0*pi + else if(r(1)==0.d0.and.r(2)==0.d0)then + phi = 0.d0 + endif + if(r(2).lt.0.d0.and.r(1).le.0.d0)then + tmp = pi - dabs(phi) + phi = pi + tmp + else if(r(2).lt.0.d0.and.r(1).gt.0.d0)then + phi = dtwo_pi + phi + endif + endif + + if(.false.)then + x_2_y_2 = dsqrt(x_2_y_2) + if(dabs(x_2_y_2).gt.1.d-20.and.dabs(r(2)).gt.1.d-20)then + phi = dabs(r(2))/r(2) * dacos(r(1)/x_2_y_2) + else + phi = 0.d0 + endif + endif +end + + +subroutine spher_harm_func_expl(l,m,theta,phi,re_ylm, im_ylm) + implicit none + BEGIN_DOC + ! Y_lm(theta,phi) with -l for same spin matrix elements - ! - ! notice the -1 sign: in this way three_e_diag_parrallel_spin_prov can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, m - double precision :: integral, wall1, wall0, three_e_diag_parrallel_spin - - three_e_diag_parrallel_spin_prov = 0.d0 - print *, ' Providing the three_e_diag_parrallel_spin_prov ...' - - integral = three_e_diag_parrallel_spin(1,1,1) ! to provide all stuffs - call wall_time(wall0) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,m,integral) & - !$OMP SHARED (mo_num,three_e_diag_parrallel_spin_prov) - !$OMP DO SCHEDULE (dynamic) - do i = 1, mo_num - do j = 1, mo_num - do m = j, mo_num - three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin(m,j,i) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - do i = 1, mo_num - do j = 1, mo_num - do m = 1, j - three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin_prov(j,m,i) - enddo - enddo - enddo - - call wall_time(wall1) - print *, ' wall time for three_e_diag_parrallel_spin_prov', wall1 - wall0 - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, three_e_single_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_single_parrallel_spin_prov(m,j,k,i) = All combination of for same spin matrix elements - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0, three_e_single_parrallel_spin - - three_e_single_parrallel_spin_prov = 0.d0 - print *, ' Providing the three_e_single_parrallel_spin_prov ...' - - integral = three_e_single_parrallel_spin(1,1,1,1) - call wall_time(wall0) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_single_parrallel_spin_prov) - !$OMP DO SCHEDULE (dynamic) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - three_e_single_parrallel_spin_prov(m,j,k,i) = three_e_single_parrallel_spin(m,j,k,i) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_single_parrallel_spin_prov', wall1 - wall0 - -END_PROVIDER - - -! --- - -BEGIN_PROVIDER [ double precision, three_e_double_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_double_parrallel_spin_prov(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0, three_e_double_parrallel_spin - - three_e_double_parrallel_spin_prov = 0.d0 - print *, ' Providing the three_e_double_parrallel_spin_prov ...' - call wall_time(wall0) - - integral = three_e_double_parrallel_spin(1,1,1,1,1) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_double_parrallel_spin_prov) - !$OMP DO SCHEDULE (dynamic) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - three_e_double_parrallel_spin_prov(m,l,j,k,i) = three_e_double_parrallel_spin(m,l,j,k,i) - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_double_parrallel_spin_prov', wall1 - wall0 - -END_PROVIDER - diff --git a/plugins/local/tc_bi_ortho/tc_cisd_sc2.irp.f b/plugins/local/tc_bi_ortho/tc_cisd_sc2.irp.f deleted file mode 100644 index d4c8c55dd..000000000 --- a/plugins/local/tc_bi_ortho/tc_cisd_sc2.irp.f +++ /dev/null @@ -1,36 +0,0 @@ - -! --- - -program tc_cisd_sc2 - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - print *, 'Hello world' - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - call test - -end - -! --- - -subroutine test() - implicit none -! double precision, allocatable :: dressing_dets(:),e_corr_dets(:) -! allocate(dressing_dets(N_det),e_corr_dets(N_det)) -! e_corr_dets = 0.d0 -! call get_cisd_sc2_dressing(psi_det,e_corr_dets,N_det,dressing_dets) - provide eigval_tc_cisd_sc2_bi_ortho -end diff --git a/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f b/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f deleted file mode 100644 index 4c3c07882..000000000 --- a/plugins/local/tc_bi_ortho/tc_cisd_sc2_utils.irp.f +++ /dev/null @@ -1,145 +0,0 @@ - BEGIN_PROVIDER [ double precision, reigvec_tc_cisd_sc2_bi_ortho, (N_det,N_states)] -&BEGIN_PROVIDER [ double precision, leigvec_tc_cisd_sc2_bi_ortho, (N_det,N_states)] -&BEGIN_PROVIDER [ double precision, eigval_tc_cisd_sc2_bi_ortho, (N_states)] - implicit none - integer :: it,n_real,degree,i,istate - double precision :: e_before, e_current,thr, hmono,htwoe,hthree,accu - double precision, allocatable :: e_corr_dets(:),h0j(:), h_sc2(:,:), dressing_dets(:) - double precision, allocatable :: leigvec_tc_bi_orth_tmp(:,:),reigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:) - allocate(leigvec_tc_bi_orth_tmp(N_det,N_det),reigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det)) - allocate(e_corr_dets(N_det),h0j(N_det),h_sc2(N_det,N_det),dressing_dets(N_det)) - allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),eigval_tmp(N_states)) - dressing_dets = 0.d0 - do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) - call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) - if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i)) - endif - enddo - reigvec_tc_bi_orth_tmp = 0.d0 - do i = 1, N_det - reigvec_tc_bi_orth_tmp(i,1) = psi_r_coef_bi_ortho(i,1) - enddo - vec_tmp = 0.d0 - do istate = 1, N_states - vec_tmp(:,istate) = reigvec_tc_bi_orth_tmp(:,istate) - enddo - do istate = N_states+1, n_states_diag - vec_tmp(istate,istate) = 1.d0 - enddo - print*,'Diagonalizing the TC CISD ' - call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow) - do i = 1, N_det - e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1) - enddo - E_before = eigval_tmp(1) - print*,'Starting from ',E_before - - e_current = 10.d0 - thr = 1.d-5 - it = 0 - dressing_dets = 0.d0 - double precision, allocatable :: H_jj(:),vec_tmp(:,:),eigval_tmp(:) - external htc_bi_ortho_calc_tdav_slow - external htcdag_bi_ortho_calc_tdav_slow - logical :: converged - do while (dabs(E_before-E_current).gt.thr) - it += 1 - E_before = E_current -! h_sc2 = htilde_matrix_elmt_bi_ortho - call get_cisd_sc2_dressing(psi_det,e_corr_dets,N_det,dressing_dets) - do i = 1, N_det -! print*,'dressing_dets(i) = ',dressing_dets(i) - h_sc2(i,i) += dressing_dets(i) - enddo - print*,'********************' - print*,'iteration ',it -! call non_hrmt_real_diag(N_det,h_sc2,& -! leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& -! n_real,eigval_right_tmp) -! print*,'eigval_right_tmp(1)',eigval_right_tmp(1) - vec_tmp = 0.d0 - do istate = 1, N_states - vec_tmp(:,istate) = reigvec_tc_bi_orth_tmp(:,istate) - enddo - do istate = N_states+1, n_states_diag - vec_tmp(istate,istate) = 1.d0 - enddo - call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow) - print*,'outside Davidson' - print*,'eigval_tmp(1) = ',eigval_tmp(1) - do i = 1, N_det - reigvec_tc_bi_orth_tmp(i,1) = vec_tmp(i,1) - e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1) - enddo -! E_current = eigval_right_tmp(1) - E_current = eigval_tmp(1) - print*,'it, E(SC)^2 = ',it,E_current - enddo - eigval_tc_cisd_sc2_bi_ortho(1:N_states) = eigval_right_tmp(1:N_states) - reigvec_tc_cisd_sc2_bi_ortho(1:N_det,1:N_states) = reigvec_tc_bi_orth_tmp(1:N_det,1:N_states) - leigvec_tc_cisd_sc2_bi_ortho(1:N_det,1:N_states) = leigvec_tc_bi_orth_tmp(1:N_det,1:N_states) - -END_PROVIDER - -subroutine get_cisd_sc2_dressing(dets,e_corr_dets,ndet,dressing_dets) - implicit none - use bitmasks - integer, intent(in) :: ndet - integer(bit_kind), intent(in) :: dets(N_int,2,ndet) - double precision, intent(in) :: e_corr_dets(ndet) - double precision, intent(out) :: dressing_dets(ndet) - integer, allocatable :: degree(:),hole(:,:),part(:,:),spin(:,:) - integer(bit_kind), allocatable :: hole_part(:,:,:) - integer :: i,j,k, exc(0:2,2,2),h1,p1,h2,p2,s1,s2 - integer(bit_kind) :: xorvec(2,N_int) - - double precision :: phase - dressing_dets = 0.d0 - allocate(degree(ndet),hole(2,ndet),part(2,ndet), spin(2,ndet),hole_part(N_int,2,ndet)) - do i = 2, ndet - call get_excitation_degree(HF_bitmask,dets(1,1,i),degree(i),N_int) - do j = 1, N_int - hole_part(j,1,i) = xor( HF_bitmask(j,1), dets(j,1,i)) - hole_part(j,2,i) = xor( HF_bitmask(j,2), dets(j,2,i)) - enddo - if(degree(i) == 1)then - call get_single_excitation(HF_bitmask,psi_det(1,1,i),exc,phase,N_int) - else if(degree(i) == 2)then - call get_double_excitation(HF_bitmask,psi_det(1,1,i),exc,phase,N_int) - endif - call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) - hole(1,i) = h1 - hole(2,i) = h2 - part(1,i) = p1 - part(2,i) = p2 - spin(1,i) = s1 - spin(2,i) = s2 - enddo - - integer :: same - if(elec_alpha_num+elec_beta_num<3)return - do i = 2, ndet - do j = i+1, ndet - same = 0 - if(degree(i) == degree(j) .and. degree(i)==1)cycle - do k = 1, N_int - xorvec(k,1) = iand(hole_part(k,1,i),hole_part(k,1,j)) - xorvec(k,2) = iand(hole_part(k,2,i),hole_part(k,2,j)) - same += popcnt(xorvec(k,1)) + popcnt(xorvec(k,2)) - enddo -! print*,'i,j',i,j -! call debug_det(dets(1,1,i),N_int) -! call debug_det(hole_part(1,1,i),N_int) -! call debug_det(dets(1,1,j),N_int) -! call debug_det(hole_part(1,1,j),N_int) -! print*,'same = ',same - if(same.eq.0)then - dressing_dets(i) += e_corr_dets(j) - dressing_dets(j) += e_corr_dets(i) - endif - enddo - enddo - -end diff --git a/plugins/local/tc_bi_ortho/tc_h_eigvectors.irp.f b/plugins/local/tc_bi_ortho/tc_h_eigvectors.irp.f index 6bf3d99ec..168442210 100644 --- a/plugins/local/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/plugins/local/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -326,7 +326,13 @@ subroutine diagonalize_CI_tc() enddo double precision, allocatable :: buffer(:,:) - allocate(buffer(N_det,N_states)) + allocate(buffer(psi_det_size,N_states)) +! print*,N_det,N_states +! print*,size(psi_l_coef_bi_ortho,1),size(psi_l_coef_bi_ortho,2) +! print*,size(leigvec_tc_bi_orth,1),size(leigvec_tc_bi_orth,2) +! print*,size(reigvec_tc_bi_orth,1),size(reigvec_tc_bi_orth,2) +! print*,size(psi_r_coef_bi_ortho,1),size(psi_r_coef_bi_ortho,2) + buffer = 0.d0 do k = 1, N_states do i = 1, N_det psi_l_coef_bi_ortho(i,k) = leigvec_tc_bi_orth(i,k) diff --git a/plugins/local/tc_bi_ortho/tc_utils.irp.f b/plugins/local/tc_bi_ortho/tc_utils.irp.f index 53fe58845..067e87195 100644 --- a/plugins/local/tc_bi_ortho/tc_utils.irp.f +++ b/plugins/local/tc_bi_ortho/tc_utils.irp.f @@ -2,12 +2,71 @@ subroutine write_tc_energy() implicit none - integer :: i, j, k - double precision :: hmono, htwoe, hthree, htot - double precision :: E_TC, O_TC - double precision :: E_1e, E_2e, E_3e + integer :: i, j, k + double precision :: hmono, htwoe, hthree, htot + double precision :: E_TC, O_TC + double precision :: E_1e, E_2e, E_3e + double precision, allocatable :: E_TC_tmp(:), E_1e_tmp(:), E_2e_tmp(:), E_3e_tmp(:) + + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot) + + ! GS + ! --- + + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot) + + allocate(E_TC_tmp(N_det), E_1e_tmp(N_det), E_2e_tmp(N_det), E_3e_tmp(N_det)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE(i, j, hmono, htwoe, hthree, htot) & + !$OMP SHARED(N_det, psi_det, N_int, psi_l_coef_bi_ortho, psi_r_coef_bi_ortho, & + !$OMP E_TC_tmp, E_1e_tmp, E_2e_tmp, E_3e_tmp) + !$OMP DO + do i = 1, N_det + E_TC_tmp(i) = 0.d0 + E_1e_tmp(i) = 0.d0 + E_2e_tmp(i) = 0.d0 + E_3e_tmp(i) = 0.d0 + do j = 1, N_det + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + E_TC_tmp(i) = E_TC_tmp(i) + psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * htot + E_1e_tmp(i) = E_1e_tmp(i) + psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * hmono + E_2e_tmp(i) = E_2e_tmp(i) + psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * htwoe + E_3e_tmp(i) = E_3e_tmp(i) + psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * hthree + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + E_1e = 0.d0 + E_2e = 0.d0 + E_3e = 0.d0 + E_TC = 0.d0 + O_TC = 0.d0 + do i = 1, N_det + E_1e = E_1e + E_1e_tmp(i) + E_2e = E_2e + E_2e_tmp(i) + E_3e = E_3e + E_3e_tmp(i) + E_TC = E_TC + E_TC_tmp(i) + O_TC = O_TC + psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(i,1) + enddo - do k = 1, n_states + print *, ' state :', 1 + print *, " E_TC = ", E_TC / O_TC + print *, " E_1e = ", E_1e / O_TC + print *, " E_2e = ", E_2e / O_TC + print *, " E_3e = ", E_3e / O_TC + print *, " O_TC = ", O_TC + + call ezfio_set_tc_bi_ortho_tc_gs_energy(E_TC/O_TC) + + ! --- + + ! ES + ! --- + + do k = 2, n_states E_TC = 0.d0 E_1e = 0.d0 @@ -15,7 +74,7 @@ subroutine write_tc_energy() E_3e = 0.d0 do i = 1, N_det do j = 1, N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot E_1e = E_1e + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * hmono E_2e = E_2e + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htwoe @@ -37,6 +96,8 @@ subroutine write_tc_energy() enddo + deallocate(E_TC_tmp, E_1e_tmp, E_2e_tmp, E_3e_tmp) + end ! --- @@ -52,8 +113,8 @@ subroutine write_tc_var() SIGMA_TC = 0.d0 do j = 2, N_det - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) SIGMA_TC = SIGMA_TC + htot_1j * htot_j1 enddo @@ -66,3 +127,25 @@ subroutine write_tc_var() ! --- +subroutine write_tc_gs_var_HF() + + implicit none + integer :: i, j, k + double precision :: hmono, htwoe, hthree, htot + double precision :: SIGMA_TC + + SIGMA_TC = 0.d0 + do j = 2, N_det + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot) + SIGMA_TC = SIGMA_TC + htot * htot + enddo + + print *, " SIGMA_TC = ", SIGMA_TC + + call ezfio_set_tc_bi_ortho_tc_gs_var(SIGMA_TC) + +end + +! --- + + diff --git a/plugins/local/tc_bi_ortho/test_natorb.irp.f b/plugins/local/tc_bi_ortho/test_natorb.irp.f deleted file mode 100644 index 5b8801f74..000000000 --- a/plugins/local/tc_bi_ortho/test_natorb.irp.f +++ /dev/null @@ -1,64 +0,0 @@ - -! --- - -program test_natorb - - BEGIN_DOC - ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. - END_DOC - - implicit none - - print *, 'Hello world' - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - call routine() - ! call test() - -end - -! --- - -subroutine routine() - - implicit none - double precision, allocatable :: fock_diag(:),eigval(:),leigvec(:,:),reigvec(:,:),mat_ref(:,:) - allocate(eigval(mo_num),leigvec(mo_num,mo_num),reigvec(mo_num,mo_num),fock_diag(mo_num),mat_ref(mo_num, mo_num)) - double precision, allocatable :: eigval_ref(:),leigvec_ref(:,:),reigvec_ref(:,:) - allocate(eigval_ref(mo_num),leigvec_ref(mo_num,mo_num),reigvec_ref(mo_num,mo_num)) - - double precision :: thr_deg - integer :: i,n_real,j - print*,'fock_matrix' - do i = 1, mo_num - fock_diag(i) = Fock_matrix_mo(i,i) - print*,i,fock_diag(i) - enddo - thr_deg = 1.d-6 - mat_ref = -one_e_dm_mo - print*,'diagonalization by block' - call diag_mat_per_fock_degen(fock_diag,mat_ref,mo_num,thr_deg,leigvec,reigvec,eigval) - call non_hrmt_bieig( mo_num, mat_ref& - , leigvec_ref, reigvec_ref& - , n_real, eigval_ref) - print*,'TEST ***********************************' - double precision :: accu_l, accu_r - do i = 1, mo_num - accu_l = 0.d0 - accu_r = 0.d0 - do j = 1, mo_num - accu_r += reigvec_ref(j,i) * reigvec(j,i) - accu_l += leigvec_ref(j,i) * leigvec(j,i) - enddo - print*,i - write(*,'(I3,X,100(F16.10,X))')i,eigval(i),eigval_ref(i),accu_l,accu_r - enddo -end diff --git a/plugins/local/tc_bi_ortho/test_normal_order.irp.f b/plugins/local/tc_bi_ortho/test_normal_order.irp.f deleted file mode 100644 index 0cf27396e..000000000 --- a/plugins/local/tc_bi_ortho/test_normal_order.irp.f +++ /dev/null @@ -1,173 +0,0 @@ - -! --- - -program test_normal_order - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - print *, 'Hello world' - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - call provide_all_three_ints_bi_ortho() - call test() - -end - -! --- - -subroutine test - implicit none - use bitmasks ! you need to include the bitmasks_module.f90 features - integer :: h1,h2,p1,p2,s1,s2,i_ok,degree,Ne(2) - integer :: exc(0:2,2,2) - integer(bit_kind), allocatable :: det_i(:,:) - double precision :: hmono,htwoe,hthree,htilde_ij,accu,phase,normal,hthree_tmp - integer, allocatable :: occ(:,:) - allocate( occ(N_int*bit_kind_size,2) ) - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - allocate(det_i(N_int,2)) - s1 = 1 - s2 = 2 - accu = 0.d0 - do h1 = 1, elec_beta_num - do p1 = elec_alpha_num+1, mo_num - do h2 = 1, elec_beta_num - do p2 = elec_beta_num+1, mo_num - hthree = 0.d0 - - det_i = ref_bitmask - s1 = 1 - s2 = 2 - call do_single_excitation(det_i,h1,p1,s1,i_ok) - if(i_ok.ne.1)cycle - call do_single_excitation(det_i,h2,p2,s2,i_ok) - if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) - call get_excitation_degree(ref_bitmask,det_i,degree,N_int) - call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - hthree_tmp *= phase - hthree += 0.5d0 * hthree_tmp - det_i = ref_bitmask - s1 = 2 - s2 = 1 - call do_single_excitation(det_i,h1,p1,s1,i_ok) - if(i_ok.ne.1)cycle - call do_single_excitation(det_i,h2,p2,s2,i_ok) - if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) - call get_excitation_degree(ref_bitmask,det_i,degree,N_int) - call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - hthree_tmp *= phase - hthree += 0.5d0 * hthree_tmp - - -! normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1) - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, normal) - if(dabs(hthree).lt.1.d-10)cycle - if(dabs(hthree-normal).gt.1.d-10)then -! print*,pp2,pp1,hh2,hh1 - print*,p2,p1,h2,h1 - print*,hthree,normal,dabs(hthree-normal) - stop - endif -! call three_comp_two_e_elem(det_i,h1,h2,p1,p2,s1,s2,normal) -! normal = eff_2_e_from_3_e_ab(p2,p1,h2,h1) - accu += dabs(hthree-normal) - enddo - enddo - enddo - enddo -print*,'accu opposite spin = ',accu -stop - -! p2=6 -! p1=5 -! h2=2 -! h1=1 - -s1 = 1 -s2 = 1 -accu = 0.d0 -do h1 = 1, elec_alpha_num - do p1 = elec_alpha_num+1, mo_num - do p2 = p1+1, mo_num - do h2 = h1+1, elec_alpha_num - det_i = ref_bitmask - call do_single_excitation(det_i,h1,p1,s1,i_ok) - if(i_ok.ne.1)cycle - call do_single_excitation(det_i,h2,p2,s2,i_ok) - if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call get_excitation_degree(ref_bitmask,det_i,degree,N_int) - call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - integer :: hh1, pp1, hh2, pp2, ss1, ss2 - call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2) - hthree *= phase - normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) -! normal = eff_2_e_from_3_e_aa(p2,p1,h2,h1) - if(dabs(hthree).lt.1.d-10)cycle - if(dabs(hthree-normal).gt.1.d-10)then - print*,pp2,pp1,hh2,hh1 - print*,p2,p1,h2,h1 - print*,hthree,normal,dabs(hthree-normal) - stop - endif -! print*,hthree,normal,dabs(hthree-normal) - accu += dabs(hthree-normal) - enddo - enddo - enddo -enddo -print*,'accu same spin alpha = ',accu - - -s1 = 2 -s2 = 2 -accu = 0.d0 -do h1 = 1, elec_beta_num - do p1 = elec_beta_num+1, mo_num - do p2 = p1+1, mo_num - do h2 = h1+1, elec_beta_num - det_i = ref_bitmask - call do_single_excitation(det_i,h1,p1,s1,i_ok) - if(i_ok.ne.1)cycle - call do_single_excitation(det_i,h2,p2,s2,i_ok) - if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call get_excitation_degree(ref_bitmask,det_i,degree,N_int) - call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2) - hthree *= phase -! normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) - normal = eff_2_e_from_3_e_bb(p2,p1,h2,h1) - if(dabs(hthree).lt.1.d-10)cycle - if(dabs(hthree-normal).gt.1.d-10)then - print*,pp2,pp1,hh2,hh1 - print*,p2,p1,h2,h1 - print*,hthree,normal,dabs(hthree-normal) - stop - endif -! print*,hthree,normal,dabs(hthree-normal) - accu += dabs(hthree-normal) - enddo - enddo - enddo -enddo -print*,'accu same spin beta = ',accu - - -end - - diff --git a/plugins/local/tc_bi_ortho/test_s2_tc.irp.f b/plugins/local/tc_bi_ortho/test_s2_tc.irp.f deleted file mode 100644 index 7c70b1199..000000000 --- a/plugins/local/tc_bi_ortho/test_s2_tc.irp.f +++ /dev/null @@ -1,170 +0,0 @@ - -! --- - -program test_tc - - implicit none - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - call provide_all_three_ints_bi_ortho() - call routine_h_triple_left - call routine_h_triple_right -! call routine_test_s2_davidson - -end - -subroutine routine_h_triple_right - implicit none - logical :: do_right - integer :: sze ,i, N_st, j - double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0 - double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:) - double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) - sze = N_det - N_st = 1 - allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) - print*,'Checking first the Right ' - do i = 1, sze - u_0(i,1) = psi_r_coef_bi_ortho(i,1) - enddo - double precision :: wall0,wall1 - call wall_time(wall0) - call H_tc_s2_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze) - call wall_time(wall1) - print*,'time for omp',wall1 - wall0 - call wall_time(wall0) - call H_tc_s2_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze) - call wall_time(wall1) - print*,'time serial ',wall1 - wall0 - accu_e = 0.d0 - accu_s = 0.d0 - do i = 1, sze - accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) - accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) - enddo - print*,'accu_e = ',accu_e - print*,'accu_s = ',accu_s - -end - -subroutine routine_h_triple_left - implicit none - logical :: do_right - integer :: sze ,i, N_st, j - double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0 - double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:) - double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) - sze = N_det - N_st = 1 - allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) - print*,'Checking the Left ' - do i = 1, sze - u_0(i,1) = psi_l_coef_bi_ortho(i,1) - enddo - double precision :: wall0,wall1 - call wall_time(wall0) - call H_tc_s2_dagger_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze) - call wall_time(wall1) - print*,'time for omp',wall1 - wall0 - call wall_time(wall0) - call H_tc_s2_dagger_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze) - call wall_time(wall1) - print*,'time serial ',wall1 - wall0 - accu_e = 0.d0 - accu_s = 0.d0 - do i = 1, sze - accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) - accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) - enddo - print*,'accu_e = ',accu_e - print*,'accu_s = ',accu_s - -end - - -subroutine routine_test_s2_davidson - implicit none - double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) , s2(:) - integer :: i,istate - logical :: converged - external H_tc_s2_dagger_u_0_opt - external H_tc_s2_u_0_opt - allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag), s2(n_states_diag)) - do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) - enddo - ! Preparing the left-eigenvector - print*,'Computing the left-eigenvector ' - vec_tmp = 0.d0 - do istate = 1, N_states - vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) - enddo - do istate = N_states+1, n_states_diag - vec_tmp(istate,istate) = 1.d0 - enddo - do istate = 1, N_states - leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) - enddo - integer :: n_it_max - n_it_max = 1 - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) - double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) - integer :: sze,N_st - logical :: do_right - sze = N_det - N_st = 1 - do_right = .False. - allocate(s_0_new(N_det,1),v_0_new(N_det,1)) - call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,vec_tmp,N_st,sze, do_right) - double precision :: accu_e_0, accu_s_0 - accu_e_0 = 0.d0 - accu_s_0 = 0.d0 - do i = 1, sze - accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) - accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) - enddo - print*,'energies = ',energies - print*,'s2 = ',s2 - print*,'accu_e_0',accu_e_0 - print*,'accu_s_0',accu_s_0 - - ! Preparing the right-eigenvector - print*,'Computing the right-eigenvector ' - vec_tmp = 0.d0 - do istate = 1, N_states - vec_tmp(1:N_det,istate) = psi_r_coef_bi_ortho(1:N_det,istate) - enddo - do istate = N_states+1, n_states_diag - vec_tmp(istate,istate) = 1.d0 - enddo - do istate = 1, N_states - leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) - enddo - n_it_max = 1 - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) - sze = N_det - N_st = 1 - do_right = .True. - v_0_new = 0.d0 - s_0_new = 0.d0 - call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,vec_tmp,N_st,sze, do_right) - accu_e_0 = 0.d0 - accu_s_0 = 0.d0 - do i = 1, sze - accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) - accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) - enddo - print*,'energies = ',energies - print*,'s2 = ',s2 - print*,'accu_e_0',accu_e_0 - print*,'accu_s_0',accu_s_0 - -end diff --git a/plugins/local/tc_bi_ortho/test_tc_fock.irp.f b/plugins/local/tc_bi_ortho/test_tc_fock.irp.f deleted file mode 100644 index f1a7cc0ab..000000000 --- a/plugins/local/tc_bi_ortho/test_tc_fock.irp.f +++ /dev/null @@ -1,204 +0,0 @@ - -! --- - -program test_tc_fock - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - print *, 'Hello world' - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - read_wf = .True. - touch read_wf - - !call routine_1 - !call routine_2 -! call routine_3() - -! call test_3e - call routine_tot - -end - -! --- - -subroutine test_3e - implicit none - double precision :: integral_aaa,integral_aab,integral_abb,integral_bbb,accu - double precision :: hmono, htwoe, hthree, htot - call htilde_mu_mat_bi_ortho_slow(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot) - print*,'hmono = ',hmono - print*,'htwoe = ',htwoe - print*,'hthree= ',hthree - print*,'htot = ',htot - print*,'' - print*,'' - print*,'TC_one= ',tc_hf_one_e_energy - print*,'TC_two= ',TC_HF_two_e_energy - print*,'TC_3e = ',diag_three_elem_hf - print*,'TC_tot= ',TC_HF_energy - print*,'' - print*,'' - call give_aaa_contrib(integral_aaa) - print*,'integral_aaa = ',integral_aaa - call give_aab_contrib(integral_aab) - print*,'integral_aab = ',integral_aab - call give_abb_contrib(integral_abb) - print*,'integral_abb = ',integral_abb - call give_bbb_contrib(integral_bbb) - print*,'integral_bbb = ',integral_bbb - accu = integral_aaa + integral_aab + integral_abb + integral_bbb - print*,'accu = ',accu - print*,'delta = ',hthree - accu - -end - -subroutine routine_3() - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i, a, i_ok, s1 - double precision :: hmono, htwoe, hthree, htilde_ij - double precision :: err_ai, err_tot, ref, new - integer(bit_kind), allocatable :: det_i(:,:) - - allocate(det_i(N_int,2)) - - err_tot = 0.d0 - - do s1 = 1, 2 - - det_i = ref_bitmask - call debug_det(det_i, N_int) - print*, ' HF det' - call debug_det(det_i, N_int) - - do i = 1, elec_num_tab(s1) - do a = elec_num_tab(s1)+1, mo_num ! virtual - - - det_i = ref_bitmask - call do_single_excitation(det_i, i, a, s1, i_ok) - if(i_ok == -1) then - print*, 'PB !!' - print*, i, a - stop - endif - print*, ' excited det' - call debug_det(det_i, N_int) - - call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) - if(dabs(hthree).lt.1.d-10)cycle - ref = hthree - if(s1 == 1)then - new = fock_a_tot_3e_bi_orth(a,i) - else if(s1 == 2)then - new = fock_b_tot_3e_bi_orth(a,i) - endif - err_ai = dabs(dabs(ref) - dabs(new)) - if(err_ai .gt. 1d-7) then - print*,'s1 = ',s1 - print*, ' warning on', i, a - print*, ref,new,err_ai - endif - print*, ref,new,err_ai - err_tot += err_ai - - write(22, *) htilde_ij - enddo - enddo - enddo - - print *, ' err_tot = ', err_tot - - deallocate(det_i) - -end subroutine routine_3 - -! --- -subroutine routine_tot() - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i, a, i_ok, s1,other_spin(2) - double precision :: hmono, htwoe, hthree, htilde_ij - double precision :: err_ai, err_tot, ref, new - integer(bit_kind), allocatable :: det_i(:,:) - - allocate(det_i(N_int,2)) - other_spin(1) = 2 - other_spin(2) = 1 - - err_tot = 0.d0 - -! do s1 = 1, 2 - s1 = 2 - det_i = ref_bitmask - call debug_det(det_i, N_int) - print*, ' HF det' - call debug_det(det_i, N_int) - -! do i = 1, elec_num_tab(s1) -! do a = elec_num_tab(s1)+1, mo_num ! virtual - do i = 1, elec_beta_num - do a = elec_beta_num+1, mo_num! virtual - print*,i,a - - det_i = ref_bitmask - call do_single_excitation(det_i, i, a, s1, i_ok) - if(i_ok == -1) then - print*, 'PB !!' - print*, i, a - stop - endif - - call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) - print*,htilde_ij -! if(dabs(htilde_ij).lt.1.d-10)cycle - print*, ' excited det' - call debug_det(det_i, N_int) - - if(s1 == 1)then - new = Fock_matrix_tc_mo_alpha(a,i) - else - new = Fock_matrix_tc_mo_beta(a,i) - endif - ref = htilde_ij -! if(s1 == 1)then -! new = fock_a_tot_3e_bi_orth(a,i) -! else if(s1 == 2)then -! new = fock_b_tot_3e_bi_orth(a,i) -! endif - err_ai = dabs(dabs(ref) - dabs(new)) - if(err_ai .gt. 1d-7) then - print*,'---------' - print*,'s1 = ',s1 - print*, ' warning on', i, a - print*, ref,new,err_ai - print*,hmono, htwoe, hthree - print*,'---------' - endif - print*, ref,new,err_ai - err_tot += err_ai - - write(22, *) htilde_ij - enddo - enddo -! enddo - - print *, ' err_tot = ', err_tot - - deallocate(det_i) - -end subroutine routine_3 diff --git a/plugins/local/tc_int/EZFIO.cfg b/plugins/local/tc_int/EZFIO.cfg new file mode 100644 index 000000000..5615ce4b4 --- /dev/null +++ b/plugins/local/tc_int/EZFIO.cfg @@ -0,0 +1,36 @@ +[nxBlocks] +type: integer +doc: nb of x blocks in the Grid +interface: ezfio,provider,ocaml +default: 10 + +[nyBlocks] +type: integer +doc: nb of y blocks in the Grid +interface: ezfio,provider,ocaml +default: 1 + +[nzBlocks] +type: integer +doc: nb of z blocks in the Grid +interface: ezfio,provider,ocaml +default: 1 + +[blockxSize] +type: integer +doc: size of x blocks +interface: ezfio,provider,ocaml +default: 32 + +[blockySize] +type: integer +doc: size of y blocks +interface: ezfio,provider,ocaml +default: 1 + +[blockzSize] +type: integer +doc: size of z blocks +interface: ezfio,provider,ocaml +default: 1 + diff --git a/plugins/local/tc_int/LIB b/plugins/local/tc_int/LIB new file mode 100644 index 000000000..9f7e2d9f8 --- /dev/null +++ b/plugins/local/tc_int/LIB @@ -0,0 +1 @@ +-lcutcint diff --git a/plugins/local/tc_int/NEED b/plugins/local/tc_int/NEED new file mode 100644 index 000000000..362314dad --- /dev/null +++ b/plugins/local/tc_int/NEED @@ -0,0 +1,7 @@ +gpu +tc_keywords +jastrow +qmckl +becke_numerical_grid +dft_utils_in_r +bi_ortho_mos diff --git a/plugins/local/tc_int/README.rst b/plugins/local/tc_int/README.rst new file mode 100644 index 000000000..bc9e84838 --- /dev/null +++ b/plugins/local/tc_int/README.rst @@ -0,0 +1,4 @@ +====== +tc_int +====== + diff --git a/plugins/local/tc_int/compute_int_2e_ao_cpu.irp.f b/plugins/local/tc_int/compute_int_2e_ao_cpu.irp.f new file mode 100644 index 000000000..9ff5431d9 --- /dev/null +++ b/plugins/local/tc_int/compute_int_2e_ao_cpu.irp.f @@ -0,0 +1,233 @@ + +! --- + +program compute_int_2e_ao_cpu + + implicit none + + print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + + call main() + +end + +! --- + +subroutine main() + + use cutc_module + + implicit none + + integer :: m + integer :: i, j, k, l + integer :: ipoint, jpoint + + double precision :: weight1, ao_i_r, ao_k_r + + double precision :: time0, time1 + double precision :: wall_time0, wall_time1 + double precision :: wall_ttime0, wall_ttime1 + double precision :: tt1, tt2 + + double precision, allocatable :: rn(:,:), aos_data1(:,:,:), aos_data2(:,:,:) + double precision, allocatable :: grad1_u12(:,:,:), int_fct_long_range(:,:,:), c_mat(:,:,:) + double precision, allocatable :: int2_grad1_u12_ao(:,:,:,:) + double precision, allocatable :: int_2e_ao(:,:,:,:) + + + call wall_time(time0) + print*, ' start compute_int_2e_ao_cpu' + + + ! --- + + allocate(rn(3,nucl_num)) + allocate(aos_data1(n_points_final_grid,ao_num,4)) + allocate(aos_data2(n_points_extra_final_grid,ao_num,4)) + + do k = 1, nucl_num + rn(1,k) = nucl_coord(k,1) + rn(2,k) = nucl_coord(k,2) + rn(3,k) = nucl_coord(k,3) + enddo + + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + aos_data1(ipoint,k,1) = aos_in_r_array(k,ipoint) + aos_data1(ipoint,k,2) = aos_grad_in_r_array(k,ipoint,1) + aos_data1(ipoint,k,3) = aos_grad_in_r_array(k,ipoint,2) + aos_data1(ipoint,k,4) = aos_grad_in_r_array(k,ipoint,3) + enddo + enddo + + do k = 1, ao_num + do ipoint = 1, n_points_extra_final_grid + aos_data2(ipoint,k,1) = aos_in_r_array_extra(k,ipoint) + aos_data2(ipoint,k,2) = aos_grad_in_r_array_extra(k,ipoint,1) + aos_data2(ipoint,k,3) = aos_grad_in_r_array_extra(k,ipoint,2) + aos_data2(ipoint,k,4) = aos_grad_in_r_array_extra(k,ipoint,3) + enddo + enddo + + + allocate(int_fct_long_range(n_points_extra_final_grid,ao_num,ao_num)) + allocate(grad1_u12(n_points_extra_final_grid,n_points_final_grid,4)) + allocate(c_mat(n_points_final_grid,ao_num,ao_num)) + allocate(int2_grad1_u12_ao(ao_num,ao_num,n_points_final_grid,4)) + allocate(int_2e_ao(ao_num,ao_num,ao_num,ao_num)) + + call wall_time(wall_time0) + + call wall_time(wall_ttime0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (int_fct_long_range, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + int_fct_long_range(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(wall_ttime1) + write(*,"(A,2X,F15.7)") ' wall time for int_long_range (sec) = ', (wall_ttime1 - wall_ttime0) + + + call wall_time(wall_ttime0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, n_points_extra_final_grid, grad1_u12) + !$OMP DO + do ipoint = 1, n_points_final_grid + call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, grad1_u12(1,ipoint,1) & + , grad1_u12(1,ipoint,2) & + , grad1_u12(1,ipoint,3) & + , grad1_u12(1,ipoint,4) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(wall_ttime1) + write(*,"(A,2X,F15.7)") ' wall time for tc_int_bh (sec) = ', (wall_ttime1 - wall_ttime0) + + + call wall_time(wall_ttime0) + do m = 1, 4 + call dgemm("T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, 1.d0 & + , int_fct_long_range(1,1,1), n_points_extra_final_grid, grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num) + enddo + call wall_time(wall_ttime1) + write(*,"(A,2X,F15.7)") ' wall time for DGEMM of integ over r2 (sec) = ', (wall_ttime1 - wall_ttime0) + + + call wall_time(wall_ttime0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (aos_in_r_array_transp, c_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + c_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(wall_ttime1) + write(*,"(A,2X,F15.7)") ' wall time of Hermitian part (sec) = ', (wall_ttime1 - wall_ttime0) + + + call wall_time(wall_ttime0) + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , int2_grad1_u12_ao(1,1,1,4), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & + , 0.d0, int_2e_ao(1,1,1,1), ao_num*ao_num) + call wall_time(wall_ttime1) + write(*,"(A,2X,F15.7)") ' wall time for DGEMM of Hermitian part (sec) = ', (wall_ttime1 - wall_ttime0) + + + tt1 = 0.d0 + tt2 = 0.d0 + do m = 1, 3 + + call wall_time(wall_ttime0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, c_mat, & + !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector, m) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + + weight1 = final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + + c_mat(ipoint,k,i) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,m) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,m)) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(wall_ttime1) + tt1 += wall_ttime1 - wall_ttime0 + + call wall_time(wall_ttime0) + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -0.5d0 & + , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & + , 1.d0, int_2e_ao(1,1,1,1), ao_num*ao_num) + call wall_time(wall_ttime1) + tt2 += wall_ttime1 - wall_ttime0 + enddo + write(*,"(A,2X,F15.7)") ' wall time of non-Hermitian part (sec) = ', tt1 + write(*,"(A,2X,F15.7)") ' wall time for DGEMM of non Hermitian part (sec) = ', tt2 + + + call wall_time(wall_ttime0) + call sum_A_At(int_2e_ao(1,1,1,1), ao_num*ao_num) + call wall_time(wall_ttime1) + write(*,"(A,2X,F15.7)") ' wall time of A + A.T (sec) = ', wall_ttime1 - wall_ttime0 + + + call wall_time(wall_time1) + write(*,"(A,2X,F15.7)") ' wall time on cpu (sec) = ', (wall_time1 - wall_time0) + + + deallocate(int_fct_long_range, grad1_u12, c_mat) + deallocate(int_2e_ao, int2_grad1_u12_ao) + deallocate(rn, aos_data1, aos_data2) + + call wall_time(time1) + write(*,"(A,2X,F15.7)") ' wall time for compute_int_2e_ao_cpu (sec) = ', (time1 - time0) + + return +end diff --git a/plugins/local/tc_int/compute_int_2e_ao_gpu.irp.f b/plugins/local/tc_int/compute_int_2e_ao_gpu.irp.f new file mode 100644 index 000000000..4ffdc1cbd --- /dev/null +++ b/plugins/local/tc_int/compute_int_2e_ao_gpu.irp.f @@ -0,0 +1,117 @@ + +! --- + +program compute_int_2e_ao_gpu + + implicit none + + print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + + call main() + +end + +! --- + +subroutine main() + + use cutc_module + + implicit none + + integer :: i, j, k, l + integer :: ipoint + + double precision :: time0, time1 + + double precision, allocatable :: rn(:,:), aos_data1(:,:,:), aos_data2(:,:,:) + double precision, allocatable :: int2_grad1_u12_ao_gpu(:,:,:,:) + double precision, allocatable :: int_2e_ao_gpu(:,:,:,:) + + + call wall_time(time0) + print*, ' start compute_int_2e_ao_gpu' + + + ! --- + + allocate(rn(3,nucl_num)) + allocate(aos_data1(n_points_final_grid,ao_num,4)) + allocate(aos_data2(n_points_extra_final_grid,ao_num,4)) + + do k = 1, nucl_num + rn(1,k) = nucl_coord(k,1) + rn(2,k) = nucl_coord(k,2) + rn(3,k) = nucl_coord(k,3) + enddo + + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + aos_data1(ipoint,k,1) = aos_in_r_array(k,ipoint) + aos_data1(ipoint,k,2) = aos_grad_in_r_array(k,ipoint,1) + aos_data1(ipoint,k,3) = aos_grad_in_r_array(k,ipoint,2) + aos_data1(ipoint,k,4) = aos_grad_in_r_array(k,ipoint,3) + enddo + enddo + + do k = 1, ao_num + do ipoint = 1, n_points_extra_final_grid + aos_data2(ipoint,k,1) = aos_in_r_array_extra(k,ipoint) + aos_data2(ipoint,k,2) = aos_grad_in_r_array_extra(k,ipoint,1) + aos_data2(ipoint,k,3) = aos_grad_in_r_array_extra(k,ipoint,2) + aos_data2(ipoint,k,4) = aos_grad_in_r_array_extra(k,ipoint,3) + enddo + enddo + + ! --- + + integer :: nB + integer :: sB + + PROVIDE nxBlocks nyBlocks nzBlocks + PROVIDE blockxSize blockySize blockzSize + + sB = 32 + nB = (n_points_final_grid + sB - 1) / sB + + call ezfio_set_tc_int_blockxSize(sB) + call ezfio_set_tc_int_nxBlocks(nB) + + allocate(int2_grad1_u12_ao_gpu(ao_num,ao_num,n_points_final_grid,3)) + allocate(int_2e_ao_gpu(ao_num,ao_num,ao_num,ao_num)) + + call cutc_int(nxBlocks, nyBlocks, nzBlocks, blockxSize, blockySize, blockzSize, & + n_points_final_grid, n_points_extra_final_grid, ao_num, nucl_num, jBH_size, & + final_grid_points, final_weight_at_r_vector, & + final_grid_points_extra, final_weight_at_r_vector_extra, & + rn, aos_data1, aos_data2, jBH_c, jBH_m, jBH_n, jBH_o, & + int2_grad1_u12_ao_gpu, int_2e_ao_gpu) + + deallocate(int_2e_ao_gpu, int2_grad1_u12_ao_gpu) + deallocate(rn, aos_data1, aos_data2) + + call wall_time(time1) + write(*,"(A,2X,F15.7)") ' wall time for compute_int_2e_ao_gpu (sec) = ', (time1 - time0) + + return +end diff --git a/plugins/local/tc_int/compute_no_cpu.irp.f b/plugins/local/tc_int/compute_no_cpu.irp.f new file mode 100644 index 000000000..54ba37ce4 --- /dev/null +++ b/plugins/local/tc_int/compute_no_cpu.irp.f @@ -0,0 +1,143 @@ + +! --- + +program compute_no_cpu + + implicit none + + print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + + call main() + +end + +! --- + +subroutine main() + + use cutc_module + + implicit none + + integer :: i, j, k, l, ipoint + double precision :: time0, time1 + double precision :: tt0, tt1 + double precision :: acc_thr, err_tot, nrm_tot, err_loc + double precision :: noL_0e + double precision, allocatable :: int2_grad1_u12_ao(:,:,:,:) + double precision, allocatable :: tmp(:,:,:,:) + double precision, allocatable :: int2_grad1_u12_bimo_t(:,:,:,:) + double precision, allocatable :: noL_1e (:,:) + double precision, allocatable :: noL_2e (:,:,:,:) + + PROVIDE mo_l_coef mo_r_coef + PROVIDE mos_l_in_r_array_transp mos_r_in_r_array_transp + + + call wall_time(time0) + print*, ' start compute_no_cpu' + + + + allocate(int2_grad1_u12_ao(ao_num,ao_num,n_points_final_grid,3)) + print*, ' Reading int2_grad1_u12_ao from ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao' + call wall_time(tt0) + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read") + read(11) int2_grad1_u12_ao + close(11) + call wall_time(tt1) + write(*,"(A,2X,F15.7)") ' wall time for reading (sec) = ', (tt1 - tt0) + + allocate(tmp(mo_num,mo_num,n_points_final_grid,3)) + allocate(int2_grad1_u12_bimo_t(n_points_final_grid,3,mo_num,mo_num)) + + call wall_time(tt0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (ao_num, mo_num, n_points_final_grid, int2_grad1_u12_ao, tmp) + !$OMP DO SCHEDULE (dynamic) + do ipoint = 1, n_points_final_grid + call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,1), ao_num, tmp(1,1,ipoint,1), mo_num) + call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,2), ao_num, tmp(1,1,ipoint,2), mo_num) + call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,3), ao_num, tmp(1,1,ipoint,3), mo_num) + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(int2_grad1_u12_ao) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, tmp, int2_grad1_u12_bimo_t) + !$OMP DO COLLAPSE(2) SCHEDULE (dynamic) + do ipoint = 1, n_points_final_grid + do i = 1, mo_num + do j = 1, mo_num + int2_grad1_u12_bimo_t(ipoint,1,j,i) = tmp(j,i,ipoint,1) + int2_grad1_u12_bimo_t(ipoint,2,j,i) = tmp(j,i,ipoint,2) + int2_grad1_u12_bimo_t(ipoint,3,j,i) = tmp(j,i,ipoint,3) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(tt1) + write(*,"(A,2X,F15.7)") ' wall time for 3e-tensor (sec) = ', (tt1 - tt0) + + deallocate(tmp) + + allocate(noL_2e(mo_num,mo_num,mo_num,mo_num)) + allocate(noL_1e(mo_num,mo_num)) + + call provide_no_2e(n_points_final_grid, mo_num, elec_alpha_num, elec_beta_num, & + final_weight_at_r_vector(1), & + mos_l_in_r_array_transp(1,1), mos_r_in_r_array_transp(1,1), & + int2_grad1_u12_bimo_t(1,1,1,1), noL_2e(1,1,1,1)) + + call provide_no_1e(n_points_final_grid, mo_num, elec_alpha_num, elec_beta_num, & + final_weight_at_r_vector(1), & + mos_l_in_r_array_transp(1,1), mos_r_in_r_array_transp(1,1), & + int2_grad1_u12_bimo_t(1,1,1,1), noL_1e(1,1)) + + call provide_no_0e(n_points_final_grid, mo_num, elec_alpha_num, elec_beta_num, & + final_weight_at_r_vector(1), & + mos_l_in_r_array_transp(1,1), mos_r_in_r_array_transp(1,1), & + int2_grad1_u12_bimo_t(1,1,1,1), noL_0e) + + deallocate(int2_grad1_u12_bimo_t) + deallocate(noL_2e) + deallocate(noL_1e) + + + call wall_time(time1) + write(*,"(A,2X,F15.7)") ' wall time for compute_no_cpu (sec) = ', (time1 - time0) + + return + +end + +! --- + + diff --git a/plugins/local/tc_int/compute_no_gpu.irp.f b/plugins/local/tc_int/compute_no_gpu.irp.f new file mode 100644 index 000000000..9e4d90cc9 --- /dev/null +++ b/plugins/local/tc_int/compute_no_gpu.irp.f @@ -0,0 +1,132 @@ + +! --- + +program compute_no_gpu + + implicit none + + print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + + call main() + +end + +! --- + +subroutine main() + + use cutc_module + + implicit none + + integer :: i, j, k, l, ipoint + double precision :: time0, time1 + double precision :: tt0, tt1 + double precision :: acc_thr, err_tot, nrm_tot, err_loc + double precision :: noL_0e_gpu(1) + double precision, allocatable :: int2_grad1_u12_ao(:,:,:,:) + double precision, allocatable :: tmp(:,:,:,:) + double precision, allocatable :: int2_grad1_u12_bimo_t(:,:,:,:) + double precision, allocatable :: noL_1e_gpu(:,:) + double precision, allocatable :: noL_2e_gpu(:,:,:,:) + + PROVIDE mo_l_coef mo_r_coef + PROVIDE mos_l_in_r_array_transp mos_r_in_r_array_transp + + + call wall_time(time0) + print*, ' start compute_no_gpu' + + + + allocate(int2_grad1_u12_ao(ao_num,ao_num,n_points_final_grid,3)) + print*, ' Reading int2_grad1_u12_ao from ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao' + call wall_time(tt0) + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read") + read(11) int2_grad1_u12_ao + close(11) + call wall_time(tt1) + write(*,"(A,2X,F15.7)") ' wall time for reading (sec) = ', (tt1 - tt0) + + allocate(tmp(mo_num,mo_num,n_points_final_grid,3)) + allocate(int2_grad1_u12_bimo_t(n_points_final_grid,3,mo_num,mo_num)) + + call wall_time(tt0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (ao_num, mo_num, n_points_final_grid, int2_grad1_u12_ao, tmp) + !$OMP DO SCHEDULE (dynamic) + do ipoint = 1, n_points_final_grid + call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,1), ao_num, tmp(1,1,ipoint,1), mo_num) + call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,2), ao_num, tmp(1,1,ipoint,2), mo_num) + call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,3), ao_num, tmp(1,1,ipoint,3), mo_num) + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(int2_grad1_u12_ao) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, tmp, int2_grad1_u12_bimo_t) + !$OMP DO COLLAPSE(2) SCHEDULE (dynamic) + do ipoint = 1, n_points_final_grid + do i = 1, mo_num + do j = 1, mo_num + int2_grad1_u12_bimo_t(ipoint,1,j,i) = tmp(j,i,ipoint,1) + int2_grad1_u12_bimo_t(ipoint,2,j,i) = tmp(j,i,ipoint,2) + int2_grad1_u12_bimo_t(ipoint,3,j,i) = tmp(j,i,ipoint,3) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(tt1) + write(*,"(A,2X,F15.7)") ' wall time for 3e-tensor (sec) = ', (tt1 - tt0) + + deallocate(tmp) + + allocate(noL_2e_gpu(mo_num,mo_num,mo_num,mo_num)) + allocate(noL_1e_gpu(mo_num,mo_num)) + + call cutc_no(n_points_final_grid, mo_num, elec_alpha_num, elec_beta_num, & + final_weight_at_r_vector(1), & + mos_l_in_r_array_transp(1,1), mos_r_in_r_array_transp(1,1), & + int2_grad1_u12_bimo_t(1,1,1,1), noL_2e_gpu(1,1,1,1), noL_1e_gpu(1,1), noL_0e_gpu(1)) + + deallocate(int2_grad1_u12_bimo_t) + deallocate(noL_2e_gpu) + deallocate(noL_1e_gpu) + + call wall_time(time1) + write(*,"(A,2X,F15.7)") ' wall time for compute_no_gpu (sec) = ', (time1 - time0) + + return + +end + +! --- + + diff --git a/plugins/local/tc_int/compute_tc_int.irp.f b/plugins/local/tc_int/compute_tc_int.irp.f new file mode 100644 index 000000000..60c0d53b8 --- /dev/null +++ b/plugins/local/tc_int/compute_tc_int.irp.f @@ -0,0 +1,301 @@ + +! --- + +subroutine provide_int2_grad1_u12_ao() + + BEGIN_DOC + ! + ! int2_grad1_u12_ao(i,j,ipoint,1) = \int dr2 [\grad1 u(r1,r2)]_x1 \chi_i(r2) \chi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,2) = \int dr2 [\grad1 u(r1,r2)]_y1 \chi_i(r2) \chi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,3) = \int dr2 [\grad1 u(r1,r2)]_z1 \chi_i(r2) \chi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,4) = \int dr2 [-(1/2) [\grad1 u(r1,r2)]^2] \chi_i(r2) \chi_j(r2) + ! + ! + ! tc_int_2e_ao(k,i,l,j) = (ki|V^TC(r_12)|lj) + ! = where V^TC(r_12) is the total TC operator + ! = tc_grad_and_lapl_ao(k,i,l,j) + tc_grad_square_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) + ! where: + ! + ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij > + ! = -1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) + ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 (-1) \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) + ! + ! tc_grad_square_ao(k,i,l,j) = -1/2 + ! + ! ao_two_e_coul(k,i,l,j) = < l k | 1/r12 | j i > = ( k i | 1/r12 | l j ) + ! + END_DOC + + implicit none + + integer :: i, j, k, l, m, ipoint, jpoint + integer :: n_blocks, n_rest, n_pass + integer :: i_blocks, i_rest, i_pass, ii + double precision :: mem, n_double + double precision :: weight1, ao_k_r, ao_i_r + double precision :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq + double precision :: time0, time1, time2, tc1, tc2, tc + double precision, allocatable :: int2_grad1_u12_ao(:,:,:,:) + double precision, allocatable :: tmp_grad1_u12(:,:,:), tmp(:,:,:) + double precision, allocatable :: c_mat(:,:,:), tc_int_2e_ao(:,:,:,:) + + double precision, external :: get_ao_two_e_integral + + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + PROVIDE final_weight_at_r_vector aos_grad_in_r_array_transp_bis final_weight_at_r_vector aos_in_r_array_transp + + + + print*, ' start provide_int2_grad1_u12_ao ...' + call wall_time(time0) + + call total_memory(mem) + mem = max(1.d0, qp_max_mem - mem) + n_double = mem * 1.d8 + n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid)) + n_rest = int(mod(n_points_final_grid, n_blocks)) + n_pass = int((n_points_final_grid - n_rest) / n_blocks) + + call write_int(6, n_pass, 'Number of passes') + call write_int(6, n_blocks, 'Size of the blocks') + call write_int(6, n_rest, 'Size of the last block') + + ! --- + ! --- + ! --- + + allocate(int2_grad1_u12_ao(ao_num,ao_num,n_points_final_grid,4)) + + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,4)) + + tc = 0.d0 + + do i_pass = 1, n_pass + ii = (i_pass-1)*n_blocks + 1 + + call wall_time(tc1) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_blocks, ipoint) & + !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, tmp_grad1_u12) + !$OMP DO + do i_blocks = 1, n_blocks + ipoint = ii - 1 + i_blocks ! r1 + call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1), tmp_grad1_u12(1,i_blocks,2), & + tmp_grad1_u12(1,i_blocks,3), tmp_grad1_u12(1,i_blocks,4)) + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(tc2) + tc = tc + tc2 - tc1 + + do m = 1, 4 + call dgemm("T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + enddo + enddo + + if(n_rest .gt. 0) then + + ii = n_pass*n_blocks + 1 + + call wall_time(tc1) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_rest, ipoint) & + !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, tmp_grad1_u12) + !$OMP DO + do i_rest = 1, n_rest + ipoint = ii - 1 + i_rest ! r1 + call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1), tmp_grad1_u12(1,i_rest,2), & + tmp_grad1_u12(1,i_rest,3), tmp_grad1_u12(1,i_rest,4)) + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(tc2) + tc = tc + tc2 - tc1 + + do m = 1, 4 + call dgemm("T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + enddo + + endif + deallocate(tmp_grad1_u12) + + deallocate(tmp) + + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_ao (min) = ', (time1-time0) / 60.d0 + print*, ' wall time Jastrow derivatives (min) = ', tc / 60.d0 + + call print_memory_usage() + + ! --- + ! --- + ! --- + + + allocate(c_mat(n_points_final_grid,ao_num,ao_num)) + allocate(tc_int_2e_ao(ao_num,ao_num,ao_num,ao_num)) + + call wall_time(time1) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (aos_in_r_array_transp, c_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + c_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , int2_grad1_u12_ao(1,1,1,4), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & + , 0.d0, tc_int_2e_ao(1,1,1,1), ao_num*ao_num) + + call wall_time(time2) + print*, ' wall time of Hermitian part of tc_int_2e_ao (min) ', (time2 - time1) / 60.d0 + + call print_memory_usage() + + ! --- + + call wall_time(time1) + + do m = 1, 3 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, c_mat, & + !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector, m) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + + weight1 = final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + + c_mat(ipoint,k,i) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,m) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,m)) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -0.5d0 & + , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & + , 1.d0, tc_int_2e_ao(1,1,1,1), ao_num*ao_num) + enddo + + call wall_time(time2) + print*, ' wall time of non-Hermitian part of tc_int_2e_ao (min) ', (time2 - time1) / 60.d0 + + deallocate(c_mat) + + call print_memory_usage() + + ! --- + + call wall_time(time1) + + call sum_A_At(tc_int_2e_ao(1,1,1,1), ao_num*ao_num) + + call wall_time(time2) + print*, ' lower- and upper-triangle of tc_int_2e_ao (min) ', (time2 - time1) / 60.d0 + + call print_memory_usage() + + ! --- + + double precision :: tmp_omp + + call wall_time(time1) + + PROVIDE ao_integrals_map + tmp_omp = get_ao_two_e_integral(1, 1, 1, 1, ao_integrals_map) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(ao_num, tc_int_2e_ao, ao_integrals_map) & + !$OMP PRIVATE(i, j, k, l) + !$OMP DO COLLAPSE(3) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + ! < 1:i, 2:j | 1:k, 2:l > + tc_int_2e_ao(k,i,l,j) = tc_int_2e_ao(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(time2) + print*, ' wall time of Coulomb part of tc_int_2e_ao (min) ', (time2 - time1) / 60.d0 + call print_memory_usage() + + ! --- + + print*, ' Writing int2_grad1_u12_ao in ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao' + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) int2_grad1_u12_ao(:,:,:,1:3) + close(11) + deallocate(int2_grad1_u12_ao) + + print*, ' Saving tc_int_2e_ao in ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot' + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/ao_two_e_tc_tot', action="write") + call ezfio_set_work_empty(.False.) + do i = 1, ao_num + write(11) tc_int_2e_ao(:,:,:,i) + enddo + close(11) + + ! ---- + + deallocate(tc_int_2e_ao) + + call wall_time(time2) + print*, ' wall time for tc_int_2e_ao (min) = ', (time2-time1) / 60.d0 + call print_memory_usage() + + ! --- + + call wall_time(time1) + print*, ' wall time for TC-integrals (min) = ', (time1-time0) / 60.d0 + + return +end + +! --- + diff --git a/plugins/local/tc_int/compute_tc_int_gpu.irp.f b/plugins/local/tc_int/compute_tc_int_gpu.irp.f new file mode 100644 index 000000000..c2653ac67 --- /dev/null +++ b/plugins/local/tc_int/compute_tc_int_gpu.irp.f @@ -0,0 +1,315 @@ + +! --- + +subroutine provide_int2_grad1_u12_ao_gpu() + + use gpu + + BEGIN_DOC + ! + ! int2_grad1_u12_ao(i,j,ipoint,1) = \int dr2 [\grad1 u(r1,r2)]_x1 \chi_i(r2) \chi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,2) = \int dr2 [\grad1 u(r1,r2)]_y1 \chi_i(r2) \chi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,3) = \int dr2 [\grad1 u(r1,r2)]_z1 \chi_i(r2) \chi_j(r2) + ! int2_grad1_u12_ao(i,j,ipoint,4) = \int dr2 [-(1/2) [\grad1 u(r1,r2)]^2] \chi_i(r2) \chi_j(r2) + ! + ! + ! tc_int_2e_ao(k,i,l,j) = (ki|V^TC(r_12)|lj) + ! = where V^TC(r_12) is the total TC operator + ! = tc_grad_and_lapl_ao(k,i,l,j) + tc_grad_square_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) + ! where: + ! + ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij > + ! = -1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) + ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 (-1) \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) + ! + ! tc_grad_square_ao(k,i,l,j) = -1/2 + ! + ! ao_two_e_coul(k,i,l,j) = < l k | 1/r12 | j i > = ( k i | 1/r12 | l j ) + ! + END_DOC + + implicit none + + integer :: i, j, k, l, m, ipoint, jpoint + integer :: n_blocks, n_rest, n_pass + integer :: i_blocks, i_rest, i_pass, ii + double precision :: mem, n_double + double precision :: weight1, ao_k_r, ao_i_r + double precision :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq + double precision :: time0, time1, time2, tc1, tc2, tc + type(gpu_double4) :: int2_grad1_u12_ao + type(gpu_double3) :: tmp_grad1_u12, tmp_grad1_u12p, tmp + double precision, allocatable :: c_mat(:,:,:), tc_int_2e_ao(:,:,:,:) + + double precision, external :: get_ao_two_e_integral + + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + PROVIDE final_weight_at_r_vector aos_grad_in_r_array_transp_bis final_weight_at_r_vector aos_in_r_array_transp + + + + print*, ' start provide_int2_grad1_u12_ao ...' + call wall_time(time0) + + call total_memory(mem) + mem = max(1.d0, qp_max_mem - mem) + mem = 6 + n_double = mem * 1.d8 + n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid)) + n_rest = int(mod(n_points_final_grid, n_blocks)) + n_pass = int((n_points_final_grid - n_rest) / n_blocks) + + call write_int(6, n_pass, 'Number of passes') + call write_int(6, n_blocks, 'Size of the blocks') + call write_int(6, n_rest, 'Size of the last block') + + ! --- + ! --- + ! --- + + call gpu_allocate(int2_grad1_u12_ao, ao_num,ao_num,n_points_final_grid,4) + + call gpu_allocate(tmp,n_points_extra_final_grid,ao_num,ao_num) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp%f(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call gpu_allocate(tmp_grad1_u12,n_points_extra_final_grid,n_blocks,4) + call gpu_allocate(tmp_grad1_u12p,n_points_extra_final_grid,n_blocks,4) + + tc = 0.d0 + + type(gpu_stream) :: stream(4) + do i=1,4 + call gpu_stream_create(stream(i)) + enddo + + do i_pass = 1, n_pass + ii = (i_pass-1)*n_blocks + 1 + + call wall_time(tc1) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_blocks, ipoint) & + !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, final_grid_points, tmp_grad1_u12) + !$OMP DO + do i_blocks = 1, n_blocks + ipoint = ii - 1 + i_blocks ! r1 + call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12%f(1,i_blocks,1), tmp_grad1_u12%f(1,i_blocks,2), & + tmp_grad1_u12%f(1,i_blocks,3), tmp_grad1_u12%f(1,i_blocks,4)) + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(tc2) + tc = tc + tc2 - tc1 + + call gpu_synchronize() + call gpu_copy(tmp_grad1_u12,tmp_grad1_u12p) + do m = 1, 4 + call gpu_set_stream(blas_handle, stream(m)) + call gpu_dgemm(blas_handle, "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & + , tmp%f(1,1,1), n_points_extra_final_grid, tmp_grad1_u12p%f(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao%f(1,1,ii,m), ao_num*ao_num) + enddo + enddo + + if(n_rest .gt. 0) then + + ii = n_pass*n_blocks + 1 + + call wall_time(tc1) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_rest, ipoint) & + !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, final_grid_points, tmp_grad1_u12) + !$OMP DO + do i_rest = 1, n_rest + ipoint = ii - 1 + i_rest ! r1 + call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, tmp_grad1_u12%f(1,i_rest,1), tmp_grad1_u12%f(1,i_rest,2), & + tmp_grad1_u12%f(1,i_rest,3), tmp_grad1_u12%f(1,i_rest,4)) + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(tc2) + tc = tc + tc2 - tc1 + + do m = 1, 4 + call gpu_set_stream(blas_handle, stream(m)) + call gpu_dgemm(blas_handle, "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & + , tmp%f(1,1,1), n_points_extra_final_grid, tmp_grad1_u12%f(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao%f(1,1,ii,m), ao_num*ao_num) + enddo + + endif + call gpu_synchronize() + call gpu_deallocate(tmp_grad1_u12) + call gpu_deallocate(tmp_grad1_u12p) + + do i=1,4 + call gpu_stream_destroy(stream(i)) + enddo + + + call gpu_deallocate(tmp) + + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_ao (min) = ', (time1-time0) / 60.d0 + print*, ' wall time Jastrow derivatives (min) = ', tc / 60.d0 + call print_memory_usage() + +!TODO +stop + ! --- + ! --- + ! --- + + + allocate(c_mat(n_points_final_grid,ao_num,ao_num)) + allocate(tc_int_2e_ao(ao_num,ao_num,ao_num,ao_num)) + + call wall_time(time1) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (aos_in_r_array_transp, c_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + c_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , int2_grad1_u12_ao%f(1,1,1,4), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & + , 0.d0, tc_int_2e_ao(1,1,1,1), ao_num*ao_num) + + call wall_time(time2) + print*, ' wall time of Hermitian part of tc_int_2e_ao (min) ', (time2 - time1) / 60.d0 + call print_memory_usage() + + ! --- + + call wall_time(time1) + + do m = 1, 3 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, c_mat, & + !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector, m) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + + c_mat(ipoint,k,i) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,m) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,m)) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0 & + , int2_grad1_u12_ao%f(1,1,1,m), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & + , 1.d0, tc_int_2e_ao(1,1,1,1), ao_num*ao_num) + enddo + + call wall_time(time2) + print*, ' wall time of non-Hermitian part of tc_int_2e_ao (min) ', (time2 - time1) / 60.d0 + call print_memory_usage() + + deallocate(c_mat) + + ! --- + + call wall_time(time1) + + call sum_A_At(tc_int_2e_ao(1,1,1,1), ao_num*ao_num) + + call wall_time(time2) + print*, ' lower- and upper-triangle of tc_int_2e_ao (min) ', (time2 - time1) / 60.d0 + call print_memory_usage() + + ! --- + + call wall_time(time1) + + PROVIDE ao_integrals_map + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(ao_num, tc_int_2e_ao, ao_integrals_map) & + !$OMP PRIVATE(i, j, k, l) + !$OMP DO COLLAPSE(3) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + ! < 1:i, 2:j | 1:k, 2:l > + tc_int_2e_ao(k,i,l,j) = tc_int_2e_ao(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(time2) + print*, ' wall time of Coulomb part of tc_int_2e_ao (min) ', (time2 - time1) / 60.d0 + call print_memory_usage() + + ! --- + + print*, ' Writing int2_grad1_u12_ao in ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao' + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) int2_grad1_u12_ao%f(:,:,:,1:3) + close(11) + + print*, ' Saving tc_int_2e_ao in ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot' + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/ao_two_e_tc_tot', action="write") + call ezfio_set_work_empty(.False.) + do i = 1, ao_num + write(11) tc_int_2e_ao(:,:,:,i) + enddo + close(11) + + ! ---- + + call gpu_deallocate(int2_grad1_u12_ao) + deallocate(tc_int_2e_ao) + + call wall_time(time2) + print*, ' wall time for tc_int_2e_ao (min) = ', (time2-time1) / 60.d0 + call print_memory_usage() + + ! --- + + call wall_time(time1) + print*, ' wall time for TC-integrals (min) = ', (time1-time0) / 60.d0 + + return +end + +! --- + diff --git a/plugins/local/tc_int/cutc_module.F90 b/plugins/local/tc_int/cutc_module.F90 new file mode 100644 index 000000000..0f5f6c781 --- /dev/null +++ b/plugins/local/tc_int/cutc_module.F90 @@ -0,0 +1,70 @@ + +module cutc_module + + use, intrinsic :: iso_c_binding + + implicit none + + interface + + ! --- + + subroutine cutc_int(nxBlocks, nyBlocks, nzBlocks, & + blockxSize, blockySize, blockzSize, & + n_grid1, n_grid2, n_ao, n_nuc, size_bh, & + r1, wr1, r2, wr2, rn, & + aos_data1, aos_data2, & + c_bh, m_bh, n_bh, o_bh, & + int2_grad1_u12_ao, int_2e_ao) bind(C, name = "cutc_int") + + import c_int, c_double, c_ptr + integer(c_int), intent(in), value :: nxBlocks, blockxSize + integer(c_int), intent(in), value :: nyBlocks, blockySize + integer(c_int), intent(in), value :: nzBlocks, blockzSize + integer(c_int), intent(in), value :: n_grid1, n_grid2 + integer(c_int), intent(in), value :: n_ao + integer(c_int), intent(in), value :: n_nuc + integer(c_int), intent(in), value :: size_bh + real(c_double), intent(in) :: r1(3,n_grid1), wr1(n_grid1) + real(c_double), intent(in) :: r2(3,n_grid2), wr2(n_grid2) + real(c_double), intent(in) :: rn(3,n_nuc) + real(c_double), intent(in) :: aos_data1(n_grid1,n_ao,4) + real(c_double), intent(in) :: aos_data2(n_grid2,n_ao,4) + real(c_double), intent(in) :: c_bh(size_bh,n_nuc) + integer(c_int), intent(in) :: m_bh(size_bh,n_nuc) + integer(c_int), intent(in) :: n_bh(size_bh,n_nuc) + integer(c_int), intent(in) :: o_bh(size_bh,n_nuc) + real(c_double), intent(out) :: int2_grad1_u12_ao(n_ao,n_ao,n_grid1,3) + real(c_double), intent(out) :: int_2e_ao(n_ao,n_ao,n_ao,n_ao) + + end subroutine cutc_int + + ! --- + + subroutine cutc_no(n_grid1, n_mo, ne_a, ne_b, & + wr1, mos_l_in_r, mos_r_in_r, int2_grad1_u12, & + no_2e, no_1e, no_0e) bind(C, name = "cutc_no") + + import c_int, c_double, c_ptr + + integer(c_int), intent(in), value :: n_grid1 + integer(c_int), intent(in), value :: n_mo + integer(c_int), intent(in), value :: ne_a + integer(c_int), intent(in), value :: ne_b + real(c_double), intent(in) :: wr1(n_grid1) + real(c_double), intent(in) :: mos_l_in_r(n_grid1,n_mo) + real(c_double), intent(in) :: mos_r_in_r(n_grid1,n_mo) + real(c_double), intent(in) :: int2_grad1_u12(n_grid1,3,n_mo,n_mo) + real(c_double), intent(out) :: no_2e(n_mo,n_mo,n_mo,n_mo) + real(c_double), intent(out) :: no_1e(n_mo,n_mo) + real(c_double), intent(out) :: no_0e(1) + + end subroutine cutc_no + + ! --- + + end interface + +end module cutc_module + + diff --git a/plugins/local/tc_int/deb_int_2e_ao_gpu.irp.f b/plugins/local/tc_int/deb_int_2e_ao_gpu.irp.f new file mode 100644 index 000000000..4bb44af92 --- /dev/null +++ b/plugins/local/tc_int/deb_int_2e_ao_gpu.irp.f @@ -0,0 +1,311 @@ + +! --- + +program deb_int_2e_ao_gpu + + implicit none + + print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + + call main() + +end + +! --- + +subroutine main() + + use cutc_module + + implicit none + + integer :: m + integer :: i, j, k, l + integer :: ipoint, jpoint + + double precision :: weight1, ao_i_r, ao_k_r + + double precision :: acc_thr, err_tot, nrm_tot, err_loc + + double precision :: time0, time1 + double precision :: wall_time0, wall_time1 + double precision :: wall_ttime0, wall_ttime1 + double precision :: tt1, tt2 + + double precision, allocatable :: rn(:,:), aos_data1(:,:,:), aos_data2(:,:,:) + double precision, allocatable :: grad1_u12(:,:,:), int_fct_long_range(:,:,:), c_mat(:,:,:) + double precision, allocatable :: int2_grad1_u12_ao(:,:,:,:) + double precision, allocatable :: int2_grad1_u12_ao_gpu(:,:,:,:) + double precision, allocatable :: int_2e_ao(:,:,:,:) + double precision, allocatable :: int_2e_ao_gpu(:,:,:,:) + + + call wall_time(time0) + print*, ' start deb_int_2e_ao_gpu' + + + ! --- + + allocate(rn(3,nucl_num)) + allocate(aos_data1(n_points_final_grid,ao_num,4)) + allocate(aos_data2(n_points_extra_final_grid,ao_num,4)) + + do k = 1, nucl_num + rn(1,k) = nucl_coord(k,1) + rn(2,k) = nucl_coord(k,2) + rn(3,k) = nucl_coord(k,3) + enddo + + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + aos_data1(ipoint,k,1) = aos_in_r_array(k,ipoint) + aos_data1(ipoint,k,2) = aos_grad_in_r_array(k,ipoint,1) + aos_data1(ipoint,k,3) = aos_grad_in_r_array(k,ipoint,2) + aos_data1(ipoint,k,4) = aos_grad_in_r_array(k,ipoint,3) + enddo + enddo + + do k = 1, ao_num + do ipoint = 1, n_points_extra_final_grid + aos_data2(ipoint,k,1) = aos_in_r_array_extra(k,ipoint) + aos_data2(ipoint,k,2) = aos_grad_in_r_array_extra(k,ipoint,1) + aos_data2(ipoint,k,3) = aos_grad_in_r_array_extra(k,ipoint,2) + aos_data2(ipoint,k,4) = aos_grad_in_r_array_extra(k,ipoint,3) + enddo + enddo + + ! --- + + integer :: nB + integer :: sB + + PROVIDE nxBlocks nyBlocks nzBlocks + PROVIDE blockxSize blockySize blockzSize + + sB = 32 + nB = (n_points_final_grid + sB - 1) / sB + + call ezfio_set_tc_int_blockxSize(sB) + call ezfio_set_tc_int_nxBlocks(nB) + + allocate(int2_grad1_u12_ao_gpu(ao_num,ao_num,n_points_final_grid,3)) + allocate(int_2e_ao_gpu(ao_num,ao_num,ao_num,ao_num)) + + call cutc_int(nxBlocks, nyBlocks, nzBlocks, blockxSize, blockySize, blockzSize, & + n_points_final_grid, n_points_extra_final_grid, ao_num, nucl_num, jBH_size, & + final_grid_points, final_weight_at_r_vector, & + final_grid_points_extra, final_weight_at_r_vector_extra, & + rn, aos_data1, aos_data2, jBH_c, jBH_m, jBH_n, jBH_o, & + int2_grad1_u12_ao_gpu, int_2e_ao_gpu) + + ! --- + + allocate(int_fct_long_range(n_points_extra_final_grid,ao_num,ao_num)) + allocate(grad1_u12(n_points_extra_final_grid,n_points_final_grid,4)) + allocate(c_mat(n_points_final_grid,ao_num,ao_num)) + allocate(int2_grad1_u12_ao(ao_num,ao_num,n_points_final_grid,4)) + allocate(int_2e_ao(ao_num,ao_num,ao_num,ao_num)) + + call wall_time(wall_time0) + + call wall_time(wall_ttime0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (int_fct_long_range, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + int_fct_long_range(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(wall_ttime1) + write(*,"(A,2X,F15.7)") ' wall time for int_long_range (sec) = ', (wall_ttime1 - wall_ttime0) + + + call wall_time(wall_ttime0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, n_points_extra_final_grid, grad1_u12) + !$OMP DO + do ipoint = 1, n_points_final_grid + call get_grad1_u12_for_tc(ipoint, n_points_extra_final_grid, grad1_u12(1,ipoint,1) & + , grad1_u12(1,ipoint,2) & + , grad1_u12(1,ipoint,3) & + , grad1_u12(1,ipoint,4) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(wall_ttime1) + write(*,"(A,2X,F15.7)") ' wall time for tc_int_bh (sec) = ', (wall_ttime1 - wall_ttime0) + + + call wall_time(wall_ttime0) + do m = 1, 4 + call dgemm("T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, 1.d0 & + , int_fct_long_range(1,1,1), n_points_extra_final_grid, grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num) + enddo + call wall_time(wall_ttime1) + write(*,"(A,2X,F15.7)") ' wall time for DGEMM of integ over r2 (sec) = ', (wall_ttime1 - wall_ttime0) + + + call wall_time(wall_ttime0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (aos_in_r_array_transp, c_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + c_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(wall_ttime1) + write(*,"(A,2X,F15.7)") ' wall time of Hermitian part (sec) = ', (wall_ttime1 - wall_ttime0) + + + call wall_time(wall_ttime0) + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , int2_grad1_u12_ao(1,1,1,4), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & + , 0.d0, int_2e_ao(1,1,1,1), ao_num*ao_num) + call wall_time(wall_ttime1) + write(*,"(A,2X,F15.7)") ' wall time for DGEMM of Hermitian part (sec) = ', (wall_ttime1 - wall_ttime0) + + + tt1 = 0.d0 + tt2 = 0.d0 + do m = 1, 3 + + call wall_time(wall_ttime0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, c_mat, & + !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector, m) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + + weight1 = final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + + c_mat(ipoint,k,i) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,m) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,m)) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(wall_ttime1) + tt1 += wall_ttime1 - wall_ttime0 + + call wall_time(wall_ttime0) + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -0.5d0 & + , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid & + , 1.d0, int_2e_ao(1,1,1,1), ao_num*ao_num) + call wall_time(wall_ttime1) + tt2 += wall_ttime1 - wall_ttime0 + enddo + write(*,"(A,2X,F15.7)") ' wall time of non-Hermitian part (sec) = ', tt1 + write(*,"(A,2X,F15.7)") ' wall time for DGEMM of non Hermitian part (sec) = ', tt2 + + + call wall_time(wall_ttime0) + call sum_A_At(int_2e_ao(1,1,1,1), ao_num*ao_num) + call wall_time(wall_ttime1) + write(*,"(A,2X,F15.7)") ' wall time of A + A.T (sec) = ', wall_ttime1 - wall_ttime0 + + + call wall_time(wall_time1) + write(*,"(A,2X,F15.7)") ' wall time on cpu (sec) = ', (wall_time1 - wall_time0) + + ! --- + + acc_thr = 1d-12 + + err_tot = 0.d0 + nrm_tot = 0.d0 + do m = 1, 3 + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do i = 1, ao_num + err_loc = dabs(int2_grad1_u12_ao(i,j,ipoint,m) - int2_grad1_u12_ao_gpu(i,j,ipoint,m)) + if(err_loc > acc_thr) then + print*, " error on", i, j, ipoint, m + print*, " CPU res", int2_grad1_u12_ao (i,j,ipoint,m) + print*, " GPU res", int2_grad1_u12_ao_gpu(i,j,ipoint,m) + stop + endif + err_tot = err_tot + err_loc + nrm_tot = nrm_tot + dabs(int2_grad1_u12_ao(i,j,ipoint,m)) + enddo + enddo + enddo + enddo + print *, ' absolute accuracy on int2_grad1_u12_ao (%) =', 100.d0 * err_tot / nrm_tot + + + err_tot = 0.d0 + nrm_tot = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + err_loc = dabs(int_2e_ao(l,k,j,i) - int_2e_ao_gpu(l,k,j,i)) + if(err_loc > acc_thr) then + print*, " error on", l, k, j, i + print*, " CPU res", int_2e_ao (l,k,j,i) + print*, " GPU res", int_2e_ao_gpu(l,k,j,i) + stop + endif + err_tot = err_tot + err_loc + nrm_tot = nrm_tot + dabs(int_2e_ao(l,k,j,i)) + enddo + enddo + enddo + enddo + print *, ' absolute accuracy on int_2e_ao (%) =', 100.d0 * err_tot / nrm_tot + + ! --- + + deallocate(int_fct_long_range, grad1_u12, c_mat) + deallocate(int_2e_ao, int2_grad1_u12_ao) + deallocate(int_2e_ao_gpu, int2_grad1_u12_ao_gpu) + deallocate(rn, aos_data1, aos_data2) + + call wall_time(time1) + write(*,"(A,2X,F15.7)") ' wall time for deb_int_2e_ao_gpu (sec) = ', (time1 - time0) + + return +end diff --git a/plugins/local/tc_int/deb_no_gpu.irp.f b/plugins/local/tc_int/deb_no_gpu.irp.f new file mode 100644 index 000000000..de8adb977 --- /dev/null +++ b/plugins/local/tc_int/deb_no_gpu.irp.f @@ -0,0 +1,218 @@ + +! --- + +program deb_no_gpu + + implicit none + + print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + + call main() + +end + +! --- + +subroutine main() + + use cutc_module + + implicit none + + integer :: i, j, k, l, ipoint + double precision :: time0, time1 + double precision :: tt0, tt1 + double precision :: acc_thr, err_tot, nrm_tot, err_loc + double precision :: noL_0e + double precision :: noL_0e_gpu(1) + double precision, allocatable :: int2_grad1_u12_ao(:,:,:,:) + double precision, allocatable :: tmp(:,:,:,:) + double precision, allocatable :: int2_grad1_u12_bimo_t(:,:,:,:) + double precision, allocatable :: noL_1e (:,:) + double precision, allocatable :: noL_1e_gpu(:,:) + double precision, allocatable :: noL_2e (:,:,:,:) + double precision, allocatable :: noL_2e_gpu(:,:,:,:) + + + PROVIDE mo_l_coef mo_r_coef + PROVIDE mos_l_in_r_array_transp mos_r_in_r_array_transp + + + call wall_time(time0) + print*, ' start deb_no_gpu' + + + + allocate(int2_grad1_u12_ao(ao_num,ao_num,n_points_final_grid,3)) + print*, ' Reading int2_grad1_u12_ao from ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao' + call wall_time(tt0) + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read") + read(11) int2_grad1_u12_ao + close(11) + call wall_time(tt1) + write(*,"(A,2X,F15.7)") ' wall time for reading (sec) = ', (tt1 - tt0) + + allocate(tmp(mo_num,mo_num,n_points_final_grid,3)) + allocate(int2_grad1_u12_bimo_t(n_points_final_grid,3,mo_num,mo_num)) + + call wall_time(tt0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (ao_num, mo_num, n_points_final_grid, int2_grad1_u12_ao, tmp) + !$OMP DO SCHEDULE (dynamic) + do ipoint = 1, n_points_final_grid + call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,1), ao_num, tmp(1,1,ipoint,1), mo_num) + call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,2), ao_num, tmp(1,1,ipoint,2), mo_num) + call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,3), ao_num, tmp(1,1,ipoint,3), mo_num) + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(int2_grad1_u12_ao) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, tmp, int2_grad1_u12_bimo_t) + !$OMP DO COLLAPSE(2) SCHEDULE (dynamic) + do ipoint = 1, n_points_final_grid + do i = 1, mo_num + do j = 1, mo_num + int2_grad1_u12_bimo_t(ipoint,1,j,i) = tmp(j,i,ipoint,1) + int2_grad1_u12_bimo_t(ipoint,2,j,i) = tmp(j,i,ipoint,2) + int2_grad1_u12_bimo_t(ipoint,3,j,i) = tmp(j,i,ipoint,3) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(tt1) + write(*,"(A,2X,F15.7)") ' wall time for 3e-tensor (sec) = ', (tt1 - tt0) + + deallocate(tmp) + + ! --- + + allocate(noL_2e_gpu(mo_num,mo_num,mo_num,mo_num)) + allocate(noL_1e_gpu(mo_num,mo_num)) + + call cutc_no(n_points_final_grid, mo_num, elec_alpha_num, elec_beta_num, & + final_weight_at_r_vector(1), & + mos_l_in_r_array_transp(1,1), mos_r_in_r_array_transp(1,1), & + int2_grad1_u12_bimo_t(1,1,1,1), noL_2e_gpu(1,1,1,1), noL_1e_gpu(1,1), noL_0e_gpu(1)) + + ! --- + + allocate(noL_2e(mo_num,mo_num,mo_num,mo_num)) + allocate(noL_1e(mo_num,mo_num)) + + call provide_no_2e(n_points_final_grid, mo_num, elec_alpha_num, elec_beta_num, & + final_weight_at_r_vector(1), & + mos_l_in_r_array_transp(1,1), mos_r_in_r_array_transp(1,1), & + int2_grad1_u12_bimo_t(1,1,1,1), noL_2e(1,1,1,1)) + + call provide_no_1e(n_points_final_grid, mo_num, elec_alpha_num, elec_beta_num, & + final_weight_at_r_vector(1), & + mos_l_in_r_array_transp(1,1), mos_r_in_r_array_transp(1,1), & + int2_grad1_u12_bimo_t(1,1,1,1), noL_1e(1,1)) + + call provide_no_0e(n_points_final_grid, mo_num, elec_alpha_num, elec_beta_num, & + final_weight_at_r_vector(1), & + mos_l_in_r_array_transp(1,1), mos_r_in_r_array_transp(1,1), & + int2_grad1_u12_bimo_t(1,1,1,1), noL_0e) + + ! --- + + deallocate(int2_grad1_u12_bimo_t) + + acc_thr = 1d-12 + + ! --- + + err_tot = 0.d0 + nrm_tot = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + err_loc = dabs(noL_2e(l,k,j,i) - noL_2e_gpu(l,k,j,i)) + if(err_loc > acc_thr) then + print*, " error on", l, k, j, i + print*, " CPU res", noL_2e (l,k,j,i) + print*, " GPU res", noL_2e_gpu(l,k,j,i) + stop + endif + err_tot = err_tot + err_loc + nrm_tot = nrm_tot + dabs(noL_2e(l,k,j,i)) + enddo + enddo + enddo + enddo + print *, ' absolute accuracy on noL_2e (%) =', 100.d0 * err_tot / nrm_tot + + deallocate(noL_2e) + deallocate(noL_2e_gpu) + + ! --- + + err_tot = 0.d0 + nrm_tot = 0.d0 + do k = 1, mo_num + do l = 1, mo_num + err_loc = dabs(noL_1e(l,k) - noL_1e_gpu(l,k)) + if(err_loc > acc_thr) then + print*, " error on", l, k + print*, " CPU res", noL_1e (l,k) + print*, " GPU res", noL_1e_gpu(l,k) + stop + endif + err_tot = err_tot + err_loc + nrm_tot = nrm_tot + dabs(noL_1e(l,k)) + enddo + enddo + print *, ' absolute accuracy on noL_1e (%) =', 100.d0 * err_tot / nrm_tot + + deallocate(noL_1e) + deallocate(noL_1e_gpu) + + ! --- + + print *, 'noL_0e CPU = ', noL_0e + print *, 'noL_0e GPU = ', noL_0e_gpu(1) + + err_tot = dabs(noL_0e - noL_0e_gpu(1)) + nrm_tot = dabs(noL_0e) + print *, ' absolute accuracy on noL_0e (%) =', 100.d0 * err_tot / nrm_tot + + + call wall_time(time1) + write(*,"(A,2X,F15.7)") ' wall time for deb_no_gpu (sec) = ', (time1 - time0) + + return + +end + +! --- + + diff --git a/plugins/local/tc_int/install b/plugins/local/tc_int/install new file mode 100755 index 000000000..be779c3ac --- /dev/null +++ b/plugins/local/tc_int/install @@ -0,0 +1,18 @@ +#!/bin/bash + +# Check if the QP_ROOT environment variable is set. +if [[ -z ${QP_ROOT} ]] +then + print "The QP_ROOT environment variable is not set." + print "Please reload the quantum_package.rc file." + exit -1 +fi + +git clone https://github.com/AbdAmmar/CuTC +cd CuTC +source config/env.rc +make +cd .. + +ln -s ${PWD}/CuTC/build/libcutcint.so ${QP_ROOT}/lib + diff --git a/plugins/local/tc_int/jast_grad_full.irp.f b/plugins/local/tc_int/jast_grad_full.irp.f new file mode 100644 index 000000000..943d85671 --- /dev/null +++ b/plugins/local/tc_int/jast_grad_full.irp.f @@ -0,0 +1,188 @@ + +! --- + +subroutine get_grad1_u12_for_tc(ipoint, n_grid2, resx, resy, resz, res) + + BEGIN_DOC + ! + ! resx(ipoint) = [grad1 u(r1,r2)]_x1 + ! resy(ipoint) = [grad1 u(r1,r2)]_y1 + ! resz(ipoint) = [grad1 u(r1,r2)]_z1 + ! res (ipoint) = -0.5 [grad1 u(r1,r2)]^2 + ! + ! We use: + ! grid for r1 + ! extra_grid for r2 + ! + END_DOC + + include 'constants.include.F' + + implicit none + integer, intent(in) :: ipoint, n_grid2 + double precision, intent(out) :: resx(n_grid2), resy(n_grid2), resz(n_grid2), res(n_grid2) + + integer :: jpoint, i_nucl, p, mpA, npA, opA, pp + integer :: powmax1, powmax, powmax2 + double precision :: r1(3), r2(3) + double precision :: tmp, tmp1, tmp2, tmp11, tmp22 + double precision :: rn(3), f1A, grad1_f1A(3), f2A, grad2_f2A(3), g12, grad1_g12(3) + double precision, allocatable :: f1A_power(:), f2A_power(:), double_p(:), g12_power(:) + + r1(1) = final_grid_points(1,ipoint) + r1(2) = final_grid_points(2,ipoint) + r1(3) = final_grid_points(3,ipoint) + + call grad1_j12_r1_seq(r1, n_grid2, resx, resy, resz) + + do jpoint = 1, n_grid2 ! r2 + res(jpoint) = -0.5d0 * (resx(jpoint) * resx(jpoint) + resy(jpoint) * resy(jpoint) + resz(jpoint) * resz(jpoint)) + enddo + + return +end + +! --- + +subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz) + + include 'constants.include.F' + + implicit none + integer , intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: gradx(n_grid2) + double precision, intent(out) :: grady(n_grid2) + double precision, intent(out) :: gradz(n_grid2) + + integer :: jpoint, i_nucl, p, mpA, npA, opA + double precision :: r2(3) + double precision :: dx, dy, dz, r12, tmp + double precision :: rn(3), f1A, grad1_f1A(3), f2A, g12, grad1_g12(3) + double precision :: tmp1, tmp2, dist + integer :: powmax1, powmax, powmax2 + double precision, allocatable :: f1A_power(:), f2A_power(:), double_p(:), g12_power(:) + + powmax1 = max(maxval(jBH_m), maxval(jBH_n)) + powmax2 = maxval(jBH_o) + powmax = max(powmax1, powmax2) + + allocate(f1A_power(-1:powmax), f2A_power(-1:powmax), g12_power(-1:powmax), double_p(0:powmax)) + + do p = 0, powmax + double_p(p) = dble(p) + enddo + + f1A_power(-1) = 0.d0 + f2A_power(-1) = 0.d0 + g12_power(-1) = 0.d0 + + f1A_power(0) = 1.d0 + f2A_power(0) = 1.d0 + g12_power(0) = 1.d0 + + do jpoint = 1, n_grid2 ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 + + dist = (r1(1) - r2(1)) * (r1(1) - r2(1)) & + + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + + (r1(3) - r2(3)) * (r1(3) - r2(3)) + if(dist .ge. 1d-15) then + dist = dsqrt(dist) + tmp1 = 1.d0 / (1.d0 + dist) + g12 = dist * tmp1 + tmp2 = tmp1 * tmp1 / dist + grad1_g12(1) = tmp2 * (r1(1) - r2(1)) + grad1_g12(2) = tmp2 * (r1(2) - r2(2)) + grad1_g12(3) = tmp2 * (r1(3) - r2(3)) + do p = 1, powmax2 + g12_power(p) = g12_power(p-1) * g12 + enddo + else + grad1_g12(1) = 0.d0 + grad1_g12(2) = 0.d0 + grad1_g12(3) = 0.d0 + g12 = 0.d0 + do p = 1, powmax2 + g12_power(p) = 0.d0 + enddo + endif + + do i_nucl = 1, nucl_num + + rn(1) = nucl_coord(i_nucl,1) + rn(2) = nucl_coord(i_nucl,2) + rn(3) = nucl_coord(i_nucl,3) + + dist = (r1(1) - rn(1)) * (r1(1) - rn(1)) & + + (r1(2) - rn(2)) * (r1(2) - rn(2)) & + + (r1(3) - rn(3)) * (r1(3) - rn(3)) + if (dist > 1.d-15) then + dist = dsqrt(dist) + tmp1 = 1.d0 / (1.d0 + dist) + f1A = dist * tmp1 + tmp2 = tmp1 * tmp1 / dist + grad1_f1A(1) = tmp2 * (r1(1) - rn(1)) + grad1_f1A(2) = tmp2 * (r1(2) - rn(2)) + grad1_f1A(3) = tmp2 * (r1(3) - rn(3)) + do p = 1, powmax1 + f1A_power(p) = f1A_power(p-1) * f1A + enddo + else + grad1_f1A(1) = 0.d0 + grad1_f1A(2) = 0.d0 + grad1_f1A(3) = 0.d0 + f1A = 0.d0 + do p = 1, powmax1 + f1A_power(p) = 0.d0 + enddo + endif + + dist = (r2(1) - rn(1)) * (r2(1) - rn(1)) & + + (r2(2) - rn(2)) * (r2(2) - rn(2)) & + + (r2(3) - rn(3)) * (r2(3) - rn(3)) + if (dist > 1.d-15) then + dist = dsqrt(dist) + f2A = dist / (1.d0 + dist) + do p = 1, powmax1 + f2A_power(p) = f2A_power(p-1) * f2A + enddo + else + f2A = 0.d0 + do p = 1, powmax1 + f2A_power(p) = 0.d0 + enddo + endif + + do p = 1, jBH_size + + tmp = jBH_c(p,i_nucl) + if (dabs(tmp) <= 1.d-10) cycle + + mpA = jBH_m(p,i_nucl) + npA = jBH_n(p,i_nucl) + opA = jBH_o(p,i_nucl) + + tmp1 = double_p(mpA) * f1A_power(mpA-1) * f2A_power(npA) + double_p(npA) * f1A_power(npA-1) * f2A_power(mpA) + tmp1 = tmp1 * g12_power(opA) * tmp + tmp2 = double_p(opA) * g12_power(opA-1) * (f1A_power(mpA) * f2A_power(npA) + f1A_power(npA) * f2A_power(mpA)) * tmp + + gradx(jpoint) = gradx(jpoint) + tmp1 * grad1_f1A(1) + tmp2 * grad1_g12(1) + grady(jpoint) = grady(jpoint) + tmp1 * grad1_f1A(2) + tmp2 * grad1_g12(2) + gradz(jpoint) = gradz(jpoint) + tmp1 * grad1_f1A(3) + tmp2 * grad1_g12(3) + enddo ! p + enddo ! i_nucl + enddo ! jpoint + + return +end + +! --- + diff --git a/plugins/local/tc_int/no_0e.irp.f b/plugins/local/tc_int/no_0e.irp.f new file mode 100644 index 000000000..830b91a83 --- /dev/null +++ b/plugins/local/tc_int/no_0e.irp.f @@ -0,0 +1,407 @@ + +! --- + +subroutine provide_no_0e(n_grid, n_mo, ne_a, ne_b, wr1, mos_l_in_r, mos_r_in_r, int2_grad1_u12, noL_0e) + + implicit none + + integer, intent(in) :: n_grid, n_mo + integer, intent(in) :: ne_a, ne_b + double precision, intent(in) :: wr1(n_grid) + double precision, intent(in) :: mos_l_in_r(n_grid,n_mo) + double precision, intent(in) :: mos_r_in_r(n_grid,n_mo) + double precision, intent(in) :: int2_grad1_u12(n_grid,3,n_mo,n_mo) + double precision, intent(out) :: noL_0e + + integer :: i, j, k, ipoint + double precision :: t0, t1 + double precision, allocatable :: tmp(:) + double precision, allocatable :: tmpL(:,:), tmpR(:,:) + double precision, allocatable :: tmpM(:,:), tmpS(:), tmpO(:), tmpJ(:,:) + double precision, allocatable :: tmpM_priv(:,:), tmpS_priv(:), tmpO_priv(:), tmpJ_priv(:,:) + + + call wall_time(t0) + + + if(ne_a .eq. ne_b) then + + allocate(tmp(ne_b)) + allocate(tmpL(n_grid,3), tmpR(n_grid,3)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(j, i, ipoint, tmpL, tmpR) & + !$OMP SHARED(ne_b, n_grid, & + !$OMP mos_l_in_r, mos_r_in_r, wr1, & + !$OMP int2_grad1_u12, tmp) + + !$OMP DO + do j = 1, ne_b + + tmpL = 0.d0 + tmpR = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + + tmpL(ipoint,1) = tmpL(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) + tmpL(ipoint,2) = tmpL(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) + tmpL(ipoint,3) = tmpL(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) + + tmpR(ipoint,1) = tmpR(ipoint,1) + int2_grad1_u12(ipoint,1,i,j) * mos_r_in_r(ipoint,i) + tmpR(ipoint,2) = tmpR(ipoint,2) + int2_grad1_u12(ipoint,2,i,j) * mos_r_in_r(ipoint,i) + tmpR(ipoint,3) = tmpR(ipoint,3) + int2_grad1_u12(ipoint,3,i,j) * mos_r_in_r(ipoint,i) + enddo + enddo + + tmp(j) = 0.d0 + do ipoint = 1, n_grid + tmp(j) = tmp(j) + wr1(ipoint) * (tmpL(ipoint,1)*tmpR(ipoint,1) + tmpL(ipoint,2)*tmpR(ipoint,2) + tmpL(ipoint,3)*tmpR(ipoint,3)) + enddo + enddo ! j + !$OMP END DO + !$OMP END PARALLEL + + noL_0e = -2.d0 * sum(tmp) + + deallocate(tmp) + deallocate(tmpL, tmpR) + + ! --- + + allocate(tmpO(n_grid), tmpJ(n_grid,3)) + tmpO = 0.d0 + tmpJ = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, ipoint, tmpO_priv, tmpJ_priv) & + !$OMP SHARED(ne_b, n_grid, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmpO, tmpJ) + + allocate(tmpO_priv(n_grid), tmpJ_priv(n_grid,3)) + tmpO_priv = 0.d0 + tmpJ_priv = 0.d0 + + !$OMP DO + do i = 1, ne_b + do ipoint = 1, n_grid + tmpO_priv(ipoint) = tmpO_priv(ipoint) + mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i) + tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + int2_grad1_u12(ipoint,1,i,i) + tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + int2_grad1_u12(ipoint,2,i,i) + tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + int2_grad1_u12(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmpO = tmpO + tmpO_priv + tmpJ = tmpJ + tmpJ_priv + !$OMP END CRITICAL + + deallocate(tmpO_priv, tmpJ_priv) + !$OMP END PARALLEL + + allocate(tmpM(n_grid,3), tmpS(n_grid)) + tmpM = 0.d0 + tmpS = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, j, ipoint, tmpM_priv, tmpS_priv) & + !$OMP SHARED(ne_b, n_grid, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmpM, tmpS) + + allocate(tmpM_priv(n_grid,3), tmpS_priv(n_grid)) + tmpM_priv = 0.d0 + tmpS_priv = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, ne_b + do j = 1, ne_b + do ipoint = 1, n_grid + + tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + + tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmpM = tmpM + tmpM_priv + tmpS = tmpS + tmpS_priv + !$OMP END CRITICAL + + deallocate(tmpM_priv, tmpS_priv) + !$OMP END PARALLEL + + allocate(tmp(n_grid)) + + do ipoint = 1, n_grid + + tmpS(ipoint) = 2.d0 * (tmpJ(ipoint,1)*tmpJ(ipoint,1) + tmpJ(ipoint,2)*tmpJ(ipoint,2) + tmpJ(ipoint,3)*tmpJ(ipoint,3)) - tmpS(ipoint) + + tmp(ipoint) = wr1(ipoint) * ( tmpO(ipoint) * tmpS(ipoint) - 2.d0 * ( tmpJ(ipoint,1) * tmpM(ipoint,1) & + + tmpJ(ipoint,2) * tmpM(ipoint,2) & + + tmpJ(ipoint,3) * tmpM(ipoint,3) ) ) + enddo + + noL_0e = noL_0e - 2.d0 * (sum(tmp)) + + deallocate(tmp) + + else + + allocate(tmp(ne_a)) + allocate(tmpL(n_grid,3), tmpR(n_grid,3)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(j, i, ipoint, tmpL, tmpR) & + !$OMP SHARED(ne_b, ne_a, n_grid, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmp, wr1) + + !$OMP DO + do j = 1, ne_b + + tmpL = 0.d0 + tmpR = 0.d0 + do i = ne_b+1, ne_a + do ipoint = 1, n_grid + + tmpL(ipoint,1) = tmpL(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) + tmpL(ipoint,2) = tmpL(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) + tmpL(ipoint,3) = tmpL(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) + + tmpR(ipoint,1) = tmpR(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * mos_r_in_r(ipoint,i) + tmpR(ipoint,2) = tmpR(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * mos_r_in_r(ipoint,i) + tmpR(ipoint,3) = tmpR(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * mos_r_in_r(ipoint,i) + enddo + enddo + + tmp(j) = 0.d0 + do ipoint = 1, n_grid + tmp(j) = tmp(j) + wr1(ipoint) * (tmpL(ipoint,1)*tmpR(ipoint,1) + tmpL(ipoint,2)*tmpR(ipoint,2) + tmpL(ipoint,3)*tmpR(ipoint,3)) + enddo + + do i = 1, ne_b + do ipoint = 1, n_grid + + tmpL(ipoint,1) = tmpL(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) + tmpL(ipoint,2) = tmpL(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) + tmpL(ipoint,3) = tmpL(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) + + tmpR(ipoint,1) = tmpR(ipoint,1) + int2_grad1_u12(ipoint,1,i,j) * mos_r_in_r(ipoint,i) + tmpR(ipoint,2) = tmpR(ipoint,2) + int2_grad1_u12(ipoint,2,i,j) * mos_r_in_r(ipoint,i) + tmpR(ipoint,3) = tmpR(ipoint,3) + int2_grad1_u12(ipoint,3,i,j) * mos_r_in_r(ipoint,i) + enddo + enddo + + do ipoint = 1, n_grid + tmp(j) = tmp(j) + wr1(ipoint) * (tmpL(ipoint,1)*tmpR(ipoint,1) + tmpL(ipoint,2)*tmpR(ipoint,2) + tmpL(ipoint,3)*tmpR(ipoint,3)) + enddo + enddo ! j + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(j, i, ipoint, tmpL, tmpR) & + !$OMP SHARED(ne_b, ne_a, n_grid, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmp, wr1) + + !$OMP DO + do j = ne_b+1, ne_a + + tmpL = 0.d0 + tmpR = 0.d0 + do i = 1, ne_a + do ipoint = 1, n_grid + tmpL(ipoint,1) = tmpL(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) + tmpL(ipoint,2) = tmpL(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) + tmpL(ipoint,3) = tmpL(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) + + tmpR(ipoint,1) = tmpR(ipoint,1) + int2_grad1_u12(ipoint,1,i,j) * mos_r_in_r(ipoint,i) + tmpR(ipoint,2) = tmpR(ipoint,2) + int2_grad1_u12(ipoint,2,i,j) * mos_r_in_r(ipoint,i) + tmpR(ipoint,3) = tmpR(ipoint,3) + int2_grad1_u12(ipoint,3,i,j) * mos_r_in_r(ipoint,i) + enddo + enddo + + tmp(j) = 0.d0 + do ipoint = 1, n_grid + tmp(j) = tmp(j) + 0.5d0 * wr1(ipoint) * (tmpL(ipoint,1)*tmpR(ipoint,1) + tmpL(ipoint,2)*tmpR(ipoint,2) + tmpL(ipoint,3)*tmpR(ipoint,3)) + enddo + enddo ! j + !$OMP END DO + !$OMP END PARALLEL + + noL_0e = -2.d0 * sum(tmp) + + deallocate(tmp) + deallocate(tmpL, tmpR) + + ! --- + + allocate(tmpO(n_grid), tmpJ(n_grid,3)) + tmpO = 0.d0 + tmpJ = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, ipoint, tmpO_priv, tmpJ_priv) & + !$OMP SHARED(ne_b, ne_a, n_grid, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmpO, tmpJ) + + allocate(tmpO_priv(n_grid), tmpJ_priv(n_grid,3)) + tmpO_priv = 0.d0 + tmpJ_priv = 0.d0 + + !$OMP DO + do i = 1, ne_b + do ipoint = 1, n_grid + tmpO_priv(ipoint) = tmpO_priv(ipoint) + mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i) + tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + int2_grad1_u12(ipoint,1,i,i) + tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + int2_grad1_u12(ipoint,2,i,i) + tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + int2_grad1_u12(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO + do i = ne_b+1, ne_a + do ipoint = 1, n_grid + tmpO_priv(ipoint) = tmpO_priv(ipoint) + 0.5d0 * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i) + tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,i) + tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,i) + tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmpO = tmpO + tmpO_priv + tmpJ = tmpJ + tmpJ_priv + !$OMP END CRITICAL + + deallocate(tmpO_priv, tmpJ_priv) + !$OMP END PARALLEL + + ! --- + + allocate(tmpM(n_grid,3), tmpS(n_grid)) + tmpM = 0.d0 + tmpS = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, j, ipoint, tmpM_priv, tmpS_priv) & + !$OMP SHARED(ne_b, ne_a, n_grid, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmpM, tmpS) + + allocate(tmpM_priv(n_grid,3), tmpS_priv(n_grid)) + tmpM_priv = 0.d0 + tmpS_priv = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, ne_b + do j = 1, ne_b + do ipoint = 1, n_grid + + tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + + tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO COLLAPSE(2) + do i = ne_b+1, ne_a + do j = 1, ne_b + do ipoint = 1, n_grid + + tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + + tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i) + tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i) + tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i) + + tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO COLLAPSE(2) + do i = ne_b+1, ne_a + do j = ne_b+1, ne_a + do ipoint = 1, n_grid + + tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + + tmpS_priv(ipoint) = tmpS_priv(ipoint) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) & + + 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) & + + 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmpM = tmpM + tmpM_priv + tmpS = tmpS + tmpS_priv + !$OMP END CRITICAL + + deallocate(tmpM_priv, tmpS_priv) + !$OMP END PARALLEL + + allocate(tmp(n_grid)) + + do ipoint = 1, n_grid + + tmpS(ipoint) = 2.d0 * (tmpJ(ipoint,1)*tmpJ(ipoint,1) + tmpJ(ipoint,2)*tmpJ(ipoint,2) + tmpJ(ipoint,3)*tmpJ(ipoint,3)) - tmpS(ipoint) + + tmp(ipoint) = wr1(ipoint) * ( tmpO(ipoint) * tmpS(ipoint) - 2.d0 * ( tmpJ(ipoint,1) * tmpM(ipoint,1) & + + tmpJ(ipoint,2) * tmpM(ipoint,2) & + + tmpJ(ipoint,3) * tmpM(ipoint,3) ) ) + enddo + + noL_0e = noL_0e - 2.d0 * (sum(tmp)) + + deallocate(tmp) + + endif + + + call wall_time(t1) + write(*,"(A,2X,F15.7)") ' wall time for noL_0e (sec) = ', (t1 - t0) + + return +end + +! --- + diff --git a/plugins/local/tc_int/no_1e.irp.f b/plugins/local/tc_int/no_1e.irp.f new file mode 100644 index 000000000..5a9798f03 --- /dev/null +++ b/plugins/local/tc_int/no_1e.irp.f @@ -0,0 +1,1179 @@ + +! --- + +subroutine provide_no_1e(n_grid, n_mo, ne_a, ne_b, wr1, mos_l_in_r, mos_r_in_r, int2_grad1_u12, noL_1e) + + implicit none + + integer, intent(in) :: n_grid, n_mo + integer, intent(in) :: ne_a, ne_b + double precision, intent(in) :: wr1(n_grid) + double precision, intent(in) :: mos_l_in_r(n_grid,n_mo) + double precision, intent(in) :: mos_r_in_r(n_grid,n_mo) + double precision, intent(in) :: int2_grad1_u12(n_grid,3,n_mo,n_mo) + double precision, intent(out) :: noL_1e(n_mo,n_mo) + + integer :: p, s, i, j, ipoint + double precision :: t0, t1 + double precision, allocatable :: tmpC(:,:,:,:), tmpD(:,:), tmpE(:,:,:), tmpF(:,:,:) + double precision, allocatable :: tmpL(:,:,:), tmpR(:,:,:), tmpM(:,:), tmpS(:), tmpO(:), tmpJ(:,:) + double precision, allocatable :: tmpL0(:,:,:), tmpR0(:,:,:) + double precision, allocatable :: tmpM_priv(:,:), tmpS_priv(:), tmpO_priv(:), tmpJ_priv(:,:) + + + call wall_time(t0) + + + if(ne_a .eq. ne_b) then + + allocate(tmpO(n_grid), tmpJ(n_grid,3)) + tmpO = 0.d0 + tmpJ = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, ipoint, tmpO_priv, tmpJ_priv) & + !$OMP SHARED(ne_b, n_grid, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmpO, tmpJ) + + allocate(tmpO_priv(n_grid), tmpJ_priv(n_grid,3)) + tmpO_priv = 0.d0 + tmpJ_priv = 0.d0 + + !$OMP DO + do i = 1, ne_b + do ipoint = 1, n_grid + tmpO_priv(ipoint) = tmpO_priv(ipoint) + mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i) + tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + int2_grad1_u12(ipoint,1,i,i) + tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + int2_grad1_u12(ipoint,2,i,i) + tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + int2_grad1_u12(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmpO = tmpO + tmpO_priv + tmpJ = tmpJ + tmpJ_priv + !$OMP END CRITICAL + + deallocate(tmpO_priv, tmpJ_priv) + !$OMP END PARALLEL + + ! --- + + allocate(tmpM(n_grid,3), tmpS(n_grid)) + tmpM = 0.d0 + tmpS = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, j, ipoint, tmpM_priv, tmpS_priv) & + !$OMP SHARED(ne_b, n_grid, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmpM, tmpS) + + allocate(tmpM_priv(n_grid,3), tmpS_priv(n_grid)) + tmpM_priv = 0.d0 + tmpS_priv = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, ne_b + do j = 1, ne_b + do ipoint = 1, n_grid + + tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + + tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmpM = tmpM + tmpM_priv + tmpS = tmpS + tmpS_priv + !$OMP END CRITICAL + + deallocate(tmpM_priv, tmpS_priv) + !$OMP END PARALLEL + + ! --- + + allocate(tmpC(n_grid,4,n_mo,n_mo)) + allocate(tmpD(n_grid,4)) + + do ipoint = 1, n_grid + + tmpD(ipoint,1) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,1) - tmpM(ipoint,1)) + tmpD(ipoint,2) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,2) - tmpM(ipoint,2)) + tmpD(ipoint,3) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,3) - tmpM(ipoint,3)) + tmpD(ipoint,4) = -wr1(ipoint) * tmpO(ipoint) + + tmpS(ipoint) = 2.d0 * (tmpJ(ipoint,1) * tmpJ(ipoint,1) + tmpJ(ipoint,2) * tmpJ(ipoint,2) + tmpJ(ipoint,3) * tmpJ(ipoint,3)) - tmpS(ipoint) + enddo + + deallocate(tmpO, tmpM) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, s, i, ipoint) & + !$OMP SHARED(n_mo, ne_b, n_grid, & + !$OMP int2_grad1_u12, tmpC) + + !$OMP DO COLLAPSE(2) + do s = 1, n_mo + do p = 1, n_mo + + do ipoint = 1, n_grid + tmpC(ipoint,1,p,s) = int2_grad1_u12(ipoint,1,p,s) + tmpC(ipoint,2,p,s) = int2_grad1_u12(ipoint,2,p,s) + tmpC(ipoint,3,p,s) = int2_grad1_u12(ipoint,3,p,s) + enddo + + tmpC(:,4,p,s) = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + tmpC(ipoint,4,p,s) = tmpC(ipoint,4,p,s) + int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) & + + int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) & + + int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s) + enddo + enddo + + enddo ! p + enddo ! s + !$OMP END DO + !$OMP END PARALLEL + + call dgemv( 'T', 4*n_grid, n_mo*n_mo, 2.d0 & + , tmpC(1,1,1,1), size(tmpC, 1) * size(tmpC, 2) & + , tmpD(1,1), 1 & + , 0.d0, noL_1e(1,1), 1) + + deallocate(tmpC, tmpD) + + ! --- + + allocate(tmpL(n_grid,3,n_mo)) + allocate(tmpR(n_grid,3,n_mo)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, ipoint) & + !$OMP SHARED(ne_b, n_grid, n_mo, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmpL, tmpR) + + !$OMP DO + do p = 1, n_mo + + tmpL(:,1:3,p) = 0.d0 + tmpR(:,1:3,p) = 0.d0 + + do i = 1, ne_b + do ipoint = 1, n_grid + + tmpL(ipoint,1,p) = tmpL(ipoint,1,p) + int2_grad1_u12(ipoint,1,p,i) * mos_l_in_r(ipoint,i) + tmpL(ipoint,2,p) = tmpL(ipoint,2,p) + int2_grad1_u12(ipoint,2,p,i) * mos_l_in_r(ipoint,i) + tmpL(ipoint,3,p) = tmpL(ipoint,3,p) + int2_grad1_u12(ipoint,3,p,i) * mos_l_in_r(ipoint,i) + + tmpR(ipoint,1,p) = tmpR(ipoint,1,p) + int2_grad1_u12(ipoint,1,i,p) * mos_r_in_r(ipoint,i) + tmpR(ipoint,2,p) = tmpR(ipoint,2,p) + int2_grad1_u12(ipoint,2,i,p) * mos_r_in_r(ipoint,i) + tmpR(ipoint,3,p) = tmpR(ipoint,3,p) + int2_grad1_u12(ipoint,3,i,p) * mos_r_in_r(ipoint,i) + enddo + enddo + enddo ! p + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + allocate(tmpE(n_grid,5,n_mo)) + allocate(tmpF(n_grid,5,n_mo)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, j, ipoint) & + !$OMP SHARED(ne_b, n_grid, n_mo, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, wr1, & + !$OMP tmpL, tmpR, tmpJ, tmpS, tmpE, tmpF) + + !$OMP DO + do p = 1, n_mo + + do ipoint = 1, n_grid + + tmpE(ipoint,1,p) = wr1(ipoint) * mos_l_in_r(ipoint,p) + tmpE(ipoint,2,p) = -2.d0 * (tmpL(ipoint,1,p) * tmpJ(ipoint,1) + tmpL(ipoint,2,p) * tmpJ(ipoint,2) + tmpL(ipoint,3,p) * tmpJ(ipoint,3)) + tmpE(ipoint,3,p) = wr1(ipoint) * tmpL(ipoint,1,p) + tmpE(ipoint,4,p) = wr1(ipoint) * tmpL(ipoint,2,p) + tmpE(ipoint,5,p) = wr1(ipoint) * tmpL(ipoint,3,p) + + tmpF(ipoint,1,p) = -2.d0 * (tmpR(ipoint,1,p) * tmpJ(ipoint,1) + tmpR(ipoint,2,p) * tmpJ(ipoint,2) + tmpR(ipoint,3,p) * tmpJ(ipoint,3)) & + + mos_r_in_r(ipoint,p) * tmpS(ipoint) + tmpF(ipoint,2,p) = wr1(ipoint) * mos_r_in_r(ipoint,p) + tmpF(ipoint,3,p) = tmpR(ipoint,1,p) + tmpF(ipoint,4,p) = tmpR(ipoint,2,p) + tmpF(ipoint,5,p) = tmpR(ipoint,3,p) + enddo + + do i = 1, ne_b + do j = 1, ne_b + do ipoint = 1, n_grid + + tmpE(ipoint,2,p) = tmpE(ipoint,2,p) + mos_l_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,j) & + + int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,j) & + + int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,j) ) + + tmpF(ipoint,1,p) = tmpF(ipoint,1,p) + mos_r_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,p) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,p) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,p) ) + enddo ! ipoint + enddo ! j + enddo ! i + + enddo ! p + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmpL, tmpR, tmpJ, tmpS) + + call dgemm( 'T', 'N', n_mo, n_mo, 5*n_grid, 1.d0 & + , tmpE(1,1,1), 5*n_grid, tmpF(1,1,1), 5*n_grid & + , 1.d0, noL_1e(1,1), n_mo) + + deallocate(tmpE, tmpF) + + ! --- + + else + + allocate(tmpO(n_grid), tmpJ(n_grid,3)) + tmpO = 0.d0 + tmpJ = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, ipoint, tmpO_priv, tmpJ_priv) & + !$OMP SHARED(ne_b, ne_a, n_grid, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmpO, tmpJ) + + allocate(tmpO_priv(n_grid), tmpJ_priv(n_grid,3)) + tmpO_priv = 0.d0 + tmpJ_priv = 0.d0 + + !$OMP DO + do i = 1, ne_b + do ipoint = 1, n_grid + tmpO_priv(ipoint) = tmpO_priv(ipoint) + mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i) + tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + int2_grad1_u12(ipoint,1,i,i) + tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + int2_grad1_u12(ipoint,2,i,i) + tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + int2_grad1_u12(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO + do i = ne_b+1, ne_a + do ipoint = 1, n_grid + tmpO_priv(ipoint) = tmpO_priv(ipoint) + 0.5d0 * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i) + tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,i) + tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,i) + tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmpO = tmpO + tmpO_priv + tmpJ = tmpJ + tmpJ_priv + !$OMP END CRITICAL + + deallocate(tmpO_priv, tmpJ_priv) + !$OMP END PARALLEL + + ! --- + + allocate(tmpM(n_grid,3), tmpS(n_grid)) + tmpM = 0.d0 + tmpS = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, j, ipoint, tmpM_priv, tmpS_priv) & + !$OMP SHARED(ne_b, ne_a, n_grid, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmpM, tmpS) + + allocate(tmpM_priv(n_grid,3), tmpS_priv(n_grid)) + tmpM_priv = 0.d0 + tmpS_priv = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, ne_b + do j = 1, ne_b + do ipoint = 1, n_grid + + tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + + tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO COLLAPSE(2) + do i = ne_b+1, ne_a + do j = 1, ne_b + do ipoint = 1, n_grid + + tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + + tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i) + tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i) + tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i) + + tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO COLLAPSE(2) + do i = ne_b+1, ne_a + do j = ne_b+1, ne_a + do ipoint = 1, n_grid + + tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + + tmpS_priv(ipoint) = tmpS_priv(ipoint) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) & + + 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) & + + 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmpM = tmpM + tmpM_priv + tmpS = tmpS + tmpS_priv + !$OMP END CRITICAL + + deallocate(tmpM_priv, tmpS_priv) + !$OMP END PARALLEL + + ! --- + + allocate(tmpC(n_grid,4,n_mo,n_mo)) + allocate(tmpD(n_grid,4)) + + do ipoint = 1, n_grid + + tmpD(ipoint,1) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,1) - tmpM(ipoint,1)) + tmpD(ipoint,2) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,2) - tmpM(ipoint,2)) + tmpD(ipoint,3) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,3) - tmpM(ipoint,3)) + tmpD(ipoint,4) = -wr1(ipoint) * tmpO(ipoint) + + tmpS(ipoint) = 2.d0 * (tmpJ(ipoint,1) * tmpJ(ipoint,1) + tmpJ(ipoint,2) * tmpJ(ipoint,2) + tmpJ(ipoint,3) * tmpJ(ipoint,3)) - tmpS(ipoint) + enddo + + deallocate(tmpO, tmpM) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, s, i, ipoint) & + !$OMP SHARED(n_mo, ne_b, n_grid, & + !$OMP ne_a, int2_grad1_u12, tmpC) + + !$OMP DO COLLAPSE(2) + do s = 1, n_mo + do p = 1, n_mo + + do ipoint = 1, n_grid + tmpC(ipoint,1,p,s) = int2_grad1_u12(ipoint,1,p,s) + tmpC(ipoint,2,p,s) = int2_grad1_u12(ipoint,2,p,s) + tmpC(ipoint,3,p,s) = int2_grad1_u12(ipoint,3,p,s) + enddo + + tmpC(:,4,p,s) = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + tmpC(ipoint,4,p,s) = tmpC(ipoint,4,p,s) + int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) & + + int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) & + + int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s) + enddo + enddo + do i = ne_b+1, ne_a + do ipoint = 1, n_grid + tmpC(ipoint,4,p,s) = tmpC(ipoint,4,p,s) + 0.5d0 * int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) & + + 0.5d0 * int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) & + + 0.5d0 * int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s) + enddo + enddo + + enddo ! p + enddo ! s + !$OMP END DO + !$OMP END PARALLEL + + call dgemv( 'T', 4*n_grid, n_mo*n_mo, 2.d0 & + , tmpC(1,1,1,1), size(tmpC, 1) * size(tmpC, 2) & + , tmpD(1,1), 1 & + , 0.d0, noL_1e(1,1), 1) + + deallocate(tmpC, tmpD) + + ! --- + + allocate(tmpL(n_grid,3,n_mo), tmpL0(n_grid,3,n_mo)) + allocate(tmpR(n_grid,3,n_mo), tmpR0(n_grid,3,n_mo)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, ipoint) & + !$OMP SHARED(ne_b, ne_a, n_grid, n_mo, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmpL0, tmpR0, tmpL, tmpR) + + !$OMP DO + do p = 1, n_mo + + tmpL0(:,1:3,p) = 0.d0 + tmpR0(:,1:3,p) = 0.d0 + do i = ne_b+1, ne_a + do ipoint = 1, n_grid + + tmpL0(ipoint,1,p) = tmpL0(ipoint,1,p) + 0.5d0 * int2_grad1_u12(ipoint,1,p,i) * mos_l_in_r(ipoint,i) + tmpL0(ipoint,2,p) = tmpL0(ipoint,2,p) + 0.5d0 * int2_grad1_u12(ipoint,2,p,i) * mos_l_in_r(ipoint,i) + tmpL0(ipoint,3,p) = tmpL0(ipoint,3,p) + 0.5d0 * int2_grad1_u12(ipoint,3,p,i) * mos_l_in_r(ipoint,i) + + tmpR0(ipoint,1,p) = tmpR0(ipoint,1,p) + 0.5d0 * int2_grad1_u12(ipoint,1,i,p) * mos_r_in_r(ipoint,i) + tmpR0(ipoint,2,p) = tmpR0(ipoint,2,p) + 0.5d0 * int2_grad1_u12(ipoint,2,i,p) * mos_r_in_r(ipoint,i) + tmpR0(ipoint,3,p) = tmpR0(ipoint,3,p) + 0.5d0 * int2_grad1_u12(ipoint,3,i,p) * mos_r_in_r(ipoint,i) + enddo + enddo + + tmpL(:,1:3,p) = tmpL0(:,1:3,p) + tmpR(:,1:3,p) = tmpR0(:,1:3,p) + do i = 1, ne_b + do ipoint = 1, n_grid + + tmpL(ipoint,1,p) = tmpL(ipoint,1,p) + int2_grad1_u12(ipoint,1,p,i) * mos_l_in_r(ipoint,i) + tmpL(ipoint,2,p) = tmpL(ipoint,2,p) + int2_grad1_u12(ipoint,2,p,i) * mos_l_in_r(ipoint,i) + tmpL(ipoint,3,p) = tmpL(ipoint,3,p) + int2_grad1_u12(ipoint,3,p,i) * mos_l_in_r(ipoint,i) + + tmpR(ipoint,1,p) = tmpR(ipoint,1,p) + int2_grad1_u12(ipoint,1,i,p) * mos_r_in_r(ipoint,i) + tmpR(ipoint,2,p) = tmpR(ipoint,2,p) + int2_grad1_u12(ipoint,2,i,p) * mos_r_in_r(ipoint,i) + tmpR(ipoint,3,p) = tmpR(ipoint,3,p) + int2_grad1_u12(ipoint,3,i,p) * mos_r_in_r(ipoint,i) + enddo + enddo + + enddo ! p + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + allocate(tmpE(n_grid,8,n_mo)) + allocate(tmpF(n_grid,8,n_mo)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, j, ipoint) & + !$OMP SHARED(ne_b, ne_a, n_grid, n_mo, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, wr1, & + !$OMP tmpL, tmpL0, tmpR, tmpR0, tmpJ, tmpS, tmpE, tmpF) + + !$OMP DO + do p = 1, n_mo + + do ipoint = 1, n_grid + + tmpE(ipoint,1,p) = wr1(ipoint) * mos_l_in_r(ipoint,p) + tmpE(ipoint,2,p) = -2.d0 * (tmpL(ipoint,1,p) * tmpJ(ipoint,1) + tmpL(ipoint,2,p) * tmpJ(ipoint,2) + tmpL(ipoint,3,p) * tmpJ(ipoint,3)) + tmpE(ipoint,3,p) = wr1(ipoint) * tmpL(ipoint,1,p) + tmpE(ipoint,4,p) = wr1(ipoint) * tmpL(ipoint,2,p) + tmpE(ipoint,5,p) = wr1(ipoint) * tmpL(ipoint,3,p) + tmpE(ipoint,6,p) = wr1(ipoint) * tmpL0(ipoint,1,p) + tmpE(ipoint,7,p) = wr1(ipoint) * tmpL0(ipoint,2,p) + tmpE(ipoint,8,p) = wr1(ipoint) * tmpL0(ipoint,3,p) + + tmpF(ipoint,1,p) = -2.d0 * (tmpR(ipoint,1,p) * tmpJ(ipoint,1) + tmpR(ipoint,2,p) * tmpJ(ipoint,2) + tmpR(ipoint,3,p) * tmpJ(ipoint,3)) & + + mos_r_in_r(ipoint,p) * tmpS(ipoint) + tmpF(ipoint,2,p) = wr1(ipoint) * mos_r_in_r(ipoint,p) + tmpF(ipoint,3,p) = tmpR(ipoint,1,p) + tmpF(ipoint,4,p) = tmpR(ipoint,2,p) + tmpF(ipoint,5,p) = tmpR(ipoint,3,p) + tmpF(ipoint,6,p) = tmpR0(ipoint,1,p) + tmpF(ipoint,7,p) = tmpR0(ipoint,2,p) + tmpF(ipoint,8,p) = tmpR0(ipoint,3,p) + enddo + + do i = 1, ne_b + do j = 1, ne_b + do ipoint = 1, n_grid + + tmpE(ipoint,2,p) = tmpE(ipoint,2,p) + mos_l_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,j) & + + int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,j) & + + int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,j) ) + + tmpF(ipoint,1,p) = tmpF(ipoint,1,p) + mos_r_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,p) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,p) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,p) ) + enddo ! ipoint + enddo ! j + enddo ! i + + do i = ne_b+1, ne_a + do j = 1, ne_b + do ipoint = 1, n_grid + + tmpE(ipoint,2,p) = tmpE(ipoint,2,p) + 0.5d0 * mos_l_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,j) & + + int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,j) & + + int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,j) ) + tmpE(ipoint,2,p) = tmpE(ipoint,2,p) + 0.5d0 * mos_l_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,p,j) * int2_grad1_u12(ipoint,1,j,i) & + + int2_grad1_u12(ipoint,2,p,j) * int2_grad1_u12(ipoint,2,j,i) & + + int2_grad1_u12(ipoint,3,p,j) * int2_grad1_u12(ipoint,3,j,i) ) + + tmpF(ipoint,1,p) = tmpF(ipoint,1,p) + 0.5d0 * mos_r_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,p) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,p) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,p) ) + tmpF(ipoint,1,p) = tmpF(ipoint,1,p) + 0.5d0 * mos_r_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,j,i) * int2_grad1_u12(ipoint,1,i,p) & + + int2_grad1_u12(ipoint,2,j,i) * int2_grad1_u12(ipoint,2,i,p) & + + int2_grad1_u12(ipoint,3,j,i) * int2_grad1_u12(ipoint,3,i,p) ) + enddo ! ipoint + enddo ! j + enddo ! i + + do i = ne_b+1, ne_a + do j = ne_b+1, ne_a + do ipoint = 1, n_grid + + tmpE(ipoint,2,p) = tmpE(ipoint,2,p) + 0.5d0 * mos_l_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,j) & + + int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,j) & + + int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,j) ) + + tmpF(ipoint,1,p) = tmpF(ipoint,1,p) + 0.5d0 * mos_r_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,p) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,p) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,p) ) + enddo ! ipoint + enddo ! j + enddo ! i + + enddo ! p + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmpL0, tmpL, tmpR0, tmpR, tmpJ, tmpS) + + call dgemm( 'T', 'N', n_mo, n_mo, 8*n_grid, 1.d0 & + , tmpE(1,1,1), 8*n_grid, tmpF(1,1,1), 8*n_grid & + , 1.d0, noL_1e(1,1), n_mo) + + deallocate(tmpE, tmpF) + + endif + + + call wall_time(t1) + write(*,"(A,2X,F15.7)") ' wall time for noL_1e (sec) = ', (t1 - t0) + + return +end + +! --- + +subroutine provide_no_1e_tmp(n_grid, n_mo, ne_a, ne_b, wr1, mos_l_in_r, mos_r_in_r, int2_grad1_u12, & + tmpO, tmpJ, tmpM, tmpS, tmpC, tmpD, tmpL, tmpR, tmpE, tmpF, noL_1e) + + + implicit none + + integer, intent(in) :: n_grid, n_mo + integer, intent(in) :: ne_a, ne_b + double precision, intent(in) :: wr1(n_grid) + double precision, intent(in) :: mos_l_in_r(n_grid,n_mo) + double precision, intent(in) :: mos_r_in_r(n_grid,n_mo) + double precision, intent(in) :: int2_grad1_u12(n_grid,3,n_mo,n_mo) + double precision, intent(out) :: tmpO(n_grid), tmpJ(n_grid,3) + double precision, intent(out) :: tmpM(n_grid,3), tmpS(n_grid) + double precision, intent(out) :: tmpC(n_grid,4,n_mo,n_mo), tmpD(n_grid,4) + double precision, intent(out) :: tmpL(n_grid,3,n_mo), tmpR(n_grid,3,n_mo) + double precision, intent(out) :: tmpE(n_grid,5,n_mo), tmpF(n_grid,5,n_mo) + double precision, intent(out) :: noL_1e(n_mo,n_mo) + + integer :: p, s, i, j, ipoint + double precision :: t0, t1 + double precision, allocatable :: tmpM_priv(:,:), tmpS_priv(:), tmpO_priv(:), tmpJ_priv(:,:) + double precision, allocatable :: tmpL0(:,:,:), tmpR0(:,:,:) + double precision, allocatable :: tmpE_os(:,:,:), tmpF_os(:,:,:) + + + call wall_time(t0) + + + if(ne_a .eq. ne_b) then + + tmpO = 0.d0 + tmpJ = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, ipoint, tmpO_priv, tmpJ_priv) & + !$OMP SHARED(ne_b, n_grid, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmpO, tmpJ) + + allocate(tmpO_priv(n_grid), tmpJ_priv(n_grid,3)) + tmpO_priv = 0.d0 + tmpJ_priv = 0.d0 + + !$OMP DO + do i = 1, ne_b + do ipoint = 1, n_grid + tmpO_priv(ipoint) = tmpO_priv(ipoint) + mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i) + tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + int2_grad1_u12(ipoint,1,i,i) + tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + int2_grad1_u12(ipoint,2,i,i) + tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + int2_grad1_u12(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmpO = tmpO + tmpO_priv + tmpJ = tmpJ + tmpJ_priv + !$OMP END CRITICAL + + deallocate(tmpO_priv, tmpJ_priv) + !$OMP END PARALLEL + + ! --- + + tmpM = 0.d0 + tmpS = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, j, ipoint, tmpM_priv, tmpS_priv) & + !$OMP SHARED(ne_b, n_grid, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmpM, tmpS) + + allocate(tmpM_priv(n_grid,3), tmpS_priv(n_grid)) + tmpM_priv = 0.d0 + tmpS_priv = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, ne_b + do j = 1, ne_b + do ipoint = 1, n_grid + + tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + + tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmpM = tmpM + tmpM_priv + tmpS = tmpS + tmpS_priv + !$OMP END CRITICAL + + deallocate(tmpM_priv, tmpS_priv) + !$OMP END PARALLEL + + ! --- + + do ipoint = 1, n_grid + + tmpD(ipoint,1) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,1) - tmpM(ipoint,1)) + tmpD(ipoint,2) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,2) - tmpM(ipoint,2)) + tmpD(ipoint,3) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,3) - tmpM(ipoint,3)) + tmpD(ipoint,4) = -wr1(ipoint) * tmpO(ipoint) + + tmpS(ipoint) = 2.d0 * (tmpJ(ipoint,1) * tmpJ(ipoint,1) + tmpJ(ipoint,2) * tmpJ(ipoint,2) + tmpJ(ipoint,3) * tmpJ(ipoint,3)) - tmpS(ipoint) + enddo + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, s, i, ipoint) & + !$OMP SHARED(n_mo, ne_b, n_grid, & + !$OMP int2_grad1_u12, tmpC) + + !$OMP DO COLLAPSE(2) + do s = 1, n_mo + do p = 1, n_mo + + do ipoint = 1, n_grid + tmpC(ipoint,1,p,s) = int2_grad1_u12(ipoint,1,p,s) + tmpC(ipoint,2,p,s) = int2_grad1_u12(ipoint,2,p,s) + tmpC(ipoint,3,p,s) = int2_grad1_u12(ipoint,3,p,s) + enddo + + tmpC(:,4,p,s) = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + tmpC(ipoint,4,p,s) = tmpC(ipoint,4,p,s) + int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) & + + int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) & + + int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s) + enddo + enddo + + enddo ! p + enddo ! s + !$OMP END DO + !$OMP END PARALLEL + + call dgemv( 'T', 4*n_grid, n_mo*n_mo, 2.d0 & + , tmpC(1,1,1,1), size(tmpC, 1) * size(tmpC, 2) & + , tmpD(1,1), 1 & + , 0.d0, noL_1e(1,1), 1) + + ! --- + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, ipoint) & + !$OMP SHARED(ne_b, n_grid, n_mo, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmpL, tmpR) + + !$OMP DO + do p = 1, n_mo + + tmpL(:,1:3,p) = 0.d0 + tmpR(:,1:3,p) = 0.d0 + + do i = 1, ne_b + do ipoint = 1, n_grid + + tmpL(ipoint,1,p) = tmpL(ipoint,1,p) + int2_grad1_u12(ipoint,1,p,i) * mos_l_in_r(ipoint,i) + tmpL(ipoint,2,p) = tmpL(ipoint,2,p) + int2_grad1_u12(ipoint,2,p,i) * mos_l_in_r(ipoint,i) + tmpL(ipoint,3,p) = tmpL(ipoint,3,p) + int2_grad1_u12(ipoint,3,p,i) * mos_l_in_r(ipoint,i) + + tmpR(ipoint,1,p) = tmpR(ipoint,1,p) + int2_grad1_u12(ipoint,1,i,p) * mos_r_in_r(ipoint,i) + tmpR(ipoint,2,p) = tmpR(ipoint,2,p) + int2_grad1_u12(ipoint,2,i,p) * mos_r_in_r(ipoint,i) + tmpR(ipoint,3,p) = tmpR(ipoint,3,p) + int2_grad1_u12(ipoint,3,i,p) * mos_r_in_r(ipoint,i) + enddo + enddo + enddo ! p + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, j, ipoint) & + !$OMP SHARED(ne_b, n_grid, n_mo, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, wr1, & + !$OMP tmpL, tmpR, tmpJ, tmpS, tmpE, tmpF) + + !$OMP DO + do p = 1, n_mo + + do ipoint = 1, n_grid + + tmpE(ipoint,1,p) = wr1(ipoint) * mos_l_in_r(ipoint,p) + tmpE(ipoint,2,p) = -2.d0 * (tmpL(ipoint,1,p) * tmpJ(ipoint,1) + tmpL(ipoint,2,p) * tmpJ(ipoint,2) + tmpL(ipoint,3,p) * tmpJ(ipoint,3)) + tmpE(ipoint,3,p) = wr1(ipoint) * tmpL(ipoint,1,p) + tmpE(ipoint,4,p) = wr1(ipoint) * tmpL(ipoint,2,p) + tmpE(ipoint,5,p) = wr1(ipoint) * tmpL(ipoint,3,p) + + tmpF(ipoint,1,p) = -2.d0 * (tmpR(ipoint,1,p) * tmpJ(ipoint,1) + tmpR(ipoint,2,p) * tmpJ(ipoint,2) + tmpR(ipoint,3,p) * tmpJ(ipoint,3)) & + + mos_r_in_r(ipoint,p) * tmpS(ipoint) + tmpF(ipoint,2,p) = wr1(ipoint) * mos_r_in_r(ipoint,p) + tmpF(ipoint,3,p) = tmpR(ipoint,1,p) + tmpF(ipoint,4,p) = tmpR(ipoint,2,p) + tmpF(ipoint,5,p) = tmpR(ipoint,3,p) + enddo + + do i = 1, ne_b + do j = 1, ne_b + do ipoint = 1, n_grid + + tmpE(ipoint,2,p) = tmpE(ipoint,2,p) + mos_l_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,j) & + + int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,j) & + + int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,j) ) + + tmpF(ipoint,1,p) = tmpF(ipoint,1,p) + mos_r_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,p) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,p) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,p) ) + enddo ! ipoint + enddo ! j + enddo ! i + + enddo ! p + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', n_mo, n_mo, 5*n_grid, 1.d0 & + , tmpE(1,1,1), 5*n_grid, tmpF(1,1,1), 5*n_grid & + , 1.d0, noL_1e(1,1), n_mo) + + ! --- + + else + + tmpO = 0.d0 + tmpJ = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, ipoint, tmpO_priv, tmpJ_priv) & + !$OMP SHARED(ne_b, ne_a, n_grid, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmpO, tmpJ) + + allocate(tmpO_priv(n_grid), tmpJ_priv(n_grid,3)) + tmpO_priv = 0.d0 + tmpJ_priv = 0.d0 + + !$OMP DO + do i = 1, ne_b + do ipoint = 1, n_grid + tmpO_priv(ipoint) = tmpO_priv(ipoint) + mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i) + tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + int2_grad1_u12(ipoint,1,i,i) + tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + int2_grad1_u12(ipoint,2,i,i) + tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + int2_grad1_u12(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO + do i = ne_b+1, ne_a + do ipoint = 1, n_grid + tmpO_priv(ipoint) = tmpO_priv(ipoint) + 0.5d0 * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i) + tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,i) + tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,i) + tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmpO = tmpO + tmpO_priv + tmpJ = tmpJ + tmpJ_priv + !$OMP END CRITICAL + + deallocate(tmpO_priv, tmpJ_priv) + !$OMP END PARALLEL + + ! --- + + tmpM = 0.d0 + tmpS = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, j, ipoint, tmpM_priv, tmpS_priv) & + !$OMP SHARED(ne_b, ne_a, n_grid, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmpM, tmpS) + + allocate(tmpM_priv(n_grid,3), tmpS_priv(n_grid)) + tmpM_priv = 0.d0 + tmpS_priv = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, ne_b + do j = 1, ne_b + do ipoint = 1, n_grid + + tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + + tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO COLLAPSE(2) + do i = ne_b+1, ne_a + do j = 1, ne_b + do ipoint = 1, n_grid + + tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + + tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i) + tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i) + tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i) + + tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO COLLAPSE(2) + do i = ne_b+1, ne_a + do j = ne_b+1, ne_a + do ipoint = 1, n_grid + + tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j) + + tmpS_priv(ipoint) = tmpS_priv(ipoint) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) & + + 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) & + + 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmpM = tmpM + tmpM_priv + tmpS = tmpS + tmpS_priv + !$OMP END CRITICAL + + deallocate(tmpM_priv, tmpS_priv) + !$OMP END PARALLEL + + ! --- + + do ipoint = 1, n_grid + + tmpD(ipoint,1) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,1) - tmpM(ipoint,1)) + tmpD(ipoint,2) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,2) - tmpM(ipoint,2)) + tmpD(ipoint,3) = wr1(ipoint) * (2.d0 * tmpO(ipoint) * tmpJ(ipoint,3) - tmpM(ipoint,3)) + tmpD(ipoint,4) = -wr1(ipoint) * tmpO(ipoint) + + tmpS(ipoint) = 2.d0 * (tmpJ(ipoint,1) * tmpJ(ipoint,1) + tmpJ(ipoint,2) * tmpJ(ipoint,2) + tmpJ(ipoint,3) * tmpJ(ipoint,3)) - tmpS(ipoint) + enddo + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, s, i, ipoint) & + !$OMP SHARED(n_mo, ne_b, n_grid, & + !$OMP ne_a, int2_grad1_u12, tmpC) + + !$OMP DO COLLAPSE(2) + do s = 1, n_mo + do p = 1, n_mo + + do ipoint = 1, n_grid + tmpC(ipoint,1,p,s) = int2_grad1_u12(ipoint,1,p,s) + tmpC(ipoint,2,p,s) = int2_grad1_u12(ipoint,2,p,s) + tmpC(ipoint,3,p,s) = int2_grad1_u12(ipoint,3,p,s) + enddo + + tmpC(:,4,p,s) = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + tmpC(ipoint,4,p,s) = tmpC(ipoint,4,p,s) + int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) & + + int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) & + + int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s) + enddo + enddo + do i = ne_b+1, ne_a + do ipoint = 1, n_grid + tmpC(ipoint,4,p,s) = tmpC(ipoint,4,p,s) + 0.5d0 * int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) & + + 0.5d0 * int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) & + + 0.5d0 * int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s) + enddo + enddo + + enddo ! p + enddo ! s + !$OMP END DO + !$OMP END PARALLEL + + call dgemv( 'T', 4*n_grid, n_mo*n_mo, 2.d0 & + , tmpC(1,1,1,1), size(tmpC, 1) * size(tmpC, 2) & + , tmpD(1,1), 1 & + , 0.d0, noL_1e(1,1), 1) + + ! --- + + allocate(tmpL0(n_grid,3,n_mo)) + allocate(tmpR0(n_grid,3,n_mo)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, ipoint) & + !$OMP SHARED(ne_b, ne_a, n_grid, n_mo, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, tmpL0, tmpR0, tmpL, tmpR) + + !$OMP DO + do p = 1, n_mo + + tmpL0(:,1:3,p) = 0.d0 + tmpR0(:,1:3,p) = 0.d0 + do i = ne_b+1, ne_a + do ipoint = 1, n_grid + + tmpL0(ipoint,1,p) = tmpL0(ipoint,1,p) + 0.5d0 * int2_grad1_u12(ipoint,1,p,i) * mos_l_in_r(ipoint,i) + tmpL0(ipoint,2,p) = tmpL0(ipoint,2,p) + 0.5d0 * int2_grad1_u12(ipoint,2,p,i) * mos_l_in_r(ipoint,i) + tmpL0(ipoint,3,p) = tmpL0(ipoint,3,p) + 0.5d0 * int2_grad1_u12(ipoint,3,p,i) * mos_l_in_r(ipoint,i) + + tmpR0(ipoint,1,p) = tmpR0(ipoint,1,p) + 0.5d0 * int2_grad1_u12(ipoint,1,i,p) * mos_r_in_r(ipoint,i) + tmpR0(ipoint,2,p) = tmpR0(ipoint,2,p) + 0.5d0 * int2_grad1_u12(ipoint,2,i,p) * mos_r_in_r(ipoint,i) + tmpR0(ipoint,3,p) = tmpR0(ipoint,3,p) + 0.5d0 * int2_grad1_u12(ipoint,3,i,p) * mos_r_in_r(ipoint,i) + enddo + enddo + + tmpL(:,1:3,p) = tmpL0(:,1:3,p) + tmpR(:,1:3,p) = tmpR0(:,1:3,p) + do i = 1, ne_b + do ipoint = 1, n_grid + + tmpL(ipoint,1,p) = tmpL(ipoint,1,p) + int2_grad1_u12(ipoint,1,p,i) * mos_l_in_r(ipoint,i) + tmpL(ipoint,2,p) = tmpL(ipoint,2,p) + int2_grad1_u12(ipoint,2,p,i) * mos_l_in_r(ipoint,i) + tmpL(ipoint,3,p) = tmpL(ipoint,3,p) + int2_grad1_u12(ipoint,3,p,i) * mos_l_in_r(ipoint,i) + + tmpR(ipoint,1,p) = tmpR(ipoint,1,p) + int2_grad1_u12(ipoint,1,i,p) * mos_r_in_r(ipoint,i) + tmpR(ipoint,2,p) = tmpR(ipoint,2,p) + int2_grad1_u12(ipoint,2,i,p) * mos_r_in_r(ipoint,i) + tmpR(ipoint,3,p) = tmpR(ipoint,3,p) + int2_grad1_u12(ipoint,3,i,p) * mos_r_in_r(ipoint,i) + enddo + enddo + + enddo ! p + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + allocate(tmpE_os(n_grid,8,n_mo)) + allocate(tmpF_os(n_grid,8,n_mo)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, j, ipoint) & + !$OMP SHARED(ne_b, ne_a, n_grid, n_mo, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, wr1, & + !$OMP tmpL, tmpL0, tmpR, tmpR0, tmpJ, tmpS, tmpE_os, tmpF_os) + + !$OMP DO + do p = 1, n_mo + + do ipoint = 1, n_grid + + tmpE_os(ipoint,1,p) = wr1(ipoint) * mos_l_in_r(ipoint,p) + tmpE_os(ipoint,2,p) = -2.d0 * (tmpL(ipoint,1,p) * tmpJ(ipoint,1) + tmpL(ipoint,2,p) * tmpJ(ipoint,2) + tmpL(ipoint,3,p) * tmpJ(ipoint,3)) + tmpE_os(ipoint,3,p) = wr1(ipoint) * tmpL(ipoint,1,p) + tmpE_os(ipoint,4,p) = wr1(ipoint) * tmpL(ipoint,2,p) + tmpE_os(ipoint,5,p) = wr1(ipoint) * tmpL(ipoint,3,p) + tmpE_os(ipoint,6,p) = wr1(ipoint) * tmpL0(ipoint,1,p) + tmpE_os(ipoint,7,p) = wr1(ipoint) * tmpL0(ipoint,2,p) + tmpE_os(ipoint,8,p) = wr1(ipoint) * tmpL0(ipoint,3,p) + + tmpF_os(ipoint,1,p) = -2.d0 * (tmpR(ipoint,1,p) * tmpJ(ipoint,1) + tmpR(ipoint,2,p) * tmpJ(ipoint,2) + tmpR(ipoint,3,p) * tmpJ(ipoint,3)) & + + mos_r_in_r(ipoint,p) * tmpS(ipoint) + tmpF_os(ipoint,2,p) = wr1(ipoint) * mos_r_in_r(ipoint,p) + tmpF_os(ipoint,3,p) = tmpR(ipoint,1,p) + tmpF_os(ipoint,4,p) = tmpR(ipoint,2,p) + tmpF_os(ipoint,5,p) = tmpR(ipoint,3,p) + tmpF_os(ipoint,6,p) = tmpR0(ipoint,1,p) + tmpF_os(ipoint,7,p) = tmpR0(ipoint,2,p) + tmpF_os(ipoint,8,p) = tmpR0(ipoint,3,p) + enddo + + do i = 1, ne_b + do j = 1, ne_b + do ipoint = 1, n_grid + + tmpE_os(ipoint,2,p) = tmpE_os(ipoint,2,p) + mos_l_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,j) & + + int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,j) & + + int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,j) ) + + tmpF_os(ipoint,1,p) = tmpF_os(ipoint,1,p) + mos_r_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,p) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,p) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,p) ) + enddo ! ipoint + enddo ! j + enddo ! i + + do i = ne_b+1, ne_a + do j = 1, ne_b + do ipoint = 1, n_grid + + tmpE_os(ipoint,2,p) = tmpE_os(ipoint,2,p) + 0.5d0 * mos_l_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,j) & + + int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,j) & + + int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,j) ) + tmpE_os(ipoint,2,p) = tmpE_os(ipoint,2,p) + 0.5d0 * mos_l_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,p,j) * int2_grad1_u12(ipoint,1,j,i) & + + int2_grad1_u12(ipoint,2,p,j) * int2_grad1_u12(ipoint,2,j,i) & + + int2_grad1_u12(ipoint,3,p,j) * int2_grad1_u12(ipoint,3,j,i) ) + + tmpF_os(ipoint,1,p) = tmpF_os(ipoint,1,p) + 0.5d0 * mos_r_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,p) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,p) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,p) ) + tmpF_os(ipoint,1,p) = tmpF_os(ipoint,1,p) + 0.5d0 * mos_r_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,j,i) * int2_grad1_u12(ipoint,1,i,p) & + + int2_grad1_u12(ipoint,2,j,i) * int2_grad1_u12(ipoint,2,i,p) & + + int2_grad1_u12(ipoint,3,j,i) * int2_grad1_u12(ipoint,3,i,p) ) + enddo ! ipoint + enddo ! j + enddo ! i + + do i = ne_b+1, ne_a + do j = ne_b+1, ne_a + do ipoint = 1, n_grid + + tmpE_os(ipoint,2,p) = tmpE_os(ipoint,2,p) + 0.5d0 * mos_l_in_r(ipoint,j) * ( int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,j) & + + int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,j) & + + int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,j) ) + + tmpF_os(ipoint,1,p) = tmpF_os(ipoint,1,p) + 0.5d0 * mos_r_in_r(ipoint,i) * ( int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,p) & + + int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,p) & + + int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,p) ) + enddo ! ipoint + enddo ! j + enddo ! i + + enddo ! p + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmpL0, tmpR0) + + call dgemm( 'T', 'N', n_mo, n_mo, 8*n_grid, 1.d0 & + , tmpE_os(1,1,1), 8*n_grid, tmpF_os(1,1,1), 8*n_grid & + , 1.d0, noL_1e(1,1), n_mo) + + deallocate(tmpE_os, tmpF_os) + + endif + + + call wall_time(t1) + write(*,"(A,2X,F15.7)") ' wall time for noL_1e (sec) = ', (t1 - t0) + + return +end + +! --- + diff --git a/plugins/local/tc_int/no_2e.irp.f b/plugins/local/tc_int/no_2e.irp.f new file mode 100644 index 000000000..6d31d6a09 --- /dev/null +++ b/plugins/local/tc_int/no_2e.irp.f @@ -0,0 +1,605 @@ + +! --- + +subroutine provide_no_2e(n_grid, n_mo, ne_a, ne_b, wr1, mos_l_in_r, mos_r_in_r, int2_grad1_u12, noL_2e) + + implicit none + + integer, intent(in) :: n_grid, n_mo + integer, intent(in) :: ne_a, ne_b + double precision, intent(in) :: wr1(n_grid) + double precision, intent(in) :: mos_l_in_r(n_grid,n_mo) + double precision, intent(in) :: mos_r_in_r(n_grid,n_mo) + double precision, intent(in) :: int2_grad1_u12(n_grid,3,n_mo,n_mo) + double precision, intent(out) :: noL_2e(n_mo,n_mo,n_mo,n_mo) + + integer :: p, q, s, t, i, ipoint + double precision :: t0, t1 + double precision, allocatable :: tmpO(:), tmpJ(:,:) + double precision, allocatable :: tmpA(:,:,:), tmpB(:,:,:) + double precision, allocatable :: tmpC(:,:,:,:), tmpD(:,:,:,:) + double precision, allocatable :: tmpE(:,:,:,:) + + + call wall_time(t0) + + if(ne_a .eq. ne_b) then + + allocate(tmpO(n_grid), tmpJ(n_grid,3)) + allocate(tmpA(n_grid,3,n_mo), tmpB(n_grid,3,n_mo)) + allocate(tmpC(n_grid,4,n_mo,n_mo), tmpD(n_grid,4,n_mo,n_mo)) + allocate(tmpE(n_mo,n_mo,n_mo,n_mo)) + + tmpO = 0.d0 + tmpJ = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + tmpO(ipoint) = tmpO(ipoint) + wr1(ipoint) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i) + tmpJ(ipoint,1) = tmpJ(ipoint,1) + wr1(ipoint) * int2_grad1_u12(ipoint,1,i,i) + tmpJ(ipoint,2) = tmpJ(ipoint,2) + wr1(ipoint) * int2_grad1_u12(ipoint,2,i,i) + tmpJ(ipoint,3) = tmpJ(ipoint,3) + wr1(ipoint) * int2_grad1_u12(ipoint,3,i,i) + enddo + enddo + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, ipoint) & + !$OMP SHARED(n_mo, ne_b, n_grid, & + !$OMP wr1, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, & + !$OMP tmpA, tmpB) + + !$OMP DO + do p = 1, n_mo + + tmpA(:,:,p) = 0.d0 + tmpB(:,:,p) = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + tmpA(ipoint,1,p) = tmpA(ipoint,1,p) + wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,1,p,i) + tmpA(ipoint,2,p) = tmpA(ipoint,2,p) + wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,2,p,i) + tmpA(ipoint,3,p) = tmpA(ipoint,3,p) + wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,3,p,i) + tmpB(ipoint,1,p) = tmpB(ipoint,1,p) + wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,1,i,p) + tmpB(ipoint,2,p) = tmpB(ipoint,2,p) + wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,2,i,p) + tmpB(ipoint,3,p) = tmpB(ipoint,3,p) + wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,3,i,p) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, s, i, ipoint) & + !$OMP SHARED(n_mo, ne_b, n_grid, & + !$OMP wr1, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, & + !$OMP tmpA, tmpB, tmpO, tmpJ, tmpC, tmpD) + + !$OMP DO COLLAPSE(2) + do s = 1, n_mo + do p = 1, n_mo + + do ipoint = 1, n_grid + + tmpC(ipoint,1,p,s) = mos_r_in_r(ipoint,s) * tmpA(ipoint,1,p) & + + mos_l_in_r(ipoint,p) * tmpB(ipoint,1,s) & + - tmpO(ipoint) * int2_grad1_u12(ipoint,1,p,s) & + - 2.d0 * mos_l_in_r(ipoint,p) * mos_r_in_r(ipoint,s) * tmpJ(ipoint,1) + tmpC(ipoint,2,p,s) = mos_r_in_r(ipoint,s) * tmpA(ipoint,2,p) & + + mos_l_in_r(ipoint,p) * tmpB(ipoint,2,s) & + - tmpO(ipoint) * int2_grad1_u12(ipoint,2,p,s) & + - 2.d0 * mos_l_in_r(ipoint,p) * mos_r_in_r(ipoint,s) * tmpJ(ipoint,2) + tmpC(ipoint,3,p,s) = mos_r_in_r(ipoint,s) * tmpA(ipoint,3,p) & + + mos_l_in_r(ipoint,p) * tmpB(ipoint,3,s) & + - tmpO(ipoint) * int2_grad1_u12(ipoint,3,p,s) & + - 2.d0 * mos_l_in_r(ipoint,p) * mos_r_in_r(ipoint,s) * tmpJ(ipoint,3) + + tmpD(ipoint,1,p,s) = int2_grad1_u12(ipoint,1,p,s) + tmpD(ipoint,2,p,s) = int2_grad1_u12(ipoint,2,p,s) + tmpD(ipoint,3,p,s) = int2_grad1_u12(ipoint,3,p,s) + tmpD(ipoint,4,p,s) = wr1(ipoint) * mos_l_in_r(ipoint,p) * mos_r_in_r(ipoint,s) + + enddo ! ipoint + + tmpC(:,4,p,s) = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + tmpC(ipoint,4,p,s) += int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) & + + int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) & + + int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s) + enddo ! ipoint + enddo ! i + + enddo ! p + enddo ! s + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmpO, tmpJ, tmpA, tmpB) + + + call dgemm( 'T', 'N', n_mo*n_mo, n_mo*n_mo, 4*n_grid, 0.5d0 & + , tmpC(1,1,1,1), 4*n_grid, tmpD(1,1,1,1), 4*n_grid & + , 0.d0, tmpE(1,1,1,1), n_mo*n_mo) + + deallocate(tmpC, tmpD) + + call sum_a_at(tmpE, n_mo*n_mo) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(t, s, q, p) & + !$OMP SHARED(n_mo, tmpE, noL_2e) + + !$OMP DO COLLAPSE(3) + do t = 1, n_mo + do s = 1, n_mo + do q = 1, n_mo + do p = 1, n_mo + noL_2e(p,q,s,t) = tmpE(p,s,q,t) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmpE) + + else + + allocate(tmpO(n_grid), tmpJ(n_grid,3)) + allocate(tmpA(n_grid,3,n_mo), tmpB(n_grid,3,n_mo)) + allocate(tmpC(n_grid,4,n_mo,n_mo), tmpD(n_grid,4,n_mo,n_mo)) + allocate(tmpE(n_mo,n_mo,n_mo,n_mo)) + + tmpO = 0.d0 + tmpJ = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + tmpO(ipoint) = tmpO(ipoint) + wr1(ipoint) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i) + tmpJ(ipoint,1) = tmpJ(ipoint,1) + wr1(ipoint) * int2_grad1_u12(ipoint,1,i,i) + tmpJ(ipoint,2) = tmpJ(ipoint,2) + wr1(ipoint) * int2_grad1_u12(ipoint,2,i,i) + tmpJ(ipoint,3) = tmpJ(ipoint,3) + wr1(ipoint) * int2_grad1_u12(ipoint,3,i,i) + enddo + enddo + do i = ne_b+1, ne_a + do ipoint = 1, n_grid + tmpO(ipoint) = tmpO(ipoint) + 0.5d0 * wr1(ipoint) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i) + tmpJ(ipoint,1) = tmpJ(ipoint,1) + 0.5d0 * wr1(ipoint) * int2_grad1_u12(ipoint,1,i,i) + tmpJ(ipoint,2) = tmpJ(ipoint,2) + 0.5d0 * wr1(ipoint) * int2_grad1_u12(ipoint,2,i,i) + tmpJ(ipoint,3) = tmpJ(ipoint,3) + 0.5d0 * wr1(ipoint) * int2_grad1_u12(ipoint,3,i,i) + enddo + enddo + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, ipoint) & + !$OMP SHARED(n_mo, ne_a, ne_b, n_grid, & + !$OMP wr1, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, & + !$OMP tmpA, tmpB) + + !$OMP DO + do p = 1, n_mo + + tmpA(:,:,p) = 0.d0 + tmpB(:,:,p) = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + tmpA(ipoint,1,p) = tmpA(ipoint,1,p) + wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,1,p,i) + tmpA(ipoint,2,p) = tmpA(ipoint,2,p) + wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,2,p,i) + tmpA(ipoint,3,p) = tmpA(ipoint,3,p) + wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,3,p,i) + tmpB(ipoint,1,p) = tmpB(ipoint,1,p) + wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,1,i,p) + tmpB(ipoint,2,p) = tmpB(ipoint,2,p) + wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,2,i,p) + tmpB(ipoint,3,p) = tmpB(ipoint,3,p) + wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,3,i,p) + enddo + enddo + do i = ne_b+1, ne_a + do ipoint = 1, n_grid + tmpA(ipoint,1,p) = tmpA(ipoint,1,p) + 0.5d0 * wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,1,p,i) + tmpA(ipoint,2,p) = tmpA(ipoint,2,p) + 0.5d0 * wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,2,p,i) + tmpA(ipoint,3,p) = tmpA(ipoint,3,p) + 0.5d0 * wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,3,p,i) + tmpB(ipoint,1,p) = tmpB(ipoint,1,p) + 0.5d0 * wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,1,i,p) + tmpB(ipoint,2,p) = tmpB(ipoint,2,p) + 0.5d0 * wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,2,i,p) + tmpB(ipoint,3,p) = tmpB(ipoint,3,p) + 0.5d0 * wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,3,i,p) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, s, i, ipoint) & + !$OMP SHARED(n_mo, ne_a, ne_b, n_grid, & + !$OMP wr1, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, & + !$OMP tmpA, tmpB, tmpO, tmpJ, tmpC, tmpD) + + !$OMP DO COLLAPSE(2) + do s = 1, n_mo + do p = 1, n_mo + + do ipoint = 1, n_grid + + tmpC(ipoint,1,p,s) = mos_r_in_r(ipoint,s) * tmpA(ipoint,1,p) & + + mos_l_in_r(ipoint,p) * tmpB(ipoint,1,s) & + - tmpO(ipoint) * int2_grad1_u12(ipoint,1,p,s) & + - 2.d0 * mos_l_in_r(ipoint,p) * mos_r_in_r(ipoint,s) * tmpJ(ipoint,1) + tmpC(ipoint,2,p,s) = mos_r_in_r(ipoint,s) * tmpA(ipoint,2,p) & + + mos_l_in_r(ipoint,p) * tmpB(ipoint,2,s) & + - tmpO(ipoint) * int2_grad1_u12(ipoint,2,p,s) & + - 2.d0 * mos_l_in_r(ipoint,p) * mos_r_in_r(ipoint,s) * tmpJ(ipoint,2) + tmpC(ipoint,3,p,s) = mos_r_in_r(ipoint,s) * tmpA(ipoint,3,p) & + + mos_l_in_r(ipoint,p) * tmpB(ipoint,3,s) & + - tmpO(ipoint) * int2_grad1_u12(ipoint,3,p,s) & + - 2.d0 * mos_l_in_r(ipoint,p) * mos_r_in_r(ipoint,s) * tmpJ(ipoint,3) + + tmpD(ipoint,1,p,s) = int2_grad1_u12(ipoint,1,p,s) + tmpD(ipoint,2,p,s) = int2_grad1_u12(ipoint,2,p,s) + tmpD(ipoint,3,p,s) = int2_grad1_u12(ipoint,3,p,s) + tmpD(ipoint,4,p,s) = wr1(ipoint) * mos_l_in_r(ipoint,p) * mos_r_in_r(ipoint,s) + + enddo ! ipoint + + tmpC(:,4,p,s) = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + tmpC(ipoint,4,p,s) += int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) & + + int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) & + + int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s) + enddo ! ipoint + enddo ! i + do i = ne_b+1, ne_a + do ipoint = 1, n_grid + tmpC(ipoint,4,p,s) += 0.5d0 * int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) & + + 0.5d0 * int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) & + + 0.5d0 * int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s) + enddo ! ipoint + enddo ! i + + enddo ! p + enddo ! s + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmpO, tmpJ, tmpA, tmpB) + + + call dgemm( 'T', 'N', n_mo*n_mo, n_mo*n_mo, 4*n_grid, 0.5d0 & + , tmpC(1,1,1,1), 4*n_grid, tmpD(1,1,1,1), 4*n_grid & + , 0.d0, tmpE(1,1,1,1), n_mo*n_mo) + + deallocate(tmpC, tmpD) + + call sum_a_at(tmpE, n_mo*n_mo) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(t, s, q, p) & + !$OMP SHARED(n_mo, tmpE, noL_2e) + + !$OMP DO COLLAPSE(3) + do t = 1, n_mo + do s = 1, n_mo + do q = 1, n_mo + do p = 1, n_mo + noL_2e(p,q,s,t) = tmpE(p,s,q,t) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmpE) + + endif + + call wall_time(t1) + write(*,"(A,2X,F15.7)") ' wall time for noL_2e (sec) = ', (t1 - t0) + + return +end + +! --- + +subroutine provide_no_2e_tmp(n_grid, n_mo, ne_a, ne_b, wr1, mos_l_in_r, mos_r_in_r, int2_grad1_u12, & + tmpO, tmpJ, tmpA, tmpB, tmpC, tmpD, tmpE, noL_2e) + + implicit none + + integer, intent(in) :: n_grid, n_mo + integer, intent(in) :: ne_a, ne_b + double precision, intent(in) :: wr1(n_grid) + double precision, intent(in) :: mos_l_in_r(n_grid,n_mo) + double precision, intent(in) :: mos_r_in_r(n_grid,n_mo) + double precision, intent(in) :: int2_grad1_u12(n_grid,3,n_mo,n_mo) + double precision, intent(out) :: tmpO(n_grid), tmpJ(n_grid,3) + double precision, intent(out) :: tmpA(n_grid,3,n_mo), tmpB(n_grid,3,n_mo) + double precision, intent(out) :: tmpC(n_grid,4,n_mo,n_mo), tmpD(n_grid,4,n_mo,n_mo) + double precision, intent(out) :: tmpE(n_mo,n_mo,n_mo,n_mo) + double precision, intent(out) :: noL_2e(n_mo,n_mo,n_mo,n_mo) + + integer :: p, q, s, t, i, ipoint + double precision :: t0, t1 + + + call wall_time(t0) + + if(ne_a .eq. ne_b) then + + tmpO = 0.d0 + tmpJ = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + tmpO(ipoint) = tmpO(ipoint) + wr1(ipoint) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i) + tmpJ(ipoint,1) = tmpJ(ipoint,1) + wr1(ipoint) * int2_grad1_u12(ipoint,1,i,i) + tmpJ(ipoint,2) = tmpJ(ipoint,2) + wr1(ipoint) * int2_grad1_u12(ipoint,2,i,i) + tmpJ(ipoint,3) = tmpJ(ipoint,3) + wr1(ipoint) * int2_grad1_u12(ipoint,3,i,i) + enddo + enddo + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, ipoint) & + !$OMP SHARED(n_mo, ne_b, n_grid, & + !$OMP wr1, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, & + !$OMP tmpA, tmpB) + + !$OMP DO + do p = 1, n_mo + + tmpA(:,:,p) = 0.d0 + tmpB(:,:,p) = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + tmpA(ipoint,1,p) = tmpA(ipoint,1,p) + wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,1,p,i) + tmpA(ipoint,2,p) = tmpA(ipoint,2,p) + wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,2,p,i) + tmpA(ipoint,3,p) = tmpA(ipoint,3,p) + wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,3,p,i) + tmpB(ipoint,1,p) = tmpB(ipoint,1,p) + wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,1,i,p) + tmpB(ipoint,2,p) = tmpB(ipoint,2,p) + wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,2,i,p) + tmpB(ipoint,3,p) = tmpB(ipoint,3,p) + wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,3,i,p) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, s, i, ipoint) & + !$OMP SHARED(n_mo, ne_b, n_grid, & + !$OMP wr1, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, & + !$OMP tmpA, tmpB, tmpO, tmpJ, tmpC, tmpD) + + !$OMP DO COLLAPSE(2) + do s = 1, n_mo + do p = 1, n_mo + + do ipoint = 1, n_grid + + tmpC(ipoint,1,p,s) = mos_r_in_r(ipoint,s) * tmpA(ipoint,1,p) & + + mos_l_in_r(ipoint,p) * tmpB(ipoint,1,s) & + - tmpO(ipoint) * int2_grad1_u12(ipoint,1,p,s) & + - 2.d0 * mos_l_in_r(ipoint,p) * mos_r_in_r(ipoint,s) * tmpJ(ipoint,1) + tmpC(ipoint,2,p,s) = mos_r_in_r(ipoint,s) * tmpA(ipoint,2,p) & + + mos_l_in_r(ipoint,p) * tmpB(ipoint,2,s) & + - tmpO(ipoint) * int2_grad1_u12(ipoint,2,p,s) & + - 2.d0 * mos_l_in_r(ipoint,p) * mos_r_in_r(ipoint,s) * tmpJ(ipoint,2) + tmpC(ipoint,3,p,s) = mos_r_in_r(ipoint,s) * tmpA(ipoint,3,p) & + + mos_l_in_r(ipoint,p) * tmpB(ipoint,3,s) & + - tmpO(ipoint) * int2_grad1_u12(ipoint,3,p,s) & + - 2.d0 * mos_l_in_r(ipoint,p) * mos_r_in_r(ipoint,s) * tmpJ(ipoint,3) + + tmpD(ipoint,1,p,s) = int2_grad1_u12(ipoint,1,p,s) + tmpD(ipoint,2,p,s) = int2_grad1_u12(ipoint,2,p,s) + tmpD(ipoint,3,p,s) = int2_grad1_u12(ipoint,3,p,s) + tmpD(ipoint,4,p,s) = wr1(ipoint) * mos_l_in_r(ipoint,p) * mos_r_in_r(ipoint,s) + + enddo ! ipoint + + tmpC(:,4,p,s) = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + tmpC(ipoint,4,p,s) += int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) & + + int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) & + + int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s) + enddo ! ipoint + enddo ! i + + enddo ! p + enddo ! s + !$OMP END DO + !$OMP END PARALLEL + + + call dgemm( 'T', 'N', n_mo*n_mo, n_mo*n_mo, 4*n_grid, 0.5d0 & + , tmpC(1,1,1,1), 4*n_grid, tmpD(1,1,1,1), 4*n_grid & + , 0.d0, tmpE(1,1,1,1), n_mo*n_mo) + + call sum_a_at(tmpE, n_mo*n_mo) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(t, s, q, p) & + !$OMP SHARED(n_mo, tmpE, noL_2e) + + !$OMP DO COLLAPSE(3) + do t = 1, n_mo + do s = 1, n_mo + do q = 1, n_mo + do p = 1, n_mo + noL_2e(p,q,s,t) = tmpE(p,s,q,t) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + else + + tmpO = 0.d0 + tmpJ = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + tmpO(ipoint) = tmpO(ipoint) + wr1(ipoint) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i) + tmpJ(ipoint,1) = tmpJ(ipoint,1) + wr1(ipoint) * int2_grad1_u12(ipoint,1,i,i) + tmpJ(ipoint,2) = tmpJ(ipoint,2) + wr1(ipoint) * int2_grad1_u12(ipoint,2,i,i) + tmpJ(ipoint,3) = tmpJ(ipoint,3) + wr1(ipoint) * int2_grad1_u12(ipoint,3,i,i) + enddo + enddo + do i = ne_b+1, ne_a + do ipoint = 1, n_grid + tmpO(ipoint) = tmpO(ipoint) + 0.5d0 * wr1(ipoint) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i) + tmpJ(ipoint,1) = tmpJ(ipoint,1) + 0.5d0 * wr1(ipoint) * int2_grad1_u12(ipoint,1,i,i) + tmpJ(ipoint,2) = tmpJ(ipoint,2) + 0.5d0 * wr1(ipoint) * int2_grad1_u12(ipoint,2,i,i) + tmpJ(ipoint,3) = tmpJ(ipoint,3) + 0.5d0 * wr1(ipoint) * int2_grad1_u12(ipoint,3,i,i) + enddo + enddo + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, ipoint) & + !$OMP SHARED(n_mo, ne_a, ne_b, n_grid, & + !$OMP wr1, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, & + !$OMP tmpA, tmpB) + + !$OMP DO + do p = 1, n_mo + + tmpA(:,:,p) = 0.d0 + tmpB(:,:,p) = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + tmpA(ipoint,1,p) = tmpA(ipoint,1,p) + wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,1,p,i) + tmpA(ipoint,2,p) = tmpA(ipoint,2,p) + wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,2,p,i) + tmpA(ipoint,3,p) = tmpA(ipoint,3,p) + wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,3,p,i) + tmpB(ipoint,1,p) = tmpB(ipoint,1,p) + wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,1,i,p) + tmpB(ipoint,2,p) = tmpB(ipoint,2,p) + wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,2,i,p) + tmpB(ipoint,3,p) = tmpB(ipoint,3,p) + wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,3,i,p) + enddo + enddo + do i = ne_b+1, ne_a + do ipoint = 1, n_grid + tmpA(ipoint,1,p) = tmpA(ipoint,1,p) + 0.5d0 * wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,1,p,i) + tmpA(ipoint,2,p) = tmpA(ipoint,2,p) + 0.5d0 * wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,2,p,i) + tmpA(ipoint,3,p) = tmpA(ipoint,3,p) + 0.5d0 * wr1(ipoint) * mos_l_in_r(ipoint,i) * int2_grad1_u12(ipoint,3,p,i) + tmpB(ipoint,1,p) = tmpB(ipoint,1,p) + 0.5d0 * wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,1,i,p) + tmpB(ipoint,2,p) = tmpB(ipoint,2,p) + 0.5d0 * wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,2,i,p) + tmpB(ipoint,3,p) = tmpB(ipoint,3,p) + 0.5d0 * wr1(ipoint) * mos_r_in_r(ipoint,i) * int2_grad1_u12(ipoint,3,i,p) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, s, i, ipoint) & + !$OMP SHARED(n_mo, ne_a, ne_b, n_grid, & + !$OMP wr1, & + !$OMP mos_l_in_r, mos_r_in_r, & + !$OMP int2_grad1_u12, & + !$OMP tmpA, tmpB, tmpO, tmpJ, tmpC, tmpD) + + !$OMP DO COLLAPSE(2) + do s = 1, n_mo + do p = 1, n_mo + + do ipoint = 1, n_grid + + tmpC(ipoint,1,p,s) = mos_r_in_r(ipoint,s) * tmpA(ipoint,1,p) & + + mos_l_in_r(ipoint,p) * tmpB(ipoint,1,s) & + - tmpO(ipoint) * int2_grad1_u12(ipoint,1,p,s) & + - 2.d0 * mos_l_in_r(ipoint,p) * mos_r_in_r(ipoint,s) * tmpJ(ipoint,1) + tmpC(ipoint,2,p,s) = mos_r_in_r(ipoint,s) * tmpA(ipoint,2,p) & + + mos_l_in_r(ipoint,p) * tmpB(ipoint,2,s) & + - tmpO(ipoint) * int2_grad1_u12(ipoint,2,p,s) & + - 2.d0 * mos_l_in_r(ipoint,p) * mos_r_in_r(ipoint,s) * tmpJ(ipoint,2) + tmpC(ipoint,3,p,s) = mos_r_in_r(ipoint,s) * tmpA(ipoint,3,p) & + + mos_l_in_r(ipoint,p) * tmpB(ipoint,3,s) & + - tmpO(ipoint) * int2_grad1_u12(ipoint,3,p,s) & + - 2.d0 * mos_l_in_r(ipoint,p) * mos_r_in_r(ipoint,s) * tmpJ(ipoint,3) + + tmpD(ipoint,1,p,s) = int2_grad1_u12(ipoint,1,p,s) + tmpD(ipoint,2,p,s) = int2_grad1_u12(ipoint,2,p,s) + tmpD(ipoint,3,p,s) = int2_grad1_u12(ipoint,3,p,s) + tmpD(ipoint,4,p,s) = wr1(ipoint) * mos_l_in_r(ipoint,p) * mos_r_in_r(ipoint,s) + + enddo ! ipoint + + tmpC(:,4,p,s) = 0.d0 + do i = 1, ne_b + do ipoint = 1, n_grid + tmpC(ipoint,4,p,s) += int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) & + + int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) & + + int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s) + enddo ! ipoint + enddo ! i + do i = ne_b+1, ne_a + do ipoint = 1, n_grid + tmpC(ipoint,4,p,s) += 0.5d0 * int2_grad1_u12(ipoint,1,p,i) * int2_grad1_u12(ipoint,1,i,s) & + + 0.5d0 * int2_grad1_u12(ipoint,2,p,i) * int2_grad1_u12(ipoint,2,i,s) & + + 0.5d0 * int2_grad1_u12(ipoint,3,p,i) * int2_grad1_u12(ipoint,3,i,s) + enddo ! ipoint + enddo ! i + + enddo ! p + enddo ! s + !$OMP END DO + !$OMP END PARALLEL + + + call dgemm( 'T', 'N', n_mo*n_mo, n_mo*n_mo, 4*n_grid, 0.5d0 & + , tmpC(1,1,1,1), 4*n_grid, tmpD(1,1,1,1), 4*n_grid & + , 0.d0, tmpE(1,1,1,1), n_mo*n_mo) + + call sum_a_at(tmpE, n_mo*n_mo) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(t, s, q, p) & + !$OMP SHARED(n_mo, tmpE, noL_2e) + + !$OMP DO COLLAPSE(3) + do t = 1, n_mo + do s = 1, n_mo + do q = 1, n_mo + do p = 1, n_mo + noL_2e(p,q,s,t) = tmpE(p,s,q,t) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + endif + + call wall_time(t1) + write(*,"(A,2X,F15.7)") ' wall time for noL_2e & tmp tensors (sec) = ', (t1 - t0) + + return +end + +! --- + + diff --git a/plugins/local/tc_int/uninstall b/plugins/local/tc_int/uninstall new file mode 100755 index 000000000..e37a54919 --- /dev/null +++ b/plugins/local/tc_int/uninstall @@ -0,0 +1,13 @@ +#!/bin/bash + +# Check if the QP_ROOT environment variable is set. +if [[ -z ${QP_ROOT} ]] +then + print "The QP_ROOT environment variable is not set." + print "Please reload the quantum_package.rc file." + exit -1 +fi + +rm -rf ${PWD}/CuTC +rm ${QP_ROOT}/lib/libcutcint.so + diff --git a/plugins/local/tc_int/write_tc_int.irp.f b/plugins/local/tc_int/write_tc_int.irp.f new file mode 100644 index 000000000..9f25a6fd9 --- /dev/null +++ b/plugins/local/tc_int/write_tc_int.irp.f @@ -0,0 +1,56 @@ +! --- + +program write_tc_int + + implicit none + + print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + + call main() + +end + +! --- + +subroutine main() + + implicit none + + PROVIDE io_tc_integ + + print*, 'io_tc_integ = ', io_tc_integ + + if(io_tc_integ .ne. "Write") then + print*, 'io_tc_integ != Write' + print*, io_tc_integ + stop + endif + + call provide_int2_grad1_u12_ao() + + call ezfio_set_tc_keywords_io_tc_integ('Read') + +end + +! --- + diff --git a/plugins/local/tc_int/write_tc_int_cuda.irp.f b/plugins/local/tc_int/write_tc_int_cuda.irp.f new file mode 100644 index 000000000..756630b8f --- /dev/null +++ b/plugins/local/tc_int/write_tc_int_cuda.irp.f @@ -0,0 +1,194 @@ +! --- + +program write_tc_int_cuda + + implicit none + + print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + + call main() + +end + +! --- + +subroutine main() + + implicit none + + PROVIDE io_tc_integ + + print*, 'io_tc_integ = ', io_tc_integ + + if(io_tc_integ .ne. "Write") then + print*, 'io_tc_integ != Write' + print*, io_tc_integ + stop + endif + + call do_work_on_gpu() + + call ezfio_set_tc_keywords_io_tc_integ('Read') + +end + +! --- + +subroutine do_work_on_gpu() + + use cutc_module + + implicit none + + integer :: k, ipoint + + double precision, allocatable :: rn(:,:), aos_data1(:,:,:), aos_data2(:,:,:) + double precision, allocatable :: int2_grad1_u12_ao(:,:,:,:) + double precision, allocatable :: int_2e_ao(:,:,:,:) + + double precision :: time0, time1 + double precision :: cuda_time0, cuda_time1 + + call wall_time(time0) + print*, ' start calculation of TC-integrals' + + allocate(rn(3,nucl_num)) + allocate(aos_data1(n_points_final_grid,ao_num,4)) + allocate(aos_data2(n_points_extra_final_grid,ao_num,4)) + allocate(int2_grad1_u12_ao(ao_num,ao_num,n_points_final_grid,3)) + allocate(int_2e_ao(ao_num,ao_num,ao_num,ao_num)) + + + do k = 1, nucl_num + rn(1,k) = nucl_coord(k,1) + rn(2,k) = nucl_coord(k,2) + rn(3,k) = nucl_coord(k,3) + enddo + + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + aos_data1(ipoint,k,1) = aos_in_r_array(k,ipoint) + aos_data1(ipoint,k,2) = aos_grad_in_r_array(k,ipoint,1) + aos_data1(ipoint,k,3) = aos_grad_in_r_array(k,ipoint,2) + aos_data1(ipoint,k,4) = aos_grad_in_r_array(k,ipoint,3) + enddo + + do ipoint = 1, n_points_extra_final_grid + aos_data2(ipoint,k,1) = aos_in_r_array_extra(k,ipoint) + aos_data2(ipoint,k,2) = aos_grad_in_r_array_extra(k,ipoint,1) + aos_data2(ipoint,k,3) = aos_grad_in_r_array_extra(k,ipoint,2) + aos_data2(ipoint,k,4) = aos_grad_in_r_array_extra(k,ipoint,3) + enddo + enddo + + ! --- + + integer :: nB + integer :: sB + + PROVIDE nxBlocks nyBlocks nzBlocks + PROVIDE blockxSize blockySize blockzSize + + sB = 32 + nB = (n_points_final_grid + sB - 1) / sB + + call ezfio_set_tc_int_blockxSize(sB) + call ezfio_set_tc_int_nxBlocks(nB) + + + + call wall_time(cuda_time0) + print*, ' start CUDA kernel' + + call cutc_int(nxBlocks, nyBlocks, nzBlocks, blockxSize, blockySize, blockzSize, & + n_points_final_grid, n_points_extra_final_grid, ao_num, nucl_num, jBH_size, & + final_grid_points, final_weight_at_r_vector, & + final_grid_points_extra, final_weight_at_r_vector_extra, & + rn, aos_data1, aos_data2, jBH_c, jBH_m, jBH_n, jBH_o, & + int2_grad1_u12_ao, int_2e_ao) + + call wall_time(cuda_time1) + print*, ' wall time for CUDA kernel (min) = ', (cuda_time1-cuda_time0) / 60.d0 + + deallocate(aos_data1, aos_data2) + + ! --- + + integer :: i, j, l + double precision :: t1, t2 + double precision :: tmp + double precision, external :: get_ao_two_e_integral + + call wall_time(t1) + + PROVIDE ao_integrals_map + tmp = get_ao_two_e_integral(1, 1, 1, 1, ao_integrals_map) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(ao_num, int_2e_ao, ao_integrals_map) & + !$OMP PRIVATE(i, j, k, l) + !$OMP DO COLLAPSE(3) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + ! < 1:i, 2:j | 1:k, 2:l > + int_2e_ao(k,i,l,j) = int_2e_ao(k,i,l,j) + get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(t2) + print*, ' wall time of Coulomb part of tc_int_2e_ao (min) ', (t2 - t1) / 60.d0 + + ! --- + + print*, ' Writing int2_grad1_u12_ao in ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao' + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) int2_grad1_u12_ao + close(11) + deallocate(int2_grad1_u12_ao) + + print*, ' Saving tc_int_2e_ao in ', trim(ezfio_filename) // '/work/ao_two_e_tc_tot' + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/ao_two_e_tc_tot', action="write") + call ezfio_set_work_empty(.False.) + do k = 1, ao_num + write(11) int_2e_ao(:,:,:,k) + enddo + close(11) + deallocate(int_2e_ao) + + ! ---- + + + call wall_time(time1) + print*, ' wall time for TC-integrals (min) = ', (time1-time0) / 60.d0 + + return +end + +! --- diff --git a/plugins/local/tc_int/write_tc_int_gpu.irp.f b/plugins/local/tc_int/write_tc_int_gpu.irp.f new file mode 100644 index 000000000..c0dd9c908 --- /dev/null +++ b/plugins/local/tc_int/write_tc_int_gpu.irp.f @@ -0,0 +1,56 @@ +! --- + +program write_tc_int_gpu + + implicit none + + print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + + call main() + +end + +! --- + +subroutine main() + + implicit none + + PROVIDE io_tc_integ + + print*, 'io_tc_integ = ', io_tc_integ + + if(io_tc_integ .ne. "Write") then + print*, 'io_tc_integ != Write' + print*, io_tc_integ + stop + endif + + call provide_int2_grad1_u12_ao_gpu() + + call ezfio_set_tc_keywords_io_tc_integ('Read') + +end + +! --- + diff --git a/plugins/local/tc_keywords/EZFIO.cfg b/plugins/local/tc_keywords/EZFIO.cfg index e07761363..b858fa5b5 100644 --- a/plugins/local/tc_keywords/EZFIO.cfg +++ b/plugins/local/tc_keywords/EZFIO.cfg @@ -14,7 +14,7 @@ default: False type: logical doc: If |true|, three-body terms are included interface: ezfio,provider,ocaml -default: True +default: False [three_e_3_idx_term] type: logical @@ -50,7 +50,7 @@ default: False type: logical doc: If |true|, standard normal-ordering for L (to be used with three_body_h_tc |false|) interface: ezfio,provider,ocaml -default: False +default: True [core_tc_op] type: logical @@ -100,30 +100,12 @@ doc: If |true|, the states are re-ordered to match the input states default: False interface: ezfio,provider,ocaml -[bi_ortho] -type: logical -doc: If |true|, the MO basis is assumed to be bi-orthonormal -interface: ezfio,provider,ocaml -default: True - [symmetric_fock_tc] type: logical doc: If |true|, using F+F^t as Fock TC interface: ezfio,provider,ocaml default: False -[thresh_tcscf] -type: Threshold -doc: Threshold on the convergence of the Hartree Fock energy. -interface: ezfio,provider,ocaml -default: 1.e-8 - -[n_it_tcscf_max] -type: Strictly_positive_int -doc: Maximum number of SCF iterations -interface: ezfio,provider,ocaml -default: 50 - [selection_tc] type: integer doc: if +1: only positive is selected, -1: only negative is selected, :0 both positive and negative @@ -160,30 +142,6 @@ doc: If |true|, maximize the overlap between orthogonalized left- and right eige interface: ezfio,provider,ocaml default: False -[max_dim_diis_tcscf] -type: integer -doc: Maximum size of the DIIS extrapolation procedure -interface: ezfio,provider,ocaml -default: 15 - -[level_shift_tcscf] -type: Positive_float -doc: Energy shift on the virtual MOs to improve TCSCF convergence -interface: ezfio,provider,ocaml -default: 0. - -[tcscf_algorithm] -type: character*(32) -doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS] -interface: ezfio,provider,ocaml -default: DIIS - -[im_thresh_tcscf] -type: Threshold -doc: Thresholds on the Imag part of energy -interface: ezfio,provider,ocaml -default: 1.e-7 - [test_cycle_tc] type: logical doc: If |true|, the integrals of the three-body jastrow are computed with cycles @@ -226,12 +184,6 @@ doc: Read/Write normal_two_body_bi_orth from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None -[debug_tc_pt2] -type: integer -doc: If :: 1 then you compute the TC-PT2 the old way, :: 2 then you check with the new version but without three-body -interface: ezfio,provider,ocaml -default: -1 - [only_spin_tc_right] type: logical doc: If |true|, only the right part of WF is used to compute spin dens @@ -278,10 +230,46 @@ default: 70 type: character*(32) doc: approach used to evaluate TC integrals [ analytic | numeric | semi-analytic ] interface: ezfio,ocaml,provider -default: semi-analytic +default: numeric [minimize_lr_angles] type: logical doc: If |true|, you minimize the angle between the left and right vectors associated to degenerate orbitals interface: ezfio,provider,ocaml default: False + +[thresh_de_tc_angles] +type: Threshold +doc: Thresholds on delta E for changing angles between orbitals +interface: ezfio,provider,ocaml +default: 1.e-6 + +[ao_to_mo_tc_n3] +type: logical +doc: If |true|, memory scale of TC ao -> mo: O(N3) +interface: ezfio,provider,ocaml +default: False + +[tc_save_mem_loops] +type: logical +doc: If |true|, use loops to save memory TC +interface: ezfio,provider,ocaml +default: False + +[tc_save_mem] +type: logical +doc: If |true|, more calc but less mem +interface: ezfio,provider,ocaml +default: False + +[im_thresh_tc] +type: Threshold +doc: Thresholds on the Imag part of TC energy +interface: ezfio,provider,ocaml +default: 1.e-7 + +[transpose_two_e_int] +type: logical +doc: If |true|, you duplicate the two-electron TC integrals with the transpose matrix. Acceleates the PT2. +interface: ezfio,provider,ocaml +default: False diff --git a/plugins/local/tc_progs/NEED b/plugins/local/tc_progs/NEED new file mode 100644 index 000000000..9deb3db48 --- /dev/null +++ b/plugins/local/tc_progs/NEED @@ -0,0 +1 @@ +tc_bi_ortho diff --git a/plugins/local/tc_bi_ortho/print_he_tc_energy.irp.f b/plugins/local/tc_progs/print_he_tc_energy.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/print_he_tc_energy.irp.f rename to plugins/local/tc_progs/print_he_tc_energy.irp.f diff --git a/plugins/local/tc_bi_ortho/print_tc_dump.irp.f b/plugins/local/tc_progs/print_tc_dump.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/print_tc_dump.irp.f rename to plugins/local/tc_progs/print_tc_dump.irp.f diff --git a/plugins/local/tc_progs/print_tc_energy.irp.f b/plugins/local/tc_progs/print_tc_energy.irp.f new file mode 100644 index 000000000..979d792bf --- /dev/null +++ b/plugins/local/tc_progs/print_tc_energy.irp.f @@ -0,0 +1,53 @@ +program print_tc_energy + + BEGIN_DOC + ! TODO : Put the documentation of the program here + END_DOC + + implicit none + + read_wf = .True. + touch read_wf + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + if(tc_integ_type .eq. "numeric") then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + endif + + call main() + +end + +! --- + +subroutine main() + + implicit none + + PROVIDE j2e_type + PROVIDE j1e_type + PROVIDE env_type + + print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type + + call write_tc_energy() + +end + diff --git a/plugins/local/tc_bi_ortho/print_tc_spin_dens.irp.f b/plugins/local/tc_progs/print_tc_spin_dens.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/print_tc_spin_dens.irp.f rename to plugins/local/tc_progs/print_tc_spin_dens.irp.f diff --git a/plugins/local/tc_bi_ortho/print_tc_var.irp.f b/plugins/local/tc_progs/print_tc_var.irp.f similarity index 70% rename from plugins/local/tc_bi_ortho/print_tc_var.irp.f rename to plugins/local/tc_progs/print_tc_var.irp.f index bec34f18c..6743cd117 100644 --- a/plugins/local/tc_bi_ortho/print_tc_var.irp.f +++ b/plugins/local/tc_progs/print_tc_var.irp.f @@ -6,7 +6,8 @@ program print_tc_var implicit none - print *, 'Hello world' + print *, ' TC VAR is available only for HF REF WF' + print *, ' DO NOT FORGET TO RUN A CISD CALCULATION BEF' my_grid_becke = .True. PROVIDE tc_grid1_a tc_grid1_r @@ -17,7 +18,7 @@ program print_tc_var read_wf = .True. touch read_wf - call write_tc_var() + call write_tc_gs_var_HF() end diff --git a/plugins/local/tc_bi_ortho/print_tc_wf.irp.f b/plugins/local/tc_progs/print_tc_wf.irp.f similarity index 88% rename from plugins/local/tc_bi_ortho/print_tc_wf.irp.f rename to plugins/local/tc_progs/print_tc_wf.irp.f index 2b88bc5bb..3e010e014 100644 --- a/plugins/local/tc_bi_ortho/print_tc_wf.irp.f +++ b/plugins/local/tc_progs/print_tc_wf.irp.f @@ -37,7 +37,7 @@ subroutine write_l_r_wf integer :: i print*,'Writing the left-right wf' do i = 1, N_det - write(i_unit_output,*)i, psi_coef_sorted_tc(i,1)/psi_coef_sorted_tc(i,1) & + write(i_unit_output,'(I8,X,10(F16.10,X))')i, psi_coef_sorted_tc(i,1),psi_coef_sorted_tc(i,1)/psi_coef_sorted_tc(1,1)& , psi_l_coef_sorted_bi_ortho_left(i)/psi_l_coef_sorted_bi_ortho_left(1) & , psi_r_coef_sorted_bi_ortho_right(i)/psi_r_coef_sorted_bi_ortho_right(1) enddo @@ -61,12 +61,12 @@ subroutine routine do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e - call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_opt_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) contrib_pt = coef_pt1 * htilde_ij e_pt2 += contrib_pt diff --git a/plugins/local/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f b/plugins/local/tc_progs/save_bitcpsileft_for_qmcchem.irp.f similarity index 91% rename from plugins/local/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f rename to plugins/local/tc_progs/save_bitcpsileft_for_qmcchem.irp.f index efa4aa2c5..ac90f737b 100644 --- a/plugins/local/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f +++ b/plugins/local/tc_progs/save_bitcpsileft_for_qmcchem.irp.f @@ -38,9 +38,9 @@ subroutine main() call ezfio_has_cisd_energy(exists) if(.not.exists) then - call ezfio_has_tc_scf_bitc_energy(exists) + call ezfio_has_tc_scf_tcscf_energy(exists) if(exists) then - call ezfio_get_tc_scf_bitc_energy(e_ref) + call ezfio_get_tc_scf_tcscf_energy(e_ref) endif else @@ -59,7 +59,7 @@ subroutine main() close(iunit) -end subroutine main +end ! -- @@ -89,7 +89,7 @@ subroutine write_lr_spindeterminants() call ezfio_set_spindeterminants_psi_left_coef_matrix_values(buffer) deallocate(buffer) -end subroutine write_lr_spindeterminants +end ! --- diff --git a/plugins/local/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f b/plugins/local/tc_progs/save_tc_bi_ortho_nat.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f rename to plugins/local/tc_progs/save_tc_bi_ortho_nat.irp.f diff --git a/plugins/local/tc_bi_ortho/select_dets_bi_ortho.irp.f b/plugins/local/tc_progs/select_dets_bi_ortho.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/select_dets_bi_ortho.irp.f rename to plugins/local/tc_progs/select_dets_bi_ortho.irp.f diff --git a/plugins/local/tc_bi_ortho/tc_bi_ortho_prop.irp.f b/plugins/local/tc_progs/tc_bi_ortho_prop.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/tc_bi_ortho_prop.irp.f rename to plugins/local/tc_progs/tc_bi_ortho_prop.irp.f diff --git a/plugins/local/tc_bi_ortho/tc_som.irp.f b/plugins/local/tc_progs/tc_som.irp.f similarity index 82% rename from plugins/local/tc_bi_ortho/tc_som.irp.f rename to plugins/local/tc_progs/tc_som.irp.f index 1d11c81b5..6bdcc1f0b 100644 --- a/plugins/local/tc_bi_ortho/tc_som.irp.f +++ b/plugins/local/tc_progs/tc_som.irp.f @@ -49,8 +49,8 @@ subroutine main() U_SOM = 0.d0 do i = 1, N_det if(i == i_HF) cycle - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1) - call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1) + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2) U_SOM += htot_1 * htot_2 enddo U_SOM = 0.5d0 * U_SOM diff --git a/plugins/local/tc_bi_ortho/test_tc_two_rdm.irp.f b/plugins/local/tc_progs/test_tc_two_rdm.irp.f similarity index 100% rename from plugins/local/tc_bi_ortho/test_tc_two_rdm.irp.f rename to plugins/local/tc_progs/test_tc_two_rdm.irp.f diff --git a/plugins/local/tc_scf/EZFIO.cfg b/plugins/local/tc_scf/EZFIO.cfg index 3dfa9a716..e3d243389 100644 --- a/plugins/local/tc_scf/EZFIO.cfg +++ b/plugins/local/tc_scf/EZFIO.cfg @@ -1,6 +1,6 @@ -[bitc_energy] +[tcscf_energy] type: Threshold -doc: Energy bi-tc HF +doc: TC-SCF ENERGY interface: ezfio [converged_tcscf] @@ -9,3 +9,33 @@ doc: If |true|, tc-scf has converged interface: ezfio,provider,ocaml default: False +[max_dim_diis_tcscf] +type: integer +doc: Maximum size of the DIIS extrapolation procedure +interface: ezfio,provider,ocaml +default: 15 + +[level_shift_tcscf] +type: Positive_float +doc: Energy shift on the virtual MOs to improve TCSCF convergence +interface: ezfio,provider,ocaml +default: 0. + +[thresh_tcscf] +type: Threshold +doc: Threshold on the convergence of the Hartree Fock energy. +interface: ezfio,provider,ocaml +default: 1.e-8 + +[n_it_tcscf_max] +type: Strictly_positive_int +doc: Maximum number of SCF iterations +interface: ezfio,provider,ocaml +default: 50 + +[tc_Brillouin_Right] +type: logical +doc: If |true|, impose only right-Brillouin condition +interface: ezfio,provider,ocaml +default: False + diff --git a/plugins/local/tc_scf/combine_lr_tcscf.irp.f b/plugins/local/tc_scf/combine_lr_tcscf.irp.f deleted file mode 100644 index a22614ba3..000000000 --- a/plugins/local/tc_scf/combine_lr_tcscf.irp.f +++ /dev/null @@ -1,75 +0,0 @@ - -! --- - -program combine_lr_tcscf - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - bi_ortho = .True. - touch bi_ortho - - call comb_orbitals() - -end - -! --- - -subroutine comb_orbitals() - - implicit none - integer :: i, m, n, nn, mm - double precision :: accu_d, accu_nd - double precision, allocatable :: R(:,:), L(:,:), Rnew(:,:), tmp(:,:), S(:,:) - - n = ao_num - m = mo_num - nn = elec_alpha_num - mm = m - nn - - allocate(L(n,m), R(n,m), Rnew(n,m), S(m,m)) - L = mo_l_coef - R = mo_r_coef - - call check_weighted_biorthog(n, m, ao_overlap, L, R, accu_d, accu_nd, S, .true.) - - allocate(tmp(n,nn)) - do i = 1, nn - tmp(1:n,i) = R(1:n,i) - enddo - call impose_weighted_orthog_svd(n, nn, ao_overlap, tmp) - do i = 1, nn - Rnew(1:n,i) = tmp(1:n,i) - enddo - deallocate(tmp) - - allocate(tmp(n,mm)) - do i = 1, mm - tmp(1:n,i) = L(1:n,i+nn) - enddo - call impose_weighted_orthog_svd(n, mm, ao_overlap, tmp) - do i = 1, mm - Rnew(1:n,i+nn) = tmp(1:n,i) - enddo - deallocate(tmp) - - call check_weighted_biorthog(n, m, ao_overlap, Rnew, Rnew, accu_d, accu_nd, S, .true.) - - mo_r_coef = Rnew - call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) - - deallocate(L, R, Rnew, S) - -end subroutine comb_orbitals - -! --- - diff --git a/plugins/local/tc_scf/diago_vartcfock.irp.f b/plugins/local/tc_scf/diago_vartcfock.irp.f deleted file mode 100644 index 0c881dcb1..000000000 --- a/plugins/local/tc_scf/diago_vartcfock.irp.f +++ /dev/null @@ -1,96 +0,0 @@ - -! --- - -BEGIN_PROVIDER [ double precision, fock_vartc_eigvec_mo, (mo_num, mo_num)] - - implicit none - - integer :: i, j - integer :: liwork, lwork, n, info - integer, allocatable :: iwork(:) - double precision, allocatable :: work(:), F(:,:), F_save(:,:) - double precision, allocatable :: diag(:) - - PROVIDE mo_r_coef - PROVIDE Fock_matrix_vartc_mo_tot - - allocate( F(mo_num,mo_num), F_save(mo_num,mo_num) ) - allocate (diag(mo_num) ) - - do j = 1, mo_num - do i = 1, mo_num - F(i,j) = Fock_matrix_vartc_mo_tot(i,j) - enddo - enddo - - ! Insert level shift here - do i = elec_beta_num+1, elec_alpha_num - F(i,i) += 0.5d0 * level_shift_tcscf - enddo - do i = elec_alpha_num+1, mo_num - F(i,i) += level_shift_tcscf - enddo - - n = mo_num - lwork = 1+6*n + 2*n*n - liwork = 3 + 5*n - - allocate(work(lwork)) - allocate(iwork(liwork) ) - - lwork = -1 - liwork = -1 - - F_save = F - call dsyevd('V', 'U', mo_num, F, size(F, 1), diag, work, lwork, iwork, liwork, info) - - if (info /= 0) then - print *, irp_here//' DSYEVD failed : ', info - stop 1 - endif - lwork = int(work(1)) - liwork = iwork(1) - deallocate(iwork) - deallocate(work) - - allocate(work(lwork)) - allocate(iwork(liwork) ) - call dsyevd('V', 'U', mo_num, F, size(F, 1), diag, work, lwork, iwork, liwork, info) - deallocate(iwork) - - if (info /= 0) then - F = F_save - call dsyev('V', 'L', mo_num, F, size(F, 1), diag, work, lwork, info) - - if (info /= 0) then - print *, irp_here//' DSYEV failed : ', info - stop 1 - endif - endif - - do i = 1, mo_num - do j = 1, mo_num - fock_vartc_eigvec_mo(j,i) = F(j,i) - enddo - enddo - - deallocate(work, F, F_save, diag) - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, fock_vartc_eigvec_ao, (ao_num, mo_num)] - - implicit none - - PROVIDE mo_r_coef - - call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0 & - , mo_r_coef, size(mo_r_coef, 1), fock_vartc_eigvec_mo, size(fock_vartc_eigvec_mo, 1) & - , 0.d0, fock_vartc_eigvec_ao, size(fock_vartc_eigvec_ao, 1)) - -END_PROVIDER - -! --- - diff --git a/plugins/local/tc_scf/diis_tcscf.irp.f b/plugins/local/tc_scf/diis_tcscf.irp.f index 5d7d6b2e0..ccc8eb151 100644 --- a/plugins/local/tc_scf/diis_tcscf.irp.f +++ b/plugins/local/tc_scf/diis_tcscf.irp.f @@ -91,28 +91,14 @@ double precision, allocatable :: tmp(:,:) double precision, allocatable :: F(:,:) - !print *, ' Providing FQS_SQF_ao ...' - !call wall_time(t0) + PROVIDE Fock_matrix_tc_ao_tot allocate(F(ao_num,ao_num)) - if(var_tc) then - - do i = 1, ao_num - do j = 1, ao_num - F(j,i) = Fock_matrix_vartc_ao_tot(j,i) - enddo + do i = 1, ao_num + do j = 1, ao_num + F(j,i) = Fock_matrix_tc_ao_tot(j,i) enddo - - else - - PROVIDE Fock_matrix_tc_ao_tot - do i = 1, ao_num - do j = 1, ao_num - F(j,i) = Fock_matrix_tc_ao_tot(j,i) - enddo - enddo - - endif + enddo allocate(tmp(ao_num,ao_num)) @@ -140,9 +126,6 @@ deallocate(tmp) deallocate(F) - !call wall_time(t1) - !print *, ' Wall time for FQS_SQF_ao =', t1-t0 - END_PROVIDER ! --- @@ -152,61 +135,13 @@ implicit none double precision :: t0, t1 - !print*, ' Providing FQS_SQF_mo ...' - !call wall_time(t0) - PROVIDE mo_r_coef mo_l_coef PROVIDE FQS_SQF_ao call ao_to_mo_bi_ortho( FQS_SQF_ao, size(FQS_SQF_ao, 1) & , FQS_SQF_mo, size(FQS_SQF_mo, 1) ) - !call wall_time(t1) - !print*, ' Wall time for FQS_SQF_mo =', t1-t0 - END_PROVIDER ! --- -! BEGIN_PROVIDER [ double precision, eigenval_Fock_tc_ao, (ao_num) ] -!&BEGIN_PROVIDER [ double precision, eigenvec_Fock_tc_ao, (ao_num,ao_num) ] -! -! BEGIN_DOC -! ! -! ! Eigenvalues and eigenvectors of the Fock matrix over the ao basis -! ! -! ! F' = X.T x F x X where X = ao_overlap^(-1/2) -! ! -! ! F' x Cr' = Cr' x E ==> F Cr = Cr x E with Cr = X x Cr' -! ! F'.T x Cl' = Cl' x E ==> F.T Cl = Cl x E with Cl = X x Cl' -! ! -! END_DOC -! -! implicit none -! double precision, allocatable :: tmp1(:,:), tmp2(:,:) -! -! ! --- -! ! Fock matrix in orthogonal basis: F' = X.T x F x X -! -! allocate(tmp1(ao_num,ao_num)) -! call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0 & -! , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), S_half_inv, size(S_half_inv, 1) & -! , 0.d0, tmp1, size(tmp1, 1) ) -! -! allocate(tmp2(ao_num,ao_num)) -! call dgemm( 'T', 'N', ao_num, ao_num, ao_num, 1.d0 & -! , S_half_inv, size(S_half_inv, 1), tmp1, size(tmp1, 1) & -! , 0.d0, tmp2, size(tmp2, 1) ) -! -! ! --- -! -! ! Diagonalize F' to obtain eigenvectors in orthogonal basis C' and eigenvalues -! ! TODO -! -! ! Back-transform eigenvectors: C =X.C' -! -!END_PROVIDER - -! --- - -~ diff --git a/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f b/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f deleted file mode 100644 index 0b8838650..000000000 --- a/plugins/local/tc_scf/fock_3e_bi_ortho_cs.irp.f +++ /dev/null @@ -1,280 +0,0 @@ - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] - - implicit none - integer :: a, b, i, j, ipoint - double precision :: ti, tf - double precision :: loc_1, loc_2, loc_3 - double precision, allocatable :: Okappa(:), Jkappa(:,:) - double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) - double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) - double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) - - PROVIDE mo_l_coef mo_r_coef - - !print *, ' PROVIDING fock_3e_uhf_mo_cs ...' - !call wall_time(ti) - - ! --- - - allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) - Jkappa = 0.d0 - Okappa = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & - !$OMP SHARED (n_points_final_grid, elec_beta_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa) - - allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) - tmp_omp_d2 = 0.d0 - tmp_omp_d1 = 0.d0 - - !$OMP DO - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) - tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) - Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) - Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) - Okappa(ipoint) += tmp_omp_d1(ipoint) - enddo - !$OMP END CRITICAL - - deallocate(tmp_omp_d2, tmp_omp_d1) - - !$OMP END PARALLEL - - ! --- - - allocate(tmp_1(n_points_final_grid,4)) - - do ipoint = 1, n_points_final_grid - - loc_1 = 2.d0 * Okappa(ipoint) - - tmp_1(ipoint,1) = loc_1 * Jkappa(ipoint,1) - tmp_1(ipoint,2) = loc_1 * Jkappa(ipoint,2) - tmp_1(ipoint,3) = loc_1 * Jkappa(ipoint,3) - - tmp_1(ipoint,4) = Okappa(ipoint) - enddo - - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, loc_1, tmp_omp_d2) & - !$OMP SHARED (n_points_final_grid, elec_beta_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_1) - - allocate(tmp_omp_d2(n_points_final_grid,3)) - tmp_omp_d2 = 0.d0 - - !$OMP DO COLLAPSE(2) - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - - tmp_omp_d2(ipoint,1) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) - tmp_omp_d2(ipoint,2) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) - tmp_omp_d2(ipoint,3) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) - tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) - tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) - enddo - !$OMP END CRITICAL - - deallocate(tmp_omp_d2) - !$OMP END PARALLEL - - ! --- - - allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) - tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) - tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b, i) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - tmp_2(:,4,b,a) = 0.d0 - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,4,b,a) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, -2.d0 & - , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & - , tmp_1(1,1), 1 & - , 0.d0, fock_3e_uhf_mo_cs(1,1), 1) - - deallocate(tmp_1, tmp_2) - - ! --- - - allocate(tmp_3(n_points_final_grid,5,mo_num), tmp_4(n_points_final_grid,5,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, Jkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - tmp_3(:,:,b) = 0.d0 - tmp_4(:,:,b) = 0.d0 - do ipoint = 1, n_points_final_grid - tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) - - tmp_4(ipoint,1,b) = -2.d0 * mos_r_in_r_array_transp(ipoint,b) * ( Jkappa(ipoint,1) * Jkappa(ipoint,1) & - + Jkappa(ipoint,2) * Jkappa(ipoint,2) & - + Jkappa(ipoint,3) * Jkappa(ipoint,3) ) - tmp_4(ipoint,5,b) = mos_r_in_r_array_transp(ipoint,b) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP Jkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_2 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) - tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) - tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) - tmp_3(ipoint,5,b) += 2.d0 * loc_1 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & - + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & - + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) - - tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - tmp_4(ipoint,1,b) += 2.d0 * loc_2 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) - loc_2 = mos_r_in_r_array_transp(ipoint,b) - loc_3 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,5,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) - - tmp_4(ipoint,1,b) += ( loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) & - - loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemm( 'T', 'N', mo_num, mo_num, 5*n_points_final_grid, 1.d0 & - , tmp_3(1,1,1), 5*n_points_final_grid & - , tmp_4(1,1,1), 5*n_points_final_grid & - , 1.d0, fock_3e_uhf_mo_cs(1,1), mo_num) - - deallocate(tmp_3, tmp_4) - deallocate(Jkappa, Okappa) - - ! --- - - !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti - -END_PROVIDER - -! --- - diff --git a/plugins/local/tc_scf/fock_3e_bi_ortho_os.irp.f b/plugins/local/tc_scf/fock_3e_bi_ortho_os.irp.f deleted file mode 100644 index 4bbce7203..000000000 --- a/plugins/local/tc_scf/fock_3e_bi_ortho_os.irp.f +++ /dev/null @@ -1,536 +0,0 @@ - -! --- - - BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a_os, (mo_num, mo_num)] -&BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b_os, (mo_num, mo_num)] - - BEGIN_DOC - ! - ! Open Shell part of the Fock matrix from three-electron terms - ! - ! WARNING :: non hermitian if bi-ortho MOS used - ! - END_DOC - - implicit none - integer :: a, b, i, j, ipoint - double precision :: loc_1, loc_2, loc_3, loc_4 - double precision :: ti, tf - double precision, allocatable :: Okappa(:), Jkappa(:,:), Obarkappa(:), Jbarkappa(:,:) - double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) - double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) - double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) - - PROVIDE mo_l_coef mo_r_coef - - !print *, ' Providing fock_3e_uhf_mo_a_os and fock_3e_uhf_mo_b_os ...' - !call wall_time(ti) - - ! --- - - allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) - allocate(Jbarkappa(n_points_final_grid,3), Obarkappa(n_points_final_grid)) - Jkappa = 0.d0 - Okappa = 0.d0 - Jbarkappa = 0.d0 - Obarkappa = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & - !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa, Obarkappa, Jbarkappa) - - allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) - - tmp_omp_d2 = 0.d0 - tmp_omp_d1 = 0.d0 - !$OMP DO - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) - tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) - Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) - Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) - Okappa(ipoint) += tmp_omp_d1(ipoint) - enddo - !$OMP END CRITICAL - - tmp_omp_d2 = 0.d0 - tmp_omp_d1 = 0.d0 - !$OMP DO - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) - tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - Jbarkappa(ipoint,1) += tmp_omp_d2(ipoint,1) - Jbarkappa(ipoint,2) += tmp_omp_d2(ipoint,2) - Jbarkappa(ipoint,3) += tmp_omp_d2(ipoint,3) - Obarkappa(ipoint) += tmp_omp_d1(ipoint) - enddo - !$OMP END CRITICAL - - deallocate(tmp_omp_d2, tmp_omp_d1) - !$OMP END PARALLEL - - ! --- - - allocate(tmp_1(n_points_final_grid,4)) - - do ipoint = 1, n_points_final_grid - - loc_1 = -2.d0 * Okappa (ipoint) - loc_2 = -2.d0 * Obarkappa(ipoint) - loc_3 = Obarkappa(ipoint) - - tmp_1(ipoint,1) = (loc_1 - loc_3) * Jbarkappa(ipoint,1) + loc_2 * Jkappa(ipoint,1) - tmp_1(ipoint,2) = (loc_1 - loc_3) * Jbarkappa(ipoint,2) + loc_2 * Jkappa(ipoint,2) - tmp_1(ipoint,3) = (loc_1 - loc_3) * Jbarkappa(ipoint,3) + loc_2 * Jkappa(ipoint,3) - - tmp_1(ipoint,4) = Obarkappa(ipoint) - enddo - - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, loc_1, loc_2, tmp_omp_d2) & - !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_1) - - allocate(tmp_omp_d2(n_points_final_grid,3)) - - tmp_omp_d2 = 0.d0 - !$OMP DO COLLAPSE(2) - do i = 1, elec_beta_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - loc_2 = mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,1,j,i) - tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,2,j,i) - tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) - tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) - tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) - enddo - !$OMP END CRITICAL - - tmp_omp_d2 = 0.d0 - !$OMP DO COLLAPSE(2) - do i = elec_beta_num+1, elec_alpha_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - - tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) - tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) - tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) - enddo - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) - tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) - tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) - enddo - !$OMP END CRITICAL - - deallocate(tmp_omp_d2) - !$OMP END PARALLEL - - ! --- - - allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) - tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) - tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b, i) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - - tmp_2(:,4,b,a) = 0.d0 - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,4,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, 1.d0 & - , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & - , tmp_1(1,1), 1 & - , 0.d0, fock_3e_uhf_mo_b_os(1,1), 1) - - deallocate(tmp_1, tmp_2) - - ! --- - - allocate(tmp_3(n_points_final_grid,2,mo_num), tmp_4(n_points_final_grid,2,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, Jkappa, Jbarkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - tmp_3(:,:,b) = 0.d0 - tmp_4(:,:,b) = 0.d0 - do ipoint = 1, n_points_final_grid - - tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) - - loc_1 = -2.0d0 * mos_r_in_r_array_transp(ipoint,b) - - tmp_4(ipoint,1,b) = loc_1 * ( Jbarkappa(ipoint,1) * (Jkappa(ipoint,1) + 0.25d0 * Jbarkappa(ipoint,1)) & - + Jbarkappa(ipoint,2) * (Jkappa(ipoint,2) + 0.25d0 * Jbarkappa(ipoint,2)) & - + Jbarkappa(ipoint,3) * (Jkappa(ipoint,3) + 0.25d0 * Jbarkappa(ipoint,3)) ) - - tmp_4(ipoint,2,b) = mos_r_in_r_array_transp(ipoint,b) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, loc_3, loc_4) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP Jkappa, Jbarkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_2 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,2,b) += loc_1 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & - + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & - + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) - - tmp_4(ipoint,1,b) += loc_2 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - do i = 1, elec_beta_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_2 = mos_r_in_r_array_transp(ipoint,b) - - tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) - enddo - enddo - enddo - - do i = elec_beta_num+1, elec_alpha_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_2 = 0.5d0 * mos_r_in_r_array_transp(ipoint,b) - - tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemm( 'T', 'N', mo_num, mo_num, 2*n_points_final_grid, 1.d0 & - , tmp_3(1,1,1), 2*n_points_final_grid & - , tmp_4(1,1,1), 2*n_points_final_grid & - , 1.d0, fock_3e_uhf_mo_b_os(1,1), mo_num) - - deallocate(tmp_3, tmp_4) - - - - - ! --- - - fock_3e_uhf_mo_a_os = fock_3e_uhf_mo_b_os - - allocate(tmp_1(n_points_final_grid,1)) - - do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) = Obarkappa(ipoint) + 2.d0 * Okappa(ipoint) - enddo - - allocate(tmp_2(n_points_final_grid,1,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b, i) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - - tmp_2(:,1,b,a) = 0.d0 - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,1,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemv( 'T', n_points_final_grid, mo_num*mo_num, 1.d0 & - , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & - , tmp_1(1,1), 1 & - , 1.d0, fock_3e_uhf_mo_a_os(1,1), 1) - - deallocate(tmp_1, tmp_2) - - ! --- - - allocate(tmp_3(n_points_final_grid,8,mo_num), tmp_4(n_points_final_grid,8,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, Jkappa, Jbarkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - tmp_3(:,:,b) = 0.d0 - tmp_4(:,:,b) = 0.d0 - do ipoint = 1, n_points_final_grid - - tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) - - tmp_4(ipoint,8,b) = mos_r_in_r_array_transp(ipoint,b) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, loc_3, loc_4) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP Jkappa, Jbarkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_2 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) - tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) - tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) - - tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - enddo - enddo - - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_3 = 2.d0 * loc_1 - loc_2 = mos_r_in_r_array_transp(ipoint,i) - loc_4 = 2.d0 * loc_2 - - tmp_3(ipoint,5,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) - tmp_3(ipoint,6,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) - tmp_3(ipoint,7,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) - - tmp_3(ipoint,8,b) += loc_3 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & - + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & - + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) - - tmp_4(ipoint,1,b) += loc_4 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) - - tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - - tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - - do i = 1, elec_beta_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) - loc_2 = mos_r_in_r_array_transp(ipoint,b) - loc_3 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) - - tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_3 = mos_r_in_r_array_transp(ipoint,j) - - tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,b,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,b,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) - - tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,j,i) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + int2_grad1_u12_bimo_t(ipoint,2,j,i) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + int2_grad1_u12_bimo_t(ipoint,3,j,i) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) - enddo - enddo - enddo - - do i = elec_beta_num+1, elec_alpha_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) - loc_2 = 0.5d0 * mos_r_in_r_array_transp(ipoint,b) - loc_3 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) - - tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemm( 'T', 'N', mo_num, mo_num, 8*n_points_final_grid, 1.d0 & - , tmp_3(1,1,1), 8*n_points_final_grid & - , tmp_4(1,1,1), 8*n_points_final_grid & - , 1.d0, fock_3e_uhf_mo_a_os(1,1), mo_num) - - deallocate(tmp_3, tmp_4) - deallocate(Jkappa, Okappa) - - !call wall_time(tf) - !print *, ' Wall time for fock_3e_uhf_mo_a_os and fock_3e_uhf_mo_b_os =', tf - ti - -END_PROVIDER - -! --- - diff --git a/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f deleted file mode 100644 index 63a1e1625..000000000 --- a/plugins/local/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ /dev/null @@ -1,77 +0,0 @@ - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] - - BEGIN_DOC - ! - ! Fock matrix alpha from three-electron terms - ! - ! WARNING :: non hermitian if bi-ortho MOS used - ! - END_DOC - - implicit none - double precision :: ti, tf - - PROVIDE mo_l_coef mo_r_coef - - !print *, ' Providing fock_3e_uhf_mo_a ...' - !call wall_time(ti) - - ! CLOSED-SHELL PART - PROVIDE fock_3e_uhf_mo_cs - fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs - - if(elec_alpha_num .ne. elec_beta_num) then - - ! OPEN-SHELL PART - PROVIDE fock_3e_uhf_mo_a_os - - fock_3e_uhf_mo_a += fock_3e_uhf_mo_a_os - endif - - !call wall_time(tf) - !print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] - - BEGIN_DOC - ! - ! Fock matrix beta from three-electron terms - ! - ! WARNING :: non hermitian if bi-ortho MOS used - ! - END_DOC - - implicit none - double precision :: ti, tf - - PROVIDE mo_l_coef mo_r_coef - - !print *, ' Providing and fock_3e_uhf_mo_b ...' - !call wall_time(ti) - - ! CLOSED-SHELL PART - PROVIDE fock_3e_uhf_mo_cs - fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs - - if(elec_alpha_num .ne. elec_beta_num) then - - ! OPEN-SHELL PART - PROVIDE fock_3e_uhf_mo_b_os - - fock_3e_uhf_mo_b += fock_3e_uhf_mo_b_os - endif - - !call wall_time(tf) - !print *, ' Wall time for fock_3e_uhf_mo_b =', tf - ti - -END_PROVIDER - -! --- - diff --git a/plugins/local/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f b/plugins/local/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f deleted file mode 100644 index 3bf6bd852..000000000 --- a/plugins/local/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f +++ /dev/null @@ -1,490 +0,0 @@ - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs_old, (mo_num, mo_num)] - - implicit none - integer :: a, b, i, j - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia - double precision :: ti, tf - double precision, allocatable :: tmp(:,:) - - PROVIDE mo_l_coef mo_r_coef - call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - - !print *, ' PROVIDING fock_3e_uhf_mo_cs_old ...' - !call wall_time(ti) - - fock_3e_uhf_mo_cs_old = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, elec_beta_num, fock_3e_uhf_mo_cs_old) - - allocate(tmp(mo_num,mo_num)) - tmp = 0.d0 - - !$OMP DO - do a = 1, mo_num - do b = 1, mo_num - - do j = 1, elec_beta_num - do i = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 4.d0 * I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - 2.d0 * I_bij_aji & - - 2.d0 * I_bij_iaj & - - 2.d0 * I_bij_jia ) - - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do a = 1, mo_num - do b = 1, mo_num - fock_3e_uhf_mo_cs_old(b,a) += tmp(b,a) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmp) - !$OMP END PARALLEL - - !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_mo_cs_old =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a_old, (mo_num, mo_num)] - - BEGIN_DOC - ! - ! ALPHA part of the Fock matrix from three-electron terms - ! - ! WARNING :: non hermitian if bi-ortho MOS used - ! - END_DOC - - implicit none - integer :: a, b, i, j, o - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia - double precision :: ti, tf - double precision, allocatable :: tmp(:,:) - - PROVIDE mo_l_coef mo_r_coef - PROVIDE fock_3e_uhf_mo_cs - - !print *, ' Providing fock_3e_uhf_mo_a_old ...' - !call wall_time(ti) - - o = elec_beta_num + 1 - call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - - PROVIDE fock_3e_uhf_mo_cs_old - fock_3e_uhf_mo_a_old = fock_3e_uhf_mo_cs_old - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_a_old) - - allocate(tmp(mo_num,mo_num)) - tmp = 0.d0 - - !$OMP DO - do a = 1, mo_num - do b = 1, mo_num - - ! --- - - do j = o, elec_alpha_num - do i = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - I_bij_iaj & - - 2.d0 * I_bij_jia ) - - enddo - enddo - - ! --- - - do j = 1, elec_beta_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - 2.d0 * I_bij_iaj & - - I_bij_jia ) - - enddo - enddo - - ! --- - - do j = o, elec_alpha_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - I_bij_iaj & - - I_bij_jia ) - - enddo - enddo - - ! --- - - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do a = 1, mo_num - do b = 1, mo_num - fock_3e_uhf_mo_a_old(b,a) += tmp(b,a) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmp) - !$OMP END PARALLEL - - !call wall_time(tf) - !print *, ' Wall time for fock_3e_uhf_mo_a_old =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b_old, (mo_num, mo_num)] - - BEGIN_DOC - ! - ! BETA part of the Fock matrix from three-electron terms - ! - ! WARNING :: non hermitian if bi-ortho MOS used - ! - END_DOC - - implicit none - integer :: a, b, i, j, o - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia - double precision :: ti, tf - double precision, allocatable :: tmp(:,:) - - PROVIDE mo_l_coef mo_r_coef - - !print *, ' PROVIDING fock_3e_uhf_mo_b_old ...' - !call wall_time(ti) - - o = elec_beta_num + 1 - call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - - PROVIDE fock_3e_uhf_mo_cs_old - fock_3e_uhf_mo_b_old = fock_3e_uhf_mo_cs_old - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_b_old) - - allocate(tmp(mo_num,mo_num)) - tmp = 0.d0 - - !$OMP DO - do a = 1, mo_num - do b = 1, mo_num - - ! --- - - do j = o, elec_alpha_num - do i = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - - I_bij_aji & - - I_bij_iaj ) - - enddo - enddo - - ! --- - - do j = 1, elec_beta_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - - I_bij_aji & - - I_bij_jia ) - - enddo - enddo - - ! --- - - do j = o, elec_alpha_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( I_bij_aij & - - I_bij_aji ) - - enddo - enddo - - ! --- - - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do a = 1, mo_num - do b = 1, mo_num - fock_3e_uhf_mo_b_old(b,a) += tmp(b,a) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmp) - !$OMP END PARALLEL - - !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_mo_b_old =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)] - - BEGIN_DOC - ! - ! Equations (B6) and (B7) - ! - ! g <--> gamma - ! d <--> delta - ! e <--> eta - ! k <--> kappa - ! - END_DOC - - implicit none - integer :: g, d, e, k, mu, nu - double precision :: dm_ge_a, dm_ge_b, dm_ge - double precision :: dm_dk_a, dm_dk_b, dm_dk - double precision :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu - double precision :: ti, tf - double precision, allocatable :: f_tmp(:,:) - - !print *, ' PROVIDING fock_3e_uhf_ao_a ...' - !call wall_time(ti) - - fock_3e_uhf_ao_a = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp, & - !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & - !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a) - - allocate(f_tmp(ao_num,ao_num)) - f_tmp = 0.d0 - - !$OMP DO - do g = 1, ao_num - do e = 1, ao_num - dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e) - dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e) - dm_ge = dm_ge_a + dm_ge_b - do d = 1, ao_num - do k = 1, ao_num - dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k) - dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k) - dm_dk = dm_dk_a + dm_dk_b - do mu = 1, ao_num - do nu = 1, ao_num - call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek) - call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu) - call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue) - call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke) - call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk) - call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu) - f_tmp(mu,nu) -= 0.5d0 * ( dm_ge * dm_dk * i_mugd_nuek & - + dm_ge_a * dm_dk_a * i_mugd_eknu & - + dm_ge_a * dm_dk_a * i_mugd_knue & - - dm_ge_a * dm_dk * i_mugd_enuk & - - dm_ge * dm_dk_a * i_mugd_kenu & - - dm_ge_a * dm_dk_a * i_mugd_nuke & - - dm_ge_b * dm_dk_b * i_mugd_nuke ) - enddo - enddo - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do mu = 1, ao_num - do nu = 1, ao_num - fock_3e_uhf_ao_a(mu,nu) += f_tmp(mu,nu) - enddo - enddo - !$OMP END CRITICAL - - deallocate(f_tmp) - !$OMP END PARALLEL - - !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_ao_a =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)] - - BEGIN_DOC - ! - ! Equations (B6) and (B7) - ! - ! g <--> gamma - ! d <--> delta - ! e <--> eta - ! k <--> kappa - ! - END_DOC - - implicit none - integer :: g, d, e, k, mu, nu - double precision :: dm_ge_a, dm_ge_b, dm_ge - double precision :: dm_dk_a, dm_dk_b, dm_dk - double precision :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu - double precision :: ti, tf - double precision, allocatable :: f_tmp(:,:) - - !print *, ' PROVIDING fock_3e_uhf_ao_b ...' - !call wall_time(ti) - - fock_3e_uhf_ao_b = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp, & - !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & - !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b) - - allocate(f_tmp(ao_num,ao_num)) - f_tmp = 0.d0 - - !$OMP DO - do g = 1, ao_num - do e = 1, ao_num - dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e) - dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e) - dm_ge = dm_ge_a + dm_ge_b - do d = 1, ao_num - do k = 1, ao_num - dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k) - dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k) - dm_dk = dm_dk_a + dm_dk_b - do mu = 1, ao_num - do nu = 1, ao_num - call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek) - call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu) - call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue) - call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke) - call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk) - call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu) - f_tmp(mu,nu) -= 0.5d0 * ( dm_ge * dm_dk * i_mugd_nuek & - + dm_ge_b * dm_dk_b * i_mugd_eknu & - + dm_ge_b * dm_dk_b * i_mugd_knue & - - dm_ge_b * dm_dk * i_mugd_enuk & - - dm_ge * dm_dk_b * i_mugd_kenu & - - dm_ge_b * dm_dk_b * i_mugd_nuke & - - dm_ge_a * dm_dk_a * i_mugd_nuke ) - enddo - enddo - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do mu = 1, ao_num - do nu = 1, ao_num - fock_3e_uhf_ao_b(mu,nu) += f_tmp(mu,nu) - enddo - enddo - !$OMP END CRITICAL - - deallocate(f_tmp) - !$OMP END PARALLEL - - !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_ao_b =', tf - ti - -END_PROVIDER - -! --- - diff --git a/plugins/local/tc_scf/fock_hermit.irp.f b/plugins/local/tc_scf/fock_hermit.irp.f deleted file mode 100644 index 3460157e2..000000000 --- a/plugins/local/tc_scf/fock_hermit.irp.f +++ /dev/null @@ -1,107 +0,0 @@ - -! --- - -BEGIN_PROVIDER [ double precision, good_hermit_tc_fock_mat, (mo_num, mo_num)] - - BEGIN_DOC -! good_hermit_tc_fock_mat = Hermitian Upper triangular Fock matrix -! -! The converged eigenvectors of such matrix yield to orthonormal vectors satisfying the left Brillouin theorem - END_DOC - implicit none - integer :: i, j - - good_hermit_tc_fock_mat = Fock_matrix_tc_mo_tot - do j = 1, mo_num - do i = 1, j-1 - good_hermit_tc_fock_mat(i,j) = Fock_matrix_tc_mo_tot(j,i) - enddo - enddo - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, hermit_average_tc_fock_mat, (mo_num, mo_num)] - - BEGIN_DOC -! hermit_average_tc_fock_mat = (F + F^\dagger)/2 - END_DOC - implicit none - integer :: i, j - - hermit_average_tc_fock_mat = Fock_matrix_tc_mo_tot - do j = 1, mo_num - do i = 1, mo_num - hermit_average_tc_fock_mat(i,j) = 0.5d0 * (Fock_matrix_tc_mo_tot(j,i) + Fock_matrix_tc_mo_tot(i,j)) - enddo - enddo - -END_PROVIDER - - -! --- -BEGIN_PROVIDER [ double precision, grad_hermit] - implicit none - BEGIN_DOC - ! square of gradient of the energy - END_DOC - if(symmetric_fock_tc)then - grad_hermit = grad_hermit_average_tc_fock_mat - else - grad_hermit = grad_good_hermit_tc_fock_mat - endif - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, grad_good_hermit_tc_fock_mat] - implicit none - BEGIN_DOC - ! grad_good_hermit_tc_fock_mat = norm of gradients of the upper triangular TC fock - END_DOC - integer :: i, j - grad_good_hermit_tc_fock_mat = 0.d0 - do i = 1, elec_alpha_num - do j = elec_alpha_num+1, mo_num - grad_good_hermit_tc_fock_mat += dabs(good_hermit_tc_fock_mat(i,j)) - enddo - enddo -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, grad_hermit_average_tc_fock_mat] - implicit none - BEGIN_DOC - ! grad_hermit_average_tc_fock_mat = norm of gradients of the upper triangular TC fock - END_DOC - integer :: i, j - grad_hermit_average_tc_fock_mat = 0.d0 - do i = 1, elec_alpha_num - do j = elec_alpha_num+1, mo_num - grad_hermit_average_tc_fock_mat += dabs(hermit_average_tc_fock_mat(i,j)) - enddo - enddo -END_PROVIDER - - -! --- - -subroutine save_good_hermit_tc_eigvectors() - - implicit none - integer :: sign - character*(64) :: label - logical :: output - - sign = 1 - label = "Canonical" - output = .False. - - if(symmetric_fock_tc)then - call mo_as_eigvectors_of_mo_matrix(hermit_average_tc_fock_mat, mo_num, mo_num, label, sign, output) - else - call mo_as_eigvectors_of_mo_matrix(good_hermit_tc_fock_mat, mo_num, mo_num, label, sign, output) - endif -end subroutine save_good_hermit_tc_eigvectors - -! --- - diff --git a/plugins/local/tc_scf/fock_tc.irp.f b/plugins/local/tc_scf/fock_tc.irp.f index 282f9873b..16bb5c878 100644 --- a/plugins/local/tc_scf/fock_tc.irp.f +++ b/plugins/local/tc_scf/fock_tc.irp.f @@ -1,78 +1,15 @@ -! --- - - BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_seq_alpha, (ao_num, ao_num)] -&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_seq_beta , (ao_num, ao_num)] - - BEGIN_DOC - ! - ! two_e_tc_non_hermit_integral_seq_alpha(k,i) = ON THE AO BASIS - ! - ! where F^tc is the TWO-BODY part of the TC Fock matrix and k,i are AO basis functions - ! - ! works in SEQUENTIAL - END_DOC - - implicit none - integer :: i, j, k, l - double precision :: density, density_a, density_b - double precision :: t0, t1 - - PROVIDE ao_two_e_tc_tot - - !print*, ' providing two_e_tc_non_hermit_integral_seq ...' - !call wall_time(t0) - - two_e_tc_non_hermit_integral_seq_alpha = 0.d0 - two_e_tc_non_hermit_integral_seq_beta = 0.d0 - - do i = 1, ao_num - do k = 1, ao_num - do j = 1, ao_num - do l = 1, ao_num - - density_a = TCSCF_density_matrix_ao_alpha(l,j) - density_b = TCSCF_density_matrix_ao_beta (l,j) - density = density_a + density_b - - !! rho(l,j) * < k l| T | i j> - !two_e_tc_non_hermit_integral_seq_alpha(k,i) += density * ao_two_e_tc_tot(l,j,k,i) - !! rho(l,j) * < k l| T | i j> - !two_e_tc_non_hermit_integral_seq_beta (k,i) += density * ao_two_e_tc_tot(l,j,k,i) - !! rho_a(l,j) * < l k| T | i j> - !two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i) - !! rho_b(l,j) * < l k| T | i j> - !two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i) - - ! rho(l,j) * < k l| T | i j> - two_e_tc_non_hermit_integral_seq_alpha(k,i) += density * ao_two_e_tc_tot(k,i,l,j) - ! rho(l,j) * < k l| T | i j> - two_e_tc_non_hermit_integral_seq_beta (k,i) += density * ao_two_e_tc_tot(k,i,l,j) - ! rho_a(l,j) * < k l| T | j i> - two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i) - ! rho_b(l,j) * < k l| T | j i> - two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i) - - enddo - enddo - enddo - enddo - - !call wall_time(t1) - !print*, ' wall time for two_e_tc_non_hermit_integral_seq after = ', t1 - t0 - -END_PROVIDER ! --- - BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_alpha, (ao_num, ao_num)] -&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_beta , (ao_num, ao_num)] + BEGIN_PROVIDER [ double precision, two_e_tc_integral_alpha, (ao_num, ao_num)] +&BEGIN_PROVIDER [ double precision, two_e_tc_integral_beta , (ao_num, ao_num)] BEGIN_DOC ! - ! two_e_tc_non_hermit_integral_alpha(k,i) = ON THE AO BASIS + ! two_e_tc_integral_alpha(k,i) = ON THE AO BASIS ! - ! where F^tc is the TWO-BODY part of the TC Fock matrix and k,i are AO basis functions + ! where F^tc_2e is the TWO-BODY part of the TC Fock matrix and k,i are AO basis functions ! END_DOC @@ -86,16 +23,13 @@ PROVIDE mo_l_coef mo_r_coef PROVIDE TCSCF_density_matrix_ao_alpha TCSCF_density_matrix_ao_beta - !print*, ' Providing two_e_tc_non_hermit_integral ...' - !call wall_time(t0) - - two_e_tc_non_hermit_integral_alpha = 0.d0 - two_e_tc_non_hermit_integral_beta = 0.d0 + two_e_tc_integral_alpha = 0.d0 + two_e_tc_integral_beta = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (i, j, k, l, density_a, density_b, density, tmp_a, tmp_b, I_coul, I_kjli) & !$OMP SHARED (ao_num, TCSCF_density_matrix_ao_alpha, TCSCF_density_matrix_ao_beta, ao_two_e_tc_tot, & - !$OMP two_e_tc_non_hermit_integral_alpha, two_e_tc_non_hermit_integral_beta) + !$OMP two_e_tc_integral_alpha, two_e_tc_integral_beta) allocate(tmp_a(ao_num,ao_num), tmp_b(ao_num,ao_num)) tmp_a = 0.d0 @@ -124,8 +58,8 @@ !$OMP CRITICAL do i = 1, ao_num do j = 1, ao_num - two_e_tc_non_hermit_integral_alpha(j,i) += tmp_a(j,i) - two_e_tc_non_hermit_integral_beta (j,i) += tmp_b(j,i) + two_e_tc_integral_alpha(j,i) += tmp_a(j,i) + two_e_tc_integral_beta (j,i) += tmp_b(j,i) enddo enddo !$OMP END CRITICAL @@ -133,9 +67,6 @@ deallocate(tmp_a, tmp_b) !$OMP END PARALLEL - !call wall_time(t1) - !print*, ' Wall time for two_e_tc_non_hermit_integral = ', t1 - t0 - END_PROVIDER ! --- @@ -149,13 +80,7 @@ implicit none double precision :: t0, t1 - !print*, ' Providing Fock_matrix_tc_ao_alpha ...' - !call wall_time(t0) - - Fock_matrix_tc_ao_alpha = ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_alpha - - !call wall_time(t1) - !print*, ' Wall time for Fock_matrix_tc_ao_alpha =', t1-t0 + Fock_matrix_tc_ao_alpha = ao_one_e_integrals_tc_tot + two_e_tc_integral_alpha END_PROVIDER @@ -169,13 +94,13 @@ implicit none - Fock_matrix_tc_ao_beta = ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_beta + Fock_matrix_tc_ao_beta = ao_one_e_integrals_tc_tot + two_e_tc_integral_beta END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ] +BEGIN_PROVIDER [double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num)] BEGIN_DOC ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the MO basis @@ -185,31 +110,16 @@ double precision :: t0, t1, tt0, tt1 double precision, allocatable :: tmp(:,:) - !print*, ' Providing Fock_matrix_tc_mo_alpha ...' - !call wall_time(t0) - - if(bi_ortho) then - - PROVIDE mo_l_coef mo_r_coef - - call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & - , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) - - if(three_body_h_tc) then - PROVIDE fock_3e_uhf_mo_a - Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a - endif - - else + PROVIDE mo_l_coef mo_r_coef - call ao_to_mo( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & - , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) + call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & + , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) + if(three_body_h_tc) then + PROVIDE fock_3e_mo_a + Fock_matrix_tc_mo_alpha += fock_3e_mo_a endif - !call wall_time(t1) - !print*, ' Wall time for Fock_matrix_tc_mo_alpha =', t1-t0 - END_PROVIDER ! --- @@ -223,21 +133,12 @@ implicit none double precision, allocatable :: tmp(:,:) - if(bi_ortho) then - - call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & - , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) - - if(three_body_h_tc) then - PROVIDE fock_3e_uhf_mo_b - Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b - endif - - else - - call ao_to_mo( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & - , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) + call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & + , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) + if(three_body_h_tc) then + PROVIDE fock_3e_mo_b + Fock_matrix_tc_mo_beta += fock_3e_mo_b endif END_PROVIDER @@ -286,20 +187,895 @@ implicit none double precision :: t0, t1 - !print*, ' Providing Fock_matrix_tc_ao_tot ...' - !call wall_time(t0) - PROVIDE mo_l_coef mo_r_coef PROVIDE Fock_matrix_tc_mo_tot call mo_to_ao_bi_ortho( Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) & , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) ) - !call wall_time(t1) - !print*, ' Wall time for Fock_matrix_tc_ao_tot =', t1-t0 - END_PROVIDER ! --- + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_mo_a, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! Fock matrix alpha from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + END_DOC + + implicit none + double precision :: ti, tf + + PROVIDE mo_l_coef mo_r_coef + + ! CLOSED-SHELL PART + PROVIDE fock_3e_mo_cs + fock_3e_mo_a = fock_3e_mo_cs + + if(elec_alpha_num .ne. elec_beta_num) then + + ! OPEN-SHELL PART + PROVIDE fock_3e_mo_a_os + + fock_3e_mo_a += fock_3e_mo_a_os + endif + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_mo_b, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! Fock matrix beta from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + END_DOC + + implicit none + double precision :: ti, tf + + PROVIDE mo_l_coef mo_r_coef + + ! CLOSED-SHELL PART + PROVIDE fock_3e_mo_cs + fock_3e_mo_b = fock_3e_mo_cs + + if(elec_alpha_num .ne. elec_beta_num) then + + ! OPEN-SHELL PART + PROVIDE fock_3e_mo_b_os + + fock_3e_mo_b += fock_3e_mo_b_os + endif + +END_PROVIDER + +! --- + + +! --- + + BEGIN_PROVIDER [double precision, fock_3e_mo_a_os, (mo_num, mo_num)] +&BEGIN_PROVIDER [double precision, fock_3e_mo_b_os, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! Open Shell part of the Fock matrix from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + END_DOC + + implicit none + integer :: a, b, i, j, ipoint + double precision :: loc_1, loc_2, loc_3, loc_4 + double precision :: ti, tf + double precision, allocatable :: Okappa(:), Jkappa(:,:), Obarkappa(:), Jbarkappa(:,:) + double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) + double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) + double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) + + PROVIDE mo_l_coef mo_r_coef + + ! --- + + allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) + allocate(Jbarkappa(n_points_final_grid,3), Obarkappa(n_points_final_grid)) + Jkappa = 0.d0 + Okappa = 0.d0 + Jbarkappa = 0.d0 + Obarkappa = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa, Obarkappa, Jbarkappa) + + allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) + + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Okappa(ipoint) += tmp_omp_d1(ipoint) + enddo + !$OMP END CRITICAL + + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 + !$OMP DO + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + Jbarkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jbarkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jbarkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Obarkappa(ipoint) += tmp_omp_d1(ipoint) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2, tmp_omp_d1) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_1(n_points_final_grid,4)) + + do ipoint = 1, n_points_final_grid + + loc_1 = -2.d0 * Okappa (ipoint) + loc_2 = -2.d0 * Obarkappa(ipoint) + loc_3 = Obarkappa(ipoint) + + tmp_1(ipoint,1) = (loc_1 - loc_3) * Jbarkappa(ipoint,1) + loc_2 * Jkappa(ipoint,1) + tmp_1(ipoint,2) = (loc_1 - loc_3) * Jbarkappa(ipoint,2) + loc_2 * Jkappa(ipoint,2) + tmp_1(ipoint,3) = (loc_1 - loc_3) * Jbarkappa(ipoint,3) + loc_2 * Jkappa(ipoint,3) + + tmp_1(ipoint,4) = Obarkappa(ipoint) + enddo + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, loc_1, loc_2, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_1) + + allocate(tmp_omp_d2(n_points_final_grid,3)) + + tmp_omp_d2 = 0.d0 + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + loc_2 = mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,1,j,i) + tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,2,j,i) + tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + enddo + !$OMP END CRITICAL + + tmp_omp_d2 = 0.d0 + !$OMP DO COLLAPSE(2) + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + + tmp_2(:,4,b,a) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,4,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, 1.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_mo_b_os(1,1), 1) + + deallocate(tmp_1, tmp_2) + + ! --- + + allocate(tmp_3(n_points_final_grid,2,mo_num), tmp_4(n_points_final_grid,2,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + tmp_3(:,:,b) = 0.d0 + tmp_4(:,:,b) = 0.d0 + do ipoint = 1, n_points_final_grid + + tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + + loc_1 = -2.0d0 * mos_r_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) = loc_1 * ( Jbarkappa(ipoint,1) * (Jkappa(ipoint,1) + 0.25d0 * Jbarkappa(ipoint,1)) & + + Jbarkappa(ipoint,2) * (Jkappa(ipoint,2) + 0.25d0 * Jbarkappa(ipoint,2)) & + + Jbarkappa(ipoint,3) * (Jkappa(ipoint,3) + 0.25d0 * Jbarkappa(ipoint,3)) ) + + tmp_4(ipoint,2,b) = mos_r_in_r_array_transp(ipoint,b) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, loc_3, loc_4) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,2,b) += loc_1 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,1,b) += loc_2 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_2 = mos_r_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_2 = 0.5d0 * mos_r_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 2*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 2*n_points_final_grid & + , tmp_4(1,1,1), 2*n_points_final_grid & + , 1.d0, fock_3e_mo_b_os(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + + ! --- + + fock_3e_mo_a_os = fock_3e_mo_b_os + + allocate(tmp_1(n_points_final_grid,1)) + + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) = Obarkappa(ipoint) + 2.d0 * Okappa(ipoint) + enddo + + allocate(tmp_2(n_points_final_grid,1,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + + tmp_2(:,1,b,a) = 0.d0 + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemv( 'T', n_points_final_grid, mo_num*mo_num, 1.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 1.d0, fock_3e_mo_a_os(1,1), 1) + + deallocate(tmp_1, tmp_2) + + ! --- + + allocate(tmp_3(n_points_final_grid,8,mo_num), tmp_4(n_points_final_grid,8,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + tmp_3(:,:,b) = 0.d0 + tmp_4(:,:,b) = 0.d0 + do ipoint = 1, n_points_final_grid + + tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,8,b) = mos_r_in_r_array_transp(ipoint,b) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, loc_3, loc_4) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + + tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_3 = 2.d0 * loc_1 + loc_2 = mos_r_in_r_array_transp(ipoint,i) + loc_4 = 2.d0 * loc_2 + + tmp_3(ipoint,5,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,6,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,7,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + + tmp_3(ipoint,8,b) += loc_3 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,1,b) += loc_4 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + + tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + + tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + + do i = 1, elec_beta_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_3 = mos_r_in_r_array_transp(ipoint,j) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,b,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,b,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,j,i) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + int2_grad1_u12_bimo_t(ipoint,2,j,i) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + int2_grad1_u12_bimo_t(ipoint,3,j,i) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = 0.5d0 * mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 8*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 8*n_points_final_grid & + , tmp_4(1,1,1), 8*n_points_final_grid & + , 1.d0, fock_3e_mo_a_os(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + deallocate(Jkappa, Okappa) + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_mo_cs, (mo_num, mo_num)] + + implicit none + integer :: a, b, i, j, ipoint + double precision :: ti, tf + double precision :: loc_1, loc_2, loc_3 + double precision, allocatable :: Okappa(:), Jkappa(:,:) + double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) + double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:), tmp_22(:,:,:) + double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) + + PROVIDE mo_l_coef mo_r_coef + + ! --- + + allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) + Jkappa = 0.d0 + Okappa = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa) + + allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Okappa(ipoint) += tmp_omp_d1(ipoint) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2, tmp_omp_d1) + + !$OMP END PARALLEL + + ! --- + + allocate(tmp_1(n_points_final_grid,4)) + + do ipoint = 1, n_points_final_grid + loc_1 = 2.d0 * Okappa(ipoint) + tmp_1(ipoint,1) = loc_1 * Jkappa(ipoint,1) + tmp_1(ipoint,2) = loc_1 * Jkappa(ipoint,2) + tmp_1(ipoint,3) = loc_1 * Jkappa(ipoint,3) + tmp_1(ipoint,4) = Okappa(ipoint) + enddo + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, loc_1, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_1) + + allocate(tmp_omp_d2(n_points_final_grid,3)) + tmp_omp_d2 = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmp_omp_d2(ipoint,1) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + tmp_omp_d2(ipoint,2) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + tmp_omp_d2(ipoint,3) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2) + !$OMP END PARALLEL + + ! --- + + if(tc_save_mem) then + + allocate(tmp_22(n_points_final_grid,4,mo_num)) + do a = 1, mo_num + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, a, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_22) + !$OMP DO + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_22(ipoint,1,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_22(ipoint,2,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_22(ipoint,3,b) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + enddo + tmp_22(:,4,b) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_22(ipoint,4,b) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemv( 'T', 4*n_points_final_grid, mo_num, -2.d0 & + , tmp_22(1,1,1), size(tmp_22, 1) * size(tmp_22, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_mo_cs(1,a), 1) + enddo + deallocate(tmp_22) + + else + + allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + enddo + tmp_2(:,4,b,a) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,4,b,a) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, -2.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_mo_cs(1,1), 1) + deallocate(tmp_2) + + endif + + deallocate(tmp_1) + + ! --- + + allocate(tmp_3(n_points_final_grid,5,mo_num), tmp_4(n_points_final_grid,5,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, Jkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + tmp_3(:,:,b) = 0.d0 + tmp_4(:,:,b) = 0.d0 + do ipoint = 1, n_points_final_grid + tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) = -2.d0 * mos_r_in_r_array_transp(ipoint,b) * ( Jkappa(ipoint,1) * Jkappa(ipoint,1) & + + Jkappa(ipoint,2) * Jkappa(ipoint,2) & + + Jkappa(ipoint,3) * Jkappa(ipoint,3) ) + tmp_4(ipoint,5,b) = mos_r_in_r_array_transp(ipoint,b) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP Jkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + tmp_3(ipoint,5,b) += 2.d0 * loc_1 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + tmp_4(ipoint,1,b) += 2.d0 * loc_2 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,5,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp_4(ipoint,1,b) += ( loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) & + - loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 5*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 5*n_points_final_grid & + , tmp_4(1,1,1), 5*n_points_final_grid & + , 1.d0, fock_3e_mo_cs(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + deallocate(Jkappa, Okappa) + + ! --- + +END_PROVIDER + +! --- + diff --git a/plugins/local/tc_scf/fock_tc_mo_tot.irp.f b/plugins/local/tc_scf/fock_tc_mo_tot.irp.f index eb8973fff..fd490af6c 100644 --- a/plugins/local/tc_scf/fock_tc_mo_tot.irp.f +++ b/plugins/local/tc_scf/fock_tc_mo_tot.irp.f @@ -1,4 +1,6 @@ +! --- + BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_tot, (mo_num,mo_num) ] &BEGIN_PROVIDER [ double precision, Fock_matrix_tc_diag_mo_tot, (mo_num)] @@ -23,9 +25,6 @@ integer :: i, j, n double precision :: t0, t1 - !print*, ' Providing Fock_matrix_tc_mo_tot ...' - !call wall_time(t0) - if(elec_alpha_num == elec_beta_num) then PROVIDE Fock_matrix_tc_mo_alpha @@ -133,7 +132,7 @@ enddo endif - if(no_oa_or_av_opt)then + if(no_oa_or_av_opt) then do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_inact_orb @@ -154,12 +153,25 @@ enddo endif - if(.not.bi_ortho .and. three_body_h_tc)then - Fock_matrix_tc_mo_tot += fock_3_mat - endif + if(tc_Brillouin_Right) then - !call wall_time(t1) - !print*, ' Wall time for Fock_matrix_tc_mo_tot =', t1-t0 + double precision, allocatable :: tmp(:,:) + allocate(tmp(mo_num,mo_num)) + + tmp = Fock_matrix_tc_mo_tot + do j = 1, mo_num + do i = 1, j-1 + tmp(i,j) = Fock_matrix_tc_mo_tot(j,i) + enddo + enddo + + Fock_matrix_tc_mo_tot = tmp + deallocate(tmp) + + endif END_PROVIDER +! --- + + diff --git a/plugins/local/tc_scf/fock_three_hermit.irp.f b/plugins/local/tc_scf/fock_three_hermit.irp.f deleted file mode 100644 index 00d47faee..000000000 --- a/plugins/local/tc_scf/fock_three_hermit.irp.f +++ /dev/null @@ -1,771 +0,0 @@ - -! --- - -BEGIN_PROVIDER [ double precision, fock_3_mat, (mo_num, mo_num)] - - implicit none - integer :: i,j - double precision :: contrib - - fock_3_mat = 0.d0 - if(.not.bi_ortho .and. three_body_h_tc) then - - call give_fock_ia_three_e_total(1, 1, contrib) - !! !$OMP PARALLEL & - !! !$OMP DEFAULT (NONE) & - !! !$OMP PRIVATE (i,j,m,integral) & - !! !$OMP SHARED (mo_num,three_body_3_index) - !! !$OMP DO SCHEDULE (guided) COLLAPSE(3) - do i = 1, mo_num - do j = 1, mo_num - call give_fock_ia_three_e_total(j,i,contrib) - fock_3_mat(j,i) = -contrib - enddo - enddo - !else if(bi_ortho.and.three_body_h_tc) then - !! !$OMP END DO - !! !$OMP END PARALLEL - !! do i = 1, mo_num - !! do j = 1, i-1 - !! mat_three(j,i) = mat_three(i,j) - !! enddo - !! enddo - endif - -END_PROVIDER - - -subroutine give_fock_ia_three_e_total(i,a,contrib) - implicit none - BEGIN_DOC -! contrib is the TOTAL (same spins / opposite spins) contribution from the three body term to the Fock operator -! - END_DOC - integer, intent(in) :: i,a - double precision, intent(out) :: contrib - double precision :: int_1, int_2, int_3 - double precision :: mos_i, mos_a, w_ia - double precision :: mos_ia, weight - - integer :: mm, ipoint,k,l - - int_1 = 0.d0 - int_2 = 0.d0 - int_3 = 0.d0 - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - mos_i = mos_in_r_array_transp(ipoint,i) - mos_a = mos_in_r_array_transp(ipoint,a) - mos_ia = mos_a * mos_i - w_ia = x_W_ij_erf_rk(ipoint,mm,i,a) - - int_1 += weight * fock_3_w_kk_sum(ipoint,mm) * (4.d0 * fock_3_rho_beta(ipoint) * w_ia & - + 2.0d0 * mos_ia * fock_3_w_kk_sum(ipoint,mm) & - - 2.0d0 * fock_3_w_ki_mos_k(ipoint,mm,i) * mos_a & - - 2.0d0 * fock_3_w_ki_mos_k(ipoint,mm,a) * mos_i ) - int_2 += weight * (-1.d0) * ( 2.0d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * w_ia & - + 2.0d0 * fock_3_rho_beta(ipoint) * fock_3_w_ki_wk_a(ipoint,mm,i,a) & - + 1.0d0 * mos_ia * fock_3_trace_w_tilde(ipoint,mm) ) - - int_3 += weight * 1.d0 * (fock_3_w_kl_wla_phi_k(ipoint,mm,i) * mos_a + fock_3_w_kl_wla_phi_k(ipoint,mm,a) * mos_i & - +fock_3_w_ki_mos_k(ipoint,mm,i) * fock_3_w_ki_mos_k(ipoint,mm,a) ) - enddo - enddo - contrib = int_1 + int_2 + int_3 - -end - -! --- - -BEGIN_PROVIDER [double precision, diag_three_elem_hf] - - implicit none - integer :: i, j, k, ipoint, mm - double precision :: contrib, weight, four_third, one_third, two_third, exchange_int_231 - double precision :: integral_aaa, hthree, integral_aab, integral_abb, integral_bbb - double precision, allocatable :: tmp(:) - double precision, allocatable :: tmp_L(:,:), tmp_R(:,:) - double precision, allocatable :: tmp_M(:,:), tmp_S(:), tmp_O(:), tmp_J(:,:) - double precision, allocatable :: tmp_M_priv(:,:), tmp_S_priv(:), tmp_O_priv(:), tmp_J_priv(:,:) - - PROVIDE mo_l_coef mo_r_coef - - !print *, ' providing diag_three_elem_hf' - - if(.not. three_body_h_tc) then - - if(noL_standard) then - PROVIDE noL_0e - diag_three_elem_hf = noL_0e - else - diag_three_elem_hf = 0.d0 - endif - - else - - if(.not. bi_ortho) then - - ! --- - - one_third = 1.d0/3.d0 - two_third = 2.d0/3.d0 - four_third = 4.d0/3.d0 - diag_three_elem_hf = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do k = 1, elec_beta_num - call give_integrals_3_body(k, j, i, j, i, k, exchange_int_231) - diag_three_elem_hf += two_third * exchange_int_231 - enddo - enddo - enddo - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - contrib = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm) & - - 2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm) & - - 1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm) - contrib *= four_third - contrib += -two_third * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm) & - -four_third * fock_3_w_kk_sum(ipoint,mm) * fock_3_w_kl_mo_k_mo_l(ipoint,mm) - diag_three_elem_hf += weight * contrib - enddo - enddo - - diag_three_elem_hf = - diag_three_elem_hf - - ! --- - - else - - ! ------------ - ! SLOW VERSION - ! ------------ - - !call give_aaa_contrib(integral_aaa) - !call give_aab_contrib(integral_aab) - !call give_abb_contrib(integral_abb) - !call give_bbb_contrib(integral_bbb) - !diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb - - ! ------------ - ! ------------ - - PROVIDE int2_grad1_u12_bimo_t - PROVIDE mos_l_in_r_array_transp - PROVIDE mos_r_in_r_array_transp - - if(elec_alpha_num .eq. elec_beta_num) then - - allocate(tmp(elec_beta_num)) - allocate(tmp_L(n_points_final_grid,3), tmp_R(n_points_final_grid,3)) - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & - !$OMP SHARED(elec_beta_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) - - !$OMP DO - do j = 1, elec_beta_num - - tmp_L = 0.d0 - tmp_R = 0.d0 - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) - - tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - - tmp(j) = 0.d0 - do ipoint = 1, n_points_final_grid - tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) - enddo - enddo ! j - !$OMP END DO - !$OMP END PARALLEL - - diag_three_elem_hf = -2.d0 * sum(tmp) - - deallocate(tmp) - deallocate(tmp_L, tmp_R) - - ! --- - - allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) - tmp_O = 0.d0 - tmp_J = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) & - !$OMP SHARED(elec_beta_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_O, tmp_J) - - allocate(tmp_O_priv(n_points_final_grid), tmp_J_priv(n_points_final_grid,3)) - tmp_O_priv = 0.d0 - tmp_J_priv = 0.d0 - - !$OMP DO - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,i) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - tmp_O = tmp_O + tmp_O_priv - tmp_J = tmp_J + tmp_J_priv - !$OMP END CRITICAL - - deallocate(tmp_O_priv, tmp_J_priv) - !$OMP END PARALLEL - - allocate(tmp_M(n_points_final_grid,3), tmp_S(n_points_final_grid)) - tmp_M = 0.d0 - tmp_S = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) & - !$OMP SHARED(elec_beta_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_M, tmp_S) - - allocate(tmp_M_priv(n_points_final_grid,3), tmp_S_priv(n_points_final_grid)) - tmp_M_priv = 0.d0 - tmp_S_priv = 0.d0 - - !$OMP DO COLLAPSE(2) - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - tmp_M = tmp_M + tmp_M_priv - tmp_S = tmp_S + tmp_S_priv - !$OMP END CRITICAL - - deallocate(tmp_M_priv, tmp_S_priv) - !$OMP END PARALLEL - - allocate(tmp(n_points_final_grid)) - - do ipoint = 1, n_points_final_grid - - tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1)*tmp_J(ipoint,1) + tmp_J(ipoint,2)*tmp_J(ipoint,2) + tmp_J(ipoint,3)*tmp_J(ipoint,3)) - tmp_S(ipoint) - - tmp(ipoint) = final_weight_at_r_vector(ipoint) * ( tmp_O(ipoint) * tmp_S(ipoint) & - - 2.d0 * ( tmp_J(ipoint,1) * tmp_M(ipoint,1) & - + tmp_J(ipoint,2) * tmp_M(ipoint,2) & - + tmp_J(ipoint,3) * tmp_M(ipoint,3))) - enddo - - diag_three_elem_hf = diag_three_elem_hf -2.d0 * (sum(tmp)) - - deallocate(tmp) - - else - - allocate(tmp(elec_alpha_num)) - allocate(tmp_L(n_points_final_grid,3), tmp_R(n_points_final_grid,3)) - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & - !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) - - !$OMP DO - do j = 1, elec_beta_num - - tmp_L = 0.d0 - tmp_R = 0.d0 - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - tmp_L(ipoint,1) = tmp_L(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,2) = tmp_L(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,3) = tmp_L(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) - - tmp_R(ipoint,1) = tmp_R(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,2) = tmp_R(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,3) = tmp_R(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - - tmp(j) = 0.d0 - do ipoint = 1, n_points_final_grid - tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) - enddo - - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) - - tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - - do ipoint = 1, n_points_final_grid - tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) - enddo - enddo ! j - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & - !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) - - !$OMP DO - do j = elec_beta_num+1, elec_alpha_num - - tmp_L = 0.d0 - tmp_R = 0.d0 - do i = 1, elec_alpha_num - do ipoint = 1, n_points_final_grid - tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) - tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) - - tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - - tmp(j) = 0.d0 - do ipoint = 1, n_points_final_grid - tmp(j) = tmp(j) + 0.5d0 * final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) - enddo - enddo ! j - !$OMP END DO - !$OMP END PARALLEL - - diag_three_elem_hf = -2.d0 * sum(tmp) - - deallocate(tmp) - deallocate(tmp_L, tmp_R) - - ! --- - - allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) - tmp_O = 0.d0 - tmp_J = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) & - !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_O, tmp_J) - - allocate(tmp_O_priv(n_points_final_grid), tmp_J_priv(n_points_final_grid,3)) - tmp_O_priv = 0.d0 - tmp_J_priv = 0.d0 - - !$OMP DO - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,i) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP DO - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + 0.5d0 * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,i) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - tmp_O = tmp_O + tmp_O_priv - tmp_J = tmp_J + tmp_J_priv - !$OMP END CRITICAL - - deallocate(tmp_O_priv, tmp_J_priv) - !$OMP END PARALLEL - - ! --- - - allocate(tmp_M(n_points_final_grid,3), tmp_S(n_points_final_grid)) - tmp_M = 0.d0 - tmp_S = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) & - !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_M, tmp_S) - - allocate(tmp_M_priv(n_points_final_grid,3), tmp_S_priv(n_points_final_grid)) - tmp_M_priv = 0.d0 - tmp_S_priv = 0.d0 - - !$OMP DO COLLAPSE(2) - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP DO COLLAPSE(2) - do i = elec_beta_num+1, elec_alpha_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - - tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP DO COLLAPSE(2) - do i = elec_beta_num+1, elec_alpha_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - tmp_M = tmp_M + tmp_M_priv - tmp_S = tmp_S + tmp_S_priv - !$OMP END CRITICAL - - deallocate(tmp_M_priv, tmp_S_priv) - !$OMP END PARALLEL - - allocate(tmp(n_points_final_grid)) - - do ipoint = 1, n_points_final_grid - - tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1)*tmp_J(ipoint,1) + tmp_J(ipoint,2)*tmp_J(ipoint,2) + tmp_J(ipoint,3)*tmp_J(ipoint,3)) - tmp_S(ipoint) - - tmp(ipoint) = final_weight_at_r_vector(ipoint) * ( tmp_O(ipoint) * tmp_S(ipoint) & - - 2.d0 * ( tmp_J(ipoint,1) * tmp_M(ipoint,1) & - + tmp_J(ipoint,2) * tmp_M(ipoint,2) & - + tmp_J(ipoint,3) * tmp_M(ipoint,3))) - enddo - - diag_three_elem_hf = diag_three_elem_hf - 2.d0 * (sum(tmp)) - - deallocate(tmp) - - endif - - - endif - - endif - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, fock_3_mat_a_op_sh, (mo_num, mo_num)] - implicit none - integer :: h,p,i,j - double precision :: direct_int, exch_int, exchange_int_231, exchange_int_312 - double precision :: exchange_int_23, exchange_int_12, exchange_int_13 - - fock_3_mat_a_op_sh = 0.d0 - do h = 1, mo_num - do p = 1, mo_num - !F_a^{ab}(h,p) - do i = 1, elec_beta_num ! beta - do j = elec_beta_num+1, elec_alpha_num ! alpha - call give_integrals_3_body(h,j,i,p,j,i,direct_int) ! - call give_integrals_3_body(h,j,i,j,p,i,exch_int) - fock_3_mat_a_op_sh(h,p) -= direct_int - exch_int - enddo - enddo - !F_a^{aa}(h,p) - do i = 1, elec_beta_num ! alpha - do j = elec_beta_num+1, elec_alpha_num ! alpha - call give_integrals_3_body(h,j,i,p,j,i,direct_int) - call give_integrals_3_body(h,j,i,i,p,j,exchange_int_231) - call give_integrals_3_body(h,j,i,j,i,p,exchange_int_312) - call give_integrals_3_body(h,j,i,p,i,j,exchange_int_23) - call give_integrals_3_body(h,j,i,i,j,p,exchange_int_12) - call give_integrals_3_body(h,j,i,j,p,i,exchange_int_13) - fock_3_mat_a_op_sh(h,p) -= ( direct_int + exchange_int_231 + exchange_int_312 & - - exchange_int_23 & ! i <-> j - - exchange_int_12 & ! p <-> j - - exchange_int_13 )! p <-> i - enddo - enddo - enddo - enddo -! symmetrized -! do p = 1, elec_beta_num -! do h = elec_alpha_num +1, mo_num -! fock_3_mat_a_op_sh(h,p) = fock_3_mat_a_op_sh(p,h) -! enddo -! enddo - -! do h = elec_beta_num+1, elec_alpha_num -! do p = elec_alpha_num +1, mo_num -! !F_a^{bb}(h,p) -! do i = 1, elec_beta_num -! do j = i+1, elec_beta_num -! call give_integrals_3_body(h,j,i,p,j,i,direct_int) -! call give_integrals_3_body(h,j,i,p,i,j,exch_int) -! fock_3_mat_a_op_sh(h,p) -= direct_int - exch_int -! enddo -! enddo -! enddo -! enddo - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_mat_b_op_sh, (mo_num, mo_num)] - implicit none - integer :: h,p,i,j - double precision :: direct_int, exch_int - fock_3_mat_b_op_sh = 0.d0 - do h = 1, elec_beta_num - do p = elec_alpha_num +1, mo_num - !F_b^{aa}(h,p) - do i = 1, elec_beta_num - do j = elec_beta_num+1, elec_alpha_num - call give_integrals_3_body(h,j,i,p,j,i,direct_int) - call give_integrals_3_body(h,j,i,p,i,j,exch_int) - fock_3_mat_b_op_sh(h,p) += direct_int - exch_int - enddo - enddo - - !F_b^{ab}(h,p) - do i = elec_beta_num+1, elec_beta_num - do j = 1, elec_beta_num - call give_integrals_3_body(h,j,i,p,j,i,direct_int) - call give_integrals_3_body(h,j,i,j,p,i,exch_int) - fock_3_mat_b_op_sh(h,p) += direct_int - exch_int - enddo - enddo - - enddo - enddo - -END_PROVIDER - - -BEGIN_PROVIDER [ double precision, fock_3_w_kk_sum, (n_points_final_grid,3)] - implicit none - integer :: mm, ipoint,k - double precision :: w_kk - fock_3_w_kk_sum = 0.d0 - do k = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - w_kk = x_W_ij_erf_rk(ipoint,mm,k,k) - fock_3_w_kk_sum(ipoint,mm) += w_kk - enddo - enddo - enddo -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_w_ki_mos_k, (n_points_final_grid,3,mo_num)] - implicit none - integer :: mm, ipoint,k,i - double precision :: w_ki, mo_k - fock_3_w_ki_mos_k = 0.d0 - do i = 1, mo_num - do k = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - w_ki = x_W_ij_erf_rk(ipoint,mm,k,i) - mo_k = mos_in_r_array(k,ipoint) - fock_3_w_ki_mos_k(ipoint,mm,i) += w_ki * mo_k - enddo - enddo - enddo - enddo - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_w_kl_w_kl, (n_points_final_grid,3)] - implicit none - integer :: k,j,ipoint,mm - double precision :: w_kj - fock_3_w_kl_w_kl = 0.d0 - do j = 1, elec_beta_num - do k = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - w_kj = x_W_ij_erf_rk(ipoint,mm,k,j) - fock_3_w_kl_w_kl(ipoint,mm) += w_kj * w_kj - enddo - enddo - enddo - enddo - - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_rho_beta, (n_points_final_grid)] - implicit none - integer :: ipoint,k - fock_3_rho_beta = 0.d0 - do ipoint = 1, n_points_final_grid - do k = 1, elec_beta_num - fock_3_rho_beta(ipoint) += mos_in_r_array(k,ipoint) * mos_in_r_array(k,ipoint) - enddo - enddo -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_w_kl_mo_k_mo_l, (n_points_final_grid,3)] - implicit none - integer :: ipoint,k,l,mm - double precision :: mos_k, mos_l, w_kl - fock_3_w_kl_mo_k_mo_l = 0.d0 - do k = 1, elec_beta_num - do l = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - mos_k = mos_in_r_array_transp(ipoint,k) - mos_l = mos_in_r_array_transp(ipoint,l) - w_kl = x_W_ij_erf_rk(ipoint,mm,l,k) - fock_3_w_kl_mo_k_mo_l(ipoint,mm) += w_kl * mos_k * mos_l - enddo - enddo - enddo - enddo - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_w_ki_wk_a, (n_points_final_grid,3,mo_num, mo_num)] - implicit none - integer :: ipoint,i,a,k,mm - double precision :: w_ki,w_ka - fock_3_w_ki_wk_a = 0.d0 - do i = 1, mo_num - do a = 1, mo_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - do k = 1, elec_beta_num - w_ki = x_W_ij_erf_rk(ipoint,mm,k,i) - w_ka = x_W_ij_erf_rk(ipoint,mm,k,a) - fock_3_w_ki_wk_a(ipoint,mm,a,i) += w_ki * w_ka - enddo - enddo - enddo - enddo - enddo -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_trace_w_tilde, (n_points_final_grid,3)] - implicit none - integer :: ipoint,k,mm - fock_3_trace_w_tilde = 0.d0 - do k = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - fock_3_trace_w_tilde(ipoint,mm) += fock_3_w_ki_wk_a(ipoint,mm,k,k) - enddo - enddo - enddo - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_3_w_kl_wla_phi_k, (n_points_final_grid,3,mo_num)] - implicit none - integer :: ipoint,a,k,mm,l - double precision :: w_kl,w_la, mo_k - fock_3_w_kl_wla_phi_k = 0.d0 - do a = 1, mo_num - do k = 1, elec_beta_num - do l = 1, elec_beta_num - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - w_kl = x_W_ij_erf_rk(ipoint,mm,l,k) - w_la = x_W_ij_erf_rk(ipoint,mm,l,a) - mo_k = mos_in_r_array_transp(ipoint,k) - fock_3_w_kl_wla_phi_k(ipoint,mm,a) += w_kl * w_la * mo_k - enddo - enddo - enddo - enddo - enddo -END_PROVIDER - - - - - diff --git a/plugins/local/tc_scf/fock_vartc.irp.f b/plugins/local/tc_scf/fock_vartc.irp.f deleted file mode 100644 index 2b4a57e5c..000000000 --- a/plugins/local/tc_scf/fock_vartc.irp.f +++ /dev/null @@ -1,287 +0,0 @@ - -! --- - - BEGIN_PROVIDER [ double precision, two_e_vartc_integral_alpha, (ao_num, ao_num)] -&BEGIN_PROVIDER [ double precision, two_e_vartc_integral_beta , (ao_num, ao_num)] - - implicit none - integer :: i, j, k, l - double precision :: density, density_a, density_b, I_coul, I_kjli - double precision :: t0, t1 - double precision, allocatable :: tmp_a(:,:), tmp_b(:,:) - - two_e_vartc_integral_alpha = 0.d0 - two_e_vartc_integral_beta = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (i, j, k, l, density_a, density_b, density, tmp_a, tmp_b, I_coul, I_kjli) & - !$OMP SHARED (ao_num, TCSCF_density_matrix_ao_alpha, TCSCF_density_matrix_ao_beta, ao_two_e_tc_tot, & - !$OMP two_e_vartc_integral_alpha, two_e_vartc_integral_beta) - - allocate(tmp_a(ao_num,ao_num), tmp_b(ao_num,ao_num)) - tmp_a = 0.d0 - tmp_b = 0.d0 - - !$OMP DO - do j = 1, ao_num - do l = 1, ao_num - density_a = TCSCF_density_matrix_ao_alpha(l,j) - density_b = TCSCF_density_matrix_ao_beta (l,j) - density = density_a + density_b - do i = 1, ao_num - do k = 1, ao_num - - I_coul = density * ao_two_e_tc_tot(k,i,l,j) - I_kjli = ao_two_e_tc_tot(k,j,l,i) - - tmp_a(k,i) += I_coul - density_a * I_kjli - tmp_b(k,i) += I_coul - density_b * I_kjli - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do i = 1, ao_num - do j = 1, ao_num - two_e_vartc_integral_alpha(j,i) += tmp_a(j,i) - two_e_vartc_integral_beta (j,i) += tmp_b(j,i) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmp_a, tmp_b) - !$OMP END PARALLEL - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_ao_alpha, (ao_num, ao_num)] - - implicit none - - Fock_matrix_vartc_ao_alpha = ao_one_e_integrals_tc_tot + two_e_vartc_integral_alpha - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_ao_beta, (ao_num, ao_num)] - - implicit none - - Fock_matrix_vartc_ao_beta = ao_one_e_integrals_tc_tot + two_e_vartc_integral_beta - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_mo_alpha, (mo_num, mo_num) ] - - implicit none - - call ao_to_mo_bi_ortho( Fock_matrix_vartc_ao_alpha, size(Fock_matrix_vartc_ao_alpha, 1) & - , Fock_matrix_vartc_mo_alpha, size(Fock_matrix_vartc_mo_alpha, 1) ) - if(three_body_h_tc) then - Fock_matrix_vartc_mo_alpha += fock_3e_uhf_mo_a - endif - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_mo_beta, (mo_num,mo_num) ] - - implicit none - - call ao_to_mo_bi_ortho( Fock_matrix_vartc_ao_beta, size(Fock_matrix_vartc_ao_beta, 1) & - , Fock_matrix_vartc_mo_beta, size(Fock_matrix_vartc_mo_beta, 1) ) - if(three_body_h_tc) then - Fock_matrix_vartc_mo_beta += fock_3e_uhf_mo_b - endif - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, grad_vartc] - - implicit none - integer :: i, k - double precision :: grad_left, grad_right - - grad_left = 0.d0 - grad_right = 0.d0 - - do i = 1, elec_beta_num ! doc --> SOMO - do k = elec_beta_num+1, elec_alpha_num - grad_left = max(grad_left , dabs(Fock_matrix_vartc_mo_tot(k,i))) - grad_right = max(grad_right, dabs(Fock_matrix_vartc_mo_tot(i,k))) - enddo - enddo - - do i = 1, elec_beta_num ! doc --> virt - do k = elec_alpha_num+1, mo_num - grad_left = max(grad_left , dabs(Fock_matrix_vartc_mo_tot(k,i))) - grad_right = max(grad_right, dabs(Fock_matrix_vartc_mo_tot(i,k))) - enddo - enddo - - do i = elec_beta_num+1, elec_alpha_num ! SOMO --> virt - do k = elec_alpha_num+1, mo_num - grad_left = max(grad_left , dabs(Fock_matrix_vartc_mo_tot(k,i))) - grad_right = max(grad_right, dabs(Fock_matrix_vartc_mo_tot(i,k))) - enddo - enddo - - grad_vartc = grad_left + grad_right - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_ao_tot, (ao_num, ao_num) ] - - implicit none - - call mo_to_ao_bi_ortho( Fock_matrix_vartc_mo_tot, size(Fock_matrix_vartc_mo_tot, 1) & - , Fock_matrix_vartc_ao_tot, size(Fock_matrix_vartc_ao_tot, 1) ) - -END_PROVIDER - -! --- - - BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_mo_tot, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, Fock_matrix_vartc_diag_mo_tot, (mo_num)] - - implicit none - integer :: i, j, n - - if(elec_alpha_num == elec_beta_num) then - Fock_matrix_vartc_mo_tot = Fock_matrix_vartc_mo_alpha - else - - do j = 1, elec_beta_num - ! F-K - do i = 1, elec_beta_num !CC - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))& - - (Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j)) - enddo - ! F+K/2 - do i = elec_beta_num+1, elec_alpha_num !CA - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))& - + 0.5d0*(Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j)) - enddo - ! F - do i = elec_alpha_num+1, mo_num !CV - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j)) - enddo - enddo - - do j = elec_beta_num+1, elec_alpha_num - ! F+K/2 - do i = 1, elec_beta_num !AC - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))& - + 0.5d0*(Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j)) - enddo - ! F - do i = elec_beta_num+1, elec_alpha_num !AA - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j)) - enddo - ! F-K/2 - do i = elec_alpha_num+1, mo_num !AV - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))& - - 0.5d0*(Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j)) - enddo - enddo - - do j = elec_alpha_num+1, mo_num - ! F - do i = 1, elec_beta_num !VC - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j)) - enddo - ! F-K/2 - do i = elec_beta_num+1, elec_alpha_num !VA - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j))& - - 0.5d0*(Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j)) - enddo - ! F+K - do i = elec_alpha_num+1, mo_num !VV - Fock_matrix_vartc_mo_tot(i,j) = 0.5d0*(Fock_matrix_vartc_mo_alpha(i,j)+Fock_matrix_vartc_mo_beta(i,j)) & - + (Fock_matrix_vartc_mo_beta(i,j) - Fock_matrix_vartc_mo_alpha(i,j)) - enddo - enddo - if(three_body_h_tc)then - ! C-O - do j = 1, elec_beta_num - do i = elec_beta_num+1, elec_alpha_num - Fock_matrix_vartc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j)) - Fock_matrix_vartc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i)) - enddo - enddo - ! C-V - do j = 1, elec_beta_num - do i = elec_alpha_num+1, mo_num - Fock_matrix_vartc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j)) - Fock_matrix_vartc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i)) - enddo - enddo - ! O-V - do j = elec_beta_num+1, elec_alpha_num - do i = elec_alpha_num+1, mo_num - Fock_matrix_vartc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j)) - Fock_matrix_vartc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i)) - enddo - enddo - endif - - endif - - do i = 1, mo_num - Fock_matrix_vartc_diag_mo_tot(i) = Fock_matrix_vartc_mo_tot(i,i) - enddo - - if(frozen_orb_scf)then - integer :: iorb, jorb - do i = 1, n_core_orb - iorb = list_core(i) - do j = 1, n_act_orb - jorb = list_act(j) - Fock_matrix_vartc_mo_tot(iorb,jorb) = 0.d0 - Fock_matrix_vartc_mo_tot(jorb,iorb) = 0.d0 - enddo - enddo - endif - - if(no_oa_or_av_opt)then - do i = 1, n_act_orb - iorb = list_act(i) - do j = 1, n_inact_orb - jorb = list_inact(j) - Fock_matrix_vartc_mo_tot(iorb,jorb) = 0.d0 - Fock_matrix_vartc_mo_tot(jorb,iorb) = 0.d0 - enddo - do j = 1, n_virt_orb - jorb = list_virt(j) - Fock_matrix_vartc_mo_tot(iorb,jorb) = 0.d0 - Fock_matrix_vartc_mo_tot(jorb,iorb) = 0.d0 - enddo - do j = 1, n_core_orb - jorb = list_core(j) - Fock_matrix_vartc_mo_tot(iorb,jorb) = 0.d0 - Fock_matrix_vartc_mo_tot(jorb,iorb) = 0.d0 - enddo - enddo - endif - - !call check_sym(Fock_matrix_vartc_mo_tot, mo_num) - !do i = 1, mo_num - ! write(*,'(100(F15.8, I4))') Fock_matrix_vartc_mo_tot(i,:) - !enddo - -END_PROVIDER - -! --- - diff --git a/plugins/local/tc_scf/integrals_in_r_stuff.irp.f b/plugins/local/tc_scf/integrals_in_r_stuff.irp.f deleted file mode 100644 index 3ce85a976..000000000 --- a/plugins/local/tc_scf/integrals_in_r_stuff.irp.f +++ /dev/null @@ -1,391 +0,0 @@ - -! --- - -BEGIN_PROVIDER [ double precision, tc_scf_dm_in_r, (n_points_final_grid) ] - - implicit none - integer :: i, j - - tc_scf_dm_in_r = 0.d0 - do i = 1, n_points_final_grid - do j = 1, elec_beta_num - tc_scf_dm_in_r(i) += mos_r_in_r_array(j,i) * mos_l_in_r_array(j,i) - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, w_sum_in_r, (n_points_final_grid, 3)] - - implicit none - integer :: ipoint, j, xi - - w_sum_in_r = 0.d0 - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - !w_sum_in_r(ipoint,xi) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,j) - w_sum_in_r(ipoint,xi) += x_W_ki_bi_ortho_erf_rk_diag(ipoint,xi,j) - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, ww_sum_in_r, (n_points_final_grid, 3)] - - implicit none - integer :: ipoint, j, xi - double precision :: tmp - - ww_sum_in_r = 0.d0 - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - tmp = x_W_ki_bi_ortho_erf_rk_diag(ipoint,xi,j) - ww_sum_in_r(ipoint,xi) += tmp * tmp - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_r_in_r, (n_points_final_grid, 3, mo_num)] - - implicit none - integer :: i, j, xi, ipoint - - ! TODO: call lapack - - W1_r_in_r = 0.d0 - do i = 1, mo_num - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_r_in_r(ipoint,xi,i) += mos_r_in_r_array_transp(ipoint,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,i) - enddo - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_l_in_r, (n_points_final_grid, 3, mo_num)] - - implicit none - integer :: i, j, xi, ipoint - - ! TODO: call lapack - - W1_l_in_r = 0.d0 - do i = 1, mo_num - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_l_in_r(ipoint,xi,i) += mos_l_in_r_array_transp(ipoint,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,i,j) - enddo - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_in_r, (n_points_final_grid, 3)] - - implicit none - integer :: j, xi, ipoint - - ! TODO: call lapack - - W1_in_r = 0.d0 - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_in_r(ipoint,xi) += W1_l_in_r(ipoint,xi,j) * mos_r_in_r_array_transp(ipoint,j) - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_diag_in_r, (n_points_final_grid, 3)] - - implicit none - integer :: j, xi, ipoint - - ! TODO: call lapack - - W1_diag_in_r = 0.d0 - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_diag_in_r(ipoint,xi) += mos_r_in_r_array_transp(ipoint,j) * mos_l_in_r_array_transp(ipoint,j) * x_W_ki_bi_ortho_erf_rk_diag(ipoint,xi,j) - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, v_sum_in_r, (n_points_final_grid, 3)] - - implicit none - integer :: i, j, xi, ipoint - - ! TODO: call lapack - v_sum_in_r = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - v_sum_in_r(ipoint,xi) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,i,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,i) - enddo - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_W1_r_in_r, (n_points_final_grid, 3, mo_num)] - - implicit none - integer :: i, m, xi, ipoint - - ! TODO: call lapack - - W1_W1_r_in_r = 0.d0 - do i = 1, mo_num - do m = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_W1_r_in_r(ipoint,xi,i) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,m,i) * W1_r_in_r(ipoint,xi,m) - enddo - enddo - enddo - enddo - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, W1_W1_l_in_r, (n_points_final_grid, 3, mo_num)] - - implicit none - integer :: i, j, xi, ipoint - - ! TODO: call lapack - - W1_W1_l_in_r = 0.d0 - do i = 1, mo_num - do j = 1, elec_beta_num - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - W1_W1_l_in_r(ipoint,xi,i) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,i,j) * W1_l_in_r(ipoint,xi,j) - enddo - enddo - enddo - enddo - -END_PROVIDER - -! --- - -subroutine direct_term_imj_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j,m = 1, elec_beta_num) < a m j | i m j > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi - double precision :: weight, tmp - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - !integral += ( mos_l_in_r_array(a,ipoint) * mos_r_in_r_array(i,ipoint) * w_sum_in_r(ipoint,xi) * w_sum_in_r(ipoint,xi) & - ! + 2.d0 * tc_scf_dm_in_r(ipoint) * w_sum_in_r(ipoint,xi) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i) ) * weight - - tmp = w_sum_in_r(ipoint,xi) - - integral += ( mos_l_in_r_array_transp(ipoint,a) * mos_r_in_r_array_transp(ipoint,i) * tmp * tmp & - + 2.d0 * tc_scf_dm_in_r(ipoint) * tmp * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i) & - ) * weight - enddo - enddo - -end - -! --- - -subroutine exch_term_jmi_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j,m = 1, elec_beta_num) < a m j | j m i > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi, j - double precision :: weight, tmp - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - - tmp = 0.d0 - do j = 1, elec_beta_num - tmp = tmp + x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,i) - enddo - - integral += ( mos_l_in_r_array_transp(ipoint,a) * W1_r_in_r(ipoint,xi,i) * w_sum_in_r(ipoint,xi) & - + tc_scf_dm_in_r(ipoint) * tmp & - + mos_r_in_r_array_transp(ipoint,i) * W1_l_in_r(ipoint,xi,a) * w_sum_in_r(ipoint,xi) & - ) * weight - - enddo - enddo - -end - -! --- - -subroutine exch_term_ijm_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j,m = 1, elec_beta_num) < a m j | i j m > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi - double precision :: weight - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - - integral += ( mos_l_in_r_array_transp(ipoint,a) * mos_r_in_r_array_transp(ipoint,i) * v_sum_in_r(ipoint,xi) & - + 2.d0 * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i) * W1_in_r(ipoint,xi) & - ) * weight - - enddo - enddo - -end - -! --- - -subroutine direct_term_ijj_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j = 1, elec_beta_num) < a j j | i j j > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi - double precision :: weight - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - - integral += ( mos_l_in_r_array_transp(ipoint,a) * mos_r_in_r_array_transp(ipoint,i) * ww_sum_in_r(ipoint,xi) & - + 2.d0 * W1_diag_in_r(ipoint, xi) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i) & - ) * weight - enddo - enddo - -end - -! --- - -subroutine cyclic_term_jim_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j,m = 1, elec_beta_num) < a m j | j i m > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi - double precision :: weight - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - - integral += ( mos_l_in_r_array_transp(ipoint,a) * W1_W1_r_in_r(ipoint,xi,i) & - + W1_W1_l_in_r(ipoint,xi,a) * mos_r_in_r_array_transp(ipoint,i) & - + W1_l_in_r(ipoint,xi,a) * W1_r_in_r(ipoint,xi,i) & - ) * weight - - enddo - enddo - -end - -! --- - -subroutine cyclic_term_mji_bi_ortho(a, i, integral) - - BEGIN_DOC - ! computes sum_(j,m = 1, elec_beta_num) < a m j | m j i > with bi ortho mos - END_DOC - - implicit none - integer, intent(in) :: i, a - double precision, intent(out) :: integral - - integer :: ipoint, xi - double precision :: weight - - integral = 0.d0 - do xi = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - - integral += ( mos_l_in_r_array_transp(ipoint,a) * W1_W1_r_in_r(ipoint,xi,i) & - + W1_l_in_r(ipoint,xi,a) * W1_r_in_r(ipoint,xi,i) & - + W1_W1_l_in_r(ipoint,xi,a) * mos_r_in_r_array_transp(ipoint,i) & - ) * weight - - enddo - enddo - -end - -! --- - diff --git a/plugins/local/tc_scf/jast_schmos_90.irp.f b/plugins/local/tc_scf/jast_schmos_90.irp.f deleted file mode 100644 index 5c5e625f9..000000000 --- a/plugins/local/tc_scf/jast_schmos_90.irp.f +++ /dev/null @@ -1,318 +0,0 @@ - BEGIN_PROVIDER [integer , m_max_sm_7] -&BEGIN_PROVIDER [integer , n_max_sm_7] -&BEGIN_PROVIDER [integer , o_max_sm_7] - implicit none - BEGIN_DOC -! maximum value of the "m", "n" and "o" integer in the Jastrow function as in Eq. (4) -! of Schmidt,Moskowitz, JCP, 93, 4172 (1990) for the SM_7 version of Table IV - END_DOC - m_max_sm_7 = 4 - n_max_sm_7 = 0 - o_max_sm_7 = 4 -END_PROVIDER - - BEGIN_PROVIDER [integer , m_max_sm_9] -&BEGIN_PROVIDER [integer , n_max_sm_9] -&BEGIN_PROVIDER [integer , o_max_sm_9] - implicit none - BEGIN_DOC -! maximum value of the "m", "n" and "o" integer in the Jastrow function as in Eq. (4) -! of Schmidt,Moskowitz, JCP, 93, 4172 (1990) for the SM_9 version of Table IV - END_DOC - m_max_sm_9 = 4 - n_max_sm_9 = 2 - o_max_sm_9 = 4 -END_PROVIDER - - - BEGIN_PROVIDER [integer , m_max_sm_17] -&BEGIN_PROVIDER [integer , n_max_sm_17] -&BEGIN_PROVIDER [integer , o_max_sm_17] - implicit none - BEGIN_DOC -! maximum value of the "m", "n" and "o" integer in the Jastrow function as in Eq. (4) -! of Schmidt,Moskowitz, JCP, 93, 4172 (1990) for the SM_17 version of Table IV - END_DOC - m_max_sm_17 = 6 - n_max_sm_17 = 2 - o_max_sm_17 = 6 -END_PROVIDER - - -BEGIN_PROVIDER [ double precision, c_mn_o_sm_7, (0:m_max_sm_7,0:n_max_sm_7,0:o_max_sm_7,2:10)] - implicit none - BEGIN_DOC - ! - !c_mn_o_7(0:4,0:4,2:10) = coefficient for the SM_7 correlation factor as given is Table IV of - ! Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! the first index (0:4) is the "m" integer for the 1e part - ! the second index(0:0) is the "n" integer for the 1e part WHICH IS ALWAYS SET TO 0 FOR SM_7 - ! the third index (0:4) is the "o" integer for the 2e part - ! the fourth index (2:10) is the nuclear charge of the atom - END_DOC - c_mn_o_sm_7 = 0.d0 - integer :: i - do i = 2, 10 ! loop over nuclear charge - c_mn_o_sm_7(0,0,1,i) = 0.5d0 ! all the linear terms are set to 1/2 to satisfy the anti-parallel spin condition - enddo - ! He atom - ! two electron terms - c_mn_o_sm_7(0,0,2,2) = 0.50516d0 - c_mn_o_sm_7(0,0,3,2) = -0.19313d0 - c_mn_o_sm_7(0,0,4,2) = 0.30276d0 - ! one-electron terms - c_mn_o_sm_7(2,0,0,2) = -0.16995d0 - c_mn_o_sm_7(3,0,0,2) = -0.34505d0 - c_mn_o_sm_7(4,0,0,2) = -0.54777d0 - ! Ne atom - ! two electron terms - c_mn_o_sm_7(0,0,2,10) = -0.792d0 - c_mn_o_sm_7(0,0,3,10) = 1.05232d0 - c_mn_o_sm_7(0,0,4,10) = -0.65615d0 - ! one-electron terms - c_mn_o_sm_7(2,0,0,10) = -0.13312d0 - c_mn_o_sm_7(3,0,0,10) = -0.00131d0 - c_mn_o_sm_7(4,0,0,10) = 0.09083d0 - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, c_mn_o_sm_9, (0:m_max_sm_9,0:n_max_sm_9,0:o_max_sm_9,2:10)] - implicit none - BEGIN_DOC - ! - !c_mn_o_9(0:4,0:4,2:10) = coefficient for the SM_9 correlation factor as given is Table IV of - ! Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! the first index (0:4) is the "m" integer for the 1e part - ! the second index(0:0) is the "n" integer for the 1e part WHICH IS ALWAYS SET TO 0 FOR SM_9 - ! the third index (0:4) is the "o" integer for the 2e part - ! the fourth index (2:10) is the nuclear charge of the atom - END_DOC - c_mn_o_sm_9 = 0.d0 - integer :: i - do i = 2, 10 ! loop over nuclear charge - c_mn_o_sm_9(0,0,1,i) = 0.5d0 ! all the linear terms are set to 1/2 to satisfy the anti-parallel spin condition - enddo - ! He atom - ! two electron terms - c_mn_o_sm_9(0,0,2,2) = 0.50516d0 - c_mn_o_sm_9(0,0,3,2) = -0.19313d0 - c_mn_o_sm_9(0,0,4,2) = 0.30276d0 - ! one-electron terms - c_mn_o_sm_9(2,0,0,2) = -0.16995d0 - c_mn_o_sm_9(3,0,0,2) = -0.34505d0 - c_mn_o_sm_9(4,0,0,2) = -0.54777d0 - ! Ne atom - ! two electron terms - c_mn_o_sm_9(0,0,2,10) = -0.792d0 - c_mn_o_sm_9(0,0,3,10) = 1.05232d0 - c_mn_o_sm_9(0,0,4,10) = -0.65615d0 - ! one-electron terms - c_mn_o_sm_9(2,0,0,10) = -0.13312d0 - c_mn_o_sm_9(3,0,0,10) = -0.00131d0 - c_mn_o_sm_9(4,0,0,10) = 0.09083d0 - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, c_mn_o_sm_17, (0:m_max_sm_17,0:n_max_sm_17,0:o_max_sm_17,2:10)] - implicit none - BEGIN_DOC - ! - !c_mn_o_17(0:4,0:4,2:10) = coefficient for the SM_17 correlation factor as given is Table IV of - ! Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! the first index (0:4) is the "m" integer for the 1e part - ! the second index(0:0) is the "n" integer for the 1e part WHICH IS ALWAYS SET TO 0 FOR SM_17 - ! the third index (0:4) is the "o" integer for the 2e part - ! the fourth index (2:10) is the nuclear charge of the atom - END_DOC - c_mn_o_sm_17 = 0.d0 - integer :: i - do i = 2, 10 ! loop over nuclear charge - c_mn_o_sm_17(0,0,1,i) = 0.5d0 ! all the linear terms are set to 1/2 to satisfy the anti-parallel spin condition - enddo - ! He atom - ! two electron terms - c_mn_o_sm_17(0,0,2,2) = 0.09239d0 - c_mn_o_sm_17(0,0,3,2) = -0.38664d0 - c_mn_o_sm_17(0,0,4,2) = 0.95764d0 - ! one-electron terms - c_mn_o_sm_17(2,0,0,2) = 0.23208d0 - c_mn_o_sm_17(3,0,0,2) = -0.45032d0 - c_mn_o_sm_17(4,0,0,2) = 0.82777d0 - c_mn_o_sm_17(2,2,0,2) = -4.15388d0 - ! ee-n terms - c_mn_o_sm_17(2,0,2,2) = 0.80622d0 - c_mn_o_sm_17(2,2,2,2) = 10.19704d0 - c_mn_o_sm_17(4,0,2,2) = -4.96259d0 - c_mn_o_sm_17(2,0,4,2) = -1.35647d0 - c_mn_o_sm_17(4,2,2,2) = -5.90907d0 - c_mn_o_sm_17(6,0,2,2) = 0.90343d0 - c_mn_o_sm_17(4,0,4,2) = 5.50739d0 - c_mn_o_sm_17(2,2,4,2) = -0.03154d0 - c_mn_o_sm_17(2,0,6,2) = -1.1051860 - - - ! Ne atom - ! two electron terms - c_mn_o_sm_17(0,0,2,10) = -0.80909d0 - c_mn_o_sm_17(0,0,3,10) = -0.00219d0 - c_mn_o_sm_17(0,0,4,10) = 0.59188d0 - ! one-electron terms - c_mn_o_sm_17(2,0,0,10) = -0.00567d0 - c_mn_o_sm_17(3,0,0,10) = 0.14011d0 - c_mn_o_sm_17(4,0,0,10) = -0.05671d0 - c_mn_o_sm_17(2,2,0,10) = -3.33767d0 - ! ee-n terms - c_mn_o_sm_17(2,0,2,10) = 1.95067d0 - c_mn_o_sm_17(2,2,2,10) = 6.83340d0 - c_mn_o_sm_17(4,0,2,10) = -3.29231d0 - c_mn_o_sm_17(2,0,4,10) = -2.44998d0 - c_mn_o_sm_17(4,2,2,10) = -2.13029d0 - c_mn_o_sm_17(6,0,2,10) = 2.25768d0 - c_mn_o_sm_17(4,0,4,10) = 1.97951d0 - c_mn_o_sm_17(2,2,4,10) = -2.0924160 - c_mn_o_sm_17(2,0,6,10) = 0.35493d0 - -END_PROVIDER - - BEGIN_PROVIDER [ double precision, b_I_sm_90,(2:10)] -&BEGIN_PROVIDER [ double precision, d_I_sm_90,(2:10)] - implicit none - BEGIN_DOC -! "b_I" and "d_I" parameters of Eqs. (4) and (5) of Schmidt,Moskowitz, JCP, 93, 4172 (1990) - END_DOC - b_I_sm_90 = 1.d0 - d_I_sm_90 = 1.d0 - -END_PROVIDER - -subroutine get_full_sm_90_jastrow(r1,r2,rI,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - implicit none - double precision, intent(in) :: r1(3),r2(3),rI(3) - integer, intent(in) :: sm_j, i_charge - double precision, intent(out):: j_1e,j_2e,j_een,j_tot - BEGIN_DOC - ! Jastrow function as in Eq. (4) of Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! the i_charge variable is the integer specifying the charge of the atom for the Jastrow - ! the sm_j integer variable represents the "quality" of the jastrow : sm_j = 7, 9, 17 - END_DOC - double precision :: r_inucl,r_jnucl,r_ij,b_I, d_I - b_I = b_I_sm_90(i_charge) - d_I = d_I_sm_90(i_charge) - call get_rescaled_variables_j_sm_90(r1,r2,rI,b_I,d_I,r_inucl,r_jnucl,r_ij) - call jastrow_func_sm_90(r_inucl,r_jnucl,r_ij,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) -end - -subroutine get_rescaled_variables_j_sm_90(r1,r2,rI,b_I,d_I,r_inucl,r_jnucl,r_ij) - implicit none - BEGIN_DOC - ! rescaled variables of Eq. (5) and (6) of Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! the "b_I" and "d_I" parameters are the same as in Eqs. (5) and (6) - END_DOC - double precision, intent(in) :: r1(3),r2(3),rI(3) - double precision, intent(in) :: b_I, d_I - double precision, intent(out):: r_inucl,r_jnucl,r_ij - double precision :: rin, rjn, rij - integer :: i - rin = 0.d0 - rjn = 0.d0 - rij = 0.d0 - do i = 1,3 - rin += (r1(i) - rI(i)) * (r1(i) - rI(i)) - rjn += (r2(i) - rI(i)) * (r2(i) - rI(i)) - rij += (r2(i) - r1(i)) * (r2(i) - r1(i)) - enddo - rin = dsqrt(rin) - rjn = dsqrt(rjn) - rij = dsqrt(rij) - r_inucl = b_I * rin/(1.d0 + b_I * rin) - r_jnucl = b_I * rjn/(1.d0 + b_I * rjn) - r_ij = d_I * rij/(1.d0 + b_I * rij) -end - -subroutine jastrow_func_sm_90(r_inucl,r_jnucl,r_ij,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - implicit none - BEGIN_DOC - ! Jastrow function as in Eq. (4) of Schmidt,Moskowitz, JCP, 93, 4172 (1990) - ! Here the r_inucl, r_jnucl are the rescaled variables as defined in Eq. (5) with "b_I" - ! r_ij is the rescaled variable as defined in Eq. (6) with "d_I" - ! the i_charge variable is the integer specifying the charge of the atom for the Jastrow - ! the sm_j integer variable represents the "quality" of the jastrow : sm_j = 7, 9, 17 - ! - ! it returns the j_1e : sum of terms with "o" = "n" = 0, "m" /= 0, - ! j_2e : sum of terms with "m" = "n" = 0, "o" /= 0, - ! j_een : sum of terms with "m" /=0, "n" /= 0, "o" /= 0, - ! j_tot : the total sum - END_DOC - double precision, intent(in) :: r_inucl,r_jnucl,r_ij - integer, intent(in) :: sm_j,i_charge - double precision, intent(out):: j_1e,j_2e,j_een,j_tot - j_1e = 0.D0 - j_2e = 0.D0 - j_een = 0.D0 - double precision :: delta_mn,jastrow_sm_90_atomic - integer :: m,n,o -BEGIN_TEMPLATE - ! pure 2e part - n = 0 - m = 0 - if(sm_j == $X )then - do o = 1, o_max_sm_$X - if(dabs(c_mn_o_sm_$X(m,n,o,i_charge)).lt.1.d-10)cycle - j_2e += c_mn_o_sm_$X(m,n,o,i_charge) * jastrow_sm_90_atomic(m,n,o,i_charge,r_inucl,r_jnucl,r_ij) - enddo -! else -! print*,'sm_j = ',sm_j -! print*,'not implemented, stop' -! stop - endif - ! pure one-e part - o = 0 - if(sm_j == $X)then - do n = 2, n_max_sm_$X - do m = 2, m_max_sm_$X - j_1e += c_mn_o_sm_$X(m,n,o,i_charge) * jastrow_sm_90_atomic(m,n,o,i_charge,r_inucl,r_jnucl,r_ij) - enddo - enddo -! else -! print*,'sm_j = ',sm_j -! print*,'not implemented, stop' -! stop - endif - ! e-e-n part - if(sm_j == $X)then - do o = 1, o_max_sm_$X - do m = 2, m_max_sm_$X - do n = 2, n_max_sm_$X - j_een += c_mn_o_sm_$X(m,n,o,i_charge) * jastrow_sm_90_atomic(m,n,o,i_charge,r_inucl,r_jnucl,r_ij) - enddo - enddo - enddo - else -! print*,'sm_j = ',sm_j -! print*,'not implemented, stop' -! stop - endif - j_tot = j_1e + j_2e + j_een -SUBST [ X] - 7 ;; - 9 ;; - 17 ;; -END_TEMPLATE -end - -double precision function jastrow_sm_90_atomic(m,n,o,i_charge,r_inucl,r_jnucl,r_ij) - implicit none - BEGIN_DOC -! contribution to the function of Eq. (4) of Schmidt,Moskowitz, JCP, 93, 4172 (1990) -! for a given m,n,o and atom - END_DOC - double precision, intent(in) :: r_inucl,r_jnucl,r_ij - integer , intent(in) :: m,n,o,i_charge - double precision :: delta_mn - if(m==n)then - delta_mn = 0.5d0 - else - delta_mn = 1.D0 - endif - jastrow_sm_90_atomic = delta_mn * (r_inucl**m * r_jnucl**n + r_jnucl**m * r_inucl**n)*r_ij**o -end diff --git a/plugins/local/tc_scf/minimize_tc_angles.irp.f b/plugins/local/tc_scf/minimize_tc_angles.irp.f index c7752930c..e5f6cf870 100644 --- a/plugins/local/tc_scf/minimize_tc_angles.irp.f +++ b/plugins/local/tc_scf/minimize_tc_angles.irp.f @@ -20,7 +20,7 @@ program minimize_tc_angles ! TODO ! check if rotations of orbitals affect the TC energy ! and refuse the step - call minimize_tc_orb_angles + call minimize_tc_orb_angles() end diff --git a/plugins/local/tc_scf/plot_j_schMos.irp.f b/plugins/local/tc_scf/plot_j_schMos.irp.f deleted file mode 100644 index eda0dd250..000000000 --- a/plugins/local/tc_scf/plot_j_schMos.irp.f +++ /dev/null @@ -1,69 +0,0 @@ -program plot_j - implicit none - double precision :: r1(3),rI(3),r2(3) - double precision :: r12,dx,xmax, j_1e,j_2e,j_een,j_tot - double precision :: j_mu_F_x_j - integer :: i,nx,m,i_charge,sm_j - - character*(128) :: output - integer :: i_unit_output_He_sm_7,i_unit_output_Ne_sm_7 - integer :: i_unit_output_He_sm_17,i_unit_output_Ne_sm_17 - integer :: getUnitAndOpen - output='J_SM_7_He' - i_unit_output_He_sm_7 = getUnitAndOpen(output,'w') - output='J_SM_7_Ne' - i_unit_output_Ne_sm_7 = getUnitAndOpen(output,'w') - - output='J_SM_17_He' - i_unit_output_He_sm_17 = getUnitAndOpen(output,'w') - output='J_SM_17_Ne' - i_unit_output_Ne_sm_17 = getUnitAndOpen(output,'w') - - rI = 0.d0 - r1 = 0.d0 - r2 = 0.d0 - r1(1) = 1.5d0 - xmax = 20.d0 - r2(1) = -xmax*0.5d0 - nx = 1000 - dx = xmax/dble(nx) - do i = 1, nx - r12 = 0.d0 - do m = 1, 3 - r12 += (r1(m) - r2(m))*(r1(m) - r2(m)) - enddo - r12 = dsqrt(r12) - double precision :: jmu,env_nucl,jmu_env,jmu_scaled, jmu_scaled_env - double precision :: b_I,d_I,r_inucl,r_jnucl,r_ij - b_I = 1.D0 - d_I = 1.D0 - call get_rescaled_variables_j_sm_90(r1,r2,rI,b_I,d_I,r_inucl,r_jnucl,r_ij) - jmu=j_mu_F_x_j(r12) - jmu_scaled=j_mu_F_x_j(r_ij) - jmu_env = jmu * env_nucl(r1) * env_nucl(r2) -! jmu_scaled_env= jmu_scaled * (1.d0 - env_coef(1) * dexp(-env_expo(1)*r_inucl**2)) * (1.d0 - env_coef(1) * dexp(-env_expo(1)*r_jnucl**2)) - jmu_scaled_env= jmu_scaled * env_nucl(r1) * env_nucl(r2) - ! He - i_charge = 2 - ! SM 7 Jastrow - sm_j = 7 - call get_full_sm_90_jastrow(r1,r2,rI,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - write(i_unit_output_He_sm_7,'(100(F16.10,X))')r2(1),r12,j_mu_F_x_j(r12), j_1e,j_2e,j_een,j_tot,jmu_env,jmu_scaled,jmu_scaled_env - ! SM 17 Jastrow - sm_j = 17 - call get_full_sm_90_jastrow(r1,r2,rI,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - write(i_unit_output_He_sm_17,'(100(F16.10,X))')r2(1),r12,j_mu_F_x_j(r12), j_1e,j_2e,j_een,j_tot,jmu_env,jmu_scaled,jmu_scaled_env - ! Ne - i_charge = 10 - ! SM 7 Jastrow - sm_j = 7 - call get_full_sm_90_jastrow(r1,r2,rI,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - write(i_unit_output_Ne_sm_7,'(100(F16.10,X))')r2(1),r12,j_mu_F_x_j(r12), j_1e,j_2e,j_een,j_tot,jmu_env,jmu_scaled,jmu_scaled_env - ! SM 17 Jastrow - sm_j = 17 - call get_full_sm_90_jastrow(r1,r2,rI,sm_j,i_charge, j_1e,j_2e,j_een,j_tot) - write(i_unit_output_Ne_sm_17,'(100(F16.10,X))')r2(1),r12,j_mu_F_x_j(r12), j_1e,j_2e,j_een,j_tot,jmu_env,jmu_scaled,jmu_scaled_env - r2(1) += dx - enddo - -end diff --git a/plugins/local/tc_scf/print_fit_param.irp.f b/plugins/local/tc_scf/print_fit_param.irp.f deleted file mode 100644 index e62f0dde7..000000000 --- a/plugins/local/tc_scf/print_fit_param.irp.f +++ /dev/null @@ -1,59 +0,0 @@ -program print_fit_param - - BEGIN_DOC -! TODO : Put the documentation of the program here - END_DOC - - implicit none - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - !call create_guess - !call orthonormalize_mos - - call main() - -end - -! --- - -subroutine main() - - implicit none - integer :: i - - mu_erf = 1.d0 - touch mu_erf - - print *, ' fit for (1 - erf(x))^2' - do i = 1, n_max_fit_slat - print*, expo_gauss_1_erf_x_2(i), coef_gauss_1_erf_x_2(i) - enddo - - print *, '' - print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)]' - do i = 1, n_max_fit_slat - print *, expo_gauss_j_mu_x(i), 2.d0 * coef_gauss_j_mu_x(i) - enddo - - print *, '' - print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)]^2' - do i = 1, n_max_fit_slat - print *, expo_gauss_j_mu_x_2(i), 4.d0 * coef_gauss_j_mu_x_2(i) - enddo - - print *, '' - print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)] x [1 - erf(mu * r12)]' - do i = 1, n_max_fit_slat - print *, expo_gauss_j_mu_1_erf(i), 4.d0 * coef_gauss_j_mu_1_erf(i) - enddo - - return -end subroutine main - -! --- - diff --git a/plugins/local/tc_scf/print_tcscf_energy.irp.f b/plugins/local/tc_scf/print_tcscf_energy.irp.f deleted file mode 100644 index 6f9afd9a9..000000000 --- a/plugins/local/tc_scf/print_tcscf_energy.irp.f +++ /dev/null @@ -1,55 +0,0 @@ -program print_tcscf_energy - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - print *, 'Hello world' - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - call main() - -end - -! --- - -subroutine main() - - implicit none - double precision :: etc_tot, etc_1e, etc_2e, etc_3e - - PROVIDE j2e_type mu_erf - PROVIDE j1e_type j1e_coef j1e_expo - PROVIDE env_type env_coef env_expo - - print*, ' j2e_type = ', j2e_type - print*, ' j1e_type = ', j1e_type - print*, ' env_type = ', env_type - - print*, ' mu_erf = ', mu_erf - - etc_tot = TC_HF_energy - etc_1e = TC_HF_one_e_energy - etc_2e = TC_HF_two_e_energy - etc_3e = 0.d0 - if(three_body_h_tc) then - !etc_3e = diag_three_elem_hf - etc_3e = tcscf_energy_3e_naive - endif - - print *, " E_TC = ", etc_tot - print *, " E_1e = ", etc_1e - print *, " E_2e = ", etc_2e - print *, " E_3e = ", etc_3e - - return -end subroutine main - -! --- - diff --git a/plugins/local/tc_scf/rh_tcscf_diis.irp.f b/plugins/local/tc_scf/rh_tcscf_diis.irp.f index 431b6e085..1cade02a6 100644 --- a/plugins/local/tc_scf/rh_tcscf_diis.irp.f +++ b/plugins/local/tc_scf/rh_tcscf_diis.irp.f @@ -61,7 +61,7 @@ subroutine rh_tcscf_diis() etc_tot = TC_HF_energy etc_1e = TC_HF_one_e_energy etc_2e = TC_HF_two_e_energy - etc_3e = diag_three_elem_hf + etc_3e = TC_HF_three_e_energy !tc_grad = grad_non_hermit er_DIIS = maxval(abs(FQS_SQF_mo)) e_delta = dabs(etc_tot - e_save) @@ -189,7 +189,7 @@ subroutine rh_tcscf_diis() etc_tot = TC_HF_energy etc_1e = TC_HF_one_e_energy etc_2e = TC_HF_two_e_energy - etc_3e = diag_three_elem_hf + etc_3e = TC_HF_three_e_energy !tc_grad = grad_non_hermit er_DIIS = maxval(abs(FQS_SQF_mo)) e_delta = dabs(etc_tot - e_save) @@ -234,7 +234,7 @@ subroutine rh_tcscf_diis() call unlock_io if(er_delta .lt. 0.d0) then - call ezfio_set_tc_scf_bitc_energy(etc_tot) + call ezfio_set_tc_scf_tcscf_energy(etc_tot) call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) write(json_unit, json_true_fmt) 'saved' @@ -263,7 +263,7 @@ subroutine rh_tcscf_diis() deallocate(mo_r_coef_save, mo_l_coef_save, F_DIIS, E_DIIS) - call ezfio_set_tc_scf_bitc_energy(TC_HF_energy) + call ezfio_set_tc_scf_tcscf_energy(TC_HF_energy) call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) diff --git a/plugins/local/tc_scf/rh_tcscf_simple.irp.f b/plugins/local/tc_scf/rh_tcscf_simple.irp.f deleted file mode 100644 index 0b79e8eab..000000000 --- a/plugins/local/tc_scf/rh_tcscf_simple.irp.f +++ /dev/null @@ -1,129 +0,0 @@ -! --- - -subroutine rh_tcscf_simple() - - implicit none - integer :: i, j, it, dim_DIIS - double precision :: t0, t1 - double precision :: e_save, e_delta, rho_delta - double precision :: etc_tot, etc_1e, etc_2e, etc_3e, tc_grad - double precision :: er_DIIS - double precision, allocatable :: rho_old(:,:), rho_new(:,:) - - allocate(rho_old(ao_num,ao_num), rho_new(ao_num,ao_num)) - - it = 0 - e_save = 0.d0 - dim_DIIS = 0 - - ! --- - - if(.not. bi_ortho) then - print *, ' grad_hermit = ', grad_hermit - call save_good_hermit_tc_eigvectors - TOUCH mo_coef - call save_mos - endif - - ! --- - - if(bi_ortho) then - - PROVIDE level_shift_tcscf - PROVIDE mo_l_coef mo_r_coef - - write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & - '====', '================', '================', '================', '================', '================' & - , '================', '================', '================', '====', '========' - - write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & - ' it ', ' SCF TC Energy ', ' E(1e) ', ' E(2e) ', ' E(3e) ', ' energy diff ' & - , ' gradient ', ' DIIS error ', ' level shift ', 'DIIS', ' WT (m)' - - write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & - '====', '================', '================', '================', '================', '================' & - , '================', '================', '================', '====', '========' - - - ! first iteration (HF orbitals) - call wall_time(t0) - - etc_tot = TC_HF_energy - etc_1e = TC_HF_one_e_energy - etc_2e = TC_HF_two_e_energy - etc_3e = 0.d0 - if(three_body_h_tc) then - etc_3e = diag_three_elem_hf - endif - tc_grad = grad_non_hermit - er_DIIS = maxval(abs(FQS_SQF_mo)) - e_delta = dabs(etc_tot - e_save) - e_save = etc_tot - - call wall_time(t1) - write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & - it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 - - do while(tc_grad .gt. dsqrt(thresh_tcscf)) - call wall_time(t0) - - it += 1 - if(it > n_it_tcscf_max) then - print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max - stop - endif - - mo_l_coef = fock_tc_leigvec_ao - mo_r_coef = fock_tc_reigvec_ao - call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) - call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) - TOUCH mo_l_coef mo_r_coef - - etc_tot = TC_HF_energy - etc_1e = TC_HF_one_e_energy - etc_2e = TC_HF_two_e_energy - etc_3e = 0.d0 - if(three_body_h_tc) then - etc_3e = diag_three_elem_hf - endif - tc_grad = grad_non_hermit - er_DIIS = maxval(abs(FQS_SQF_mo)) - e_delta = dabs(etc_tot - e_save) - e_save = etc_tot - - call ezfio_set_tc_scf_bitc_energy(etc_tot) - - call wall_time(t1) - write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & - it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 - enddo - - else - - do while( (grad_hermit.gt.dsqrt(thresh_tcscf)) .and. (it.lt.n_it_tcscf_max) ) - print*,'grad_hermit = ',grad_hermit - it += 1 - print *, 'iteration = ', it - print *, '***' - print *, 'TC HF total energy = ', TC_HF_energy - print *, 'TC HF 1 e energy = ', TC_HF_one_e_energy - print *, 'TC HF 2 e energy = ', TC_HF_two_e_energy - print *, 'TC HF 3 body = ', diag_three_elem_hf - print *, '***' - print *, '' - call save_good_hermit_tc_eigvectors - TOUCH mo_coef - call save_mos - enddo - - endif - - print *, ' TCSCF Simple converged !' - !call print_energy_and_mos(good_angles) - - deallocate(rho_old, rho_new) - -end - -! --- - diff --git a/plugins/local/tc_scf/rh_vartcscf_simple.irp.f b/plugins/local/tc_scf/rh_vartcscf_simple.irp.f deleted file mode 100644 index ecb0709e1..000000000 --- a/plugins/local/tc_scf/rh_vartcscf_simple.irp.f +++ /dev/null @@ -1,89 +0,0 @@ -! --- - -subroutine rh_vartcscf_simple() - - implicit none - integer :: i, j, it, dim_DIIS - double precision :: t0, t1 - double precision :: e_save, e_delta, rho_delta - double precision :: etc_tot, etc_1e, etc_2e, etc_3e - double precision :: er_DIIS - - - it = 0 - e_save = 0.d0 - dim_DIIS = 0 - - ! --- - - PROVIDE level_shift_tcscf - PROVIDE mo_r_coef - - write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & - '====', '================', '================', '================', '================', '================' & - , '================', '================', '====', '========' - write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & - ' it ', ' SCF TC Energy ', ' E(1e) ', ' E(2e) ', ' E(3e) ', ' energy diff ' & - , ' DIIS error ', ' level shift ', 'DIIS', ' WT (m)' - write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)') & - '====', '================', '================', '================', '================', '================' & - , '================', '================', '====', '========' - - - ! first iteration (HF orbitals) - call wall_time(t0) - - etc_tot = VARTC_HF_energy - etc_1e = VARTC_HF_one_e_energy - etc_2e = VARTC_HF_two_e_energy - etc_3e = 0.d0 - if(three_body_h_tc) then - etc_3e = diag_three_elem_hf - endif - er_DIIS = maxval(abs(FQS_SQF_mo)) - e_delta = dabs(etc_tot - e_save) - e_save = etc_tot - - call wall_time(t1) - write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & - it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 - - do while(er_DIIS .gt. dsqrt(thresh_tcscf)) - call wall_time(t0) - - it += 1 - if(it > n_it_tcscf_max) then - print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max - stop - endif - - mo_r_coef = fock_vartc_eigvec_ao - mo_l_coef = mo_r_coef - call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) - call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) - TOUCH mo_l_coef mo_r_coef - - etc_tot = VARTC_HF_energy - etc_1e = VARTC_HF_one_e_energy - etc_2e = VARTC_HF_two_e_energy - etc_3e = 0.d0 - if(three_body_h_tc) then - etc_3e = diag_three_elem_hf - endif - er_DIIS = maxval(abs(FQS_SQF_mo)) - e_delta = dabs(etc_tot - e_save) - e_save = etc_tot - - call ezfio_set_tc_scf_bitc_energy(etc_tot) - - call wall_time(t1) - write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & - it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 - enddo - - print *, ' VAR-TCSCF Simple converged !' - -end - -! --- - diff --git a/plugins/local/tc_scf/rotate_tcscf_orbitals.irp.f b/plugins/local/tc_scf/rotate_tcscf_orbitals.irp.f deleted file mode 100644 index 0f2663e55..000000000 --- a/plugins/local/tc_scf/rotate_tcscf_orbitals.irp.f +++ /dev/null @@ -1,369 +0,0 @@ - -! --- - -program rotate_tcscf_orbitals - - BEGIN_DOC - ! TODO : Rotate the bi-orthonormal orbitals in order to minimize left-right angles when degenerate - END_DOC - - implicit none - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - bi_ortho = .True. - touch bi_ortho - - call minimize_tc_orb_angles() - !call maximize_overlap() - -end - -! --- - -subroutine maximize_overlap() - - implicit none - integer :: i, m, n - double precision :: accu_d, accu_nd - double precision, allocatable :: C(:,:), R(:,:), L(:,:), W(:,:), e(:) - double precision, allocatable :: S(:,:) - - n = ao_num - m = mo_num - - allocate(L(n,m), R(n,m), C(n,m), W(n,n), e(m)) - L = mo_l_coef - R = mo_r_coef - C = mo_coef - W = ao_overlap - - print*, ' fock matrix diag elements' - do i = 1, m - e(i) = Fock_matrix_tc_mo_tot(i,i) - print*, e(i) - enddo - - ! --- - - print *, ' overlap before :' - print *, ' ' - - allocate(S(m,m)) - - call LTxSxR(n, m, L, W, R, S) - !print*, " L.T x R" - !do i = 1, m - ! write(*, '(100(F16.10,X))') S(i,i) - !enddo - call LTxSxR(n, m, L, W, C, S) - print*, " L.T x C" - do i = 1, m - write(*, '(100(F16.10,X))') S(i,:) - enddo - call LTxSxR(n, m, C, W, R, S) - print*, " C.T x R" - do i = 1, m - write(*, '(100(F16.10,X))') S(i,:) - enddo - - deallocate(S) - - ! --- - - call rotate_degen_eigvec_to_maximize_overlap(n, m, e, C, W, L, R) - - ! --- - - print *, ' overlap after :' - print *, ' ' - - allocate(S(m,m)) - - call LTxSxR(n, m, L, W, R, S) - !print*, " L.T x R" - !do i = 1, m - ! write(*, '(100(F16.10,X))') S(i,i) - !enddo - call LTxSxR(n, m, L, W, C, S) - print*, " L.T x C" - do i = 1, m - write(*, '(100(F16.10,X))') S(i,:) - enddo - call LTxSxR(n, m, C, W, R, S) - print*, " C.T x R" - do i = 1, m - write(*, '(100(F16.10,X))') S(i,:) - enddo - - deallocate(S) - - ! --- - - mo_l_coef = L - mo_r_coef = R - call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) - call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) - - ! --- - - deallocate(L, R, C, W, e) - -end subroutine maximize_overlap - -! --- - -subroutine rotate_degen_eigvec_to_maximize_overlap(n, m, e0, C0, W0, L0, R0) - - implicit none - - integer, intent(in) :: n, m - double precision, intent(in) :: e0(m), W0(n,n), C0(n,m) - double precision, intent(inout) :: L0(n,m), R0(n,m) - - - integer :: i, j, k, kk, mm, id1, tot_deg - double precision :: ei, ej, de, de_thr - integer, allocatable :: deg_num(:) - double precision, allocatable :: L(:,:), R(:,:), C(:,:), Lnew(:,:), Rnew(:,:), tmp(:,:) - !double precision, allocatable :: S(:,:), Snew(:,:), T(:,:), Ttmp(:,:), Stmp(:,:) - double precision, allocatable :: S(:,:), Snew(:,:), T(:,:), Ttmp(:,:), Stmp(:,:) - !real*8 :: S(m,m), Snew(m,m), T(m,m) - - id1 = 700 - allocate(S(id1,id1), Snew(id1,id1), T(id1,id1)) - - ! --- - - allocate( deg_num(m) ) - do i = 1, m - deg_num(i) = 1 - enddo - - de_thr = thr_degen_tc - - do i = 1, m-1 - ei = e0(i) - - ! already considered in degen vectors - if(deg_num(i).eq.0) cycle - - do j = i+1, m - ej = e0(j) - de = dabs(ei - ej) - - if(de .lt. de_thr) then - deg_num(i) = deg_num(i) + 1 - deg_num(j) = 0 - endif - - enddo - enddo - - tot_deg = 0 - do i = 1, m - if(deg_num(i).gt.1) then - print *, ' degen on', i, deg_num(i) - tot_deg = tot_deg + 1 - endif - enddo - - if(tot_deg .eq. 0) then - print *, ' no degen' - return - endif - - ! --- - - do i = 1, m - mm = deg_num(i) - - if(mm .gt. 1) then - - allocate(L(n,mm), R(n,mm), C(n,mm)) - do j = 1, mm - L(1:n,j) = L0(1:n,i+j-1) - R(1:n,j) = R0(1:n,i+j-1) - C(1:n,j) = C0(1:n,i+j-1) - enddo - - ! --- - - ! C.T x W0 x R - allocate(tmp(mm,n), Stmp(mm,mm)) - call dgemm( 'T', 'N', mm, n, n, 1.d0 & - , C, size(C, 1), W0, size(W0, 1) & - , 0.d0, tmp, size(tmp, 1) ) - call dgemm( 'N', 'N', mm, mm, n, 1.d0 & - , tmp, size(tmp, 1), R, size(R, 1) & - , 0.d0, Stmp, size(Stmp, 1) ) - deallocate(C, tmp) - - S = 0.d0 - do k = 1, mm - do kk = 1, mm - S(kk,k) = Stmp(kk,k) - enddo - enddo - deallocate(Stmp) - - !print*, " overlap bef" - !do k = 1, mm - ! write(*, '(100(F16.10,X))') (S(k,kk), kk=1, mm) - !enddo - - T = 0.d0 - Snew = 0.d0 - call maxovl(mm, mm, S, T, Snew) - - !print*, " overlap aft" - !do k = 1, mm - ! write(*, '(100(F16.10,X))') (Snew(k,kk), kk=1, mm) - !enddo - - allocate(Ttmp(mm,mm)) - Ttmp(1:mm,1:mm) = T(1:mm,1:mm) - - allocate(Lnew(n,mm), Rnew(n,mm)) - call dgemm( 'N', 'N', n, mm, mm, 1.d0 & - , R, size(R, 1), Ttmp(1,1), size(Ttmp, 1) & - , 0.d0, Rnew, size(Rnew, 1) ) - call dgemm( 'N', 'N', n, mm, mm, 1.d0 & - , L, size(L, 1), Ttmp(1,1), size(Ttmp, 1) & - , 0.d0, Lnew, size(Lnew, 1) ) - - deallocate(L, R) - deallocate(Ttmp) - - ! --- - - do j = 1, mm - L0(1:n,i+j-1) = Lnew(1:n,j) - R0(1:n,i+j-1) = Rnew(1:n,j) - enddo - deallocate(Lnew, Rnew) - - endif - enddo - - deallocate(S, Snew, T) - -end subroutine rotate_degen_eigvec_to_maximize_overlap - -! --- - -subroutine fix_right_to_one() - - implicit none - integer :: i, j, m, n, mm, tot_deg - double precision :: accu_d, accu_nd - double precision :: de_thr, ei, ej, de - integer, allocatable :: deg_num(:) - double precision, allocatable :: R0(:,:), L0(:,:), W(:,:), e0(:) - double precision, allocatable :: R(:,:), L(:,:), S(:,:), Stmp(:,:), tmp(:,:) - - n = ao_num - m = mo_num - - allocate(L0(n,m), R0(n,m), W(n,n), e0(m)) - L0 = mo_l_coef - R0 = mo_r_coef - W = ao_overlap - - print*, ' fock matrix diag elements' - do i = 1, m - e0(i) = Fock_matrix_tc_mo_tot(i,i) - print*, e0(i) - enddo - - ! --- - - allocate( deg_num(m) ) - do i = 1, m - deg_num(i) = 1 - enddo - - de_thr = 1d-6 - - do i = 1, m-1 - ei = e0(i) - - ! already considered in degen vectors - if(deg_num(i).eq.0) cycle - - do j = i+1, m - ej = e0(j) - de = dabs(ei - ej) - - if(de .lt. de_thr) then - deg_num(i) = deg_num(i) + 1 - deg_num(j) = 0 - endif - - enddo - enddo - - deallocate(e0) - - tot_deg = 0 - do i = 1, m - if(deg_num(i).gt.1) then - print *, ' degen on', i, deg_num(i) - tot_deg = tot_deg + 1 - endif - enddo - - if(tot_deg .eq. 0) then - print *, ' no degen' - return - endif - - ! --- - - do i = 1, m - mm = deg_num(i) - - if(mm .gt. 1) then - - allocate(L(n,mm), R(n,mm)) - do j = 1, mm - L(1:n,j) = L0(1:n,i+j-1) - R(1:n,j) = R0(1:n,i+j-1) - enddo - - ! --- - - call impose_weighted_orthog_svd(n, mm, W, R) - call impose_weighted_biorthog_qr(n, mm, thresh_biorthog_diag, thresh_biorthog_nondiag, R, W, L) - - ! --- - - do j = 1, mm - L0(1:n,i+j-1) = L(1:n,j) - R0(1:n,i+j-1) = R(1:n,j) - enddo - deallocate(L, R) - - endif - enddo - - call check_weighted_biorthog_binormalize(n, m, L0, W, R0, thresh_biorthog_diag, thresh_biorthog_nondiag, .true.) - - deallocate(W, deg_num) - - mo_l_coef = L0 - mo_r_coef = R0 - deallocate(L0, R0) - - call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) - call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) - print *, ' orbitals are rotated ' - - return -end subroutine fix_right_to_one - -! --- diff --git a/plugins/local/tc_scf/routines_rotates.irp.f b/plugins/local/tc_scf/routines_rotates.irp.f index fbfc9beba..64a81e8e4 100644 --- a/plugins/local/tc_scf/routines_rotates.irp.f +++ b/plugins/local/tc_scf/routines_rotates.irp.f @@ -40,9 +40,6 @@ subroutine LTxSxR(n, m, L, S, R, C) end subroutine LTxR -! --- - - ! --- subroutine minimize_tc_orb_angles() @@ -103,7 +100,10 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) double precision, allocatable :: stmp(:,:), T(:,:), Snew(:,:), smat2(:,:) double precision, allocatable :: mo_l_coef_tmp(:,:), mo_r_coef_tmp(:,:), mo_l_coef_new(:,:) - E_thr = 1d-04 + PROVIDE TC_HF_energy + PROVIDE mo_r_coef mo_l_coef + + E_thr = 1d-07 E_old = TC_HF_energy allocate(mo_l_coef_old(ao_num,mo_num), mo_r_coef_old(ao_num,mo_num)) mo_r_coef_old = mo_r_coef @@ -111,7 +111,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) good_angles = .False. - allocate(mo_l_coef_good(ao_num, mo_num), mo_r_coef_good(ao_num,mo_num)) + allocate(mo_l_coef_good(ao_num,mo_num), mo_r_coef_good(ao_num,mo_num)) print *, ' ***************************************' print *, ' ***************************************' @@ -123,7 +123,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) mo_r_coef_good = mo_r_coef mo_l_coef_good = mo_l_coef - allocate(mo_r_coef_new(ao_num, mo_num)) + allocate(mo_r_coef_new(ao_num,mo_num)) mo_r_coef_new = mo_r_coef do i = 1, mo_num norm = 1.d0/dsqrt(overlap_mo_r(i,i)) @@ -141,10 +141,11 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) call build_s_matrix(ao_num, mo_num, mo_r_coef_new, mo_r_coef_new, ao_overlap, s_mat) ! call give_degen(fock_diag,mo_num,thr_deg,list_degen,n_degen_list) if(n_core_orb.ne.0)then - call give_degen_full_listcore(fock_diag, mo_num, list_core, n_core_orb, thr_deg, list_degen, n_degen_list) + call give_degen_full_listcore(fock_diag, mo_num, list_core, n_core_orb, thr_deg, list_degen, n_degen_list) else - call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list) + call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list) endif + print *, ' fock_matrix_mo' do i = 1, mo_num print *, i, fock_diag(i), angle_left_right(i) @@ -156,50 +157,52 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) ! n_degen = ilast - ifirst +1 n_degen = list_degen(i,0) - if(n_degen .ge. 1000)n_degen = 1 ! convention for core orbitals + if(n_degen .ge. 1000) n_degen = 1 ! convention for core orbitals if(n_degen .eq. 1) cycle + print*, ' working on orbital', i + print*, ' multiplicity =', n_degen allocate(stmp(n_degen,n_degen), smat2(n_degen,n_degen)) allocate(mo_r_coef_tmp(ao_num,n_degen), mo_l_coef_tmp(ao_num,n_degen), mo_l_coef_new(ao_num,n_degen)) allocate(T(n_degen,n_degen), Snew(n_degen,n_degen)) print*,'Right orbitals before' - do j = 1, n_degen - write(*,'(100(F16.10,X))') mo_r_coef_new(1:ao_num,list_degen(i,j)) - enddo + do j = 1, n_degen + write(*,'(1000(F16.10,X))') mo_r_coef_new(1:ao_num,list_degen(i,j)) + enddo print*,'Left orbitals before' - do j = 1, n_degen - write(*,'(100(F16.10,X))')mo_l_coef(1:ao_num,list_degen(i,j)) - enddo + do j = 1, n_degen + write(*,'(1000(F16.10,X))') mo_l_coef(1:ao_num,list_degen(i,j)) + enddo if(angle_left_right(list_degen(i,1)).gt.80.d0.and.n_degen==2)then - integer :: i_list, j_list - i_list = list_degen(i,1) - j_list = list_degen(i,2) - print*,'Huge angle !!! == ',angle_left_right(list_degen(i,1)),angle_left_right(list_degen(i,2)) - print*,'i_list = ',i_list - print*,'i_list = ',j_list - print*,'Swapping left/right orbitals' - call print_strong_overlap(i_list, j_list) - mo_r_coef_tmp(1:ao_num,1) = mo_r_coef_new(1:ao_num,i_list) - mo_r_coef_tmp(1:ao_num,2) = mo_l_coef(1:ao_num,i_list) - mo_l_coef_tmp(1:ao_num,1) = mo_l_coef(1:ao_num,j_list) - mo_l_coef_tmp(1:ao_num,2) = mo_r_coef_new(1:ao_num,j_list) + integer :: i_list, j_list + i_list = list_degen(i,1) + j_list = list_degen(i,2) + print*,'Huge angle !!! == ',angle_left_right(list_degen(i,1)),angle_left_right(list_degen(i,2)) + print*,'i_list = ',i_list + print*,'i_list = ',j_list + print*,'Swapping left/right orbitals' + call print_strong_overlap(i_list, j_list) + mo_r_coef_tmp(1:ao_num,1) = mo_r_coef_new(1:ao_num,i_list) + mo_r_coef_tmp(1:ao_num,2) = mo_l_coef(1:ao_num,i_list) + mo_l_coef_tmp(1:ao_num,1) = mo_l_coef(1:ao_num,j_list) + mo_l_coef_tmp(1:ao_num,2) = mo_r_coef_new(1:ao_num,j_list) else - do j = 1, n_degen - print*,'i_list = ',list_degen(i,j) - mo_r_coef_tmp(1:ao_num,j) = mo_r_coef_new(1:ao_num,list_degen(i,j)) - mo_l_coef_tmp(1:ao_num,j) = mo_l_coef(1:ao_num,list_degen(i,j)) - enddo + do j = 1, n_degen + print*,'i_list = ',list_degen(i,j) + mo_r_coef_tmp(1:ao_num,j) = mo_r_coef_new(1:ao_num,list_degen(i,j)) + mo_l_coef_tmp(1:ao_num,j) = mo_l_coef(1:ao_num,list_degen(i,j)) + enddo endif print*,'Right orbitals ' - do j = 1, n_degen - write(*,'(100(F16.10,X))')mo_r_coef_tmp(1:ao_num,j) - enddo + do j = 1, n_degen + write(*,'(1000(F16.10,X))') mo_r_coef_tmp(1:ao_num,j) + enddo print*,'Left orbitals ' - do j = 1, n_degen - write(*,'(100(F16.10,X))')mo_l_coef_tmp(1:ao_num,j) - enddo + do j = 1, n_degen + write(*,'(100(F16.10,X))') mo_l_coef_tmp(1:ao_num,j) + enddo ! Orthogonalization of right functions print *, ' Orthogonalization of RIGHT functions' print *, ' ------------------------------------' @@ -301,6 +304,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) ! check if TC energy has changed E_new = TC_HF_energy + E_thr = thresh_de_tc_angles if(dabs(E_new - E_old) .gt. E_thr) then mo_r_coef = mo_r_coef_old mo_l_coef = mo_l_coef_old diff --git a/plugins/local/tc_scf/tc_petermann_factor.irp.f b/plugins/local/tc_scf/tc_petermann_factor.irp.f deleted file mode 100644 index 14fff8980..000000000 --- a/plugins/local/tc_scf/tc_petermann_factor.irp.f +++ /dev/null @@ -1,91 +0,0 @@ - -! --- - -program tc_petermann_factor - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - call main() - -end - -! --- - -subroutine main() - - implicit none - integer :: i, j - double precision :: Pf_diag_av - double precision, allocatable :: Sl(:,:), Sr(:,:), Pf(:,:) - - allocate(Sl(mo_num,mo_num), Sr(mo_num,mo_num), Pf(mo_num,mo_num)) - - - call LTxSxR(ao_num, mo_num, mo_l_coef, ao_overlap, mo_r_coef, Sl) - !call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0 & - ! , mo_l_coef, size(mo_l_coef, 1), mo_l_coef, size(mo_l_coef, 1) & - ! , 0.d0, Sl, size(Sl, 1) ) - - print *, '' - print *, ' left-right orthog matrix:' - do i = 1, mo_num - write(*,'(100(F8.4,X))') Sl(:,i) - enddo - - call LTxSxR(ao_num, mo_num, mo_l_coef, ao_overlap, mo_l_coef, Sl) - !call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0 & - ! , mo_l_coef, size(mo_l_coef, 1), mo_l_coef, size(mo_l_coef, 1) & - ! , 0.d0, Sl, size(Sl, 1) ) - - print *, '' - print *, ' left-orthog matrix:' - do i = 1, mo_num - write(*,'(100(F8.4,X))') Sl(:,i) - enddo - - call LTxSxR(ao_num, mo_num, mo_r_coef, ao_overlap, mo_r_coef, Sr) -! call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0 & -! , mo_r_coef, size(mo_r_coef, 1), mo_r_coef, size(mo_r_coef, 1) & -! , 0.d0, Sr, size(Sr, 1) ) - - print *, '' - print *, ' right-orthog matrix:' - do i = 1, mo_num - write(*,'(100(F8.4,X))') Sr(:,i) - enddo - - print *, '' - print *, ' Petermann matrix:' - do i = 1, mo_num - do j = 1, mo_num - Pf(j,i) = Sl(j,i) * Sr(j,i) - enddo - write(*,'(100(F8.4,X))') Pf(:,i) - enddo - - Pf_diag_av = 0.d0 - do i = 1, mo_num - Pf_diag_av = Pf_diag_av + Pf(i,i) - enddo - Pf_diag_av = Pf_diag_av / dble(mo_num) - - print *, '' - print *, ' mean of the diagonal Petermann factor = ', Pf_diag_av - - deallocate(Sl, Sr, Pf) - - return -end subroutine - -! --- - diff --git a/plugins/local/tc_scf/tc_scf.irp.f b/plugins/local/tc_scf/tc_scf.irp.f index 768069d67..83da03ec1 100644 --- a/plugins/local/tc_scf/tc_scf.irp.f +++ b/plugins/local/tc_scf/tc_scf.irp.f @@ -7,19 +7,6 @@ program tc_scf END_DOC implicit none - integer :: i - logical :: good_angles - - PROVIDE j1e_type - PROVIDE j2e_type - PROVIDE tcscf_algorithm - PROVIDE var_tc - - print *, ' TC-SCF with:' - print *, ' j1e_type = ', j1e_type - print *, ' j2e_type = ', j2e_type - - write(json_unit,json_array_open_fmt) 'tc-scf' my_grid_becke = .True. PROVIDE tc_grid1_a tc_grid1_r @@ -30,7 +17,6 @@ program tc_scf call write_int(6, my_n_pt_r_grid, 'radial external grid over') call write_int(6, my_n_pt_a_grid, 'angular external grid over') - if(tc_integ_type .eq. "numeric") then my_extra_grid_becke = .True. PROVIDE tc_grid2_a tc_grid2_r @@ -42,48 +28,38 @@ program tc_scf call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') endif - !call create_guess() - !call orthonormalize_mos() + call main() +end - if(var_tc) then +! --- - print *, ' VAR-TC' +subroutine main() - if(tcscf_algorithm == 'DIIS') then - print*, ' NOT implemented yet' - elseif(tcscf_algorithm == 'Simple') then - call rh_vartcscf_simple() - else - print *, ' not implemented yet', tcscf_algorithm - stop - endif + implicit none - else + integer :: i + logical :: good_angles - if(tcscf_algorithm == 'DIIS') then - call rh_tcscf_diis() - elseif(tcscf_algorithm == 'Simple') then - call rh_tcscf_simple() - else - print *, ' not implemented yet', tcscf_algorithm - stop - endif + print *, ' TC-SCF with:' + print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type + + write(json_unit,json_array_open_fmt) 'tc-scf' - PROVIDE Fock_matrix_tc_diag_mo_tot - print*, ' Eigenvalues:' - do i = 1, mo_num - print*, i, Fock_matrix_tc_diag_mo_tot(i) - enddo + call rh_tcscf_diis() - ! TODO - ! rotate angles in separate code only if necessary - if(minimize_lr_angles)then - call minimize_tc_orb_angles() - endif - call print_energy_and_mos(good_angles) + PROVIDE Fock_matrix_tc_diag_mo_tot + print*, ' Eigenvalues:' + do i = 1, mo_num + print*, i, Fock_matrix_tc_diag_mo_tot(i) + enddo + if(minimize_lr_angles) then + call minimize_tc_orb_angles() endif + call print_energy_and_mos(good_angles) write(json_unit,json_array_close_fmtx) call json_close @@ -119,7 +95,7 @@ subroutine create_guess() SOFT_TOUCH mo_label endif -end subroutine create_guess +end ! --- diff --git a/plugins/local/tc_scf/tc_scf_dm.irp.f b/plugins/local/tc_scf/tc_scf_dm.irp.f index bf31a4a1a..5d25fce25 100644 --- a/plugins/local/tc_scf/tc_scf_dm.irp.f +++ b/plugins/local/tc_scf/tc_scf_dm.irp.f @@ -10,16 +10,8 @@ implicit none - if(bi_ortho) then - - PROVIDE mo_l_coef mo_r_coef - TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta - - else - - TCSCF_density_matrix_ao_beta = SCF_density_matrix_ao_beta - - endif + PROVIDE mo_l_coef mo_r_coef + TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta END_PROVIDER @@ -35,16 +27,8 @@ implicit none - if(bi_ortho) then - - PROVIDE mo_l_coef mo_r_coef - TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha - - else - - TCSCF_density_matrix_ao_alpha = SCF_density_matrix_ao_alpha - - endif + PROVIDE mo_l_coef mo_r_coef + TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha END_PROVIDER diff --git a/plugins/local/tc_scf/tc_scf_energy.irp.f b/plugins/local/tc_scf/tc_scf_energy.irp.f index 833b48aab..80ef2afbe 100644 --- a/plugins/local/tc_scf/tc_scf_energy.irp.f +++ b/plugins/local/tc_scf/tc_scf_energy.irp.f @@ -1,7 +1,8 @@ - BEGIN_PROVIDER [ double precision, TC_HF_energy ] -&BEGIN_PROVIDER [ double precision, TC_HF_one_e_energy] -&BEGIN_PROVIDER [ double precision, TC_HF_two_e_energy] + BEGIN_PROVIDER [double precision, TC_HF_energy ] +&BEGIN_PROVIDER [double precision, TC_HF_one_e_energy ] +&BEGIN_PROVIDER [double precision, TC_HF_two_e_energy ] +&BEGIN_PROVIDER [double precision, TC_HF_three_e_energy] BEGIN_DOC ! TC Hartree-Fock energy containing the nuclear repulsion, and its one- and two-body components. @@ -11,11 +12,8 @@ integer :: i, j double precision :: t0, t1 - !print*, ' Providing TC energy ...' - !call wall_time(t0) - PROVIDE mo_l_coef mo_r_coef - PROVIDE two_e_tc_non_hermit_integral_alpha two_e_tc_non_hermit_integral_beta + PROVIDE two_e_tc_integral_alpha two_e_tc_integral_beta TC_HF_energy = nuclear_repulsion TC_HF_one_e_energy = 0.d0 @@ -23,47 +21,20 @@ do j = 1, ao_num do i = 1, ao_num - TC_HF_two_e_energy += 0.5d0 * ( two_e_tc_non_hermit_integral_alpha(i,j) * TCSCF_density_matrix_ao_alpha(i,j) & - + two_e_tc_non_hermit_integral_beta (i,j) * TCSCF_density_matrix_ao_beta (i,j) ) + TC_HF_two_e_energy += 0.5d0 * ( two_e_tc_integral_alpha(i,j) * TCSCF_density_matrix_ao_alpha(i,j) & + + two_e_tc_integral_beta (i,j) * TCSCF_density_matrix_ao_beta (i,j) ) TC_HF_one_e_energy += ao_one_e_integrals_tc_tot(i,j) & * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) ) enddo enddo - TC_HF_energy += TC_HF_one_e_energy + TC_HF_two_e_energy - TC_HF_energy += diag_three_elem_hf - - !call wall_time(t1) - !print*, ' Wall time for TC energy=', t1-t0 - -END_PROVIDER - -! --- - - BEGIN_PROVIDER [ double precision, VARTC_HF_energy] -&BEGIN_PROVIDER [ double precision, VARTC_HF_one_e_energy] -&BEGIN_PROVIDER [ double precision, VARTC_HF_two_e_energy] - - implicit none - integer :: i, j - - PROVIDE mo_r_coef - - VARTC_HF_energy = nuclear_repulsion - VARTC_HF_one_e_energy = 0.d0 - VARTC_HF_two_e_energy = 0.d0 - - do j = 1, ao_num - do i = 1, ao_num - VARTC_HF_two_e_energy += 0.5d0 * ( two_e_vartc_integral_alpha(i,j) * TCSCF_density_matrix_ao_alpha(i,j) & - + two_e_vartc_integral_beta (i,j) * TCSCF_density_matrix_ao_beta (i,j) ) - VARTC_HF_one_e_energy += ao_one_e_integrals_tc_tot(i,j) & - * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) ) - enddo - enddo + if((three_body_h_tc .eqv. .False.) .and. (.not. noL_standard)) then + TC_HF_three_e_energy = 0.d0 + else + TC_HF_three_e_energy = noL_0e + endif - VARTC_HF_energy += VARTC_HF_one_e_energy + VARTC_HF_two_e_energy - VARTC_HF_energy += diag_three_elem_hf + TC_HF_energy += TC_HF_one_e_energy + TC_HF_two_e_energy + TC_HF_three_e_energy END_PROVIDER diff --git a/plugins/local/tc_scf/tcscf_energy_naive.irp.f b/plugins/local/tc_scf/tcscf_energy_naive.irp.f deleted file mode 100644 index 82bb87999..000000000 --- a/plugins/local/tc_scf/tcscf_energy_naive.irp.f +++ /dev/null @@ -1,80 +0,0 @@ - -! --- - -BEGIN_PROVIDER [double precision, tcscf_energy_3e_naive] - - implicit none - integer :: i, j, k - integer :: neu, ned, D(elec_num) - integer :: ii, jj, kk - integer :: si, sj, sk - double precision :: I_ijk, I_jki, I_kij, I_jik, I_ikj, I_kji - double precision :: I_tot - - PROVIDE mo_l_coef mo_r_coef - - neu = elec_alpha_num - ned = elec_beta_num - if (neu > 0) D(1:neu) = [(2*i-1, i = 1, neu)] - if (ned > 0) D(neu+1:neu+ned) = [(2*i, i = 1, ned)] - - !print*, "D = " - !do i = 1, elec_num - ! ii = (D(i) - 1) / 2 + 1 - ! si = mod(D(i), 2) - ! print*, i, D(i), ii, si - !enddo - - tcscf_energy_3e_naive = 0.d0 - - do i = 1, elec_num - 2 - ii = (D(i) - 1) / 2 + 1 - si = mod(D(i), 2) - - do j = i + 1, elec_num - 1 - jj = (D(j) - 1) / 2 + 1 - sj = mod(D(j), 2) - - do k = j + 1, elec_num - kk = (D(k) - 1) / 2 + 1 - sk = mod(D(k), 2) - - call give_integrals_3_body_bi_ort(ii, jj, kk, ii, jj, kk, I_ijk) - I_tot = I_ijk - - if(sj==si .and. sk==sj) then - call give_integrals_3_body_bi_ort(ii, jj, kk, jj, kk, ii, I_jki) - I_tot += I_jki - endif - - if(sk==si .and. si==sj) then - call give_integrals_3_body_bi_ort(ii, jj, kk, kk, ii, jj, I_kij) - I_tot += I_kij - endif - - if(sj==si) then - call give_integrals_3_body_bi_ort(ii, jj, kk, jj, ii, kk, I_jik) - I_tot -= I_jik - endif - - if(sk==sj) then - call give_integrals_3_body_bi_ort(ii, jj, kk, ii, kk, jj, I_ikj) - I_tot -= I_ikj - endif - - if(sk==si) then - call give_integrals_3_body_bi_ort(ii, jj, kk, kk, jj, ii, I_kji) - I_tot -= I_kji - endif - - tcscf_energy_3e_naive += I_tot - enddo - enddo - enddo - - tcscf_energy_3e_naive = -tcscf_energy_3e_naive - -END_PROVIDER - -! --- - diff --git a/plugins/local/tc_scf/test_int.irp.f b/plugins/local/tc_scf/test_int.irp.f deleted file mode 100644 index e135fcd85..000000000 --- a/plugins/local/tc_scf/test_int.irp.f +++ /dev/null @@ -1,970 +0,0 @@ -program test_ints - - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - - implicit none - - print *, ' starting test_ints ...' - - my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r - my_n_pt_r_grid = tc_grid1_r - my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - - my_extra_grid_becke = .True. - my_n_pt_r_extra_grid = 30 - my_n_pt_a_extra_grid = 50 ! small extra_grid for quick debug - touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid - -!! OK -! call routine_int2_u_grad1u_env2 -! OK -! call routine_v_ij_erf_rk_cst_mu_env -! OK -! call routine_x_v_ij_erf_rk_cst_mu_env -! OK -! call routine_int2_u2_env2 -! OK -! call routine_int2_u_grad1u_x_env2 -! OK -! call routine_int2_grad1u2_grad2u2_env2 -! call routine_int2_u_grad1u_env2 -! call test_int2_grad1_u12_ao_test -! call routine_v_ij_u_cst_mu_env_test -! call test_grid_points_ao - !call test_int_gauss - - !call test_fock_3e_uhf_ao() - !call test_fock_3e_uhf_mo() - - !call test_two_e_tc_non_hermit_integral() - -!!PROVIDE TC_HF_energy VARTC_HF_energy -!!print *, ' TC_HF_energy = ', TC_HF_energy -!!print *, ' VARTC_HF_energy = ', VARTC_HF_energy - - call test_fock_3e_uhf_mo_cs() - call test_fock_3e_uhf_mo_a() - call test_fock_3e_uhf_mo_b() - -end - -! --- - -subroutine routine_test_env - implicit none - integer :: i,icount,j - icount = 0 - do i = 1, List_env1s_square_size - if(dabs(List_env1s_square_coef(i)).gt.1.d-10)then - print*,'' - print*,List_env1s_square_expo(i),List_env1s_square_coef(i) - print*,List_env1s_square_cent(1:3,i) - print*,'' - icount += 1 - endif - - enddo - print*,'List_env1s_square_coef,icount = ',List_env1s_square_size,icount - do i = 1, ao_num - do j = 1, ao_num - do icount = 1, List_comb_thr_b3_size(j,i) - print*,'',j,i - print*,List_comb_thr_b3_expo(icount,j,i),List_comb_thr_b3_coef(icount,j,i) - print*,List_comb_thr_b3_cent(1:3,icount,j,i) - print*,'' - enddo -! enddo - enddo - enddo - print*,'max_List_comb_thr_b3_size = ',max_List_comb_thr_b3_size,List_env1s_square_size - -end - -subroutine routine_int2_u_grad1u_env2 - implicit none - integer :: i,j,ipoint,k,l - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - array(j,i,l,k) += int2_u_grad1u_env2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += int2_u_grad1u_env2(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - enddo - enddo - enddo - enddo - enddo - accu_relat = 0.d0 - accu_abs = 0.d0 - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib - if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then - accu_relat += contrib/dabs(array_ref(j,i,l,k)) - endif - enddo - enddo - enddo - enddo - print*,'******' - print*,'******' - print*,'routine_int2_u_grad1u_env2' - print*,'accu_abs = ',accu_abs/dble(ao_num)**4 - print*,'accu_relat = ',accu_relat/dble(ao_num)**4 - - - -end - -subroutine routine_v_ij_erf_rk_cst_mu_env - implicit none - integer :: i,j,ipoint,k,l - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - array(j,i,l,k) += v_ij_erf_rk_cst_mu_env_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += v_ij_erf_rk_cst_mu_env(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - enddo - enddo - enddo - enddo - enddo - accu_relat = 0.d0 - accu_abs = 0.d0 - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib - if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then - accu_relat += contrib/dabs(array_ref(j,i,l,k)) - endif - enddo - enddo - enddo - enddo - print*,'******' - print*,'******' - print*,'routine_v_ij_erf_rk_cst_mu_env' - print*,'accu_abs = ',accu_abs/dble(ao_num)**4 - print*,'accu_relat = ',accu_relat/dble(ao_num)**4 - - - -end - - -subroutine routine_x_v_ij_erf_rk_cst_mu_env - implicit none - integer :: i,j,ipoint,k,l,m - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - do m = 1, 3 - array(j,i,l,k) += x_v_ij_erf_rk_cst_mu_env_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += x_v_ij_erf_rk_cst_mu_env (j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight - enddo - enddo - enddo - enddo - enddo - enddo - accu_relat = 0.d0 - accu_abs = 0.d0 - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib - if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then - accu_relat += contrib/dabs(array_ref(j,i,l,k)) - endif - enddo - enddo - enddo - enddo - - print*,'******' - print*,'******' - print*,'routine_x_v_ij_erf_rk_cst_mu_env' - print*,'accu_abs = ',accu_abs/dble(ao_num)**4 - print*,'accu_relat = ',accu_relat/dble(ao_num)**4 - - - -end - - - -subroutine routine_v_ij_u_cst_mu_env_test - implicit none - integer :: i,j,ipoint,k,l - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - array(j,i,l,k) += v_ij_u_cst_mu_env_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += v_ij_u_cst_mu_env_fit (j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - enddo - enddo - enddo - enddo - enddo - accu_relat = 0.d0 - accu_abs = 0.d0 - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib - if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then - accu_relat += contrib/dabs(array_ref(j,i,l,k)) - endif - enddo - enddo - enddo - enddo - print*,'******' - print*,'******' - print*,'routine_v_ij_u_cst_mu_env_test' - print*,'accu_abs = ',accu_abs/dble(ao_num)**4 - print*,'accu_relat = ',accu_relat/dble(ao_num)**4 - -end - -subroutine routine_int2_grad1u2_grad2u2_env2 - implicit none - integer :: i,j,ipoint,k,l - integer :: ii , jj - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - double precision, allocatable :: ints(:,:,:) - allocate(ints(ao_num, ao_num, n_points_final_grid)) -! do ipoint = 1, n_points_final_grid -! do i = 1, ao_num -! do j = 1, ao_num -! read(33,*)ints(j,i,ipoint) -! enddo -! enddo -! enddo - - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - array(j,i,l,k) += int2_grad1u2_grad2u2_env2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight -! !array(j,i,l,k) += int2_grad1u2_grad2u2_env2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight -! array_ref(j,i,l,k) += int2_grad1u2_grad2u2_env2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight -! !array(j,i,l,k) += ints(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight -! array_ref(j,i,l,k) += int2_grad1u2_grad2u2_env2(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += ints(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight -! if(dabs(int2_grad1u2_grad2u2_env2_test(j,i,ipoint)).gt.1.d-6)then -! if(dabs(int2_grad1u2_grad2u2_env2_test(j,i,ipoint) - int2_grad1u2_grad2u2_env2_test(j,i,ipoint)).gt.1.d-6)then -! print*,j,i,ipoint -! print*,int2_grad1u2_grad2u2_env2_test(j,i,ipoint) , int2_grad1u2_grad2u2_env2_test(j,i,ipoint), dabs(int2_grad1u2_grad2u2_env2_test(j,i,ipoint) - int2_grad1u2_grad2u2_env2_test(j,i,ipoint)) -! print*,int2_grad1u2_grad2u2_env2_test(i,j,ipoint) , int2_grad1u2_grad2u2_env2_test(i,j,ipoint), dabs(int2_grad1u2_grad2u2_env2_test(i,j,ipoint) - int2_grad1u2_grad2u2_env2_test(i,j,ipoint)) -! stop -! endif -! endif - enddo - enddo - enddo - enddo - enddo - double precision :: e_ref, e_new - accu_relat = 0.d0 - accu_abs = 0.d0 - e_ref = 0.d0 - e_new = 0.d0 - do ii = 1, elec_alpha_num - do jj = ii, elec_alpha_num - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - e_ref += mo_coef(j,ii) * mo_coef(i,ii) * array_ref(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj) - e_new += mo_coef(j,ii) * mo_coef(i,ii) * array(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj) - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib -! if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then -! accu_relat += contrib/dabs(array_ref(j,i,l,k)) -! endif - enddo - enddo - enddo - enddo - - enddo - enddo - print*,'e_ref = ',e_ref - print*,'e_new = ',e_new -! print*,'accu_abs = ',accu_abs/dble(ao_num)**4 -! print*,'accu_relat = ',accu_relat/dble(ao_num)**4 - - - -end - -subroutine routine_int2_u2_env2 - implicit none - integer :: i,j,ipoint,k,l - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - array(j,i,l,k) += int2_u2_env2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += int2_u2_env2(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - enddo - enddo - enddo - enddo - enddo - accu_relat = 0.d0 - accu_abs = 0.d0 - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib - if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then - accu_relat += contrib/dabs(array_ref(j,i,l,k)) - endif - enddo - enddo - enddo - enddo - print*,'******' - print*,'******' - print*,'routine_int2_u2_env2' - print*,'accu_abs = ',accu_abs/dble(ao_num)**4 - print*,'accu_relat = ',accu_relat/dble(ao_num)**4 - - - -end - - -subroutine routine_int2_u_grad1u_x_env2 - implicit none - integer :: i,j,ipoint,k,l,m - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - do m = 1, 3 - array(j,i,l,k) += int2_u_grad1u_x_env2_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += int2_u_grad1u_x_env2 (j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight - enddo - enddo - enddo - enddo - enddo - enddo - accu_relat = 0.d0 - accu_abs = 0.d0 - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib - if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then - accu_relat += contrib/dabs(array_ref(j,i,l,k)) - endif - enddo - enddo - enddo - enddo - print*,'******' - print*,'******' - print*,'routine_int2_u_grad1u_x_env2' - print*,'accu_abs = ',accu_abs/dble(ao_num)**4 - print*,'accu_relat = ',accu_relat/dble(ao_num)**4 - - - -end - -subroutine routine_v_ij_u_cst_mu_env - implicit none - integer :: i,j,ipoint,k,l - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - array(j,i,l,k) += v_ij_u_cst_mu_env_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += v_ij_u_cst_mu_env_fit (j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - enddo - enddo - enddo - enddo - enddo - accu_relat = 0.d0 - accu_abs = 0.d0 - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib - if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then - accu_relat += contrib/dabs(array_ref(j,i,l,k)) - endif - enddo - enddo - enddo - enddo - print*,'******' - print*,'******' - print*,'routine_v_ij_u_cst_mu_env' - print*,'accu_abs = ',accu_abs/dble(ao_num)**4 - print*,'accu_relat = ',accu_relat/dble(ao_num)**4 - -end - -! --- - -subroutine test_fock_3e_uhf_ao() - - implicit none - integer :: i, j - double precision :: diff_tot, diff_ij, thr_ih, norm - double precision, allocatable :: fock_3e_uhf_ao_a_mo(:,:), fock_3e_uhf_ao_b_mo(:,:) - - thr_ih = 1d-7 - - PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth - PROVIDE fock_3e_uhf_ao_a fock_3e_uhf_ao_b - - ! --- - - allocate(fock_3e_uhf_ao_a_mo(mo_num,mo_num)) - call ao_to_mo_bi_ortho( fock_3e_uhf_ao_a , size(fock_3e_uhf_ao_a , 1) & - , fock_3e_uhf_ao_a_mo, size(fock_3e_uhf_ao_a_mo, 1) ) - - norm = 0.d0 - diff_tot = 0.d0 - do i = 1, mo_num - do j = 1, mo_num - - diff_ij = dabs(fock_3e_uhf_ao_a_mo(j,i) - fock_a_tot_3e_bi_orth(j,i)) - if(diff_ij .gt. thr_ih) then - print *, ' difference on ', j, i - print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i) - print *, ' UHF : ', fock_3e_uhf_ao_a_mo (j,i) - !stop - endif - - norm += dabs(fock_a_tot_3e_bi_orth(j,i)) - diff_tot += diff_ij - enddo - enddo - print *, ' diff on F_a = ', diff_tot / norm - print *, ' ' - - deallocate(fock_3e_uhf_ao_a_mo) - - ! --- - - allocate(fock_3e_uhf_ao_b_mo(mo_num,mo_num)) - call ao_to_mo_bi_ortho( fock_3e_uhf_ao_b , size(fock_3e_uhf_ao_b , 1) & - , fock_3e_uhf_ao_b_mo, size(fock_3e_uhf_ao_b_mo, 1) ) - - norm = 0.d0 - diff_tot = 0.d0 - do i = 1, mo_num - do j = 1, mo_num - - diff_ij = dabs(fock_3e_uhf_ao_b_mo(j,i) - fock_b_tot_3e_bi_orth(j,i)) - if(diff_ij .gt. thr_ih) then - print *, ' difference on ', j, i - print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i) - print *, ' UHF : ', fock_3e_uhf_ao_b_mo (j,i) - !stop - endif - - norm += dabs(fock_b_tot_3e_bi_orth(j,i)) - diff_tot += diff_ij - enddo - enddo - print *, ' diff on F_b = ', diff_tot/norm - print *, ' ' - - deallocate(fock_3e_uhf_ao_b_mo) - - ! --- - -end subroutine test_fock_3e_uhf_ao() - -! --- - -subroutine test_fock_3e_uhf_mo() - - implicit none - integer :: i, j - double precision :: diff_tot, diff_ij, thr_ih, norm - - thr_ih = 1d-12 - - PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth - PROVIDE fock_3e_uhf_mo_a fock_3e_uhf_mo_b - - ! --- - - norm = 0.d0 - diff_tot = 0.d0 - do i = 1, mo_num - do j = 1, mo_num - - diff_ij = dabs(fock_3e_uhf_mo_a(j,i) - fock_a_tot_3e_bi_orth(j,i)) - if(diff_ij .gt. thr_ih) then - print *, ' difference on ', j, i - print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i) - print *, ' UHF : ', fock_3e_uhf_mo_a (j,i) - !stop - endif - - norm += dabs(fock_a_tot_3e_bi_orth(j,i)) - diff_tot += diff_ij - enddo - enddo - print *, ' diff on F_a = ', diff_tot / norm - print *, ' norm_a = ', norm - print *, ' ' - - ! --- - - norm = 0.d0 - diff_tot = 0.d0 - do i = 1, mo_num - do j = 1, mo_num - - diff_ij = dabs(fock_3e_uhf_mo_b(j,i) - fock_b_tot_3e_bi_orth(j,i)) - if(diff_ij .gt. thr_ih) then - print *, ' difference on ', j, i - print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i) - print *, ' UHF : ', fock_3e_uhf_mo_b (j,i) - !stop - endif - - norm += dabs(fock_b_tot_3e_bi_orth(j,i)) - diff_tot += diff_ij - enddo - enddo - print *, ' diff on F_b = ', diff_tot/norm - print *, ' norm_b = ', norm - print *, ' ' - - ! --- - -end - -! --- - -subroutine test_grid_points_ao - implicit none - integer :: i,j,ipoint,icount,icount_good, icount_bad,icount_full - double precision :: thr - thr = 1.d-10 -! print*,'max_n_pts_grid_ao_prod = ',max_n_pts_grid_ao_prod -! print*,'n_pts_grid_ao_prod' - do i = 1, ao_num - do j = i, ao_num - icount = 0 - icount_good = 0 - icount_bad = 0 - icount_full = 0 - do ipoint = 1, n_points_final_grid -! if(dabs(int2_u_grad1u_x_env2_test(j,i,ipoint,1)) & -! + dabs(int2_u_grad1u_x_env2_test(j,i,ipoint,2)) & -! + dabs(int2_u_grad1u_x_env2_test(j,i,ipoint,3)) ) -! if(dabs(int2_u2_env2_test(j,i,ipoint)).gt.thr)then -! icount += 1 -! endif - if(dabs(v_ij_u_cst_mu_env_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then - icount_full += 1 - endif - if(dabs(v_ij_u_cst_mu_env_test(j,i,ipoint)).gt.thr)then - icount += 1 - if(dabs(v_ij_u_cst_mu_env_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then - icount_good += 1 - else - print*,j,i,ipoint - print*,dabs(v_ij_u_cst_mu_env_test(j,i,ipoint)), dabs(v_ij_u_cst_mu_env_ng_1_test(j,i,ipoint)),dabs(v_ij_u_cst_mu_env_ng_1_test(j,i,ipoint))/dabs(v_ij_u_cst_mu_env_test(j,i,ipoint)) - icount_bad += 1 - endif - endif -! if(dabs(v_ij_u_cst_mu_env_ng_1_test(j,i,ipoint)).gt.thr)then -! endif - enddo - print*,'' - print*,j,i - print*,icount,icount_full, icount_bad!,n_pts_grid_ao_prod(j,i) - print*,dble(icount)/dble(n_points_final_grid),dble(icount_full)/dble(n_points_final_grid) -! dble(n_pts_grid_ao_prod(j,i))/dble(n_points_final_grid) -! if(icount.gt.n_pts_grid_ao_prod(j,i))then -! print*,'pb !!' -! endif - enddo - enddo -end - -subroutine test_int_gauss - implicit none - integer :: i,j - print*,'center' - do i = 1, ao_num - do j = i, ao_num - print*,j,i - print*,ao_prod_sigma(j,i),ao_overlap_abs_grid(j,i) - print*,ao_prod_center(1:3,j,i) - enddo - enddo - print*,'' - double precision :: weight, r(3),integral_1,pi,center(3),f_r,alpha,distance,integral_2 - center = 0.d0 - pi = dacos(-1.d0) - integral_1 = 0.d0 - integral_2 = 0.d0 - alpha = 0.75d0 - do i = 1, n_points_final_grid - ! you get x, y and z of the ith grid point - r(1) = final_grid_points(1,i) - r(2) = final_grid_points(2,i) - r(3) = final_grid_points(3,i) - weight = final_weight_at_r_vector(i) - distance = dsqrt( (r(1) - center(1))**2 + (r(2) - center(2))**2 + (r(3) - center(3))**2 ) - f_r = dexp(-alpha * distance*distance) - ! you add the contribution of the grid point to the integral - integral_1 += f_r * weight - integral_2 += f_r * distance * weight - enddo - print*,'integral_1 =',integral_1 - print*,'(pi/alpha)**1.5 =',(pi / alpha)**1.5 - print*,'integral_2 =',integral_2 - print*,'(pi/alpha)**1.5 =',2.d0*pi / (alpha)**2 - - -end - -! --- - -subroutine test_two_e_tc_non_hermit_integral() - - implicit none - integer :: i, j - double precision :: diff_tot, diff, thr_ih, norm - - thr_ih = 1d-10 - - PROVIDE two_e_tc_non_hermit_integral_beta two_e_tc_non_hermit_integral_alpha - PROVIDE two_e_tc_non_hermit_integral_seq_beta two_e_tc_non_hermit_integral_seq_alpha - - ! --- - - norm = 0.d0 - diff_tot = 0.d0 - do i = 1, ao_num - do j = 1, ao_num - - diff = dabs(two_e_tc_non_hermit_integral_seq_alpha(j,i) - two_e_tc_non_hermit_integral_alpha(j,i)) - if(diff .gt. thr_ih) then - print *, ' difference on ', j, i - print *, ' seq : ', two_e_tc_non_hermit_integral_seq_alpha(j,i) - print *, ' // : ', two_e_tc_non_hermit_integral_alpha (j,i) - !stop - endif - - norm += dabs(two_e_tc_non_hermit_integral_seq_alpha(j,i)) - diff_tot += diff - enddo - enddo - - print *, ' diff tot a = ', diff_tot / norm - print *, ' norm a = ', norm - print *, ' ' - - ! --- - - norm = 0.d0 - diff_tot = 0.d0 - do i = 1, ao_num - do j = 1, ao_num - - diff = dabs(two_e_tc_non_hermit_integral_seq_beta(j,i) - two_e_tc_non_hermit_integral_beta(j,i)) - if(diff .gt. thr_ih) then - print *, ' difference on ', j, i - print *, ' seq : ', two_e_tc_non_hermit_integral_seq_beta(j,i) - print *, ' // : ', two_e_tc_non_hermit_integral_beta (j,i) - !stop - endif - - norm += dabs(two_e_tc_non_hermit_integral_seq_beta(j,i)) - diff_tot += diff - enddo - enddo - - print *, ' diff tot b = ', diff_tot / norm - print *, ' norm b = ', norm - print *, ' ' - - ! --- - - return - -end - -! --- - -subroutine test_int2_grad1_u12_ao_test - implicit none - integer :: i,j,ipoint,m,k,l - double precision :: weight,accu_relat, accu_abs, contrib - double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) - allocate(array(ao_num, ao_num, ao_num, ao_num)) - array = 0.d0 - allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) - array_ref = 0.d0 - do m = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - array(j,i,l,k) += int2_grad1_u12_ao_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += int2_grad1_u12_ao(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight - enddo - enddo - enddo - enddo - enddo - enddo - - accu_relat = 0.d0 - accu_abs = 0.d0 - do k = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do j = 1, ao_num - contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) - accu_abs += contrib - if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then - accu_relat += contrib/dabs(array_ref(j,i,l,k)) - endif - enddo - enddo - enddo - enddo - print*,'******' - print*,'******' - print*,'test_int2_grad1_u12_ao_test' - print*,'accu_abs = ',accu_abs/dble(ao_num)**4 - print*,'accu_relat = ',accu_relat/dble(ao_num)**4 -end - -! --- - -subroutine test_fock_3e_uhf_mo_cs() - - implicit none - integer :: i, j - double precision :: I_old, I_new - double precision :: diff_tot, diff, thr_ih, norm - -! double precision :: t0, t1 -! print*, ' Providing fock_a_tot_3e_bi_orth ...' -! call wall_time(t0) -! PROVIDE fock_a_tot_3e_bi_orth -! call wall_time(t1) -! print*, ' Wall time for fock_a_tot_3e_bi_orth =', t1 - t0 - - PROVIDE fock_3e_uhf_mo_cs fock_3e_uhf_mo_cs_old - - thr_ih = 1d-8 - norm = 0.d0 - diff_tot = 0.d0 - - do i = 1, mo_num - do j = 1, mo_num - - I_old = fock_3e_uhf_mo_cs_old(j,i) - I_new = fock_3e_uhf_mo_cs (j,i) - - diff = dabs(I_old - I_new) - if(diff .gt. thr_ih) then - print *, ' problem in fock_3e_uhf_mo_cs on ', j, i - print *, ' old value = ', I_old - print *, ' new value = ', I_new - !stop - endif - - norm += dabs(I_old) - diff_tot += diff - enddo - enddo - - print *, ' diff tot (%) = ', 100.d0 * diff_tot / norm - - return -end - -! --- - -subroutine test_fock_3e_uhf_mo_a() - - implicit none - integer :: i, j - double precision :: I_old, I_new - double precision :: diff_tot, diff, thr_ih, norm - - PROVIDE fock_3e_uhf_mo_a fock_3e_uhf_mo_a_old - - thr_ih = 1d-8 - norm = 0.d0 - diff_tot = 0.d0 - - do i = 1, mo_num - do j = 1, mo_num - - I_old = fock_3e_uhf_mo_a_old(j,i) - I_new = fock_3e_uhf_mo_a (j,i) - - diff = dabs(I_old - I_new) - if(diff .gt. thr_ih) then - print *, ' problem in fock_3e_uhf_mo_a on ', j, i - print *, ' old value = ', I_old - print *, ' new value = ', I_new - !stop - endif - - norm += dabs(I_old) - diff_tot += diff - enddo - enddo - - print *, ' diff tot (%) = ', 100.d0 * diff_tot / norm - - return -end - -! --- - -subroutine test_fock_3e_uhf_mo_b() - - implicit none - integer :: i, j - double precision :: I_old, I_new - double precision :: diff_tot, diff, thr_ih, norm - - PROVIDE fock_3e_uhf_mo_b fock_3e_uhf_mo_b_old - - thr_ih = 1d-8 - norm = 0.d0 - diff_tot = 0.d0 - - do i = 1, mo_num - do j = 1, mo_num - - I_old = fock_3e_uhf_mo_b_old(j,i) - I_new = fock_3e_uhf_mo_b (j,i) - - diff = dabs(I_old - I_new) - if(diff .gt. thr_ih) then - print *, ' problem in fock_3e_uhf_mo_b on ', j, i - print *, ' old value = ', I_old - print *, ' new value = ', I_new - !stop - endif - - norm += dabs(I_old) - diff_tot += diff - enddo - enddo - - print *, ' diff tot (%) = ', 100.d0 * diff_tot / norm - - return -end - -! --- - diff --git a/plugins/local/tc_scf/three_e_energy_bi_ortho.irp.f b/plugins/local/tc_scf/three_e_energy_bi_ortho.irp.f deleted file mode 100644 index 0c9ebbd79..000000000 --- a/plugins/local/tc_scf/three_e_energy_bi_ortho.irp.f +++ /dev/null @@ -1,189 +0,0 @@ - -subroutine contrib_3e_diag_sss(i, j, k, integral) - - BEGIN_DOC - ! returns the pure same spin contribution to diagonal matrix element of 3e term - END_DOC - - implicit none - integer, intent(in) :: i, j, k - double precision, intent(out) :: integral - double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int - - call give_integrals_3_body_bi_ort(i, k, j, i, k, j, direct_int )!!! < i k j | i k j > - call give_integrals_3_body_bi_ort(i, k, j, j, i, k, c_3_int) ! < i k j | j i k > - call give_integrals_3_body_bi_ort(i, k, j, k, j, i, c_minus_3_int)! < i k j | k j i > - integral = direct_int + c_3_int + c_minus_3_int - - ! negative terms :: exchange contrib - call give_integrals_3_body_bi_ort(i, k, j, j, k, i, exch_13_int)!!! < i k j | j k i > : E_13 - call give_integrals_3_body_bi_ort(i, k, j, i, j, k, exch_23_int)!!! < i k j | i j k > : E_23 - call give_integrals_3_body_bi_ort(i, k, j, k, i, j, exch_12_int)!!! < i k j | k i j > : E_12 - - integral += - exch_13_int - exch_23_int - exch_12_int - integral = -integral - -end - -! --- - -subroutine contrib_3e_diag_soo(i,j,k,integral) - implicit none - integer, intent(in) :: i,j,k - BEGIN_DOC - ! returns the pure same spin contribution to diagonal matrix element of 3e term - END_DOC - double precision, intent(out) :: integral - double precision :: direct_int, exch_23_int - call give_integrals_3_body_bi_ort(i, k, j, i, k, j, direct_int) ! < i k j | i k j > - call give_integrals_3_body_bi_ort(i, k, j, i, j, k, exch_23_int)! < i k j | i j k > : E_23 - integral = direct_int - exch_23_int - integral = -integral -end - - -subroutine give_aaa_contrib_bis(integral_aaa) - implicit none - double precision, intent(out) :: integral_aaa - double precision :: integral - integer :: i,j,k - integral_aaa = 0.d0 - do i = 1, elec_alpha_num - do j = i+1, elec_alpha_num - do k = j+1, elec_alpha_num - call contrib_3e_diag_sss(i,j,k,integral) - integral_aaa += integral - enddo - enddo - enddo - -end - -! --- - -subroutine give_aaa_contrib(integral_aaa) - - implicit none - integer :: i, j, k - double precision :: integral - double precision, intent(out) :: integral_aaa - - integral_aaa = 0.d0 - do i = 1, elec_alpha_num - do j = 1, elec_alpha_num - do k = 1, elec_alpha_num - call contrib_3e_diag_sss(i, j, k, integral) - integral_aaa += integral - enddo - enddo - enddo - integral_aaa *= 1.d0/6.d0 - - return -end - -! --- - -subroutine give_aab_contrib(integral_aab) - implicit none - double precision, intent(out) :: integral_aab - double precision :: integral - integer :: i,j,k - integral_aab = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_alpha_num - do k = 1, elec_alpha_num - call contrib_3e_diag_soo(i,j,k,integral) - integral_aab += integral - enddo - enddo - enddo - integral_aab *= 0.5d0 -end - - -subroutine give_aab_contrib_bis(integral_aab) - implicit none - double precision, intent(out) :: integral_aab - double precision :: integral - integer :: i,j,k - integral_aab = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_alpha_num - do k = j+1, elec_alpha_num - call contrib_3e_diag_soo(i,j,k,integral) - integral_aab += integral - enddo - enddo - enddo -end - - -subroutine give_abb_contrib(integral_abb) - implicit none - double precision, intent(out) :: integral_abb - double precision :: integral - integer :: i,j,k - integral_abb = 0.d0 - do i = 1, elec_alpha_num - do j = 1, elec_beta_num - do k = 1, elec_beta_num - call contrib_3e_diag_soo(i,j,k,integral) - integral_abb += integral - enddo - enddo - enddo - integral_abb *= 0.5d0 -end - -subroutine give_abb_contrib_bis(integral_abb) - implicit none - double precision, intent(out) :: integral_abb - double precision :: integral - integer :: i,j,k - integral_abb = 0.d0 - do i = 1, elec_alpha_num - do j = 1, elec_beta_num - do k = j+1, elec_beta_num - call contrib_3e_diag_soo(i,j,k,integral) - integral_abb += integral - enddo - enddo - enddo -end - -subroutine give_bbb_contrib_bis(integral_bbb) - implicit none - double precision, intent(out) :: integral_bbb - double precision :: integral - integer :: i,j,k - integral_bbb = 0.d0 - do i = 1, elec_beta_num - do j = i+1, elec_beta_num - do k = j+1, elec_beta_num - call contrib_3e_diag_sss(i,j,k,integral) - integral_bbb += integral - enddo - enddo - enddo - -end - -subroutine give_bbb_contrib(integral_bbb) - implicit none - double precision, intent(out) :: integral_bbb - double precision :: integral - integer :: i,j,k - integral_bbb = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do k = 1, elec_beta_num - call contrib_3e_diag_sss(i,j,k,integral) - integral_bbb += integral - enddo - enddo - enddo - integral_bbb *= 1.d0/6.d0 -end - - diff --git a/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f b/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f new file mode 100644 index 000000000..ec5167d10 --- /dev/null +++ b/plugins/local/tc_scf/write_ao_2e_tc_integ.irp.f @@ -0,0 +1,56 @@ +! --- + +program write_ao_2e_tc_integ + + implicit none + + print *, ' j2e_type = ', j2e_type + print *, ' j1e_type = ', j1e_type + print *, ' env_type = ', env_type + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + if(tc_integ_type .eq. "numeric") then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + endif + + call main() + +end + +! --- + +subroutine main() + + implicit none + + PROVIDE io_tc_integ + + print*, 'io_tc_integ = ', io_tc_integ + + if(io_tc_integ .ne. "Write") then + print*, 'io_tc_integ != Write' + print*, io_tc_integ + stop + endif + + PROVIDE ao_two_e_tc_tot + +end + +! --- + diff --git a/plugins/local/tuto_plugins/tuto_I/test_cholesky.irp.f b/plugins/local/tuto_plugins/tuto_I/test_cholesky.irp.f new file mode 100644 index 000000000..d09d100a7 --- /dev/null +++ b/plugins/local/tuto_plugins/tuto_I/test_cholesky.irp.f @@ -0,0 +1,53 @@ +program my_program_to_print_stuffs + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + integer :: i,j,k,l,m + double precision :: integral, accu, accu_tot, integral_cholesky + double precision :: get_ao_two_e_integral, get_two_e_integral ! declaration of the functions + print*,'AO integrals, physicist notations : ' + accu_tot = 0.D0 + do i = 1, ao_num + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + integral = get_ao_two_e_integral(i, j, k, l, ao_integrals_map) + integral_cholesky = 0.D0 + do m = 1, cholesky_ao_num + integral_cholesky += cholesky_ao_transp(m,i,k) * cholesky_ao_transp(m,j,l) + enddo + accu = dabs(integral_cholesky-integral) + accu_tot += accu + if(accu.gt.1.d-10)then + print*,i,j,k,l + print*,accu, integral, integral_cholesky + endif + enddo + enddo + enddo + enddo + print*,'accu_tot',accu_tot + + print*,'MO integrals, physicist notations : ' + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + integral = get_two_e_integral(i, j, k, l, mo_integrals_map) + accu = 0.D0 + integral_cholesky = 0.D0 + do m = 1, cholesky_mo_num + integral_cholesky += cholesky_mo_transp(m,i,k) * cholesky_mo_transp(m,j,l) + enddo + accu = dabs(integral_cholesky-integral) + accu_tot += accu + if(accu.gt.1.d-10)then + print*,i,j,k,l + print*,accu, integral, integral_cholesky + endif + enddo + enddo + enddo + enddo +end diff --git a/scripts/get_fci_tc_conv.sh b/scripts/get_fci_tc_conv.sh index 643f3ac06..f0c99baf3 100755 --- a/scripts/get_fci_tc_conv.sh +++ b/scripts/get_fci_tc_conv.sh @@ -1,2 +1,2 @@ file=$1 -grep "Ndet,E,E+PT2,E+RPT2,|PT2|=" $file | cut -d "=" -f 2 > ${file}.conv_fci_tc +grep "Ndet,E,E+PT2,pt2_minus,pt2_plus,pt2_abs=" $file | cut -d "=" -f 2 > ${file}.conv_fci_tc diff --git a/src/ao_basis/aos_in_r.irp.f b/src/ao_basis/aos_in_r.irp.f index 1b1595a3d..053c86a21 100644 --- a/src/ao_basis/aos_in_r.irp.f +++ b/src/ao_basis/aos_in_r.irp.f @@ -1,67 +1,76 @@ -double precision function ao_value(i,r) - implicit none - BEGIN_DOC -! Returns the value of the i-th ao at point $\textbf{r}$ - END_DOC - double precision, intent(in) :: r(3) - integer, intent(in) :: i - - integer :: m,num_ao - double precision :: center_ao(3) - double precision :: beta - integer :: power_ao(3) - double precision :: accu,dx,dy,dz,r2 - num_ao = ao_nucl(i) - power_ao(1:3)= ao_power(i,1:3) - center_ao(1:3) = nucl_coord(num_ao,1:3) - dx = (r(1) - center_ao(1)) - dy = (r(2) - center_ao(2)) - dz = (r(3) - center_ao(3)) - r2 = dx*dx + dy*dy + dz*dz - dx = dx**power_ao(1) - dy = dy**power_ao(2) - dz = dz**power_ao(3) - - accu = 0.d0 - do m=1,ao_prim_num(i) - beta = ao_expo_ordered_transp(m,i) - accu += ao_coef_normalized_ordered_transp(m,i) * dexp(-beta*r2) - enddo - ao_value = accu * dx * dy * dz + +! --- + +double precision function ao_value(i, r) + + BEGIN_DOC + ! Returns the value of the i-th ao at point $\textbf{r}$ + END_DOC + + implicit none + integer, intent(in) :: i + double precision, intent(in) :: r(3) + + integer :: m, num_ao + integer :: power_ao(3) + double precision :: center_ao(3) + double precision :: beta + double precision :: accu, dx, dy, dz, r2 + + num_ao = ao_nucl(i) + power_ao(1:3) = ao_power(i,1:3) + center_ao(1:3) = nucl_coord(num_ao,1:3) + dx = r(1) - center_ao(1) + dy = r(2) - center_ao(2) + dz = r(3) - center_ao(3) + r2 = dx*dx + dy*dy + dz*dz + dx = dx**power_ao(1) + dy = dy**power_ao(2) + dz = dz**power_ao(3) + + accu = 0.d0 + do m = 1, ao_prim_num(i) + beta = ao_expo_ordered_transp(m,i) + accu += ao_coef_normalized_ordered_transp(m,i) * dexp(-beta*r2) + enddo + ao_value = accu * dx * dy * dz end -double precision function primitive_value(i,j,r) - implicit none - BEGIN_DOC -! Returns the value of the j-th primitive of the i-th |AO| at point $\textbf{r} -! **without the coefficient** - END_DOC - double precision, intent(in) :: r(3) - integer, intent(in) :: i,j - - integer :: m,num_ao - double precision :: center_ao(3) - double precision :: beta - integer :: power_ao(3) - double precision :: accu,dx,dy,dz,r2 - num_ao = ao_nucl(i) - power_ao(1:3)= ao_power(i,1:3) - center_ao(1:3) = nucl_coord(num_ao,1:3) - dx = (r(1) - center_ao(1)) - dy = (r(2) - center_ao(2)) - dz = (r(3) - center_ao(3)) - r2 = dx*dx + dy*dy + dz*dz - dx = dx**power_ao(1) - dy = dy**power_ao(2) - dz = dz**power_ao(3) - - accu = 0.d0 - m=j - beta = ao_expo_ordered_transp(m,i) - accu += dexp(-beta*r2) - primitive_value = accu * dx * dy * dz +double precision function primitive_value(i, j, r) + + BEGIN_DOC + ! Returns the value of the j-th primitive of the i-th |AO| at point $\textbf{r} + ! **without the coefficient** + END_DOC + + implicit none + integer, intent(in) :: i, j + double precision, intent(in) :: r(3) + + integer :: m, num_ao + integer :: power_ao(3) + double precision :: center_ao(3) + double precision :: beta + double precision :: accu, dx, dy, dz, r2 + + num_ao = ao_nucl(i) + power_ao(1:3)= ao_power(i,1:3) + center_ao(1:3) = nucl_coord(num_ao,1:3) + dx = r(1) - center_ao(1) + dy = r(2) - center_ao(2) + dz = r(3) - center_ao(3) + r2 = dx*dx + dy*dy + dz*dz + dx = dx**power_ao(1) + dy = dy**power_ao(2) + dz = dz**power_ao(3) + + accu = 0.d0 + m = j + beta = ao_expo_ordered_transp(m,i) + accu += dexp(-beta*r2) + primitive_value = accu * dx * dy * dz end @@ -104,9 +113,9 @@ subroutine give_all_aos_at_r(r, tmp_array) dz2 = dz**p_ao(3) tmp_array(k) = 0.d0 - do l = 1,ao_prim_num(k) + do l = 1, ao_prim_num(k) beta = ao_expo_ordered_transp_per_nucl(l,j,i) - if(dabs(beta*r2).gt.40.d0) cycle + if(beta*r2.gt.50.d0) cycle tmp_array(k) += ao_coef_normalized_ordered_transp_per_nucl(l,j,i) * dexp(-beta*r2) enddo @@ -120,207 +129,232 @@ subroutine give_all_aos_at_r(r, tmp_array) ! --- -subroutine give_all_aos_and_grad_at_r(r,aos_array,aos_grad_array) - implicit none - BEGIN_DOC -! input : r(1) ==> r(1) = x, r(2) = y, r(3) = z -! -! output : -! -! * aos_array(i) = ao(i) evaluated at ro -! * aos_grad_array(1,i) = gradient X of the ao(i) evaluated at $\textbf{r}$ -! - END_DOC - double precision, intent(in) :: r(3) - double precision, intent(out) :: aos_array(ao_num) - double precision, intent(out) :: aos_grad_array(3,ao_num) - - integer :: power_ao(3) - integer :: i,j,k,l,m - double precision :: dx,dy,dz,r2 - double precision :: dx2,dy2,dz2 - double precision :: dx1,dy1,dz1 - double precision :: center_ao(3) - double precision :: beta,accu_1,accu_2,contrib - do i = 1, nucl_num - center_ao(1:3) = nucl_coord(i,1:3) - dx = (r(1) - center_ao(1)) - dy = (r(2) - center_ao(2)) - dz = (r(3) - center_ao(3)) - r2 = dx*dx + dy*dy + dz*dz - do j = 1,Nucl_N_Aos(i) - k = Nucl_Aos_transposed(j,i) ! index of the ao in the ordered format - aos_array(k) = 0.d0 - aos_grad_array(1,k) = 0.d0 - aos_grad_array(2,k) = 0.d0 - aos_grad_array(3,k) = 0.d0 - power_ao(1:3)= ao_power_ordered_transp_per_nucl(1:3,j,i) - dx2 = dx**power_ao(1) - dy2 = dy**power_ao(2) - dz2 = dz**power_ao(3) - if(power_ao(1) .ne. 0)then - dx1 = dble(power_ao(1)) * dx**(power_ao(1)-1) - else - dx1 = 0.d0 - endif - if(power_ao(2) .ne. 0)then - dy1 = dble(power_ao(2)) * dy**(power_ao(2)-1) - else - dy1 = 0.d0 - endif - if(power_ao(3) .ne. 0)then - dz1 = dble(power_ao(3)) * dz**(power_ao(3)-1) - else - dz1 = 0.d0 - endif - accu_1 = 0.d0 - accu_2 = 0.d0 - do l = 1,ao_prim_num(k) - beta = ao_expo_ordered_transp_per_nucl(l,j,i) - contrib = 0.d0 - if(beta*r2.gt.50.d0)cycle - contrib = ao_coef_normalized_ordered_transp_per_nucl(l,j,i) * dexp(-beta*r2) - accu_1 += contrib - accu_2 += contrib * beta - enddo - aos_array(k) = accu_1 * dx2 * dy2 * dz2 - aos_grad_array(1,k) = accu_1 * dx1 * dy2 * dz2- 2.d0 * dx2 * dx * dy2 * dz2 * accu_2 - aos_grad_array(2,k) = accu_1 * dx2 * dy1 * dz2- 2.d0 * dx2 * dy2 * dy * dz2 * accu_2 - aos_grad_array(3,k) = accu_1 * dx2 * dy2 * dz1- 2.d0 * dx2 * dy2 * dz2 * dz * accu_2 +subroutine give_all_aos_and_grad_at_r(r, aos_array, aos_grad_array) + + BEGIN_DOC + ! + ! input : r(1) ==> r(1) = x, r(2) = y, r(3) = z + ! + ! output : + ! + ! * aos_array(i) = ao(i) evaluated at ro + ! * aos_grad_array(1,i) = gradient X of the ao(i) evaluated at $\textbf{r}$ + ! + END_DOC + + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out) :: aos_array(ao_num) + double precision, intent(out) :: aos_grad_array(3,ao_num) + + integer :: power_ao(3) + integer :: i, j, k, l, m + double precision :: dx, dy, dz, r2 + double precision :: dx1, dy1, dz1 + double precision :: dx2, dy2, dz2 + double precision :: center_ao(3) + double precision :: beta, accu_1, accu_2, contrib + + do i = 1, nucl_num + + center_ao(1:3) = nucl_coord(i,1:3) + + dx = r(1) - center_ao(1) + dy = r(2) - center_ao(2) + dz = r(3) - center_ao(3) + r2 = dx*dx + dy*dy + dz*dz + + do j = 1, Nucl_N_Aos(i) + + k = Nucl_Aos_transposed(j,i) ! index of the ao in the ordered format + + aos_array(k) = 0.d0 + aos_grad_array(1,k) = 0.d0 + aos_grad_array(2,k) = 0.d0 + aos_grad_array(3,k) = 0.d0 + + power_ao(1:3) = ao_power_ordered_transp_per_nucl(1:3,j,i) + dx2 = dx**power_ao(1) + dy2 = dy**power_ao(2) + dz2 = dz**power_ao(3) + + dx1 = 0.d0 + if(power_ao(1) .ne. 0) then + dx1 = dble(power_ao(1)) * dx**(power_ao(1)-1) + endif + + dy1 = 0.d0 + if(power_ao(2) .ne. 0) then + dy1 = dble(power_ao(2)) * dy**(power_ao(2)-1) + endif + + dz1 = 0.d0 + if(power_ao(3) .ne. 0) then + dz1 = dble(power_ao(3)) * dz**(power_ao(3)-1) + endif + + accu_1 = 0.d0 + accu_2 = 0.d0 + do l = 1, ao_prim_num(k) + beta = ao_expo_ordered_transp_per_nucl(l,j,i) + if(beta*r2.gt.50.d0) cycle + contrib = ao_coef_normalized_ordered_transp_per_nucl(l,j,i) * dexp(-beta*r2) + accu_1 += contrib + accu_2 += contrib * beta + enddo + + aos_array(k) = accu_1 * dx2 * dy2 * dz2 + aos_grad_array(1,k) = accu_1 * dx1 * dy2 * dz2 - 2.d0 * dx2 * dx * dy2 * dz2 * accu_2 + aos_grad_array(2,k) = accu_1 * dx2 * dy1 * dz2 - 2.d0 * dx2 * dy2 * dy * dz2 * accu_2 + aos_grad_array(3,k) = accu_1 * dx2 * dy2 * dz1 - 2.d0 * dx2 * dy2 * dz2 * dz * accu_2 + enddo enddo - enddo + end +! --- -subroutine give_all_aos_and_grad_and_lapl_at_r(r,aos_array,aos_grad_array,aos_lapl_array) - implicit none - BEGIN_DOC -! input : r(1) ==> r(1) = x, r(2) = y, r(3) = z -! -! output : -! -! * aos_array(i) = ao(i) evaluated at $\textbf{r}$ -! * aos_grad_array(1,i) = $\nabla_x$ of the ao(i) evaluated at $\textbf{r}$ - END_DOC - double precision, intent(in) :: r(3) - double precision, intent(out) :: aos_array(ao_num) - double precision, intent(out) :: aos_grad_array(3,ao_num) - double precision, intent(out) :: aos_lapl_array(3,ao_num) - - integer :: power_ao(3) - integer :: i,j,k,l,m - double precision :: dx,dy,dz,r2 - double precision :: dx2,dy2,dz2 - double precision :: dx1,dy1,dz1 - double precision :: dx3,dy3,dz3 - double precision :: dx4,dy4,dz4 - double precision :: dx5,dy5,dz5 - double precision :: center_ao(3) - double precision :: beta,accu_1,accu_2,accu_3,contrib - do i = 1, nucl_num - center_ao(1:3) = nucl_coord(i,1:3) - dx = (r(1) - center_ao(1)) - dy = (r(2) - center_ao(2)) - dz = (r(3) - center_ao(3)) - r2 = dx*dx + dy*dy + dz*dz - do j = 1,Nucl_N_Aos(i) - k = Nucl_Aos_transposed(j,i) ! index of the ao in the ordered format - aos_array(k) = 0.d0 - aos_grad_array(1,k) = 0.d0 - aos_grad_array(2,k) = 0.d0 - aos_grad_array(3,k) = 0.d0 - - aos_lapl_array(1,k) = 0.d0 - aos_lapl_array(2,k) = 0.d0 - aos_lapl_array(3,k) = 0.d0 - - power_ao(1:3)= ao_power_ordered_transp_per_nucl(1:3,j,i) - dx2 = dx**power_ao(1) - dy2 = dy**power_ao(2) - dz2 = dz**power_ao(3) - if(power_ao(1) .ne. 0)then - dx1 = dble(power_ao(1)) * dx**(power_ao(1)-1) - else - dx1 = 0.d0 - endif - ! For the Laplacian - if(power_ao(1) .ge. 2)then - dx3 = dble(power_ao(1)) * dble((power_ao(1)-1)) * dx**(power_ao(1)-2) - else - dx3 = 0.d0 - endif - if(power_ao(1) .ge. 1)then - dx4 = dble((2 * power_ao(1) + 1)) * dx**(power_ao(1)) - else - dx4 = dble((power_ao(1) + 1)) * dx**(power_ao(1)) - endif - - dx5 = dx**(power_ao(1)+2) - - if(power_ao(2) .ne. 0)then - dy1 = dble(power_ao(2)) * dy**(power_ao(2)-1) - else - dy1 = 0.d0 - endif - ! For the Laplacian - if(power_ao(2) .ge. 2)then - dy3 = dble(power_ao(2)) * dble((power_ao(2)-1)) * dy**(power_ao(2)-2) - else - dy3 = 0.d0 - endif - - if(power_ao(2) .ge. 1)then - dy4 = dble((2 * power_ao(2) + 1)) * dy**(power_ao(2)) - else - dy4 = dble((power_ao(2) + 1)) * dy**(power_ao(2)) - endif - - dy5 = dy**(power_ao(2)+2) - - - if(power_ao(3) .ne. 0)then - dz1 = dble(power_ao(3)) * dz**(power_ao(3)-1) - else - dz1 = 0.d0 - endif - ! For the Laplacian - if(power_ao(3) .ge. 2)then - dz3 = dble(power_ao(3)) * dble((power_ao(3)-1)) * dz**(power_ao(3)-2) - else - dz3 = 0.d0 - endif - - if(power_ao(3) .ge. 1)then - dz4 = dble((2 * power_ao(3) + 1)) * dz**(power_ao(3)) - else - dz4 = dble((power_ao(3) + 1)) * dz**(power_ao(3)) - endif - - dz5 = dz**(power_ao(3)+2) - - - accu_1 = 0.d0 - accu_2 = 0.d0 - accu_3 = 0.d0 - do l = 1,ao_prim_num(k) - beta = ao_expo_ordered_transp_per_nucl(l,j,i) - contrib = ao_coef_normalized_ordered_transp_per_nucl(l,j,i) * dexp(-beta*r2) - accu_1 += contrib - accu_2 += contrib * beta - accu_3 += contrib * beta**2 - enddo - aos_array(k) = accu_1 * dx2 * dy2 * dz2 - - aos_grad_array(1,k) = accu_1 * dx1 * dy2 * dz2- 2.d0 * dx2 * dx * dy2 * dz2 * accu_2 - aos_grad_array(2,k) = accu_1 * dx2 * dy1 * dz2- 2.d0 * dx2 * dy2 * dy * dz2 * accu_2 - aos_grad_array(3,k) = accu_1 * dx2 * dy2 * dz1- 2.d0 * dx2 * dy2 * dz2 * dz * accu_2 - - aos_lapl_array(1,k) = accu_1 * dx3 * dy2 * dz2- 2.d0 * dx4 * dy2 * dz2* accu_2 +4.d0 * dx5 *dy2 * dz2* accu_3 - aos_lapl_array(2,k) = accu_1 * dx2 * dy3 * dz2- 2.d0 * dx2 * dy4 * dz2* accu_2 +4.d0 * dx2 *dy5 * dz2* accu_3 - aos_lapl_array(3,k) = accu_1 * dx2 * dy2 * dz3- 2.d0 * dx2 * dy2 * dz4* accu_2 +4.d0 * dx2 *dy2 * dz5* accu_3 +subroutine give_all_aos_and_grad_and_lapl_at_r(r, aos_array, aos_grad_array, aos_lapl_array) + BEGIN_DOC + ! + ! input : r(1) ==> r(1) = x, r(2) = y, r(3) = z + ! + ! output : + ! + ! * aos_array(i) = ao(i) evaluated at $\textbf{r}$ + ! * aos_grad_array(1,i) = $\nabla_x$ of the ao(i) evaluated at $\textbf{r}$ + ! + END_DOC + + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out) :: aos_array(ao_num) + double precision, intent(out) :: aos_grad_array(3,ao_num) + double precision, intent(out) :: aos_lapl_array(3,ao_num) + + integer :: power_ao(3) + integer :: i, j, k, l, m + double precision :: dx, dy, dz, r2 + double precision :: dx1, dy1, dz1 + double precision :: dx2, dy2, dz2 + double precision :: dx3, dy3, dz3 + double precision :: dx4, dy4, dz4 + double precision :: dx5, dy5, dz5 + double precision :: center_ao(3) + double precision :: beta, accu_1, accu_2, accu_3, contrib + + do i = 1, nucl_num + + center_ao(1:3) = nucl_coord(i,1:3) + + dx = r(1) - center_ao(1) + dy = r(2) - center_ao(2) + dz = r(3) - center_ao(3) + r2 = dx*dx + dy*dy + dz*dz + + do j = 1, Nucl_N_Aos(i) + + k = Nucl_Aos_transposed(j,i) ! index of the ao in the ordered format + + aos_array(k) = 0.d0 + aos_grad_array(1,k) = 0.d0 + aos_grad_array(2,k) = 0.d0 + aos_grad_array(3,k) = 0.d0 + aos_lapl_array(1,k) = 0.d0 + aos_lapl_array(2,k) = 0.d0 + aos_lapl_array(3,k) = 0.d0 + + power_ao(1:3)= ao_power_ordered_transp_per_nucl(1:3,j,i) + dx2 = dx**power_ao(1) + dy2 = dy**power_ao(2) + dz2 = dz**power_ao(3) + + ! --- + + dx1 = 0.d0 + if(power_ao(1) .ne. 0) then + dx1 = dble(power_ao(1)) * dx**(power_ao(1)-1) + endif + + dx3 = 0.d0 + if(power_ao(1) .ge. 2) then + dx3 = dble(power_ao(1)) * dble((power_ao(1)-1)) * dx**(power_ao(1)-2) + endif + + if(power_ao(1) .ge. 1) then + dx4 = dble((2 * power_ao(1) + 1)) * dx**(power_ao(1)) + else + dx4 = dble((power_ao(1) + 1)) * dx**(power_ao(1)) + endif + + dx5 = dx**(power_ao(1)+2) + + ! --- + + dy1 = 0.d0 + if(power_ao(2) .ne. 0) then + dy1 = dble(power_ao(2)) * dy**(power_ao(2)-1) + endif + + dy3 = 0.d0 + if(power_ao(2) .ge. 2) then + dy3 = dble(power_ao(2)) * dble((power_ao(2)-1)) * dy**(power_ao(2)-2) + endif + + if(power_ao(2) .ge. 1) then + dy4 = dble((2 * power_ao(2) + 1)) * dy**(power_ao(2)) + else + dy4 = dble((power_ao(2) + 1)) * dy**(power_ao(2)) + endif + + dy5 = dy**(power_ao(2)+2) + + ! --- + + dz1 = 0.d0 + if(power_ao(3) .ne. 0) then + dz1 = dble(power_ao(3)) * dz**(power_ao(3)-1) + endif + + dz3 = 0.d0 + if(power_ao(3) .ge. 2) then + dz3 = dble(power_ao(3)) * dble((power_ao(3)-1)) * dz**(power_ao(3)-2) + endif + + if(power_ao(3) .ge. 1) then + dz4 = dble((2 * power_ao(3) + 1)) * dz**(power_ao(3)) + else + dz4 = dble((power_ao(3) + 1)) * dz**(power_ao(3)) + endif + + dz5 = dz**(power_ao(3)+2) + + ! --- + + accu_1 = 0.d0 + accu_2 = 0.d0 + accu_3 = 0.d0 + do l = 1,ao_prim_num(k) + beta = ao_expo_ordered_transp_per_nucl(l,j,i) + if(beta*r2.gt.50.d0) cycle + contrib = ao_coef_normalized_ordered_transp_per_nucl(l,j,i) * dexp(-beta*r2) + accu_1 += contrib + accu_2 += contrib * beta + accu_3 += contrib * beta**2 + enddo + + aos_array(k) = accu_1 * dx2 * dy2 * dz2 + aos_grad_array(1,k) = accu_1 * dx1 * dy2 * dz2 - 2.d0 * dx2 * dx * dy2 * dz2 * accu_2 + aos_grad_array(2,k) = accu_1 * dx2 * dy1 * dz2 - 2.d0 * dx2 * dy2 * dy * dz2 * accu_2 + aos_grad_array(3,k) = accu_1 * dx2 * dy2 * dz1 - 2.d0 * dx2 * dy2 * dz2 * dz * accu_2 + aos_lapl_array(1,k) = accu_1 * dx3 * dy2 * dz2 - 2.d0 * dx4 * dy2 * dz2 * accu_2 + 4.d0 * dx5 * dy2 * dz2 * accu_3 + aos_lapl_array(2,k) = accu_1 * dx2 * dy3 * dz2 - 2.d0 * dx2 * dy4 * dz2 * accu_2 + 4.d0 * dx2 * dy5 * dz2 * accu_3 + aos_lapl_array(3,k) = accu_1 * dx2 * dy2 * dz3 - 2.d0 * dx2 * dy2 * dz4 * accu_2 + 4.d0 * dx2 * dy2 * dz5 * accu_3 + enddo enddo - enddo + end +! --- diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index ff932b0c1..a985149e8 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -6,7 +6,7 @@ default: None [io_ao_cholesky] type: Disk_access -doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] +doc: Read/Write |AO| Cholesky integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None @@ -25,16 +25,16 @@ default: 1.e-12 [do_direct_integrals] type: logical -doc: Compute integrals on the fly (very slow, only for debugging) +doc: Compute integrals on the fly (Useful only for Cholesky decomposition) interface: ezfio,provider,ocaml -default: False +default: True ezfio_name: direct [do_ao_cholesky] type: logical doc: Perform Cholesky decomposition of AO integrals interface: ezfio,provider,ocaml -default: False +default: True [io_ao_two_e_integrals_erf] type: Disk_access diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 333040262..319e3c6e5 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -1,3 +1,15 @@ +double precision function get_ao_integ_chol(i,j,k,l) + implicit none + BEGIN_DOC + ! CHOLESKY representation of the integral of the AO basis or (ij|kl) + ! i(r1) j(r1) 1/r12 k(r2) l(r2) + END_DOC + integer, intent(in) :: i,j,k,l + double precision, external :: ddot + get_ao_integ_chol = ddot(cholesky_ao_num, cholesky_ao_transp(1,i,j), 1, cholesky_ao_transp(1,k,l), 1) + +end + BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ] implicit none BEGIN_DOC @@ -6,7 +18,7 @@ integer :: i,j,k do j=1,ao_num do i=1,ao_num - do k=1,ao_num + do k=1,cholesky_ao_num cholesky_ao_transp(k,i,j) = cholesky_ao(i,j,k) enddo enddo @@ -16,27 +28,35 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] &BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, 1) ] + use mmap_module implicit none BEGIN_DOC ! Cholesky vectors in AO basis: (ik|a): ! = (ik|jl) = sum_a (ik|a).(a|jl) ! ! Last dimension of cholesky_ao is cholesky_ao_num + ! + ! https://mogp-emulator.readthedocs.io/en/latest/methods/proc/ProcPivotedCholesky.html + ! + ! https://doi.org/10.1016/j.apnum.2011.10.001 : Page 4, Algorithm 1 + ! + ! https://www.diva-portal.org/smash/get/diva2:396223/FULLTEXT01.pdf END_DOC - integer :: rank, ndim - double precision :: tau - double precision, pointer :: L(:,:), L_old(:,:) - + integer*8 :: ndim8 + integer :: rank + double precision :: tau, tau2 + double precision, pointer :: L(:,:) double precision :: s - double precision, parameter :: dscale = 1.d0 - double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) - integer, allocatable :: Lset(:), Dset(:), addr(:,:) + double precision, allocatable :: D(:), Ltmp_p(:,:), Ltmp_q(:,:), D_sorted(:), Delta_col(:), Delta(:,:) + integer, allocatable :: addr1(:), addr2(:) + integer*8, allocatable :: Lset(:), Dset(:) logical, allocatable :: computed(:) - integer :: i,j,k,m,p,q, qj, dj, p2, q2 + integer :: i,j,k,m,p,q, dj, p2, q2, ii, jj + integer*8 :: i8, j8, p8, qj8, rank_max, np8 integer :: N, np, nq double precision :: Dmax, Dmin, Qmax, f @@ -44,19 +64,32 @@ logical, external :: ao_two_e_integral_zero double precision, external :: ao_two_e_integral - integer :: block_size, iblock, ierr + integer :: block_size, iblock - double precision :: mem + double precision :: mem, mem0 double precision, external :: memory_of_double, memory_of_int + double precision, external :: memory_of_double8, memory_of_int8 integer, external :: getUnitAndOpen - integer :: iunit + integer :: iunit, ierr - ndim = ao_num*ao_num + ndim8 = ao_num*ao_num*1_8+1 + double precision :: wall0,wall1 + + type(c_ptr) :: c_pointer(2) + integer :: fd(2) + + PROVIDE nproc ao_cholesky_threshold do_direct_integrals qp_max_mem + PROVIDE nucl_coord ao_two_e_integral_schwartz + call set_multiple_levels_omp(.False.) + + call wall_time(wall0) + + ! Will be reallocated at the end deallocate(cholesky_ao) if (read_ao_cholesky) then - print *, 'Reading Cholesky vectors from disk...' + print *, 'Reading Cholesky AO vectors from disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'R') read(iunit) rank allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) @@ -66,7 +99,6 @@ else - PROVIDE nucl_coord ao_two_e_integral_schwartz call set_multiple_levels_omp(.False.) if (do_direct_integrals) then @@ -79,66 +111,84 @@ endif tau = ao_cholesky_threshold + tau2 = tau*tau - mem = 6.d0 * memory_of_double(ndim) + 6.d0 * memory_of_int(ndim) - call check_mem(mem, irp_here) + rank = 0 - call print_memory_usage() + allocate( D(ndim8), Lset(ndim8), Dset(ndim8), D_sorted(ndim8)) + allocate( addr1(ndim8), addr2(ndim8), Delta_col(ndim8), computed(ndim8) ) - allocate(L(ndim,1)) + call resident_memory(mem0) + + call print_memory_usage() print *, '' print *, 'Cholesky decomposition of AO integrals' print *, '======================================' print *, '' print *, '============ =============' - print *, ' Rank Threshold' + print *, ' Rank Threshold' print *, '============ =============' - rank = 0 - - allocate( D(ndim), Lset(ndim), Dset(ndim) ) - allocate( addr(3,ndim) ) - ! 1. - k=0 + i8=0 do j=1,ao_num do i=1,ao_num - k = k+1 - addr(1,k) = i - addr(2,k) = j - addr(3,k) = (i-1)*ao_num + j + i8 = i8+1 + addr1(i8) = i + addr2(i8) = j enddo enddo if (do_direct_integrals) then - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) - do i=1,ndim - D(i) = ao_two_e_integral(addr(1,i), addr(2,i), & - addr(1,i), addr(2,i)) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,21) + do i8=ndim8-1,1,-1 + D(i8) = ao_two_e_integral(addr1(i8), addr2(i8), & + addr1(i8), addr2(i8)) enddo !$OMP END PARALLEL DO else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) - do i=1,ndim - D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & - addr(2,i), addr(2,i), & - ao_integrals_map) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i8) SCHEDULE(dynamic,21) + do i8=ndim8-1,1,-1 + D(i8) = get_ao_two_e_integral(addr1(i8), addr1(i8), & + addr2(i8), addr2(i8), ao_integrals_map) enddo !$OMP END PARALLEL DO endif + ! Just to guarentee termination + D(ndim8) = 0.d0 - Dmax = maxval(D) + D_sorted(:) = -D(:) + call dsort_noidx_big(D_sorted,ndim8) + D_sorted(:) = -D_sorted(:) + Dmax = D_sorted(1) ! 2. - np=0 - do p=1,ndim - if ( dscale*dscale*Dmax*D(p) > tau*tau ) then - np = np+1 - Lset(np) = p + np8=0_8 + do p8=1,ndim8 + if ( Dmax*D(p8) >= tau2 ) then + np8 = np8+1_8 + Lset(np8) = p8 endif enddo + if (np8 > ndim8) stop 'np>ndim8' + np = int(np8,4) + if (np <= 0) stop 'np<=0' + + rank_max = np + ! Avoid too large arrays when there are many electrons + if (elec_num > 10) then + rank_max = min(np,20*elec_num*elec_num) + endif + call mmap(trim(ezfio_work_dir)//'cholesky_ao_tmp', (/ ndim8, rank_max /), 8, fd(1), .False., .True., c_pointer(1)) + call c_f_pointer(c_pointer(1), L, (/ ndim8, rank_max /)) + + ! Deleting the file while it is open makes the file invisible on the filesystem, + ! and automatically deleted, even if the program crashes + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao_tmp', 'R') + close(iunit,status='delete') + ! 3. N = 0 @@ -146,77 +196,68 @@ ! 4. i = 0 + mem = memory_of_double(np) & ! Delta(np,nq) + + (np+1)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) + +! call check_mem(mem) + ! 5. - do while ( (Dmax > tau).and.(rank < ndim) ) + do while ( (Dmax > tau).and.(np > 0) ) ! a. i = i+1 + - s = 0.01d0 - ! Inrease s until the arrays fit in memory - do while (.True.) + block_size = max(N,24) - ! b. - Dmin = max(s*Dmax,tau) + ! Determine nq so that Delta fits in memory - ! c. - nq=0 - do p=1,np - if ( D(Lset(p)) > Dmin ) then - nq = nq+1 - Dset(nq) = Lset(p) - endif - enddo + s = 0.1d0 + Dmin = max(s*Dmax,tau) + do nq=2,np-1 + if (D_sorted(nq) < Dmin) exit + enddo + + do while (.True.) - call total_memory(mem) - mem = mem & - + np*memory_of_double(nq) &! Delta(np,nq) - + (rank+nq)* memory_of_double(ndim) &! L(ndim,rank+nq) - + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) + mem = mem0 & + + np*memory_of_double(nq) & ! Delta(np,nq) + + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) - if (mem > qp_max_mem) then - s = s*2.d0 + if (mem > qp_max_mem*0.5d0) then + Dmin = D_sorted(nq/2) + do ii=nq/2,np-1 + if (D_sorted(ii) < Dmin) then + nq = ii + exit + endif + enddo else exit endif - if ((s > 1.d0).or.(nq == 0)) then - call print_memory_usage() - print *, 'Not enough memory. Reduce cholesky threshold' - stop -1 - endif - enddo +!call print_memory_usage +!print *, 'np, nq, Predicted memory: ', np, nq, mem - ! d., e. - block_size = max(N,24) - - L_old => L - allocate(L(ndim,rank+nq), stat=ierr) - if (ierr /= 0) then - call print_memory_usage() - print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' - stop -1 + if (nq <= 0) then + print *, nq + stop 'bug in cholesky: nq <= 0' endif - !$OMP PARALLEL DO PRIVATE(k,j) - do k=1,rank - do j=1,ndim - L(j,k) = L_old(j,k) - enddo + Dmin = D_sorted(nq) + nq=0 + do p=1,np + if ( D(Lset(p)) >= Dmin ) then + nq = nq+1 + Dset(nq) = Lset(p) + endif enddo - !$OMP END PARALLEL DO - deallocate(L_old) - - allocate(Delta(np,nq), stat=ierr) - if (ierr /= 0) then - call print_memory_usage() - print *, irp_here, ': allocation failed : (Delta(np,nq))' - stop -1 - endif + allocate(Delta(np,nq)) allocate(Ltmp_p(np,block_size), stat=ierr) + if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' @@ -224,6 +265,7 @@ endif allocate(Ltmp_q(nq,block_size), stat=ierr) + if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' @@ -231,36 +273,39 @@ endif - allocate(computed(nq)) + computed(1:nq) = .False. - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) - - !$OMP DO - do q=1,nq - do j=1,np - Delta(j,q) = 0.d0 - enddo - computed(q) = .False. - enddo - !$OMP ENDDO NOWAIT - !$OMP DO + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,p,q) do k=1,N + !$OMP DO do p=1,np - Ltmp_p(p,k) = L(Lset(p),k) + Ltmp_p(p,k) = L(Lset(p),k) enddo + !$OMP END DO NOWAIT + + !$OMP DO do q=1,nq Ltmp_q(q,k) = L(Dset(q),k) enddo + !$OMP END DO NOWAIT enddo - !$OMP END DO NOWAIT - !$OMP BARRIER !$OMP END PARALLEL if (N>0) then - call dgemm('N','T', np, nq, N, -1.d0, & - Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + + call dgemm('N', 'T', np, nq, N, -1.d0, & + Ltmp_p(1,1), np, Ltmp_q(1,1), nq, 0.d0, Delta, np) + + else + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,j) + do q=1,nq + Delta(:,q) = 0.d0 + enddo + !$OMP END PARALLEL DO + endif ! f. @@ -272,54 +317,85 @@ ! g. iblock = 0 + do j=1,nq - if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit + if ( (Qmax < Dmin).or.(N+j*1_8 > ndim8) ) exit + ! i. rank = N+j + if (rank == rank_max) then + print *, 'cholesky: rank_max reached' + exit + endif if (iblock == block_size) then - call dgemm('N','T',np,nq,block_size,-1.d0, & - Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) - iblock = 0 + + call dgemm('N','T',np,nq,block_size,-1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + + iblock = 0 + endif ! ii. do dj=1,nq - qj = Dset(dj) - if (D(qj) == Qmax) then + qj8 = Dset(dj) + if (D(qj8) == Qmax) then exit endif enddo - L(1:ndim, rank) = 0.d0 + do i8=1,ndim8 + L(i8, rank) = 0.d0 + enddo + + iblock = iblock+1 + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Delta(p,dj) + enddo + !$OMP END PARALLEL DO if (.not.computed(dj)) then m = dj - !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(guided) - do k=np,1,-1 - if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)),& - addr(2,Lset(k)), addr(2,Dset(m)) ) ) then - if (do_direct_integrals) then - Delta(k,m) = Delta(k,m) + & - ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)),& - addr(1,Dset(m)), addr(2,Dset(m))) - else - Delta(k,m) = Delta(k,m) + & - get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)),& - addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) - endif - endif + if (do_direct_integrals) then + !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,21) + do k=1,np + Delta_col(k) = 0.d0 + if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& + addr2(Lset(k)), addr2(Dset(m)) ) ) then + Delta_col(k) = & + ao_two_e_integral(addr1(Lset(k)), addr2(Lset(k)),& + addr1(Dset(m)), addr2(Dset(m))) + endif + enddo + !$OMP END PARALLEL DO + else + PROVIDE ao_integrals_map + !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(dynamic,21) + do k=1,np + Delta_col(k) = 0.d0 + if (.not.ao_two_e_integral_zero( addr1(Lset(k)), addr1(Dset(m)),& + addr2(Lset(k)), addr2(Dset(m)) ) ) then + Delta_col(k) = & + get_ao_two_e_integral( addr1(Lset(k)), addr1(Dset(m)),& + addr2(Lset(k)), addr2(Dset(m)), ao_integrals_map) + endif + enddo + !$OMP END PARALLEL DO + endif + + !$OMP PARALLEL DO PRIVATE(p) + do p=1,np + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) + Delta_col(p) + Delta(p,dj) = Ltmp_p(p,iblock) enddo !$OMP END PARALLEL DO + computed(dj) = .True. endif - iblock = iblock+1 - do p=1,np - Ltmp_p(p,iblock) = Delta(p,dj) - enddo - ! iv. if (iblock > 1) then call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0,& @@ -329,7 +405,7 @@ ! iii. f = 1.d0/dsqrt(Qmax) - !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) + !$OMP PARALLEL PRIVATE(p,q) DEFAULT(shared) !$OMP DO do p=1,np Ltmp_p(p,iblock) = Ltmp_p(p,iblock) * f @@ -343,7 +419,6 @@ Ltmp_q(q,iblock) = L(Dset(q), rank) enddo !$OMP END DO - !$OMP END PARALLEL Qmax = D(Dset(1)) @@ -355,49 +430,62 @@ print '(I10, 4X, ES12.3)', rank, Qmax - deallocate(computed) - deallocate(Delta) deallocate(Ltmp_p) deallocate(Ltmp_q) + deallocate(Delta) ! i. N = rank ! j. - Dmax = D(Lset(1)) - do p=1,np - Dmax = max(Dmax, D(Lset(p))) - enddo + D_sorted(:) = -D(:) + call dsort_noidx_big(D_sorted,ndim8) + D_sorted(:) = -D_sorted(:) + + Dmax = D_sorted(1) - np=0 - do p=1,ndim - if ( dscale*dscale*Dmax*D(p) > tau*tau ) then - np = np+1 - Lset(np) = p + np8=0_8 + do p8=1,ndim8 + if ( Dmax*D(p8) >= tau2 ) then + np8 = np8+1_8 + Lset(np8) = p8 endif enddo + np = int(np8,4) enddo + + print *, '============ =============' + print *, '' + + deallocate( D, Lset, Dset, D_sorted ) + deallocate( addr1, addr2, Delta_col, computed ) + + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) + if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': Allocation failed' stop -1 endif - !$OMP PARALLEL DO PRIVATE(k) + + + !$OMP PARALLEL DO PRIVATE(k,j) do k=1,rank - call dcopy(ndim, L(1,k), 1, cholesky_ao(1,1,k), 1) + do j=1,ao_num + cholesky_ao(1:ao_num,j,k) = L((j-1_8)*ao_num+1_8:1_8*j*ao_num,k) + enddo enddo !$OMP END PARALLEL DO - deallocate(L) - cholesky_ao_num = rank - print *, '============ =============' - print *, '' + call munmap( (/ ndim8, rank_max /), 8, fd(1), c_pointer(1) ) + + cholesky_ao_num = rank if (write_ao_cholesky) then - print *, 'Writing Cholesky vectors to disk...' + print *, 'Writing Cholesky AO vectors to disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'W') write(iunit) rank write(iunit) cholesky_ao @@ -409,6 +497,9 @@ print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' print *, '' + call wall_time(wall1) + print*,'Time to provide AO cholesky vectors = ',(wall1-wall0)/60.d0, ' min' + END_PROVIDER diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index b55b5f0d6..d12f3d459 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -460,8 +460,8 @@ subroutine compute_ao_two_e_integrals(j,k,l,sze,buffer_value) !$OMP PARALLEL DO PRIVATE(i,k) & !$OMP DEFAULT(NONE) & !$OMP SHARED (ao_num,ao_two_e_integral_schwartz) & - !$OMP SCHEDULE(guided) - do i=1,ao_num + !$OMP SCHEDULE(dynamic) + do i=ao_num,1,-1 do k=1,i ao_two_e_integral_schwartz(i,k) = dsqrt(ao_two_e_integral(i,i,k,k)) ao_two_e_integral_schwartz(k,i) = ao_two_e_integral_schwartz(i,k) diff --git a/src/becke_numerical_grid/extra_grid_vector.irp.f b/src/becke_numerical_grid/extra_grid_vector.irp.f index ae167282d..e054e22c3 100644 --- a/src/becke_numerical_grid/extra_grid_vector.irp.f +++ b/src/becke_numerical_grid/extra_grid_vector.irp.f @@ -47,8 +47,12 @@ END_DOC implicit none - integer :: i,j,k,l,i_count - double precision :: r(3) + integer :: i, j, k, l, i_count + double precision :: r(3) + double precision :: wall0, wall1 + + call wall_time(wall0) + print *, ' Providing extra_final_grid_points ...' i_count = 0 do j = 1, nucl_num @@ -70,6 +74,10 @@ enddo enddo + call wall_time(wall1) + print *, ' wall time for extra_final_grid_points,', wall1 - wall0 + call print_memory_usage() + END_PROVIDER diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index 473096d02..9da8a0991 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -67,13 +67,6 @@ index_final_points(2,i_count) = i index_final_points(3,i_count) = j index_final_points_reverse(k,i,j) = i_count - - if(final_weight_at_r_vector(i_count) .lt. 0.d0) then - print *, ' !!! WARNING !!!' - print *, ' negative weight !!!!' - print *, i_count, final_weight_at_r_vector(i_count) - stop - endif enddo enddo enddo diff --git a/src/casscf_cipsi/EZFIO.cfg b/src/casscf_cipsi/EZFIO.cfg index 18e0b6b10..5b72d9064 100644 --- a/src/casscf_cipsi/EZFIO.cfg +++ b/src/casscf_cipsi/EZFIO.cfg @@ -79,3 +79,9 @@ type: logical doc: If |true|, the pt2_max value in the CIPSI is set to 10-10 and will not change interface: ezfio,provider,ocaml default: False + +[act_mos_opt] +type: logical +doc: If |true|, the active orbitals are also optimized variationally +interface: ezfio,provider,ocaml +default: False diff --git a/src/casscf_cipsi/NEED b/src/casscf_cipsi/NEED index dd91c7bd8..32f5ae908 100644 --- a/src/casscf_cipsi/NEED +++ b/src/casscf_cipsi/NEED @@ -3,3 +3,4 @@ selectors_full generators_cas two_body_rdm dav_general_mat +mo_optimization_utils diff --git a/src/casscf_cipsi/bielec.irp.f b/src/casscf_cipsi/bielec.irp.f index 0a44f994e..a49019852 100644 --- a/src/casscf_cipsi/bielec.irp.f +++ b/src/casscf_cipsi/bielec.irp.f @@ -1,18 +1,25 @@ -BEGIN_PROVIDER [real*8, bielec_PQxx, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] +BEGIN_PROVIDER [real*8, bielec_PQxx_array, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] BEGIN_DOC - ! bielec_PQxx : integral (pq|xx) with p,q arbitrary, x core or active + ! WARNING !!! Old version !!! NOT USED ANYMORE IN THE PROGRAM !!! TOO BIG TO BE STORED ON LARGE SYSTEMS !!! + ! + ! Replaced by the Cholesky-based function bielec_PQxx + ! + ! bielec_PQxx_array : integral (pq|xx) with p,q arbitrary, x core or active ! indices are unshifted orbital numbers END_DOC implicit none integer :: i,j,ii,jj,p,q,i3,j3,t3,v3 real*8 :: mo_two_e_integral + print*,'' + print*,'Providing bielec_PQxx_array, WARNING IT CAN BE A VERY BIG ARRAY WHEN MO_NUM IS LARGE !!!' + print*,'' - bielec_PQxx(:,:,:,:) = 0.d0 + bielec_PQxx_array(:,:,:,:) = 0.d0 PROVIDE mo_two_e_integrals_in_map !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,ii,j,jj,i3,j3) & - !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PQxx, & + !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PQxx_array, & !$OMP n_act_orb,mo_integrals_map,list_act) !$OMP DO @@ -20,14 +27,14 @@ ii=list_core_inact(i) do j=i,n_core_inact_orb jj=list_core_inact(j) - call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i,j),mo_integrals_map) - bielec_PQxx(:,:,j,i)=bielec_PQxx(:,:,i,j) + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx_array(1,1,i,j),mo_integrals_map) + bielec_PQxx_array(:,:,j,i)=bielec_PQxx_array(:,:,i,j) end do do j=1,n_act_orb jj=list_act(j) j3=j+n_core_inact_orb - call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i,j3),mo_integrals_map) - bielec_PQxx(:,:,j3,i)=bielec_PQxx(:,:,i,j3) + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx_array(1,1,i,j3),mo_integrals_map) + bielec_PQxx_array(:,:,j3,i)=bielec_PQxx_array(:,:,i,j3) end do end do !$OMP END DO @@ -40,8 +47,8 @@ do j=i,n_act_orb jj=list_act(j) j3=j+n_core_inact_orb - call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i3,j3),mo_integrals_map) - bielec_PQxx(:,:,j3,i3)=bielec_PQxx(:,:,i3,j3) + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx_array(1,1,i3,j3),mo_integrals_map) + bielec_PQxx_array(:,:,j3,i3)=bielec_PQxx_array(:,:,i3,j3) end do end do !$OMP END DO @@ -52,9 +59,13 @@ -BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] +BEGIN_PROVIDER [real*8, bielec_PxxQ_array, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] BEGIN_DOC - ! bielec_PxxQ : integral (px|xq) with p,q arbitrary, x core or active + ! WARNING !!! Old version !!! NOT USED ANYMORE IN THE PROGRAM !!! TOO BIG TO BE STORED ON LARGE SYSTEMS !!! + ! + ! Replaced by the Cholesky-based function bielec_PxxQ + ! + ! bielec_PxxQ_array : integral (px|xq) with p,q arbitrary, x core or active ! indices are unshifted orbital numbers END_DOC implicit none @@ -62,12 +73,15 @@ double precision, allocatable :: integrals_array(:,:) real*8 :: mo_two_e_integral + print*,'' + print*,'Providing bielec_PxxQ_array, WARNING IT CAN BE A VERY BIG ARRAY WHEN MO_NUM IS LARGE !!!' + print*,'' PROVIDE mo_two_e_integrals_in_map - bielec_PxxQ = 0.d0 + bielec_PxxQ_array = 0.d0 !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,ii,j,jj,i3,j3,integrals_array) & - !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PxxQ, & + !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PxxQ_array, & !$OMP n_act_orb,mo_integrals_map,list_act) allocate(integrals_array(mo_num,mo_num)) @@ -80,8 +94,8 @@ call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) do q=1,mo_num do p=1,mo_num - bielec_PxxQ(p,i,j,q)=integrals_array(p,q) - bielec_PxxQ(p,j,i,q)=integrals_array(q,p) + bielec_PxxQ_array(p,i,j,q)=integrals_array(p,q) + bielec_PxxQ_array(p,j,i,q)=integrals_array(q,p) end do end do end do @@ -91,8 +105,8 @@ call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) do q=1,mo_num do p=1,mo_num - bielec_PxxQ(p,i,j3,q)=integrals_array(p,q) - bielec_PxxQ(p,j3,i,q)=integrals_array(q,p) + bielec_PxxQ_array(p,i,j3,q)=integrals_array(p,q) + bielec_PxxQ_array(p,j3,i,q)=integrals_array(q,p) end do end do end do @@ -111,8 +125,8 @@ call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) do q=1,mo_num do p=1,mo_num - bielec_PxxQ(p,i3,j3,q)=integrals_array(p,q) - bielec_PxxQ(p,j3,i3,q)=integrals_array(q,p) + bielec_PxxQ_array(p,i3,j3,q)=integrals_array(p,q) + bielec_PxxQ_array(p,j3,i3,q)=integrals_array(q,p) end do end do end do @@ -129,10 +143,15 @@ BEGIN_DOC ! bielecCI : integrals (tu|vp) with p arbitrary, tuv active ! index p runs over the whole basis, t,u,v only over the active orbitals + ! + ! This array can be stored anyway. Ex: 50 active orbitals, 1500 MOs ==> 8x50^3x1500 = 1.5 Gb END_DOC implicit none integer :: i,j,k,p,t,u,v double precision, external :: mo_two_e_integral + double precision :: wall0, wall1 + call wall_time(wall0) + print*,'Providing bielecCI' PROVIDE mo_two_e_integrals_in_map !$OMP PARALLEL DO DEFAULT(NONE) & @@ -151,5 +170,7 @@ end do end do !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide bielecCI = ',wall1 - wall0 END_PROVIDER diff --git a/src/casscf_cipsi/bielec_natorb.irp.f b/src/casscf_cipsi/bielec_natorb.irp.f index 9968530c5..99734a0b7 100644 --- a/src/casscf_cipsi/bielec_natorb.irp.f +++ b/src/casscf_cipsi/bielec_natorb.irp.f @@ -1,30 +1,38 @@ - BEGIN_PROVIDER [real*8, bielec_PQxx_no, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] + BEGIN_PROVIDER [real*8, bielec_PQxx_no_array, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] BEGIN_DOC + ! WARNING !!! Old version !!! NOT USED ANYMORE IN THE PROGRAM !!! TOO BIG TO BE STORED ON LARGE SYSTEMS !!! + ! + ! Replaced by the Cholesky-based function bielec_PQxx_no + ! ! integral (pq|xx) in the basis of natural MOs ! indices are unshifted orbital numbers + ! END_DOC implicit none integer :: i,j,k,l,t,u,p,q double precision, allocatable :: f(:,:,:), d(:,:,:) + print*,'' + print*,'Providing bielec_PQxx_no_array, WARNING IT CAN BE A VERY BIG ARRAY WHEN MO_NUM IS LARGE !!!' + print*,'' !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(j,k,l,p,d,f) & !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & - !$OMP bielec_PQxx_no,bielec_PQxx,list_act,natorbsCI) + !$OMP bielec_PQxx_no_array,bielec_PQxx_array,list_act,natorbsCI) allocate (f(n_act_orb,mo_num,n_core_inact_act_orb), & d(n_act_orb,mo_num,n_core_inact_act_orb)) !$OMP DO do l=1,n_core_inact_act_orb - bielec_PQxx_no(:,:,:,l) = bielec_PQxx(:,:,:,l) + bielec_PQxx_no_array(:,:,:,l) = bielec_PQxx_array(:,:,:,l) do k=1,n_core_inact_act_orb do j=1,mo_num do p=1,n_act_orb - f(p,j,k)=bielec_PQxx_no(list_act(p),j,k,l) + f(p,j,k)=bielec_PQxx_no_array(list_act(p),j,k,l) end do end do end do @@ -36,13 +44,13 @@ do k=1,n_core_inact_act_orb do j=1,mo_num do p=1,n_act_orb - bielec_PQxx_no(list_act(p),j,k,l)=d(p,j,k) + bielec_PQxx_no_array(list_act(p),j,k,l)=d(p,j,k) end do end do do j=1,mo_num do p=1,n_act_orb - f(p,j,k)=bielec_PQxx_no(j,list_act(p),k,l) + f(p,j,k)=bielec_PQxx_no_array(j,list_act(p),k,l) end do end do end do @@ -54,7 +62,7 @@ do k=1,n_core_inact_act_orb do p=1,n_act_orb do j=1,mo_num - bielec_PQxx_no(j,list_act(p),k,l)=d(p,j,k) + bielec_PQxx_no_array(j,list_act(p),k,l)=d(p,j,k) end do end do end do @@ -71,7 +79,7 @@ do p=1,n_act_orb do k=1,mo_num do j=1,mo_num - f(j,k,p) = bielec_PQxx_no(j,k,n_core_inact_orb+p,l) + f(j,k,p) = bielec_PQxx_no_array(j,k,n_core_inact_orb+p,l) end do end do end do @@ -83,7 +91,7 @@ do p=1,n_act_orb do k=1,mo_num do j=1,mo_num - bielec_PQxx_no(j,k,n_core_inact_orb+p,l)=d(j,k,p) + bielec_PQxx_no_array(j,k,n_core_inact_orb+p,l)=d(j,k,p) end do end do end do @@ -97,7 +105,7 @@ do p=1,n_act_orb do k=1,mo_num do j=1,mo_num - f(j,k,p) = bielec_PQxx_no(j,k,l,n_core_inact_orb+p) + f(j,k,p) = bielec_PQxx_no_array(j,k,l,n_core_inact_orb+p) end do end do end do @@ -109,7 +117,7 @@ do p=1,n_act_orb do k=1,mo_num do j=1,mo_num - bielec_PQxx_no(j,k,l,n_core_inact_orb+p)=d(j,k,p) + bielec_PQxx_no_array(j,k,l,n_core_inact_orb+p)=d(j,k,p) end do end do end do @@ -123,8 +131,12 @@ -BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] +BEGIN_PROVIDER [real*8, bielec_PxxQ_no_array, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] BEGIN_DOC + ! WARNING !!! Old version !!! NOT USED ANYMORE IN THE PROGRAM !!! TOO BIG TO BE STORED ON LARGE SYSTEMS !!! + ! + ! Replaced by the Cholesky-based function bielec_PxxQ_no + ! ! integral (px|xq) in the basis of natural MOs ! indices are unshifted orbital numbers END_DOC @@ -132,10 +144,14 @@ integer :: i,j,k,l,t,u,p,q double precision, allocatable :: f(:,:,:), d(:,:,:) + print*,'' + print*,'Providing bielec_PxxQ_no_array, WARNING IT CAN BE A VERY BIG ARRAY WHEN MO_NUM IS LARGE !!!' + print*,'' + !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(j,k,l,p,d,f) & !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & - !$OMP bielec_PxxQ_no,bielec_PxxQ,list_act,natorbsCI) + !$OMP bielec_PxxQ_no_array,bielec_PxxQ_array,list_act,natorbsCI) allocate (f(n_act_orb,n_core_inact_act_orb,n_core_inact_act_orb), & @@ -143,11 +159,11 @@ !$OMP DO do j=1,mo_num - bielec_PxxQ_no(:,:,:,j) = bielec_PxxQ(:,:,:,j) + bielec_PxxQ_no_array(:,:,:,j) = bielec_PxxQ_array(:,:,:,j) do l=1,n_core_inact_act_orb do k=1,n_core_inact_act_orb do p=1,n_act_orb - f(p,k,l) = bielec_PxxQ_no(list_act(p),k,l,j) + f(p,k,l) = bielec_PxxQ_no_array(list_act(p),k,l,j) end do end do end do @@ -159,7 +175,7 @@ do l=1,n_core_inact_act_orb do k=1,n_core_inact_act_orb do p=1,n_act_orb - bielec_PxxQ_no(list_act(p),k,l,j)=d(p,k,l) + bielec_PxxQ_no_array(list_act(p),k,l,j)=d(p,k,l) end do end do end do @@ -176,7 +192,7 @@ do l=1,n_core_inact_act_orb do j=1,mo_num do p=1,n_act_orb - f(p,j,l) = bielec_PxxQ_no(j,n_core_inact_orb+p,l,k) + f(p,j,l) = bielec_PxxQ_no_array(j,n_core_inact_orb+p,l,k) end do end do end do @@ -188,7 +204,7 @@ do l=1,n_core_inact_act_orb do j=1,mo_num do p=1,n_act_orb - bielec_PxxQ_no(j,n_core_inact_orb+p,l,k)=d(p,j,l) + bielec_PxxQ_no_array(j,n_core_inact_orb+p,l,k)=d(p,j,l) end do end do end do @@ -205,7 +221,7 @@ do p=1,n_act_orb do l=1,n_core_inact_act_orb do j=1,mo_num - f(j,l,p) = bielec_PxxQ_no(j,l,n_core_inact_orb+p,k) + f(j,l,p) = bielec_PxxQ_no_array(j,l,n_core_inact_orb+p,k) end do end do end do @@ -217,7 +233,7 @@ do p=1,n_act_orb do l=1,n_core_inact_act_orb do j=1,mo_num - bielec_PxxQ_no(j,l,n_core_inact_orb+p,k)=d(j,l,p) + bielec_PxxQ_no_array(j,l,n_core_inact_orb+p,k)=d(j,l,p) end do end do end do @@ -231,7 +247,7 @@ do p=1,n_act_orb do k=1,n_core_inact_act_orb do j=1,mo_num - f(j,k,p) = bielec_PxxQ_no(j,k,l,n_core_inact_orb+p) + f(j,k,p) = bielec_PxxQ_no_array(j,k,l,n_core_inact_orb+p) end do end do end do @@ -243,7 +259,7 @@ do p=1,n_act_orb do k=1,n_core_inact_act_orb do j=1,mo_num - bielec_PxxQ_no(j,k,l,n_core_inact_orb+p)=d(j,k,p) + bielec_PxxQ_no_array(j,k,l,n_core_inact_orb+p)=d(j,k,p) end do end do end do @@ -259,10 +275,16 @@ BEGIN_DOC ! integrals (tu|vp) in the basis of natural MOs ! index p runs over the whole basis, t,u,v only over the active orbitals + ! + ! This array can be stored anyway. Ex: 50 active orbitals, 1500 MOs ==> 8x50^3x1500 = 1.5 Gb END_DOC implicit none integer :: i,j,k,l,t,u,p,q double precision, allocatable :: f(:,:,:), d(:,:,:) + + double precision :: wall0, wall1 + call wall_time(wall0) + print*,'Providing bielecCI_no' !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(j,k,l,p,d,f) & @@ -363,6 +385,8 @@ deallocate(d,f) !$OMP END PARALLEL + call wall_time(wall1) + print*,'Time to provide bielecCI_no = ',wall1-wall0 END_PROVIDER diff --git a/src/casscf_cipsi/casscf.irp.f b/src/casscf_cipsi/casscf.irp.f index d0a26d36a..b64a9d8f4 100644 --- a/src/casscf_cipsi/casscf.irp.f +++ b/src/casscf_cipsi/casscf.irp.f @@ -11,7 +11,7 @@ program casscf if(small_active_space)then pt2_relative_error = 0.00001 else - thresh_scf = 1.d-4 + thresh_scf = max(1.d-4,thresh_scf) pt2_relative_error = 0.04 endif touch pt2_relative_error @@ -46,94 +46,101 @@ subroutine run do while (.not.converged) print*,'pt2_max = ',pt2_max call run_stochastic_cipsi(Ev,PT2) - print*,'Ev,PT2',Ev(1),PT2(1) - E_PT2(1:N_states) = Ev(1:N_states) + PT2(1:N_states) - energy_old = energy - energy = eone+etwo+ecore - pt2_max_before = pt2_max - - call write_time(6) - call write_int(6,iteration,'CAS-SCF iteration = ') - call write_double(6,energy,'State-average CAS-SCF energy = ') -! if(n_states == 1)then -! call ezfio_get_casscf_cipsi_energy_pt2(E_PT2) -! call ezfio_get_casscf_cipsi_energy(PT2) - double precision :: delta_E_istate, e_av - e_av = 0.d0 - do istate=1,N_states - e_av += state_average_weight(istate) * Ev(istate) - if(istate.gt.1)then - delta_E_istate = E_PT2(istate) - E_PT2(1) - write(*,'(A6,I2,A18,F16.10)')'state ',istate,' Delta E+PT2 = ',delta_E_istate - endif - write(*,'(A6,I2,A18,F16.10)')'state ',istate,' E + PT2 energy = ',E_PT2(istate) - write(*,'(A6,I2,A18,F16.10)')'state ',istate,' PT2 energy = ',PT2(istate) -! call write_double(6,E_PT2(istate),'E + PT2 energy = ') -! call write_double(6,PT2(istate),' PT2 = ') - enddo - call write_double(6,e_av,'State-average CAS-SCF energy bis = ') - call write_double(6,pt2_max,' PT2_MAX = ') +! if(act_mos_opt)then DOES NOT WORK +! call run_orb_opt_trust_v2 +! call run_stochastic_cipsi(Ev,PT2) ! endif - - print*,'' - call write_double(6,norm_grad_vec2,'Norm of gradients = ') - call write_double(6,norm_grad_vec2_tab(1), ' Core-active gradients = ') - call write_double(6,norm_grad_vec2_tab(2), ' Core-virtual gradients = ') - call write_double(6,norm_grad_vec2_tab(3), ' Active-virtual gradients = ') - print*,'' - call write_double(6,energy_improvement, 'Predicted energy improvement = ') - - if(criterion_casscf == "energy")then - converged = dabs(energy_improvement) < thresh_scf - else if (criterion_casscf == "gradients")then - converged = norm_grad_vec2 < thresh_scf - else if (criterion_casscf == "e_pt2")then - delta_E = 0.d0 - do istate = 1, N_states - delta_E += dabs(E_PT2(istate) - ept2_before(istate)) - enddo - converged = dabs(delta_E) < thresh_casscf - endif - ept2_before = E_PT2 - if(.not.small_active_space)then - if(adaptive_pt2_max)then - pt2_max = dabs(energy_improvement / (pt2_relative_error)) - pt2_max = min(pt2_max, pt2_max_before) - if(n_act_orb.ge.n_big_act_orb)then - pt2_max = max(pt2_max,pt2_min_casscf) - endif + if(.True.)then + print*,'Ev,PT2',Ev(1),PT2(1) + E_PT2(1:N_states) = Ev(1:N_states) + PT2(1:N_states) + energy_old = energy + energy = eone+etwo+ecore + pt2_max_before = pt2_max + + call write_time(6) + call write_int(6,iteration,'CAS-SCF iteration = ') + call write_double(6,energy,'State-average CAS-SCF energy = ') +!! if(n_states == 1)then +!! call ezfio_get_casscf_cipsi_energy_pt2(E_PT2) +!! call ezfio_get_casscf_cipsi_energy(PT2) + double precision :: delta_E_istate, e_av + e_av = 0.d0 + do istate=1,N_states + e_av += state_average_weight(istate) * Ev(istate) + if(istate.gt.1)then + delta_E_istate = E_PT2(istate) - E_PT2(1) + write(*,'(A6,I2,A18,F16.10)')'state ',istate,' Delta E+PT2 = ',delta_E_istate + endif + write(*,'(A6,I2,A18,F16.10)')'state ',istate,' E + PT2 energy = ',E_PT2(istate) + write(*,'(A6,I2,A18,F16.10)')'state ',istate,' PT2 energy = ',PT2(istate) +!! call write_double(6,E_PT2(istate),'E + PT2 energy = ') +!! call write_double(6,PT2(istate),' PT2 = ') + enddo + call write_double(6,e_av,'State-average CAS-SCF energy bis = ') + call write_double(6,pt2_max,' PT2_MAX = ') +!! endif + + print*,'' + call write_double(6,norm_grad_vec2,'Norm of gradients = ') + call write_double(6,norm_grad_vec2_tab(1), ' Core-active gradients = ') + call write_double(6,norm_grad_vec2_tab(2), ' Core-virtual gradients = ') + call write_double(6,norm_grad_vec2_tab(3), ' Active-virtual gradients = ') + print*,'' + call write_double(6,energy_improvement, 'Predicted energy improvement = ') + + if(criterion_casscf == "energy")then + converged = dabs(energy_improvement) < thresh_scf + else if (criterion_casscf == "gradients")then + converged = norm_grad_vec2 < thresh_scf + else if (criterion_casscf == "e_pt2")then + delta_E = 0.d0 + do istate = 1, N_states + delta_E += dabs(E_PT2(istate) - ept2_before(istate)) + enddo + converged = dabs(delta_E) < thresh_casscf endif - endif - print*,'' - call write_double(6,pt2_max, 'PT2_MAX for next iteration = ') - - mo_coef = NewOrbs - mo_occ = occnum - if(.not.converged)then - call save_mos - iteration += 1 - if(norm_grad_vec2.gt.0.01d0)then - N_det = N_states - else - N_det = max(N_det/8 ,N_states) - endif - psi_det = psi_det_sorted - psi_coef = psi_coef_sorted - read_wf = .True. - call clear_mo_map - SOFT_TOUCH mo_coef N_det psi_det psi_coef + ept2_before = E_PT2 if(.not.small_active_space)then if(adaptive_pt2_max)then - SOFT_TOUCH pt2_max + pt2_max = dabs(energy_improvement / (pt2_relative_error)) + pt2_max = min(pt2_max, pt2_max_before) + if(n_act_orb.ge.n_big_act_orb)then + pt2_max = max(pt2_max,pt2_min_casscf) + endif endif endif - if(iteration .gt. 3)then - state_following_casscf = state_following_casscf_cipsi_save - soft_touch state_following_casscf + print*,'' + call write_double(6,pt2_max, 'PT2_MAX for next iteration = ') + + mo_coef = NewOrbs + mo_occ = occnum + if(.not.converged)then + call save_mos + iteration += 1 + if(norm_grad_vec2.gt.0.01d0)then + N_det = N_states + else + N_det = max(N_det/8 ,N_states) + endif + psi_det = psi_det_sorted + psi_coef = psi_coef_sorted + read_wf = .True. + call clear_mo_map + SOFT_TOUCH mo_coef N_det psi_det psi_coef + if(.not.small_active_space)then + if(adaptive_pt2_max)then + SOFT_TOUCH pt2_max + endif + endif + if(iteration .gt. 3)then + state_following_casscf = state_following_casscf_cipsi_save + soft_touch state_following_casscf + endif endif endif - + enddo + if(.True.)then integer :: i print*,'Converged CASSCF ' print*,'--------------------------' @@ -153,6 +160,7 @@ subroutine run ! write(*,*)mcscf_fock_alpha_mo(i,i) enddo + endif end diff --git a/src/casscf_cipsi/chol_bielec.irp.f b/src/casscf_cipsi/chol_bielec.irp.f new file mode 100644 index 000000000..f69832c11 --- /dev/null +++ b/src/casscf_cipsi/chol_bielec.irp.f @@ -0,0 +1,248 @@ + +BEGIN_PROVIDER [double precision, cholesky_no_1_idx_transp, (cholesky_mo_num, n_act_orb, mo_num)] + BEGIN_DOC + ! Cholesky vectors with ONE orbital on the active natural orbital basis + END_DOC + implicit none + integer :: i_chol,i_act,i_mo,jj_act + double precision, allocatable :: chol_tmp(:,:) + double precision :: wall0,wall1 + call wall_time(wall0) + print*,'Providing cholesky_no_1_idx_transp' + allocate(chol_tmp(cholesky_mo_num,n_act_orb)) + cholesky_no_1_idx_transp = 0.D0 + do i_mo = 1, mo_num + ! Get all the integrals corresponding to the "i_mo" + do i_act = 1, n_act_orb + jj_act = list_act(i_act) + do i_chol = 1, cholesky_mo_num + chol_tmp(i_chol, i_act) = cholesky_mo_transp(i_chol, jj_act, i_mo) + enddo + enddo + call dgemm('N','N',cholesky_mo_num,n_act_orb,n_act_orb,1.d0, & + chol_tmp, size(chol_tmp,1), & + natorbsCI, size(natorbsCI,1), & + 0.d0, & + cholesky_no_1_idx_transp(1,1,i_mo), size(cholesky_no_1_idx_transp,1)) + enddo + call wall_time(wall1) + print*,'Time to provide cholesky_no_1_idx_transp = ', wall1 - wall0 + +END_PROVIDER + +BEGIN_PROVIDER [double precision, cholesky_no_2_idx_transp, (cholesky_mo_num, n_act_orb, n_act_orb)] + BEGIN_DOC + ! Cholesky vectors with TWO orbital on the active natural orbital basis + END_DOC + implicit none + integer :: i_chol,i_act,j_act,jj_act + double precision, allocatable :: chol_tmp(:,:),chol_tmp_bis(:,:) + allocate(chol_tmp(cholesky_mo_num,n_act_orb),chol_tmp_bis(cholesky_mo_num,n_act_orb)) + double precision :: wall0,wall1 + call wall_time(wall0) + print*,'Providing cholesky_no_2_idx_transp' + cholesky_no_2_idx_transp = 0.D0 + do i_act = 1, n_act_orb + ! Get all the integrals corresponding to the "j_act" + do j_act = 1, n_act_orb + jj_act = list_act(j_act) + do i_chol = 1, cholesky_mo_num + chol_tmp(i_chol, j_act) = cholesky_no_1_idx_transp(i_chol, i_act, jj_act) + enddo + enddo + call dgemm('N','N',cholesky_mo_num,n_act_orb,n_act_orb,1.d0, & + chol_tmp, size(chol_tmp,1), & + natorbsCI, size(natorbsCI,1), & + 0.d0, & + cholesky_no_2_idx_transp(1,1,i_act), size(cholesky_no_2_idx_transp,1)) + enddo + call wall_time(wall1) + print*,'Time to provide cholesky_no_2_idx_transp = ', wall1 - wall0 + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cholesky_no_total_transp, (cholesky_mo_num, mo_num, mo_num)] + implicit none + BEGIN_DOC + ! Cholesky vectors defined on all basis including the NO basis + END_DOC + integer :: i_chol, i_act, ii_act, j_act, jj_act, i_core_inact, j_core_inact, ii_core_inact, jj_core_inact + integer :: i_virt, ii_virt, j_virt, jj_virt + double precision :: wall0,wall1 + call wall_time(wall0) + print*,'Providing cholesky_no_total_transp ' + ! Block when two orbitals belong to the core/inact + do j_core_inact = 1, n_core_inact_orb + jj_core_inact = list_core_inact(j_core_inact) + do i_core_inact = 1, n_core_inact_orb + ii_core_inact = list_core_inact(i_core_inact) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol, ii_core_inact, jj_core_inact) = cholesky_mo_transp(i_chol,ii_core_inact,jj_core_inact) + enddo + enddo + enddo + + ! Block when one orbitals belongs to the core/inact and one belongs to the active + do j_core_inact = 1, n_core_inact_orb + jj_core_inact = list_core_inact(j_core_inact) + do i_act = 1, n_act_orb + ii_act = list_act(i_act) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol,ii_act,j_core_inact) = cholesky_no_1_idx_transp(i_chol,i_act,jj_core_inact) + enddo + enddo + enddo + do j_core_inact = 1, n_core_inact_orb + jj_core_inact = list_core_inact(j_core_inact) + do i_act = 1, n_act_orb + ii_act = list_act(i_act) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol,j_core_inact,ii_act) = cholesky_no_1_idx_transp(i_chol,i_act,jj_core_inact) + enddo + enddo + enddo + + ! Block when two orbitals belong to the active + do j_act = 1, n_act_orb + jj_act = list_act(j_act) + do i_act = 1, n_act_orb + ii_act = list_act(i_act) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol,ii_act,jj_act) = cholesky_no_2_idx_transp(i_chol,i_act,j_act) + enddo + enddo + enddo + + ! Block when two orbitals belong to the virtuals + do i_virt = 1, n_virt_orb + ii_virt = list_virt(i_virt) + do j_virt = 1, n_virt_orb + jj_virt = list_virt(j_virt) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol,jj_virt,ii_virt) = cholesky_mo_transp(i_chol,jj_virt,ii_virt) + enddo + enddo + enddo + + ! Block when one orbital is in active and the other in the virtuals + do i_virt = 1, n_virt_orb + ii_virt = list_virt(i_virt) + do i_act = 1, n_act_orb + ii_act = list_act(i_act) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol,ii_act,ii_virt) = cholesky_no_1_idx_transp(i_chol, i_act,ii_virt) + enddo + enddo + enddo + do i_virt = 1, n_virt_orb + ii_virt = list_virt(i_virt) + do i_act = 1, n_act_orb + ii_act = list_act(i_act) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol,ii_virt,ii_act) = cholesky_no_1_idx_transp(i_chol, i_act,ii_virt) + enddo + enddo + enddo + ! Block when one orbital is in the virtual and one in the core-inact + do i_virt = 1, n_virt_orb + ii_virt = list_virt(i_virt) + do i_core_inact = 1, n_core_inact_orb + ii_core_inact = list_core_inact(i_core_inact) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol, ii_core_inact, ii_virt) = cholesky_mo_transp(i_chol, ii_core_inact, ii_virt) + enddo + enddo + enddo + do i_core_inact = 1, n_core_inact_orb + ii_core_inact = list_core_inact(i_core_inact) + do i_virt = 1, n_virt_orb + ii_virt = list_virt(i_virt) + do i_chol = 1, cholesky_mo_num + cholesky_no_total_transp(i_chol, ii_virt, ii_core_inact) = cholesky_mo_transp(i_chol, ii_virt, ii_core_inact) + enddo + enddo + enddo + + call wall_time(wall1) + print*,'Time to provide cholesky_no_total_transp = ', wall1 - wall0 +END_PROVIDER + + +double precision function bielec_no_basis(i_1,j_1,i_2,j_2) + implicit none + integer, intent(in) :: i_1,j_1,i_2,j_2 + BEGIN_DOC + ! integral (i_1 j_1|i_2 j_2) in the mixed basis of both MOs and natural MOs + ! + END_DOC + integer :: i + bielec_no_basis = 0.d0 + do i = 1, cholesky_mo_num + bielec_no_basis += cholesky_no_total_transp(i,i_1, j_1) * cholesky_no_total_transp(i,i_2,j_2) + enddo +end + +double precision function bielec_PQxx_no(i_mo, j_mo, i_ca, j_ca) + implicit none + BEGIN_DOC + ! function that computes (i_mo j_mo| i_ca j_ca) with Cholesky decomposition on the NO basis for active orbitals + ! + ! where i_ca, j_ca are in [1:n_core_inact_act_orb] + END_DOC + integer, intent(in) :: i_ca, j_ca, i_mo, j_mo + integer :: ii_ca, jj_ca + double precision :: bielec_no_basis + ii_ca = list_core_inact_act(i_ca) + jj_ca = list_core_inact_act(j_ca) + bielec_PQxx_no = bielec_no_basis(i_mo,j_mo,ii_ca,jj_ca) +end + +double precision function bielec_PxxQ_no(i_mo, j_ca, i_ca, j_mo) + implicit none + BEGIN_DOC + ! function that computes (i_mo j_ca |i_ca j_mo) with Cholesky decomposition on the NO basis for active orbitals + ! + ! where i_ca, j_ca are in [1:n_core_inact_act_orb] + END_DOC + integer, intent(in) :: i_ca, j_ca, i_mo, j_mo + integer :: ii_ca, jj_ca + double precision :: bielec_no_basis + ii_ca = list_core_inact_act(i_ca) + jj_ca = list_core_inact_act(j_ca) + bielec_PxxQ_no = bielec_no_basis(i_mo, jj_ca, ii_ca, j_mo) + +end + + +double precision function bielec_PQxx(i_mo, j_mo, i_ca, j_ca) + BEGIN_DOC + ! function that computes (i_mo j_mo |i_ca j_ca) with Cholesky decomposition + ! + ! indices are unshifted orbital numbers + ! + ! where i_ca, j_ca are in [1:n_core_inact_act_orb] + END_DOC + implicit none + integer, intent(in) :: i_ca, j_ca, j_mo, i_mo + double precision :: mo_two_e_integral + integer :: ii_ca, jj_ca + ii_ca = list_core_inact_act(i_ca) + jj_ca = list_core_inact_act(j_ca) + bielec_PQxx = mo_two_e_integral(i_mo,ii_ca,j_mo,jj_ca) +end + +double precision function bielec_PxxQ(i_mo, i_ca, j_ca, j_mo) + BEGIN_DOC + ! function that computes (i_mo j_mo |i_ca j_ca) with Cholesky decomposition + ! + ! where i_ca, j_ca are in [1:n_core_inact_act_orb] + END_DOC + implicit none + integer, intent(in) :: i_ca, j_ca, j_mo, i_mo + double precision :: mo_two_e_integral + integer :: ii_ca, jj_ca + ii_ca = list_core_inact_act(i_ca) + jj_ca = list_core_inact_act(j_ca) + bielec_PxxQ = mo_two_e_integral(i_mo,jj_ca,ii_ca,j_mo) +end + diff --git a/src/casscf_cipsi/chol_garb.irp.f b/src/casscf_cipsi/chol_garb.irp.f new file mode 100644 index 000000000..c4a8fa591 --- /dev/null +++ b/src/casscf_cipsi/chol_garb.irp.f @@ -0,0 +1,34 @@ + +!!!!! FUNCTIONS THAT WORK BUT WHICH ARE USELESS AS THE ARRAYS CAN ALWAYS BE STORED +!double precision function bielecCI_chol(i_a, j_a, k_a, i_mo) +! BEGIN_DOC +! ! function that computes (i_a j_a |k_a j_mo) with Cholesky decomposition +! ! +! ! where i_a, j_a, k_a are in [1:n_act_orb] !!! ONLY ON ACTIVE +! END_DOC +! implicit none +! integer, intent(in) :: i_a, j_a, k_a, i_mo +! integer :: ii_a, jj_a, kk_a +! double precision :: mo_two_e_integral +! ii_a = list_act(i_a) +! jj_a = list_act(j_a) +! kk_a = list_act(k_a) +! bielecCI_chol = mo_two_e_integral(ii_a,kk_a,jj_a,i_mo) +!end + +!double precision function bielecCI_no_chol(i_ca, j_ca, k_ca, i_mo) +! BEGIN_DOC +! ! function that computes (i_ca j_ca |k_ca j_mo) with Cholesky decomposition on the NO basis for active orbitals +! ! +! ! where i_ca, j_ca, k_ca are in [1:n_core_inact_act_orb] +! END_DOC +! implicit none +! integer, intent(in) :: i_ca, j_ca, k_ca, i_mo +! integer :: ii_ca, jj_ca, kk_ca +! double precision :: bielec_no_basis_chol +! ii_ca = list_act(i_ca) +! jj_ca = list_act(j_ca) +! kk_ca = list_act(k_ca) +! bielecCI_no_chol = bielec_no_basis_chol(ii_ca, jj_ca, kk_ca, i_mo) +! +!end diff --git a/src/casscf_cipsi/gradient.irp.f b/src/casscf_cipsi/gradient.irp.f index a1c5e947d..961d260d9 100644 --- a/src/casscf_cipsi/gradient.irp.f +++ b/src/casscf_cipsi/gradient.irp.f @@ -157,6 +157,7 @@ real*8 function gradvec_it(i,t) integer :: ii,tt,v,vv,x,y integer :: x3,y3 + double precision :: bielec_PQxx_no ii=list_core_inact(i) tt=list_act(t) diff --git a/src/casscf_cipsi/hessian.irp.f b/src/casscf_cipsi/hessian.irp.f index 458c6aa69..9a7a90313 100644 --- a/src/casscf_cipsi/hessian.irp.f +++ b/src/casscf_cipsi/hessian.irp.f @@ -10,6 +10,7 @@ real*8 function hessmat_itju(i,t,j,u) implicit none integer :: i,t,j,u,ii,tt,uu,v,vv,x,xx,y,jj real*8 :: term,t2 + double precision :: bielec_pqxx_no,bielec_pxxq_no ii=list_core_inact(i) tt=list_act(t) @@ -95,6 +96,7 @@ real*8 function hessmat_itja(i,t,j,a) implicit none integer :: i,t,j,a,ii,tt,jj,aa,v,vv,x,y real*8 :: term + double precision :: bielec_pqxx_no,bielec_pxxq_no ! it/ja ii=list_core_inact(i) @@ -128,6 +130,7 @@ real*8 function hessmat_itua(i,t,u,a) implicit none integer :: i,t,u,a,ii,tt,uu,aa,v,vv,x,xx,u3,t3,v3 real*8 :: term + double precision :: bielec_pqxx_no,bielec_pxxq_no ii=list_core_inact(i) tt=list_act(t) @@ -169,6 +172,7 @@ real*8 function hessmat_iajb(i,a,j,b) implicit none integer :: i,a,j,b,ii,aa,jj,bb real*8 :: term + double precision :: bielec_pqxx_no,bielec_pxxq_no ii=list_core_inact(i) aa=list_virt(a) @@ -205,6 +209,7 @@ real*8 function hessmat_iatb(i,a,t,b) implicit none integer :: i,a,t,b,ii,aa,tt,bb,v,vv,x,y,v3,t3 real*8 :: term + double precision :: bielec_pqxx_no,bielec_pxxq_no ii=list_core_inact(i) aa=list_virt(a) @@ -237,6 +242,7 @@ real*8 function hessmat_taub(t,a,u,b) integer :: t,a,u,b,tt,aa,uu,bb,v,vv,x,xx,y integer :: v3,x3 real*8 :: term,t1,t2,t3 + double precision :: bielec_pqxx_no,bielec_pxxq_no tt=list_act(t) aa=list_virt(a) diff --git a/src/casscf_cipsi/mcscf_fock.irp.f b/src/casscf_cipsi/mcscf_fock.irp.f index 0f4b7a99b..82b710a76 100644 --- a/src/casscf_cipsi/mcscf_fock.irp.f +++ b/src/casscf_cipsi/mcscf_fock.irp.f @@ -4,6 +4,7 @@ END_DOC implicit none integer :: p,q,k,kk,t,tt,u,uu + double precision :: bielec_pxxq_no, bielec_pqxx_no do q=1,mo_num do p=1,mo_num @@ -44,6 +45,7 @@ END_DOC implicit none integer :: p,q,k,kk,t,tt,u,uu + double precision :: bielec_pxxq_no, bielec_pqxx_no Fapq = 0.d0 diff --git a/src/casscf_cipsi/test_chol.irp.f b/src/casscf_cipsi/test_chol.irp.f new file mode 100644 index 000000000..bcce7cf73 --- /dev/null +++ b/src/casscf_cipsi/test_chol.irp.f @@ -0,0 +1,116 @@ +program test_chol + implicit none + read_wf= .True. + touch read_wf +! call routine_bielec_PxxQ_no +! call routine_bielecCI_no +! call test_bielec_PxxQ_chol +! call test_bielecCI + +end + +subroutine routine_bielec_PQxx_no + implicit none + integer :: i_chol, i_act, ii_act, j_act, jj_act, i_core_inact, j_core_inact, ii_core_inact, jj_core_inact + integer :: i_virt, ii_virt, j_virt, jj_virt, i_mo, j_mo + double precision :: exact, new, error, accu, bielec_no_basis_chol + double precision :: bielec_PQxx_no + + accu = 0.d0 + do i_core_inact = 1, n_core_inact_act_orb + ii_core_inact = list_core_inact_act(i_core_inact) + do j_core_inact = 1, n_core_inact_act_orb + jj_core_inact = list_core_inact_act(j_core_inact) + do i_mo = 1, mo_num + do j_mo = 1, mo_num + exact = bielec_PQxx_no_array(j_mo,i_mo, j_core_inact, i_core_inact) + new = bielec_PQxx_no(j_mo,i_mo, j_core_inact, i_core_inact) + error = dabs(exact-new) + if(dabs(exact).gt.1.d-10)then + print*,exact,new,error + endif + accu += error + enddo + enddo + enddo + enddo + print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2)) +end + +subroutine routine_bielec_PxxQ_no_array + implicit none + integer :: i_chol, i_act, ii_act, j_act, jj_act, i_core_inact, j_core_inact, ii_core_inact, jj_core_inact + integer :: i_virt, ii_virt, j_virt, jj_virt, i_mo, j_mo + double precision :: exact, new, error, accu, bielec_no_basis_chol + double precision :: bielec_PxxQ_no + + accu = 0.d0 + do i_mo = 1, mo_num + do i_core_inact = 1, n_core_inact_act_orb + ii_core_inact = list_core_inact_act(i_core_inact) + do j_core_inact = 1, n_core_inact_act_orb + jj_core_inact = list_core_inact_act(j_core_inact) + do j_mo = 1, mo_num + exact = bielec_PxxQ_no_array(j_mo, j_core_inact, i_core_inact,i_mo) +! new = bielec_no_basis_chol(j_mo,i_mo, jj_core_inact, ii_core_inact) + new = bielec_PxxQ_no(j_mo, j_core_inact, i_core_inact,i_mo) + error = dabs(exact-new) + accu += error + if(dabs(exact).gt.1.d-10)then + print*,exact,new,error + endif + enddo + enddo + enddo + enddo + print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2)) +end + +subroutine test_bielec_PQxx(i_mo, j_mo, i_ca, j_ca) + implicit none + integer :: i_mo, j_mo, i_ca, j_ca + double precision :: exact, new, error, accu + double precision :: bielec_PQxx + + accu = 0.d0 + do j_ca = 1, n_core_inact_act_orb + do i_ca = 1, n_core_inact_act_orb + do j_mo = 1, mo_num + do i_mo = 1, mo_num + exact = bielec_PQxx_array(i_mo, j_mo, i_ca, j_ca) + new = bielec_PQxx(i_mo, j_mo, i_ca, j_ca) + error = dabs(exact-new) + accu += error + if(dabs(exact).gt.1.d-10)then + print*,exact,new,error + endif + enddo + enddo + enddo + enddo + print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2)) +end + +subroutine test_bielec_PxxQ_chol(i_mo, i_ca, j_ca, j_mo) + implicit none + integer :: i_mo, i_ca, j_ca, j_mo + double precision :: exact, new, error, accu + double precision :: bielec_PxxQ + accu = 0.d0 + do j_mo = 1, mo_num + do j_ca = 1, n_core_inact_act_orb + do i_ca =1, n_core_inact_act_orb + do i_mo = 1, mo_num + exact = bielec_PxxQ_array(i_mo, i_ca, j_ca, j_mo) + new = bielec_PxxQ(i_mo, i_ca, j_ca, j_mo) + error = dabs(exact-new) + accu += error + if(dabs(exact).gt.1.d-10)then + print*,exact,new,error + endif + enddo + enddo + enddo + enddo + print*,'accu = ',accu/(dble(mo_num*mo_num*n_core_inact_act_orb**2)) +end diff --git a/src/casscf_cipsi/tot_en.irp.f b/src/casscf_cipsi/tot_en.irp.f index 1d70e0879..37ceac05e 100644 --- a/src/casscf_cipsi/tot_en.irp.f +++ b/src/casscf_cipsi/tot_en.irp.f @@ -8,6 +8,7 @@ implicit none integer :: t,u,v,x,i,ii,tt,uu,vv,xx,j,jj,t3,u3,v3,x3 real*8 :: e_one_all,e_two_all + double precision :: bielec_PQxx,bielec_PxxQ e_one_all=0.D0 e_two_all=0.D0 do i=1,n_core_inact_orb diff --git a/src/ccsd/NEED b/src/ccsd/NEED index e6e6bc596..8298f28ec 100644 --- a/src/ccsd/NEED +++ b/src/ccsd/NEED @@ -1,2 +1,3 @@ +gpu hartree_fock utils_cc diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 555a25527..d8131a9cf 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -1,4 +1,5 @@ subroutine run_ccsd_space_orb + use gpu implicit none @@ -9,9 +10,19 @@ subroutine run_ccsd_space_orb double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb logical :: not_converged - double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:), tau_x(:,:,:,:) - double precision, allocatable :: t1(:,:), r1(:,:) - double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) + type(gpu_double4) :: t2, r2, tau, tau_x + type(gpu_double2) :: t1, r1 + type(gpu_double2) :: H_oo, H_vv, H_vo + + type(gpu_double2) :: d_cc_space_f_oo, d_cc_space_f_vo + type(gpu_double2) :: d_cc_space_f_ov, d_cc_space_f_vv + + type(gpu_double3) :: d_cc_space_v_oo_chol, d_cc_space_v_vo_chol + type(gpu_double3) :: d_cc_space_v_ov_chol, d_cc_space_v_vv_chol + + type(gpu_double4) :: d_cc_space_v_oovv, d_cc_space_v_voov, d_cc_space_v_ovov + type(gpu_double4) :: d_cc_space_v_oovo, d_cc_space_v_vooo, d_cc_space_v_oooo + type(gpu_double4) :: d_cc_space_v_vvoo, d_cc_space_v_ovvo, d_cc_space_v_ovoo double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) @@ -20,7 +31,7 @@ subroutine run_ccsd_space_orb call set_multiple_levels_omp(.False.) - if (do_ao_cholesky) then + if (do_mo_cholesky) then PROVIDE cholesky_mo_transp FREE cholesky_ao else @@ -51,11 +62,77 @@ subroutine run_ccsd_space_orb !print*,'occ',list_occ !print*,'vir',list_vir - allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) - allocate(tau(nO,nO,nV,nV)) - allocate(tau_x(nO,nO,nV,nV)) - allocate(t1(nO,nV), r1(nO,nV)) - allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)) + ! GPU arrays + call gpu_allocate(d_cc_space_f_oo, nO, nO) + call gpu_allocate(d_cc_space_f_vo, nV, nO) + call gpu_allocate(d_cc_space_f_ov, nO, nV) + call gpu_allocate(d_cc_space_f_vv, nV, nV) + + call gpu_upload(cc_space_f_oo, d_cc_space_f_oo) + call gpu_upload(cc_space_f_vo, d_cc_space_f_vo) + call gpu_upload(cc_space_f_ov, d_cc_space_f_ov) + call gpu_upload(cc_space_f_vv, d_cc_space_f_vv) + +! FREE cc_space_f_oo +! FREE cc_space_f_vo +! FREE cc_space_f_vv + + if (do_mo_cholesky) then + call gpu_allocate(d_cc_space_v_oo_chol, cholesky_mo_num, nO, nO) + call gpu_allocate(d_cc_space_v_ov_chol, cholesky_mo_num, nO, nV) + call gpu_allocate(d_cc_space_v_vo_chol, cholesky_mo_num, nV, nO) + call gpu_allocate(d_cc_space_v_vv_chol, cholesky_mo_num, nV, nV) + + call gpu_upload(cc_space_v_oo_chol, d_cc_space_v_oo_chol) + call gpu_upload(cc_space_v_ov_chol, d_cc_space_v_ov_chol) + call gpu_upload(cc_space_v_vo_chol, d_cc_space_v_vo_chol) + call gpu_upload(cc_space_v_vv_chol, d_cc_space_v_vv_chol) + +! FREE cc_space_v_oo_chol +! FREE cc_space_v_ov_chol +! FREE cc_space_v_vo_chol +! FREE cc_space_v_vv_chol + endif + + call gpu_allocate(d_cc_space_v_oovv, nO, nO, nV, nV) + call gpu_allocate(d_cc_space_v_voov, nV, nO, nO, nV) + call gpu_allocate(d_cc_space_v_ovov, nO, nV, nO, nV) + call gpu_allocate(d_cc_space_v_oovo, nO, nO, nV, nO) + call gpu_allocate(d_cc_space_v_ovvo, nO, nV, nV, nO) + call gpu_allocate(d_cc_space_v_vooo, nV, nO, nO, nO) + call gpu_allocate(d_cc_space_v_oooo, nO, nO, nO, nO) + call gpu_allocate(d_cc_space_v_vvoo, nV, nV, nO, nO) + call gpu_allocate(d_cc_space_v_ovoo, nO, nV, nO, nO) + + call gpu_upload(cc_space_v_oovv, d_cc_space_v_oovv) + call gpu_upload(cc_space_v_voov, d_cc_space_v_voov) + call gpu_upload(cc_space_v_ovov, d_cc_space_v_ovov) + call gpu_upload(cc_space_v_oovo, d_cc_space_v_oovo) + call gpu_upload(cc_space_v_ovvo, d_cc_space_v_ovvo) + call gpu_upload(cc_space_v_vooo, d_cc_space_v_vooo) + call gpu_upload(cc_space_v_oooo, d_cc_space_v_oooo) + call gpu_upload(cc_space_v_vvoo, d_cc_space_v_vvoo) + call gpu_upload(cc_space_v_ovoo, d_cc_space_v_ovoo) + +! FREE cc_space_v_voov +! FREE cc_space_v_ovov +! FREE cc_space_v_oovo +! FREE cc_space_v_oovv +! FREE cc_space_v_vooo +! FREE cc_space_v_oooo +! FREE cc_space_v_vvoo +! FREE cc_space_v_ovvo +! FREE cc_space_v_ovoo + + call gpu_allocate(t2, nO,nO,nV,nV) + call gpu_allocate(r2, nO,nO,nV,nV) + call gpu_allocate(tau, nO,nO,nV,nV) + call gpu_allocate(tau_x, nO,nO,nV,nV) + call gpu_allocate(t1, nO,nV) + call gpu_allocate(r1, nO,nV) + call gpu_allocate(H_oo, nO, nO) + call gpu_allocate(H_vo, nV, nO) + call gpu_allocate(H_vv, nV, nV) if (cc_update_method == 'diis') then double precision :: rss, diis_mem, extra_mem @@ -97,14 +174,22 @@ subroutine run_ccsd_space_orb endif ! Init - call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1) - call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2) - call update_tau_space(nO,nV,t1,t2,tau) + double precision, allocatable :: h_t1(:,:), h_t2(:,:,:,:) + allocate(h_t1(nO,nV), h_t2(nO,nO,nV,nV)) + + call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,h_t1) + call gpu_upload(h_t1, t1) + + call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,h_t2) + call gpu_upload(h_t2, t2) + + + call update_tau_space(nO,nV,h_t1,t1,t2,tau) call update_tau_x_space(nO,nV,tau,tau_x) - !print*,'hf_energy', hf_energy call det_energy(det,uncorr_energy) print*,'Det energy', uncorr_energy - call ccsd_energy_space_x(nO,nV,tau_x,t1,energy) + + call ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1,energy) print*,'Guess energy', uncorr_energy+energy, energy nb_iter = 0 @@ -120,43 +205,45 @@ subroutine run_ccsd_space_orb do while (not_converged) ! Residue - if (do_ao_cholesky) then -! if (.False.) then - call compute_H_oo_chol(nO,nV,tau_x,H_oo) - call compute_H_vv_chol(nO,nV,tau_x,H_vv) - call compute_H_vo_chol(nO,nV,t1,H_vo) - - call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) - call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + if (do_mo_cholesky) then + call compute_H_oo_chol(nO,nV,tau_x,d_cc_space_f_oo, d_cc_space_v_ov_chol,d_cc_space_v_vo_chol,H_oo) + call compute_H_vv_chol(nO,nV,tau_x,d_cc_space_f_vv, d_cc_space_v_ov_chol,H_vv) + call compute_H_vo_chol(nO,nV,t1,d_cc_space_f_vo, d_cc_space_v_ov_chol,d_cc_space_v_vo_chol, H_vo) + + call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1,d_cc_space_f_ov,d_cc_space_f_vo, & + d_cc_space_v_voov, d_cc_space_v_ovov, d_cc_space_v_oovo, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol) + call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & + d_cc_space_v_oovv, d_cc_space_v_vooo, d_cc_space_v_oooo, d_cc_space_v_oovo, d_cc_space_v_ovvo, d_cc_space_v_ovoo, & + d_cc_space_v_ovov, d_cc_space_v_vvoo, d_cc_space_v_oo_chol, d_cc_space_v_ov_chol, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol, & + d_cc_space_f_vo, & + r2, max_r2) else - call compute_H_oo(nO,nV,t1,t2,tau,H_oo) - call compute_H_vv(nO,nV,t1,t2,tau,H_vv) - call compute_H_vo(nO,nV,t1,t2,H_vo) + call compute_H_oo(nO,nV,t1%f,t2%f,tau%f,H_oo%f) + call compute_H_vv(nO,nV,t1%f,t2%f,tau%f,H_vv%f) + call compute_H_vo(nO,nV,t1%f,t2%f,H_vo%f) - call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) - call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + call compute_r1_space(nO,nV,t1%f,t2%f,tau%f,H_oo%f,H_vv%f,H_vo%f,r1%f,max_r1) + call compute_r2_space(nO,nV,t1%f,t2%f,tau%f,H_oo%f,H_vv%f,H_vo%f,r2%f,max_r2) endif max_r = max(max_r1,max_r2) ! Update if (cc_update_method == 'diis') then - !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) - !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) - call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1,r2,t1,t2,all_err,all_t) + call update_t_ccsd_diis_v3(nO,nV,nb_iter,cc_space_f_o,cc_space_f_v,r1%f,r2%f,t1%f,t2%f,all_err,all_t) ! Standard update as T = T - Delta elseif (cc_update_method == 'none') then - call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1) - call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2) + call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1%f,t1%f) + call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2%f,t2%f) else print*,'Unkown cc_method_method: '//cc_update_method endif - call update_tau_space(nO,nV,t1,t2,tau) + call update_tau_space(nO,nV,t1%f,t1,t2,tau) call update_tau_x_space(nO,nV,tau,tau_x) ! Energy - call ccsd_energy_space_x(nO,nV,tau_x,t1,energy) + call ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1,energy) write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' nb_iter = nb_iter + 1 @@ -181,8 +268,8 @@ subroutine run_ccsd_space_orb print*,'' if (write_amplitudes) then - call write_t1(nO,nV,t1) - call write_t2(nO,nV,t2) + call write_t1(nO,nV,t1%f) + call write_t2(nO,nV,t2%f) call ezfio_set_utils_cc_io_amplitudes('Read') endif @@ -191,210 +278,198 @@ subroutine run_ccsd_space_orb deallocate(all_err,all_t) endif - deallocate(H_vv,H_oo,H_vo,r1,r2,tau) + call gpu_deallocate(H_oo) + call gpu_deallocate(H_vv) + call gpu_deallocate(H_vo) + + call gpu_deallocate(r1) + call gpu_deallocate(r2) + call gpu_deallocate(tau) + call gpu_deallocate(tau_x) ! CCSD(T) - double precision :: e_t + double precision :: e_t, e_t_err e_t = 0.d0 if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then - ! Dumb way - !call wall_time(ta) - !call ccsd_par_t_space(nO,nV,t1,t2,e_t) - !call wall_time(tb) - !print*,'Time: ',tb-ta, ' s' - - !print*,'' - !write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' - !write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' - !write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' - !print*,'' - ! New + e_t = uncorr_energy + energy ! For print in (T) call + e_t_err = 0.d0 + print*,'Computing (T) correction...' call wall_time(ta) -! call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & -! ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) - e_t = uncorr_energy + energy ! For print in next call - call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & - ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + call ccsd_par_t_space_stoch(nO,nV,t1%f,t2%f,cc_space_f_o,cc_space_f_v & + ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t, e_t_err) call wall_time(tb) print*,'Time: ',tb-ta, ' s' print*,'' - write(*,'(A15,F18.12,A3)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha' - write(*,'(A15,F18.12,A3)') ' E(T) = ', e_t, ' Ha' - write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' + write(*,'(A15,F18.12,A7,F18.12)') ' E(CCSD(T)) = ', uncorr_energy + energy + e_t, ' Ha +/- ', e_t_err + write(*,'(A15,F18.12,A7,F18.12)') ' E(T) = ', e_t, ' Ha +/- ', e_t_err + write(*,'(A15,F18.12,A7,F18.12)') ' Correlation = ', energy + e_t, ' Ha +/- ', e_t_err print*,'' endif call save_energy(uncorr_energy + energy, e_t) - deallocate(t1,t2) + deallocate(h_t1, h_t2) + if (do_mo_cholesky) then + call gpu_deallocate(d_cc_space_v_oo_chol) + call gpu_deallocate(d_cc_space_v_ov_chol) + call gpu_deallocate(d_cc_space_v_vo_chol) + call gpu_deallocate(d_cc_space_v_vv_chol) + endif + + call gpu_deallocate(d_cc_space_v_oovv) + call gpu_deallocate(d_cc_space_v_voov) + call gpu_deallocate(d_cc_space_v_ovov) + call gpu_deallocate(d_cc_space_v_oovo) + call gpu_deallocate(d_cc_space_v_ovvo) + call gpu_deallocate(d_cc_space_v_vooo) + call gpu_deallocate(d_cc_space_v_oooo) + call gpu_deallocate(d_cc_space_v_vvoo) + call gpu_deallocate(d_cc_space_v_ovoo) + + call gpu_deallocate(d_cc_space_f_oo) + call gpu_deallocate(d_cc_space_f_vo) + call gpu_deallocate(d_cc_space_f_ov) + call gpu_deallocate(d_cc_space_f_vv) + + call gpu_deallocate(t1) + call gpu_deallocate(t2) end ! Energy -subroutine ccsd_energy_space(nO,nV,tau,t1,energy) - +subroutine ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1,energy) + use gpu implicit none - integer, intent(in) :: nO, nV - double precision, intent(in) :: tau(nO,nO,nV,nV) - double precision, intent(in) :: t1(nO,nV) - double precision, intent(out) :: energy + integer, intent(in) :: nO, nV + type(gpu_double4), intent(in) :: tau_x, d_cc_space_v_oovv + type(gpu_double2), intent(in) :: t1, d_cc_space_f_vo + double precision, intent(out) :: energy ! internal integer :: i,j,a,b double precision :: e - energy = 0d0 - !$omp parallel & - !$omp shared(nO,nV,energy,tau,t1,& - !$omp cc_space_f_vo,cc_space_w_oovv) & - !$omp private(i,j,a,b,e) & - !$omp default(none) - e = 0d0 - !$omp do - do a = 1, nV - do i = 1, nO - e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) - enddo - enddo - !$omp end do nowait - !$omp do - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b) - enddo - enddo - enddo - enddo - !$omp end do nowait - !$omp critical - energy = energy + e - !$omp end critical - !$omp end parallel - -end + type(gpu_stream) :: s1, s2 + call gpu_stream_create(s1) + call gpu_stream_create(s2) -subroutine ccsd_energy_space_x(nO,nV,tau_x,t1,energy) + call gpu_set_stream(blas_handle,s1) + call gpu_ddot(blas_handle, nO*nV, d_cc_space_f_vo%f(1,1), 1, t1%f(1,1), 1, e) - implicit none + call gpu_set_stream(blas_handle,s2) + call gpu_ddot_64(blas_handle, nO*nO*nV*nV*1_8, tau_x%f(1,1,1,1), 1_8, d_cc_space_v_oovv%f(1,1,1,1), 1_8, energy) + call gpu_set_stream(blas_handle,gpu_default_stream) - integer, intent(in) :: nO, nV - double precision, intent(in) :: tau_x(nO,nO,nV,nV) - double precision, intent(in) :: t1(nO,nV) - double precision, intent(out) :: energy - - ! internal - integer :: i,j,a,b - double precision :: e + call gpu_synchronize() + call gpu_stream_destroy(s1) + call gpu_stream_destroy(s2) - energy = 0d0 - !$omp parallel & - !$omp shared(nO,nV,energy,tau_x,t1,& - !$omp cc_space_f_vo,cc_space_v_oovv) & - !$omp private(i,j,a,b,e) & - !$omp default(none) - e = 0d0 - !$omp do - do a = 1, nV - do i = 1, nO - e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) - enddo - enddo - !$omp end do nowait - !$omp do - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - e = e + tau_x(i,j,a,b) * cc_space_v_oovv(i,j,a,b) - enddo - enddo - enddo - enddo - !$omp end do nowait - !$omp critical - energy = energy + e - !$omp end critical - !$omp end parallel + energy = energy + 2.d0*e end ! Tau -subroutine update_tau_space(nO,nV,t1,t2,tau) - +subroutine update_tau_space(nO,nV,h_t1,t1,t2,tau) + use gpu implicit none ! in integer, intent(in) :: nO, nV - double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + double precision, intent(in) :: h_t1(nO,nV) + type(gpu_double2), intent(in) :: t1 + type(gpu_double4), intent(in) :: t2 ! out - double precision, intent(out) :: tau(nO,nO,nV,nV) + type(gpu_double4) :: tau ! internal integer :: i,j,a,b + type(gpu_stream) :: stream(nV) + !$OMP PARALLEL & - !$OMP SHARED(nO,nV,tau,t2,t1) & + !$OMP SHARED(nO,nV,tau,t2,t1,h_t1,stream,blas_handle) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) !$OMP DO - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b) - enddo - enddo + do b=1,nV + call gpu_stream_create(stream(b)) + call gpu_set_stream(blas_handle,stream(b)) + do j=1,nO + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, & + 1.d0, t2%f(1,j,1,b), nO*nO, & + h_t1(j,b), t1%f(1,1), nO, & + tau%f(1,j,1,b), nO*nO) enddo enddo !$OMP END DO !$OMP END PARALLEL + call gpu_synchronize() + + do b=1,nV + call gpu_stream_destroy(stream(b)) + enddo + call gpu_set_stream(blas_handle,gpu_default_stream) + + end subroutine update_tau_x_space(nO,nV,tau,tau_x) - + use gpu implicit none ! in - integer, intent(in) :: nO, nV - double precision, intent(in) :: tau(nO,nO,nV,nV) + integer, intent(in) :: nO, nV + type(gpu_double4), intent(in) :: tau ! out - double precision, intent(out) :: tau_x(nO,nO,nV,nV) + type(gpu_double4) :: tau_x ! internal integer :: i,j,a,b + type(gpu_stream) :: stream(nV) + + do a=1,nV + call gpu_stream_create(stream(a)) + enddo + !$OMP PARALLEL & - !$OMP SHARED(nO,nV,tau,tau_x) & - !$OMP PRIVATE(i,j,a,b) & + !$OMP SHARED(nO,nV,tau,tau_x,stream,blas_handle) & + !$OMP PRIVATE(a,b) & !$OMP DEFAULT(NONE) !$OMP DO - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - tau_x(i,j,a,b) = 2.d0*tau(i,j,a,b) - tau(i,j,b,a) - enddo - enddo + do b=1,nV + do a=1,nV + call gpu_set_stream(blas_handle,stream(a)) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nO, & + 2.d0, tau%f(1,1,a,b), nO, & + -1.d0, tau%f(1,1,b,a), nO, & + tau_x%f(1,1,a,b), nO) enddo enddo !$OMP END DO !$OMP END PARALLEL + call gpu_set_stream(blas_handle,gpu_default_stream) + call gpu_synchronize() + + do b=1,nV + call gpu_stream_destroy(stream(b)) + enddo + + end ! R1 diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index b59dc0bb6..6f65ea798 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -1,989 +1,789 @@ -subroutine ccsd_energy_space_chol(nO,nV,tau,t1,energy) +! H_oo +subroutine compute_H_oo_chol(nO,nV,tau_x,d_cc_space_f_oo, & + d_cc_space_v_ov_chol,d_cc_space_v_vo_chol,H_oo) + use gpu implicit none - integer, intent(in) :: nO, nV - double precision, intent(in) :: tau(nO,nO,nV,nV) - double precision, intent(in) :: t1(nO,nV) - double precision, intent(out) :: energy + integer, intent(in) :: nO,nV + type(gpu_double2), intent(in) :: d_cc_space_f_oo + type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol, d_cc_space_v_vo_chol + type(gpu_double4), intent(in) :: tau_x + type(gpu_double2), intent(out) :: H_oo - ! internal - integer :: i,j,a,b - double precision :: e + integer :: a,b,i,j,u,k - energy = 0d0 - !$omp parallel & - !$omp shared(nO,nV,energy,tau,t1,& - !$omp cc_space_f_vo,cc_space_w_oovv) & - !$omp private(i,j,a,b,e) & - !$omp default(none) - e = 0d0 - !$omp do - do a = 1, nV - do i = 1, nO - e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) - enddo - enddo - !$omp end do nowait - !$omp do - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b) - enddo - enddo - enddo - enddo - !$omp end do nowait - !$omp critical - energy = energy + e - !$omp end critical - !$omp end parallel + type(gpu_double3) :: tau_kau, tmp_vov, tmp_ovv -end + call gpu_allocate(tau_kau, cholesky_mo_num, nV, nO) -! Tau + type(gpu_blas) :: blas -subroutine update_tau_space_chol(nO,nV,t1,t2,tau) - implicit none + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(blas,u,b,tmp_vov,tmp_ovv) - ! in - integer, intent(in) :: nO, nV - double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + !$OMP SINGLE + !$OMP TASK + call gpu_copy(d_cc_space_f_oo, H_oo) + !$OMP END TASK + !$OMP END SINGLE - ! out - double precision, intent(out) :: tau(nO,nO,nV,nV) + call gpu_allocate(tmp_ovv, nO, nV, nV) + call gpu_allocate(tmp_vov, nV, nO, nV) - ! internal - integer :: i,j,a,b + call gpu_blas_create(blas) - !$OMP PARALLEL & - !$OMP SHARED(nO,nV,tau,t2,t1) & - !$OMP PRIVATE(i,j,a,b) & - !$OMP DEFAULT(NONE) !$OMP DO - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b) - enddo - enddo + do u=1,nO + call gpu_dgeam(blas, 'N', 'N', 1, nO*nV*nV, 1.d0, & + tau_x%f(u,1,1,1), nO, 0.d0, tau_x%f(1,1,1,1), nO, tmp_ovv%f(1,1,1), 1) + do b=1,nV + call gpu_dgeam(blas, 'T', 'T', nV, nO, 1.d0, & + tmp_ovv%f(1,1,b), nO, 0.d0, & + tmp_ovv%f(1,1,b), nO, tmp_vov%f(1,1,b), nV) enddo + call gpu_dgemm(blas, 'N','T',cholesky_mo_num,nV,nO*nV,1.d0, & + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num, tmp_vov%f(1,1,1), nV, & + 0.d0, tau_kau%f(1,1,u), cholesky_mo_num) enddo !$OMP END DO + + call gpu_blas_destroy(blas) + + call gpu_deallocate(tmp_vov) + call gpu_deallocate(tmp_ovv) + + !$OMP TASKWAIT !$OMP END PARALLEL -end + call gpu_dgemm(blas_handle, 'T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, & + tau_kau%f(1,1,1), cholesky_mo_num*nV, d_cc_space_v_vo_chol%f(1,1,1), cholesky_mo_num*nV, & + 1.d0, H_oo%f(1,1), nO) -! R1 + call gpu_synchronize() + call gpu_deallocate(tau_kau) +end -subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) +! H_vv +subroutine compute_H_vv_chol(nO,nV,tau_x,d_cc_space_f_vv, & + d_cc_space_v_ov_chol,H_vv) + use gpu implicit none - ! in - integer, intent(in) :: nO, nV - double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) - double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) - - ! out - double precision, intent(out) :: r1(nO,nV), max_r1 + integer, intent(in) :: nO,nV + type(gpu_double2), intent(in) :: d_cc_space_f_vv + type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol + type(gpu_double4), intent(in) :: tau_x + type(gpu_double2), intent(out) :: H_vv - ! internal - integer :: u,i,j,beta,a,b + integer :: a,b,i,j,u,k, beta - !$omp parallel & - !$omp shared(nO,nV,r1,cc_space_f_ov) & - !$omp private(u,beta) & - !$omp default(none) - !$omp do - do beta = 1, nV - do u = 1, nO - r1(u,beta) = cc_space_f_ov(u,beta) - enddo - enddo - !$omp end do - !$omp end parallel + type(gpu_double3) :: tau_kia, tmp_oov - double precision, allocatable :: X_oo(:,:) - allocate(X_oo(nO,nO)) - call dgemm('N','N', nO, nO, nV, & - -2d0, t1 , size(t1,1), & - cc_space_f_vo, size(cc_space_f_vo,1), & - 0d0, X_oo , size(X_oo,1)) + call gpu_allocate(tau_kia, cholesky_mo_num, nO, nV) - call dgemm('T','N', nO, nV, nO, & - 1d0, X_oo, size(X_oo,2), & - t1 , size(t1,1), & - 1d0, r1 , size(r1,1)) - deallocate(X_oo) + type(gpu_blas) :: blas - call dgemm('N','N', nO, nV, nV, & - 1d0, t1 , size(t1,1), & - H_vv, size(H_vv,1), & - 1d0, r1 , size(r1,1)) + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(a,b,tmp_oov,blas) - call dgemm('N','N', nO, nV, nO, & - -1d0, H_oo, size(H_oo,1), & - t1 , size(t1,1), & - 1d0, r1, size(r1,1)) + !$OMP SINGLE + !$OMP TASK + call gpu_copy(d_cc_space_f_vv, H_vv) + !$OMP END TASK + !$OMP END SINGLE - double precision, allocatable :: X_voov(:,:,:,:) - allocate(X_voov(nV, nO, nO, nV)) + call gpu_blas_create(blas) + call gpu_allocate(tmp_oov, nO, nO, nV) - !$omp parallel & - !$omp shared(nO,nV,X_voov,t2,t1) & - !$omp private(u,beta,i,a) & - !$omp default(none) - !$omp do - do beta = 1, nV - do u = 1, nO - do i = 1, nO - do a = 1, nV - X_voov(a,i,u,beta) = 2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta) - enddo - enddo + !$OMP DO + do a = 1, nV + do b=1,nV + call gpu_dgeam(blas, 'N', 'N', nO, nO, 1.d0, & + tau_x%f(1,1,a,b), nO, 0.d0, & + tau_x%f(1,1,a,b), nO, tmp_oov%f(1,1,b), nO) enddo + call gpu_dgemm(blas, 'N', 'T', cholesky_mo_num, nO, nO*nV, 1.d0, & + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num, tmp_oov%f(1,1,1), nO, & + 0.d0, tau_kia%f(1,1,a), cholesky_mo_num) enddo - !$omp end do - !$omp end parallel + !$OMP END DO - call dgemv('T', nV*nO, nO*nV, & - 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & - H_vo , 1, & - 1d0, r1 , 1) + call gpu_blas_destroy(blas) - deallocate(X_voov) + call gpu_deallocate(tmp_oov) + !$OMP TASKWAIT + !$OMP END PARALLEL - double precision, allocatable :: X_ovov(:,:,:,:) - allocate(X_ovov(nO, nV, nO, nV)) + call gpu_dgemm(blas_handle, 'T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, & + tau_kia%f(1,1,1), cholesky_mo_num*nO, d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num*nO, & + 1.d0, H_vv%f(1,1), nV) - !$omp parallel & - !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & - !$omp private(u,beta,i,a) & - !$omp default(none) - !$omp do - do beta = 1, nV - do u = 1, nO - do a = 1, nv - do i = 1, nO - X_ovov(i,a,u,beta) = 2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel + call gpu_synchronize() + call gpu_deallocate(tau_kia) +end - call dgemv('T', nO*nV, nO*nV, & - 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & - t1 , 1, & - 1d0, r1 , 1) +! H_vo +subroutine compute_H_vo_chol(nO,nV,t1,d_cc_space_f_vo, & + d_cc_space_v_ov_chol,d_cc_space_v_vo_chol, H_vo) + use gpu + implicit none - deallocate(X_ovov) + integer, intent(in) :: nO,nV + type(gpu_double2), intent(in) :: t1, d_cc_space_f_vo + type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol, d_cc_space_v_vo_chol + type(gpu_double2), intent(out) :: H_vo - integer :: iblock, block_size, nVmax - double precision, allocatable :: W_vvov(:,:,:,:), W_vvov_tmp(:,:,:,:), T_vvoo(:,:,:,:) - block_size = 16 - allocate(W_vvov(nV,nV,nO,block_size), W_vvov_tmp(nV,nO,nV,block_size), T_vvoo(nV,nV,nO,nO)) + integer :: a,b,i,j,u,k - !$omp parallel & - !$omp private(u,i,b,a) & - !$omp default(shared) - !$omp do - do u = 1, nO - do i = 1, nO - do b = 1, nV - do a = 1, nV - T_vvoo(a,b,i,u) = tau(i,u,a,b) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel + type(gpu_double1) :: tmp_k + type(gpu_double3) :: tmp, tmp2 - do iblock = 1, nV, block_size - nVmax = min(block_size,nV-iblock+1) + call gpu_copy(d_cc_space_f_vo, H_vo) - call dgemm('T','N', nV*nO, nV*nVmax, cholesky_mo_num, 1.d0, & - cc_space_v_vo_chol , cholesky_mo_num, & - cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & - 0.d0, W_vvov_tmp, nV*nO) + call gpu_allocate(tmp_k, cholesky_mo_num) - !$omp parallel & - !$omp private(b,i,a,beta) & - !$omp default(shared) - do beta = 1, nVmax - do i = 1, nO - !$omp do - do b = 1, nV - do a = 1, nV - W_vvov(a,b,i,beta) = 2d0 * W_vvov_tmp(a,i,b,beta) - W_vvov_tmp(b,i,a,beta) - enddo - enddo - !$omp end do nowait - enddo - enddo - !$omp barrier - !$omp end parallel + call gpu_dgemm(blas_handle, 'N', 'N', cholesky_mo_num, 1, nO*nV, 2.d0, & + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num, & + t1%f(1,1), nO*nV, 0.d0, tmp_k%f(1), cholesky_mo_num) - call dgemm('T','N',nO,nVmax,nO*nV*nV, & - 1d0, T_vvoo, nV*nV*nO, & - W_vvov, nO*nV*nV, & - 1d0, r1(1,iblock), nO) - enddo + call gpu_dgemm(blas_handle, 'T', 'N', nV*nO, 1, cholesky_mo_num, 1.d0, & + d_cc_space_v_vo_chol%f(1,1,1), cholesky_mo_num, tmp_k%f(1), cholesky_mo_num, 1.d0, & + H_vo%f(1,1), nV*nO) - deallocate(W_vvov,T_vvoo) + call gpu_deallocate(tmp_k) - double precision, allocatable :: W_oovo(:,:,:,:) - allocate(W_oovo(nO,nO,nV,nO)) + call gpu_allocate(tmp, cholesky_mo_num, nO, nO) - !$omp parallel & - !$omp shared(nO,nV,cc_space_v_oovo,W_oovo) & - !$omp private(u,a,i,j) & - !$omp default(none) - do u = 1, nO - !$omp do - do a = 1, nV - do j = 1, nO - do i = 1, nO -! W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i) - W_oovo(i,j,a,u) = 2d0 * cc_space_v_oovo(i,j,a,u) - cc_space_v_oovo(j,i,a,u) - enddo - enddo - enddo - !$omp end do nowait - enddo - !$omp end parallel + call gpu_dgemm(blas_handle, 'N', 'T', cholesky_mo_num*nO, nO, nV, 1.d0, & + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num*nO, t1%f(1,1), nO, 0.d0, tmp%f(1,1,1), cholesky_mo_num*nO) - call dgemm('T','N', nO, nV, nO*nO*nV, & - -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & - tau , size(tau,1) * size(tau,2) * size(tau,3), & - 1d0, r1 , size(r1,1)) + call gpu_allocate(tmp2, cholesky_mo_num, nO, nO) - deallocate(W_oovo) + type(gpu_stream) :: stream(nO) + do i=1,nO + call gpu_stream_create(stream(i)) + enddo - max_r1 = 0d0 - do a = 1, nV - do i = 1, nO - max_r1 = max(dabs(r1(i,a)), max_r1) + !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,j) + do i=1,nO + do j=1,nO + call gpu_set_stream(blas_handle,stream(j)) + call gpu_dgeam(blas_handle, 'N', 'N', cholesky_mo_num, 1, 1.d0, & + tmp%f(1,i,j), cholesky_mo_num, 0.d0, & + tmp%f(1,i,j), cholesky_mo_num, tmp2%f(1,j,i), cholesky_mo_num) enddo enddo + !$OMP END PARALLEL DO - ! Change the sign for consistency with the code in spin orbitals - !$omp parallel & - !$omp shared(nO,nV,r1) & - !$omp private(a,i) & - !$omp default(none) - !$omp do - do a = 1, nV - do i = 1, nO - r1(i,a) = -r1(i,a) - enddo + call gpu_set_stream(blas_handle,gpu_default_stream) + call gpu_synchronize() + + do i=1,nO + call gpu_stream_destroy(stream(i)) enddo - !$omp end do - !$omp end parallel + call gpu_deallocate(tmp) -end + call gpu_dgemm(blas_handle, 'T','N', nV, nO, cholesky_mo_num*nO, -1.d0, & + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num*nO, tmp2%f(1,1,1), cholesky_mo_num*nO, & + 1.d0, H_vo%f(1,1), nV) -! H_oo + call gpu_synchronize() + call gpu_deallocate(tmp2) +end -subroutine compute_H_oo_chol(nO,nV,tau_x,H_oo) +! R1 +subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1,d_cc_space_f_ov,d_cc_space_f_vo, & + d_cc_space_v_voov, d_cc_space_v_ovov, d_cc_space_v_oovo, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol) + use gpu implicit none - integer, intent(in) :: nO,nV - double precision, intent(in) :: tau_x(nO, nO, nV, nV) - double precision, intent(out) :: H_oo(nO, nO) + ! in + integer, intent(in) :: nO, nV + type(gpu_double2), intent(in) :: t1, H_oo, H_vo, H_vv, d_cc_space_f_ov,d_cc_space_f_vo + type(gpu_double3), intent(in) :: d_cc_space_v_vo_chol, d_cc_space_v_vv_chol + type(gpu_double4), intent(in) :: t2, tau, d_cc_space_v_voov, d_cc_space_v_ovov, d_cc_space_v_oovo - integer :: a,b,i,j,u,k + ! out + type(gpu_double2), intent(out) :: r1 + double precision, intent(out) :: max_r1 - double precision, allocatable :: tau_kau(:,:,:), tmp_vov(:,:,:) + ! internal + integer :: u,i,j,beta,a,b - allocate(tau_kau(cholesky_mo_num,nV,nO)) - !$omp parallel & - !$omp default(shared) & - !$omp private(i,u,j,k,a,b,tmp_vov) - allocate(tmp_vov(nV,nO,nV) ) - !$omp do - do u = 1, nO - do b=1,nV - do j=1,nO - do a=1,nV - tmp_vov(a,j,b) = tau_x(u,j,a,b) - enddo - enddo - enddo - call dgemm('N','T',cholesky_mo_num,nV,nO*nV,1.d0, & - cc_space_v_ov_chol, cholesky_mo_num, tmp_vov, nV, & - 0.d0, tau_kau(1,1,u), cholesky_mo_num) - enddo - !$omp end do nowait - deallocate(tmp_vov) - !$omp do - do i = 1, nO - do u = 1, nO - H_oo(u,i) = cc_space_f_oo(u,i) - enddo + type(gpu_stream) :: stream(nV) + + do a=1,nV + call gpu_stream_create(stream(a)) enddo - !$omp end do nowait - !$omp barrier - !$omp end parallel - call dgemm('T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, & - tau_kau, cholesky_mo_num*nV, cc_space_v_vo_chol, cholesky_mo_num*nV, & - 1.d0, H_oo, nO) -end + type(gpu_double2) :: X_oo + call gpu_allocate(X_oo,nO,nO) -! H_vv + call gpu_copy(d_cc_space_f_ov, r1) -subroutine compute_H_vv_chol(nO,nV,tau_x,H_vv) + call gpu_set_stream(blas_handle, stream(1)) + call gpu_dgemm(blas_handle, 'N','N', nO, nV, nV, & + 1d0, t1%f(1,1) , size(t1%f,1), & + H_vv%f(1,1), size(H_vv%f,1), & + 1d0, r1%f(1,1) , size(r1%f,1)) - implicit none + call gpu_dgemm(blas_handle, 'N','N', nO, nV, nO, & + -1d0, H_oo%f(1,1), size(H_oo%f,1), & + t1%f(1,1) , size(t1%f,1), & + 1d0, r1%f(1,1), size(r1%f,1)) - integer, intent(in) :: nO,nV - double precision, intent(in) :: tau_x(nO, nO, nV, nV) - double precision, intent(out) :: H_vv(nV, nV) + call gpu_set_stream(blas_handle, stream(nV)) + call gpu_dgemm(blas_handle, 'N','N', nO, nO, nV, & + -2d0, t1%f(1,1), size(t1%f,1), & + d_cc_space_f_vo%f(1,1), size(d_cc_space_f_vo%f,1), & + 0d0, X_oo%f(1,1), size(X_oo%f,1)) - integer :: a,b,i,j,u,k, beta + call gpu_synchronize() + call gpu_set_stream(blas_handle, gpu_default_stream) - double precision, allocatable :: tau_kia(:,:,:), tmp_oov(:,:,:) + call gpu_dgemm(blas_handle, 'T','N', nO, nV, nO, & + 1d0, X_oo%f(1,1), size(X_oo%f,2), & + t1%f(1,1) , size(t1%f,1), & + 1d0, r1%f(1,1) , size(r1%f,1)) - allocate(tau_kia(cholesky_mo_num,nO,nV)) - !$omp parallel & - !$omp default(shared) & - !$omp private(i,beta,j,k,a,b,tmp_oov) - allocate(tmp_oov(nO,nO,nV) ) - !$omp do - do a = 1, nV - do b=1,nV - do j=1,nO - do i=1,nO - tmp_oov(i,j,b) = tau_x(i,j,a,b) - enddo - enddo + + + type(gpu_double4) :: X_voov + call gpu_allocate(X_voov, nV, nO, nO, nV) + + do i=1,nO + do beta=1,nV + call gpu_set_stream(blas_handle, stream(beta)) + call gpu_dgeam(blas_handle, 'T', 'T', nV, nO, -1.d0, t2%f(1,i,1,beta), & + nO*nO, t1%f(i,beta), t1%f(1,1), nO, X_voov%f(1,i,1,beta), nV*nO) enddo - call dgemm('N','T',cholesky_mo_num,nO,nO*nV,1.d0, & - cc_space_v_ov_chol, cholesky_mo_num, tmp_oov, nO, & - 0.d0, tau_kia(1,1,a), cholesky_mo_num) enddo - !$omp end do nowait - deallocate(tmp_oov) - !$omp do + do beta=1,nV + call gpu_set_stream(blas_handle, stream(beta)) + call gpu_dgeam(blas_handle, 'N', 'T', nV, nO*nO, 1.d0, X_voov%f(1,1,1,beta), & + nV, 2.d0, t2%f(1,1,1,beta), nO*nO, X_voov%f(1,1,1,beta), nV) + enddo + + call gpu_synchronize() + call gpu_deallocate(X_oo) + + call gpu_set_stream(blas_handle, gpu_default_stream) + + call gpu_dgemv(blas_handle, 'T', nV*nO, nO*nV, & + 1d0, X_voov%f(1,1,1,1), size(X_voov%f,1) * size(X_voov%f,2), & + H_vo%f(1,1) , 1, & + 1d0, r1%f(1,1) , 1) + + type(gpu_double4) :: X_ovov + call gpu_allocate(X_ovov, nO, nV, nO, nV) + do beta = 1, nV - do a = 1, nV - H_vv(a,beta) = cc_space_f_vv(a,beta) + call gpu_set_stream(blas_handle, stream(beta)) + do u=1,nO + call gpu_dgeam(blas_handle, 'N', 'T', nO, nV, -1.d0, d_cc_space_v_ovov%f(1,1,u,beta), & + nO, 2.d0, d_cc_space_v_voov%f(1,u,1,beta), nV*nO, X_ovov%f(1,1,u,beta), nO) enddo enddo - !$omp end do nowait - !$omp barrier - !$omp end parallel - call dgemm('T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, & - tau_kia, cholesky_mo_num*nO, cc_space_v_ov_chol, cholesky_mo_num*nO, & - 1.d0, H_vv, nV) -end + call gpu_set_stream(blas_handle, gpu_default_stream) + call gpu_synchronize() + call gpu_deallocate(X_voov) -! H_vo -subroutine compute_H_vo_chol(nO,nV,t1,H_vo) + call gpu_dgemv(blas_handle, 'T', nO*nV, nO*nV, & + 1d0, X_ovov%f(1,1,1,1), size(X_ovov%f,1) * size(X_ovov%f,2), & + t1%f(1,1), 1, & + 1d0, r1%f(1,1), 1) - implicit none - integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV) - double precision, intent(out) :: H_vo(nV, nO) + integer :: iblock, block_size, nVmax + type(gpu_double4) :: W_vvov, W_vvov_tmp, T_vvoo - integer :: a,b,i,j,u,k + block_size = 16 + call gpu_allocate(T_vvoo, nV,nV,nO,nO) - double precision, allocatable :: tmp_k(:), tmp(:,:,:), tmp2(:,:,:) - do i=1,nO - do a=1,nV - H_vo(a,i) = cc_space_f_vo(a,i) + call gpu_dgeam(blas_handle, 'T', 'N', nV*nV, nO*nO, 1.d0, tau%f(1,1,1,1), & + nO*nO, 0.d0, T_vvoo%f(1,1,1,1), nV*nV, T_vvoo%f(1,1,1,1), nV*nV) + + call gpu_allocate(W_vvov,nV, nV,nO,block_size) + call gpu_allocate(W_vvov_tmp, nV,nO,nV,block_size) + + do iblock = 1, nV, block_size + nVmax = min(block_size,nV-iblock+1) + + call gpu_dgemm(blas_handle, 'T','N', nV*nO, nV*nVmax, cholesky_mo_num, 1.d0, & + d_cc_space_v_vo_chol%f(1,1,1) , cholesky_mo_num, & + d_cc_space_v_vv_chol%f(1,1,iblock), cholesky_mo_num, & + 0.d0, W_vvov_tmp%f(1,1,1,1), nV*nO) + + call gpu_synchronize() + do b=1,nV + call gpu_set_stream(blas_handle, stream(b)) + do i=1,nO + call gpu_dgeam(blas_handle, 'N', 'N', nV, nVmax, 2.d0, W_vvov_tmp%f(1,i,b,1), & + nV*nO*nV, 0.d0, W_vvov_tmp%f(1,i,b,1), nV*nO*nV, W_vvov%f(1,b,i,1), nV*nV*nO) + enddo + enddo + + call gpu_synchronize() + + do beta = 1, nVmax + call gpu_set_stream(blas_handle, stream(beta)) + call gpu_dgeam(blas_handle, 'N', 'T', nV, nV*nO, 1.d0, W_vvov%f(1,1,1,beta), & + nV, -1.d0, W_vvov_tmp%f(1,1,1,beta), nV*nO, W_vvov%f(1,1,1,beta), nV) + enddo + call gpu_synchronize() + + call gpu_dgemm(blas_handle, 'T','N',nO,nVmax,nO*nV*nV, & + 1d0, T_vvoo%f(1,1,1,1), nV*nV*nO, & + W_vvov%f(1,1,1,1), nO*nV*nV, & + 1d0, r1%f(1,iblock), nO) + enddo + + call gpu_deallocate(X_ovov) + + type(gpu_double4) :: W_oovo + call gpu_allocate(W_oovo, nO,nO,nV,nO) + + do u = 1, nO + do a = 1, nV + call gpu_set_stream(blas_handle, stream(a)) + call gpu_dgeam(blas_handle, 'N', 'T', nO, nO, 2.d0, d_cc_space_v_oovo%f(1,1,a,u), & + nO, -1.d0, d_cc_space_v_oovo%f(1,1,a,u), nO, W_oovo%f(1,1,a,u), nO) enddo enddo - allocate(tmp_k(cholesky_mo_num)) - call dgemm('N', 'N', cholesky_mo_num, 1, nO*nV, 2.d0, & - cc_space_v_ov_chol, cholesky_mo_num, & - t1, nO*nV, 0.d0, tmp_k, cholesky_mo_num) + call gpu_set_stream(blas_handle, gpu_default_stream) + call gpu_synchronize() - call dgemm('T','N',nV*nO,1,cholesky_mo_num,1.d0, & - cc_space_v_vo_chol, cholesky_mo_num, tmp_k, cholesky_mo_num, 1.d0, & - H_vo, nV*nO) - deallocate(tmp_k) + call gpu_deallocate(W_vvov) + call gpu_deallocate(T_vvoo) - allocate(tmp(cholesky_mo_num,nO,nO)) - allocate(tmp2(cholesky_mo_num,nO,nO)) + ! Change the sign for consistency with the code in spin orbitals + call gpu_dgemm(blas_handle, 'T','N', nO, nV, nO*nO*nV, & + 1d0, W_oovo%f(1,1,1,1), size(W_oovo%f,1) * size(W_oovo%f,2) * size(W_oovo%f,3), & + tau%f(1,1,1,1), size(tau%f,1) * size(tau%f,2) * size(tau%f,3), & + -1d0, r1%f(1,1), size(r1%f,1)) - call dgemm('N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & - cc_space_v_ov_chol, cholesky_mo_num*nO, t1, nO, 0.d0, tmp, cholesky_mo_num*nO) + call gpu_synchronize() + call gpu_deallocate(W_oovo) - do i=1,nO - do j=1,nO - do k=1,cholesky_mo_num - tmp2(k,j,i) = tmp(k,i,j) - enddo + max_r1 = 0d0 + do a = 1, nV + do i = 1, nO + max_r1 = max(dabs(r1%f(i,a)), max_r1) enddo enddo - deallocate(tmp) - call dgemm('T','N', nV, nO, cholesky_mo_num*nO, -1.d0, & - cc_space_v_ov_chol, cholesky_mo_num*nO, tmp2, cholesky_mo_num*nO, & - 1.d0, H_vo, nV) + do a=1,nV + call gpu_stream_destroy(stream(a)) + enddo end ! R2 -subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) - +subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv, & + d_cc_space_v_oovv, d_cc_space_v_vooo, d_cc_space_v_oooo, d_cc_space_v_oovo, d_cc_space_v_ovvo, d_cc_space_v_ovoo, & + d_cc_space_v_ovov, d_cc_space_v_vvoo, d_cc_space_v_oo_chol, d_cc_space_v_ov_chol, d_cc_space_v_vo_chol, d_cc_space_v_vv_chol, & + d_cc_space_f_vo, & + r2,max_r2) + use gpu implicit none ! in - integer, intent(in) :: nO, nV - double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) - double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + integer, intent(in) :: nO, nV + type(gpu_double2), intent(in) :: t1, H_oo, H_vv, d_cc_space_f_vo + type(gpu_double4), intent(in) :: t2, tau, d_cc_space_v_oovv + type(gpu_double4), intent(in) :: d_cc_space_v_vooo, d_cc_space_v_oooo + type(gpu_double4), intent(in) :: d_cc_space_v_vvoo, d_cc_space_v_oovo + type(gpu_double4), intent(in) :: d_cc_space_v_ovvo, d_cc_space_v_ovoo + type(gpu_double4), intent(in) :: d_cc_space_v_ovov + type(gpu_double3), intent(in) :: d_cc_space_v_oo_chol, d_cc_space_v_ov_chol + type(gpu_double3), intent(in) :: d_cc_space_v_vo_chol, d_cc_space_v_vv_chol ! out - double precision, intent(out) :: r2(nO,nO,nV,nV), max_r2 + double precision, intent(out) :: max_r2 + type(gpu_double4), intent(out) :: r2 ! internal integer :: u,v,i,j,beta,gam,a,b double precision :: max_r2_local + type(gpu_stream) :: stream(nV) + call set_multiple_levels_omp(.False.) - !$omp parallel & - !$omp shared(nO,nV,r2,cc_space_v_oovv) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do - do gam = 1, nV - do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2(u,v,beta,gam) = cc_space_v_oovv(u,v,beta,gam) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel + call gpu_copy(d_cc_space_v_oovv, r2) + + type(gpu_double4) :: A1 + call gpu_allocate(A1,nO,nO,nO,nO) + call compute_A1_chol(nO,nV,t1,t2,tau,d_cc_space_v_vooo, & + d_cc_space_v_oooo, d_cc_space_v_vvoo, A1) - double precision, allocatable :: A1(:,:,:,:) - allocate(A1(nO,nO,nO,nO)) - call compute_A1_chol(nO,nV,t1,t2,tau,A1) - call dgemm('N','N',nO*nO,nV*nV,nO*nO, & - 1d0, A1, size(A1,1) * size(A1,2), & - tau, size(tau,1) * size(tau,2), & - 1d0, r2, size(r2,1) * size(r2,2)) + call gpu_dgemm(blas_handle, 'N','N',nO*nO,nV*nV,nO*nO, & + 1d0, A1%f(1,1,1,1), size(A1%f,1) * size(A1%f,2), & + tau%f(1,1,1,1), size(tau%f,1) * size(tau%f,2), & + 1d0, r2%f(1,1,1,1), size(r2%f,1) * size(r2%f,2)) + + call gpu_deallocate(A1) - deallocate(A1) integer :: block_size, iblock, k block_size = 16 - double precision, dimension(:,:,:), allocatable :: B1, tmp_cc, tmpB1 - double precision, dimension(:,:), allocatable :: tmp_cc2 + type(gpu_double3) :: tmp_cc, B1, tmpB1 + type(gpu_double2) :: tmp_cc2 - allocate(tmp_cc(cholesky_mo_num,nV,nV)) - call dgemm('N','N', cholesky_mo_num*nV, nV, nO, 1.d0, & - cc_space_v_vo_chol, cholesky_mo_num*nV, t1, nO, 0.d0, tmp_cc, cholesky_mo_num*nV) + call gpu_allocate(tmp_cc,cholesky_mo_num,nV,nV) + call gpu_dgemm(blas_handle, 'N','N', cholesky_mo_num*nV, nV, nO, 1.d0, & + d_cc_space_v_vo_chol%f(1,1,1), cholesky_mo_num*nV, t1%f(1,1), nO, 0.d0, tmp_cc%f(1,1,1), cholesky_mo_num*nV) call set_multiple_levels_omp(.False.) + call gpu_synchronize() + + type(gpu_blas) :: blas + + !$OMP PARALLEL PRIVATE(gam, iblock, B1, tmpB1, tmp_cc2, beta, b, a, blas) + call gpu_allocate(B1,nV,nV,block_size) + call gpu_allocate(tmpB1,nV,block_size,nV) + call gpu_allocate(tmp_cc2,cholesky_mo_num,nV) + + call gpu_blas_create(blas) - !$OMP PARALLEL PRIVATE(gam, iblock, B1, tmpB1, tmp_cc2, beta, b, a) - allocate(B1(nV,nV,block_size), tmpB1(nV,block_size,nV), tmp_cc2(cholesky_mo_num,nV)) !$OMP DO do gam = 1, nV - do a=1,nV - do k=1,cholesky_mo_num - tmp_cc2(k,a) = cc_space_v_vv_chol(k,a,gam) - tmp_cc(k,a,gam) - enddo - enddo + call gpu_dgeam(blas, 'N', 'N', cholesky_mo_num, nV, 1.d0, d_cc_space_v_vv_chol%f(1,1,gam), & + cholesky_mo_num, -1.d0, tmp_cc%f(1,1,gam), cholesky_mo_num, tmp_cc2%f(1,1), cholesky_mo_num) do iblock = 1, nV, block_size - call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & - -1.d0, tmp_cc(1,1,iblock), cholesky_mo_num, & - cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, & - 0.d0, tmpB1, nV*block_size) + call gpu_dgemm(blas, 'T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & + -1.d0, tmp_cc%f(1,1,iblock), cholesky_mo_num, & + d_cc_space_v_vv_chol%f(1,1,gam), cholesky_mo_num, & + 0.d0, tmpB1%f(1,1,1), nV*block_size) - call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & - 1.d0, cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & - tmp_cc2, cholesky_mo_num, & - 1.d0, tmpB1, nV*block_size) + call gpu_dgemm(blas, 'T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & + 1.d0, d_cc_space_v_vv_chol%f(1,1,iblock), cholesky_mo_num, & + tmp_cc2%f(1,1), cholesky_mo_num, & + 1.d0, tmpB1%f(1,1,1), nV*block_size) do beta = iblock, min(nV, iblock+block_size-1) - do b = 1, nV - do a = 1, nV - B1(a,b,beta-iblock+1) = tmpB1(a,beta-iblock+1,b) - enddo - enddo + call gpu_dgeam(blas, 'N', 'N', nV, nV, 1.d0, tmpB1%f(1,beta-iblock+1,1), & + nV*block_size, 0.d0, B1%f(1,1,beta-iblock+1), nV, B1%f(1,1,beta-iblock+1), nV) enddo - call dgemm('N','N',nO*nO,min(block_size, nV-iblock+1),nV*nV, & - 1d0, tau, size(tau,1) * size(tau,2), & - B1 , size(B1 ,1) * size(B1 ,2), & - 1d0, r2(1,1,iblock,gam), size(r2 ,1) * size(r2 ,2)) + call gpu_dgemm(blas, 'N','N',nO*nO,min(block_size, nV-iblock+1),nV*nV, & + 1d0, tau%f(1,1,1,1), size(tau%f,1) * size(tau%f,2), & + B1%f(1,1,1) , size(B1%f ,1) * size(B1%f ,2), & + 1d0, r2%f(1,1,iblock,gam), size(r2%f ,1) * size(r2%f ,2)) enddo enddo !$OMP ENDDO - deallocate(B1, tmpB1, tmp_cc2) + call gpu_blas_destroy(blas) + + call gpu_deallocate(B1) + call gpu_deallocate(tmpB1) + call gpu_deallocate(tmp_cc2) !$OMP END PARALLEL - deallocate(tmp_cc) + call gpu_deallocate(tmp_cc) + type(gpu_double4) :: X_oovv + call gpu_allocate(X_oovv,nO,nO,nV,nV) + call gpu_copy(t2,X_oovv) - double precision, allocatable :: X_oovv(:,:,:,:) - allocate(X_oovv(nO,nO,nV,nV)) - !$omp parallel & - !$omp shared(nO,nV,t2,X_oovv) & - !$omp private(u,v,gam,a) & - !$omp default(none) - !$omp do - do a = 1, nV - do gam = 1, nV - do v = 1, nO - do u = 1, nO - X_oovv(u,v,gam,a) = t2(u,v,gam,a) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel + type(gpu_double2) :: g_occ, g_vir + call gpu_allocate(g_vir,nV,nV) + call gpu_allocate(g_occ,nO,nO) + call compute_g_vir_chol(nO,nV,t1,t2,H_vv,d_cc_space_f_vo, & + d_cc_space_v_ov_chol, d_cc_space_v_vv_chol, g_vir) + call compute_g_occ_chol(nO,nV,t1,t2,H_oo, & + d_cc_space_f_vo, d_cc_space_v_ov_chol, d_cc_space_v_oo_chol, d_cc_space_v_ovoo, g_occ) - double precision, allocatable :: g_vir(:,:) - allocate(g_vir(nV,nV)) - call compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) + type(gpu_double4) :: Y_oovv + call gpu_allocate(Y_oovv,nO,nO,nV,nV) - double precision, allocatable :: Y_oovv(:,:,:,:) - allocate(Y_oovv(nO,nO,nV,nV)) + call gpu_dgemm(blas_handle, 'N','N',nO*nO*nV,nV,nV, & + 1d0, X_oovv%f(1,1,1,1), size(X_oovv%f,1) * size(X_oovv%f,2) * size(X_oovv%f,3), & + g_vir%f(1,1), size(g_vir%f,1), & + 0d0, Y_oovv%f(1,1,1,1), size(Y_oovv%f,1) * size(Y_oovv%f,2) * size(Y_oovv%f,3)) - call dgemm('N','N',nO*nO*nV,nV,nV, & - 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & - g_vir, size(g_vir,1), & - 0d0, Y_oovv, size(Y_oovv,1) * size(Y_oovv,2) * size(Y_oovv,3)) - deallocate(g_vir) - deallocate(X_oovv) + call gpu_dgemm(blas_handle, 'N','N',nO,nO*nV*nV,nO, & + -1d0, g_occ%f(1,1), size(g_occ%f,1), & + t2%f(1,1,1,1) , size(t2%f,1), & + 1d0, Y_oovv%f(1,1,1,1), size(Y_oovv%f,1)) - !$omp parallel & - !$omp shared(nO,nV,r2,Y_oovv) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do - do gam = 1, nV - do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(u,v,beta,gam) + Y_oovv(v,u,gam,beta) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel - deallocate(Y_oovv) + call gpu_dgemm(blas_handle, 'N','N',nO*nO*nV,nV,nO, & + -1d0, d_cc_space_v_oovo%f(1,1,1,1), size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & + t1%f(1,1) , size(t1%f,1), & + 1d0, Y_oovv%f(1,1,1,1), size(Y_oovv%f,1) * size(Y_oovv%f,2) * size(Y_oovv%f,3)) - double precision, allocatable :: g_occ(:,:) - allocate(g_occ(nO,nO)) - call compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) - allocate(X_oovv(nO,nO,nV,nV)) - call dgemm('N','N',nO,nO*nV*nV,nO, & - 1d0, g_occ , size(g_occ,1), & - t2 , size(t2,1), & - 0d0, X_oovv, size(X_oovv,1)) - deallocate(g_occ) + call gpu_dgeam(blas_handle, 'N', 'N', nO*nO, nV*nV, 1.d0, Y_oovv%f(1,1,1,1), & + nO*nO, 1.d0, r2%f(1,1,1,1), nO*nO, r2%f(1,1,1,1), nO*nO) + + call gpu_synchronize() + call gpu_deallocate(X_oovv) + + call gpu_deallocate(g_vir) + call gpu_deallocate(g_occ) + + type(gpu_double4) :: X_vovo, Y_oovo + call gpu_allocate(X_vovo,nV,nO,nV,nO) + + do a=1,nV + call gpu_stream_create(stream(a)) + enddo - !$omp parallel & - !$omp shared(nO,nV,r2,X_oovv) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do do gam = 1, nV + call gpu_set_stream(blas_handle, stream(gam)) do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) - enddo - enddo + call gpu_dgeam(blas_handle, 'N', 'T', nO, nO, 1.d0, r2%f(1,1,beta,gam), & + nO, 1.d0, Y_oovv%f(1,1,gam,beta), nO, r2%f(1,1,beta,gam), nO) + enddo + enddo + + do i = 1, nO + do gam = 1, nV + call gpu_set_stream(blas_handle, stream(gam)) + call gpu_dgeam(blas_handle, 'T', 'N', nV, nO, 1.d0, d_cc_space_v_ovvo%f(1,1,gam,i), & + nO, 0.d0, X_vovo%f(1,1,gam,i), nV, X_vovo%f(1,1,gam,i), nV) enddo enddo - !$omp end do - !$omp end parallel - deallocate(X_oovv) + do a=1,nV + call gpu_stream_destroy(stream(a)) + enddo + call gpu_set_stream(blas_handle, gpu_default_stream) + - double precision, allocatable :: X_vovv(:,:,:,:) - allocate(X_vovv(nV,nO,nV,block_size)) - allocate(Y_oovv(nO,nO,nV,nV)) + call gpu_allocate(Y_oovo,nO,nO,nV,nO) + !$OMP PARALLEL PRIVATE(blas, iblock, gam, X_vovv) + call gpu_blas_create(blas) + type(gpu_double4) :: X_vovv + call gpu_allocate(X_vovv,nV,nO,nV,block_size) + !$OMP DO do iblock = 1, nV, block_size do gam = iblock, min(nV, iblock+block_size-1) - call dgemm('T','N',nV, nO*nV, cholesky_mo_num, 1.d0, & - cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, cc_space_v_ov_chol, & - cholesky_mo_num, 0.d0, X_vovv(1,1,1,gam-iblock+1), nV) + call gpu_dgemm(blas, 'T','N',nV, nO*nV, cholesky_mo_num, 1.d0, & + d_cc_space_v_vv_chol%f(1,1,gam), cholesky_mo_num, d_cc_space_v_ov_chol%f(1,1,1), & + cholesky_mo_num, 0.d0, X_vovv%f(1,1,1,gam-iblock+1), nV) enddo - call dgemm('N','N',nO,nO*nV*min(block_size, nV-iblock+1),nV, & - 1d0, t1 , size(t1,1), & - X_vovv, size(X_vovv,1), & - 0d0, Y_oovv(1,1,1,iblock), size(Y_oovv,1)) - - enddo - deallocate(X_vovv) - !$omp parallel & - !$omp shared(nO,nV,r2,Y_oovv) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do - do gam = 1, nV - do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(v,u,beta,gam) + Y_oovv(u,v,gam,beta) - enddo - enddo - enddo + call gpu_dgemm(blas, 'N','N', nO, & + nO*nV*min(block_size, nV-iblock+1),nV, & + 1.d0, t1%f(1,1) , size(t1%f,1), & + X_vovv%f(1,1,1,1), size(X_vovv%f,1), & + 0d0, Y_oovv%f(1,1,1,iblock), size(Y_oovv%f,1)) enddo - !$omp end do - !$omp end parallel - deallocate(Y_oovv) + !$OMP END DO - double precision, allocatable :: X_ovvo(:,:,:,:) - double precision, allocatable :: tcc(:,:,:), tcc2(:,:,:) - allocate(tcc2(cholesky_mo_num,nV,nO), X_ovvo(nO,nV,nV,nO)) - allocate(tcc(cholesky_mo_num,nO,nV)) + call gpu_blas_destroy(blas) + call gpu_deallocate(X_vovv) + !$OMP END PARALLEL - call dgemm('N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & - cc_space_v_vv_chol, cholesky_mo_num*nV, t1, nO, & - 0.d0, tcc2, cholesky_mo_num*nV) + call gpu_dgemm(blas_handle, 'N','N',nO,nO*nV*nO,nV, & + 1d0, t1%f(1,1), size(t1%f,1), & + X_vovo%f(1,1,1,1), size(X_vovo%f,1), & + 0d0, Y_oovo%f(1,1,1,1), size(Y_oovo%f,1)) - call dgemm('N','N', cholesky_mo_num*nO, nV, nO, 1.d0, & - cc_space_v_oo_chol, cholesky_mo_num*nO, t1, nO, & - 0.d0, tcc, cholesky_mo_num*nO) + call gpu_dgemm(blas_handle, 'N','N',nO*nO*nV, nV, nO, & + -1d0, Y_oovo%f(1,1,1,1), size(Y_oovo%f,1) * size(Y_oovo%f,2) * size(Y_oovo%f,3), & + t1%f(1,1) , size(t1%f,1), & + 1d0, Y_oovv%f(1,1,1,1), size(Y_oovv%f,1) * size(Y_oovv%f,2) * size(Y_oovv%f,3)) - call dgemm('T','N', nO*nV, nV*nO, cholesky_mo_num, 1.d0, & - tcc, cholesky_mo_num, tcc2, cholesky_mo_num, 0.d0, & - X_ovvo, nO*nV) + call gpu_synchronize() + call gpu_deallocate(X_vovo) + call gpu_deallocate(Y_oovo) - deallocate(tcc, tcc2) + do a=1,nV + call gpu_stream_create(stream(a)) + enddo - !$omp parallel & - !$omp shared(nO,nV,r2,X_ovvo) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do do gam = 1, nV + call gpu_set_stream(blas_handle, stream(gam)) do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_ovvo(u,beta,gam,v) - enddo - enddo + call gpu_dgeam(blas_handle, 'T', 'N', nO, nO, 1.d0, Y_oovv%f(1,1,beta,gam), & + nO, 1.d0, r2%f(1,1,beta,gam), nO, r2%f(1,1,beta,gam), nO) enddo - enddo - !$omp end do - !$omp do - do beta = 1, nV - do gam = 1, nV - do v = 1, nO - do u = 1, nO - r2(v,u,gam,beta) = r2(v,u,gam,beta) - X_ovvo(u,beta,gam,v) - enddo - enddo + do j=1,nO + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, r2%f(1,j,1,gam), & + nO*nO, 1.d0, Y_oovv%f(1,j,gam,1), nO*nO*nV, r2%f(1,j,1,gam), nO*nO) enddo enddo - !$omp end do - !$omp end parallel - deallocate(X_ovvo) - !----- + call gpu_set_stream(blas_handle, gpu_default_stream) - allocate(X_oovv(nO,nO,nV,nV)) - call dgemm('N','N',nO*nO*nV,nV,nO, & - 1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & - t1 , size(t1,1), & - 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + call gpu_synchronize() + call gpu_deallocate(Y_oovv) - !$omp parallel & - !$omp shared(nO,nV,r2,X_oovv) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do - do gam = 1, nV - do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel - deallocate(X_oovv) - - double precision, allocatable :: X_vovo(:,:,:,:), Y_oovo(:,:,:,:) - allocate(X_vovo(nV,nO,nV,nO)) - - !$omp parallel & - !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & - !$omp private(a,v,gam,i) & - !$omp default(none) - do i = 1, nO - !$omp do - do gam = 1, nV - do v = 1, nO - do a = 1, nV - X_vovo(a,v,gam,i) = cc_space_v_ovvo(v,a,gam,i) - enddo - enddo - enddo - !$omp end do nowait - enddo - !$omp end parallel + type(gpu_double4) :: X_ovvo + type(gpu_double3) :: tcc, tcc2 + call gpu_allocate(tcc2,cholesky_mo_num,nV,nO) + call gpu_allocate(X_ovvo,nO,nV,nV,nO) + call gpu_allocate(tcc,cholesky_mo_num,nO,nV) - allocate(Y_oovo(nO,nO,nV,nO)) - call dgemm('N','N',nO,nO*nV*nO,nV, & - 1d0, t1, size(t1,1), & - X_vovo, size(X_vovo,1), & - 0d0, Y_oovo, size(Y_oovo,1)) + call gpu_dgemm(blas_handle, 'N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & + d_cc_space_v_vv_chol%f(1,1,1), cholesky_mo_num*nV, t1%f(1,1), nO, & + 0.d0, tcc2%f(1,1,1), cholesky_mo_num*nV) + + call gpu_dgemm(blas_handle, 'N','N', cholesky_mo_num*nO, nV, nO, 1.d0, & + d_cc_space_v_oo_chol%f(1,1,1), cholesky_mo_num*nO, t1%f(1,1), nO, & + 0.d0, tcc%f(1,1,1), cholesky_mo_num*nO) + + call gpu_dgemm(blas_handle, 'T','N', nO*nV, nV*nO, cholesky_mo_num, 1.d0, & + tcc%f(1,1,1), cholesky_mo_num, tcc2%f(1,1,1), cholesky_mo_num, 0.d0, & + X_ovvo%f(1,1,1,1), nO*nV) + + call gpu_synchronize() - deallocate(X_vovo) - allocate(X_oovv(nO,nO,nV,nV)) - call dgemm('N','N',nO*nO*nV, nV, nO, & - 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & - t1 , size(t1,1), & - 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) - deallocate(Y_oovo) - !$omp parallel & - !$omp shared(nO,nV,r2,X_oovv) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do do gam = 1, nV + call gpu_set_stream(blas_handle, stream(gam)) + do j=1,nO + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, -1.d0, X_ovvo%f(1,1,gam,j), & + nO, 1.d0, r2%f(1,j,1,gam), nO*nO, r2%f(1,j,1,gam), nO*nO) + enddo do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,gam,beta) - X_oovv(v,u,beta,gam) - enddo - enddo + call gpu_dgeam(blas_handle, 'T', 'N', nO, nO, -1.d0, X_ovvo%f(1,gam,beta,1), & + nO*nV*nV, 1.d0, r2%f(1,1,beta,gam), nO, r2%f(1,1,beta,gam), nO) enddo enddo - !$omp end do - !$omp end parallel - deallocate(X_oovv) + call gpu_set_stream(blas_handle, gpu_default_stream) - double precision, allocatable :: J1(:,:,:,:) - allocate(J1(nO,nV,nV,nO)) - call compute_J1_chol(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & - cc_space_v_vvoo,J1) + call gpu_synchronize + call gpu_deallocate(tcc) + call gpu_deallocate(tcc2) + call gpu_deallocate(X_ovvo) - double precision, allocatable :: K1(:,:,:,:) - allocate(K1(nO,nV,nO,nV)) - call compute_K1_chol(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & - cc_space_v_ovov,K1) - allocate(X_ovvo(nO,nV,nV,nO)) - !$omp parallel & - !$omp private(u,v,gam,beta,i,a) & - !$omp default(shared) - do i = 1, nO - !$omp do - do a = 1, nV - do beta = 1, nV - do u = 1, nO - X_ovvo(u,beta,a,i) = (J1(u,a,beta,i) - 0.5d0 * K1(u,a,i,beta)) - enddo - enddo - enddo - !$omp end do nowait - enddo - !$omp end parallel - deallocate(J1) + type(gpu_double4) :: J1, K1 + type(gpu_double4) :: Y_voov, Z_ovov - double precision, allocatable :: Y_voov(:,:,:,:) - allocate(Y_voov(nV,nO,nO,nV)) - !$omp parallel & - !$omp private(u,v,gam,beta,i,a) & - !$omp default(shared) - !$omp do - do gam = 1, nV - do v = 1, nO - do i = 1, nO - do a = 1, nV - Y_voov(a,i,v,gam) = 2d0 * t2(i,v,a,gam) - t2(i,v,gam,a) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel + call gpu_allocate(J1,nO,nV,nV,nO) + call compute_J1_chol(nO,nV,t1,t2,d_cc_space_v_ovvo,d_cc_space_v_ovoo, & + d_cc_space_v_vvoo,d_cc_space_v_vo_chol,d_cc_space_v_vv_chol,J1) - double precision, allocatable :: Z_ovov(:,:,:,:) - allocate(Z_ovov(nO,nV,nO,nV)) + call gpu_allocate(K1,nO,nV,nO,nV) + call compute_K1_chol(nO,nV,t1,t2,d_cc_space_v_ovoo,d_cc_space_v_vvoo, & + d_cc_space_v_ovov,d_cc_space_v_ov_chol,d_cc_space_v_vv_chol,K1) - call dgemm('N','N', nO*nV,nO*nV,nV*nO, & - 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & - Y_voov, size(Y_voov,1) * size(Y_voov,2), & - 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) - deallocate(X_ovvo,Y_voov) + call gpu_allocate(X_ovvo,nO,nV,nV,nO) + call gpu_allocate(Y_voov,nV,nO,nO,nV) - !$omp parallel & - !$omp shared(nO,nV,r2,Z_ovov) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do - do gam = 1, nV - do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) + Z_ovov(u,beta,v,gam) + Z_ovov(v,gam,u,beta) - enddo - enddo + do a=1, nV + call gpu_set_stream(blas_handle, stream(a)) + do i=1, nO + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, J1%f(1,a,1,i), & + nO*nV, -0.5d0, K1%f(1,a,i,1), nO*nV*nO, X_ovvo%f(1,1,a,i), nO) + call gpu_dgeam(blas_handle, 'T', 'T', nV, nO, 2.d0, t2%f(1,i,1,a), & + nO*nO, -1.d0, t2%f(1,i,a,1), nO*nO*nV, Y_voov%f(1,1,i,a), nV) enddo enddo - !$omp end do - !$omp end parallel - deallocate(Z_ovov) + call gpu_allocate(Z_ovov,nO,nV,nO,nV) - double precision, allocatable :: Y_ovov(:,:,:,:), X_ovov(:,:,:,:) - allocate(X_ovov(nO,nV,nO,nV)) - allocate(Y_ovov(nO,nV,nO,nV)) + call gpu_synchronize() + call gpu_deallocate(J1) + call gpu_set_stream(blas_handle, gpu_default_stream) - !$omp parallel & - !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & - !$omp private(u,a,i,beta,gam) & - !$omp default(none) - !$omp do - do beta = 1, nV - do u = 1, nO - do a = 1, nV - do i = 1, nO - X_ovov(i,a,u,beta) = 0.5d0 * K1(u,a,i,beta) - enddo - enddo - enddo - enddo - !$omp end do nowait - !$omp do - do gam = 1, nV - do v = 1, nO - do a = 1, nV - do i = 1, nO - Y_ovov(i,a,v,gam) = t2(i,v,gam,a) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel + call gpu_dgemm(blas_handle, 'N','N', nO*nV,nO*nV,nV*nO, & + 1d0, X_ovvo%f(1,1,1,1), size(X_ovvo%f,1) * size(X_ovvo%f,2), & + Y_voov%f(1,1,1,1), size(Y_voov%f,1) * size(Y_voov%f,2), & + 0d0, Z_ovov%f(1,1,1,1), size(Z_ovov%f,1) * size(Z_ovov%f,2)) - allocate(Z_ovov(nO,nV,nO,nV)) - call dgemm('T','N',nO*nV,nO*nV,nO*nV, & - 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & - Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & - 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) - deallocate(X_ovov, Y_ovov) + call gpu_synchronize() + call gpu_deallocate(Y_voov) + call gpu_deallocate(X_ovvo) - !$omp parallel & - !$omp shared(nO,nV,r2,Z_ovov) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do - do gam = 1, nV - do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,beta,v,gam) - Z_ovov(v,gam,u,beta) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel - deallocate(Z_ovov) + type(gpu_double4) :: Y_ovov, X_ovov + call gpu_allocate(X_ovov,nO,nV,nO,nV) + call gpu_allocate(Y_ovov,nO,nV,nO,nV) - allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) - !$omp parallel & - !$omp shared(nO,nV,K1,X_ovov,Y_ovov,t2) & - !$omp private(u,v,gam,beta,i,a) & - !$omp default(none) - !$omp do - do a = 1, nV - do i = 1, nO - do gam = 1, nV - do u = 1, nO - X_ovov(u,gam,i,a) = K1(u,a,i,gam) - enddo - enddo + do a=1, nV + call gpu_set_stream(blas_handle, stream(a)) + do j=1,nO + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, t2%f(1,j,1,a), & + nO*nO, 0.d0, t2%f(1,j,1,a), nO*nO, Y_ovov%f(1,a,j,1), nO*nV*nO) + enddo + do beta=1, nV + call gpu_dgeam(blas_handle, 'T', 'T', nO, nO, 0.5d0, K1%f(1,a,1,beta), & + nO*nV, 0.d0, K1%f(1,a,1,beta), nO*nV, X_ovov%f(1,a,1,beta), nO*nV) enddo enddo - !$omp end do nowait + call gpu_set_stream(blas_handle, gpu_default_stream) - !$omp do - do beta = 1, nV - do v = 1, nO - do a = 1, nV - do i = 1, nO - Y_ovov(i,a,v,beta) = t2(i,v,beta,a) - enddo - enddo + call gpu_synchronize() + + call gpu_dgemm(blas_handle, 'T','N',nO*nV,nO*nV,nO*nV, & + -1d0, X_ovov%f(1,1,1,1), size(X_ovov%f,1) * size(X_ovov%f,2), & + Y_ovov%f(1,1,1,1), size(Y_ovov%f,1) * size(Y_ovov%f,2), & + 1d0, Z_ovov%f(1,1,1,1), size(Z_ovov%f,1) * size(Z_ovov%f,2)) + + call gpu_synchronize() + + do gam=1, nV + call gpu_set_stream(blas_handle, stream(gam)) + do j=1,nO + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, r2%f(1,j,1,gam), & + nO*nO, 1.d0, Z_ovov%f(1,1,j,gam), nO, r2%f(1,j,1,gam), nO*nO) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, K1%f(1,1,j,gam), & + nO, 0.d0, K1%f(1,1,j,gam), nO, X_ovov%f(1,gam,j,1), nO*nV*nO) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nO, 1.d0, t2%f(1,j,1,gam), & + nO*nO, 0.d0, t2%f(1,j,1,gam), nO*nO, Y_ovov%f(1,gam,j,1), nO*nV*nO) + enddo + do beta=1, nV + call gpu_dgeam(blas_handle, 'N', 'T', nO, nO, 1.d0, r2%f(1,1,beta,gam), & + nO, 1.d0, Z_ovov%f(1,gam,1,beta), nO*nV, r2%f(1,1,beta,gam), nO) enddo enddo - !$omp end do - !$omp end parallel - deallocate(K1) + call gpu_set_stream(blas_handle, gpu_default_stream) - allocate(Z_ovov(nO,nV,nO,nV)) - call dgemm('N','N',nO*nV,nO*nV,nO*nV, & - 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & - Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & - 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + call gpu_deallocate(K1) - deallocate(X_ovov,Y_ovov) + call gpu_dgemm(blas_handle, 'N','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov%f(1,1,1,1), size(X_ovov%f,1) * size(X_ovov%f,2), & + Y_ovov%f(1,1,1,1), size(Y_ovov%f,1) * size(Y_ovov%f,2), & + 0d0, Z_ovov%f(1,1,1,1), size(Z_ovov%f,1) * size(Z_ovov%f,2)) - !$omp parallel & - !$omp shared(nO,nV,r2,Z_ovov) & - !$omp private(u,v,gam,beta) & - !$omp default(none) - !$omp do + call gpu_synchronize() + + call gpu_deallocate(X_ovov) + call gpu_deallocate(Y_ovov) + + ! Change the sign for consistency with the code in spin orbitals do gam = 1, nV + call gpu_set_stream(blas_handle, stream(gam)) + do j=1,nO + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, r2%f(1,j,1,gam), & + nO*nO, -1.d0, Z_ovov%f(1,gam,j,1), nO*nV*nO, r2%f(1,j,1,gam), nO*nO) + enddo do beta = 1, nV - do v = 1, nO - do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,gam,v,beta) - Z_ovov(v,beta,u,gam) - enddo - enddo + call gpu_dgeam(blas_handle, 'N', 'T', nO, nO, -1.d0, r2%f(1,1,beta,gam), & + nO, 1.d0, Z_ovov%f(1,beta,1,gam), nO*nV, r2%f(1,1,beta,gam), nO) enddo enddo - !$omp end do - !$omp end parallel - - deallocate(Z_ovov) - ! Change the sign for consistency with the code in spin orbitals + call gpu_deallocate(Z_ovov) max_r2 = 0d0 !$omp parallel & @@ -996,8 +796,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do a = 1, nV do j = 1, nO do i = 1, nO - r2(i,j,a,b) = -r2(i,j,a,b) - max_r2_local = max(r2(i,j,a,b), max_r2_local) + max_r2_local = max(r2%f(i,j,a,b), max_r2_local) enddo enddo enddo @@ -1012,447 +811,458 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) ! A1 -subroutine compute_A1_chol(nO,nV,t1,t2,tau,A1) - +subroutine compute_A1_chol(nO,nV,t1,t2,tau,d_cc_space_v_vooo, & + d_cc_space_v_oooo, d_cc_space_v_vvoo, A1) + use gpu implicit none - integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV) - double precision, intent(in) :: t2(nO, nO, nV, nV) - double precision, intent(in) :: tau(nO, nO, nV, nV) - double precision, intent(out) :: A1(nO, nO, nO, nO) + integer, intent(in) :: nO,nV + type(gpu_double2), intent(in) :: t1 + type(gpu_double4), intent(in) :: t2, tau + type(gpu_double4), intent(in) :: d_cc_space_v_vooo, d_cc_space_v_oooo, d_cc_space_v_vvoo + type(gpu_double4), intent(out) :: A1 integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta - double precision, allocatable :: Y_oooo(:,:,:,:) - allocate(Y_oooo(nO,nO,nO,nO)) + type(gpu_double4) :: Y_oooo + call gpu_allocate(Y_oooo,nO,nO,nO,nO) ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & - call dgemm('N','N', nO, nO*nO*nO, nV, & - 1d0, t1 , size(t1,1), & - cc_space_v_vooo, size(cc_space_v_vooo,1), & - 0d0, Y_oooo, size(Y_oooo,1)) + call gpu_dgemm(blas_handle, 'N','N', nO, nO*nO*nO, nV, & + 1d0, t1%f(1,1) , size(t1%f,1), & + d_cc_space_v_vooo%f(1,1,1,1), size(d_cc_space_v_vooo%f,1), & + 0d0, Y_oooo%f(1,1,1,1), size(Y_oooo%f,1)) + + type(gpu_stream) :: stream(nO) + + do i=1, nO + call gpu_stream_create(stream(i)) + enddo + + call gpu_synchronize() - !$omp parallel & - !$omp private(u,v,i,j) & - !$omp default(shared) - !$omp do collapse(2) do j = 1, nO + call gpu_set_stream(blas_handle, stream(j)) do i = 1, nO - do v = 1, nO - do u = 1, nO - A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + Y_oooo(v,u,j,i) + Y_oooo(u,v,i,j) - enddo - enddo + call gpu_dgeam(blas_handle, 'N', 'T', nO, nO, 1.d0, d_cc_space_v_oooo%f(1,1,i,j), & + nO, 1.d0, Y_oooo%f(1,1,j,i), nO, A1%f(1,1,i,j), nO) enddo + call gpu_dgeam(blas_handle, 'N', 'N', nO, nO*nO, 1.d0, A1%f(1,1,1,j), & + nO, 1.d0, Y_oooo%f(1,1,1,j), nO, A1%f(1,1,1,j), nO) + enddo + + call gpu_set_stream(blas_handle, gpu_default_stream) + do i=1, nO + call gpu_stream_destroy(stream(i)) enddo - !$omp end do - !$omp end parallel - deallocate(Y_oooo) + call gpu_deallocate(Y_oooo) ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) - call dgemm('N','N', nO*nO, nO*nO, nV*nV, & - 1d0, tau , size(tau,1) * size(tau,2), & - cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & - 1d0, A1 , size(A1,1) * size(A1,2)) + call gpu_dgemm(blas_handle, 'N','N', nO*nO, nO*nO, nV*nV, & + 1d0, tau%f(1,1,1,1), size(tau%f,1) * size(tau%f,2), & + d_cc_space_v_vvoo%f(1,1,1,1), size(d_cc_space_v_vvoo%f,1) * size(d_cc_space_v_vvoo%f,2), & + 1d0, A1%f(1,1,1,1), size(A1%f,1) * size(A1%f,2)) + call gpu_synchronize() end ! g_occ -subroutine compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) +subroutine compute_g_occ_chol(nO,nV,t1,t2,H_oo, & + d_cc_space_f_vo, d_cc_space_v_ov_chol, d_cc_space_v_oo_chol, d_cc_space_v_ovoo, g_occ) + use gpu implicit none integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV), H_oo(nO, nO) - double precision, intent(in) :: t2(nO, nO, nV, nV) - double precision, intent(out) :: g_occ(nO, nO) + type(gpu_double2), intent(in) :: t1, H_oo, d_cc_space_f_vo + type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol, d_cc_space_v_oo_chol + type(gpu_double4), intent(in) :: t2, d_cc_space_v_ovoo + type(gpu_double2), intent(out) :: g_occ + + call gpu_copy(H_oo, g_occ) - g_occ = H_oo + call gpu_dgemm(blas_handle, 'N','N',nO,nO,nV, & + 1d0, t1%f(1,1), size(t1%f,1), & + d_cc_space_f_vo%f(1,1), size(d_cc_space_f_vo%f,1), & + 1d0, g_occ%f(1,1), size(g_occ%f,1)) - call dgemm('N','N',nO,nO,nV, & - 1d0, t1, size(t1,1), & - cc_space_f_vo, size(cc_space_f_vo,1), & - 1d0, g_occ, size(g_occ,1)) + type(gpu_double1) :: X + call gpu_allocate(X,cholesky_mo_num) - double precision, allocatable :: X(:) - allocate(X(cholesky_mo_num)) - call dgemv('N',cholesky_mo_num,nO*nV,2.d0, & - cc_space_v_ov_chol, cholesky_mo_num, & - t1, 1, 0.d0, X, 1) + call gpu_dgemv(blas_handle, 'N',cholesky_mo_num,nO*nV,2.d0, & + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num, & + t1%f(1,1), 1, 0.d0, X%f(1), 1) - call dgemv('T',cholesky_mo_num,nO*nO,1.d0, & - cc_space_v_oo_chol, cholesky_mo_num, & - X, 1, 1.d0, g_occ, 1) - deallocate(X) + call gpu_dgemv(blas_handle, 'T',cholesky_mo_num,nO*nO,1.d0, & + d_cc_space_v_oo_chol%f(1,1,1), cholesky_mo_num, & + X%f(1), 1, 1.d0, g_occ%f(1,1), 1) - call dgemv('T',nO*nV,nO*nO,-1.d0, & - cc_space_v_ovoo, nO*nV, & - t1, 1, 1.d0, g_occ, 1) + call gpu_dgemv(blas_handle, 'T',nO*nV,nO*nO,-1.d0, & + d_cc_space_v_ovoo%f(1,1,1,1), nO*nV, & + t1%f(1,1), 1, 1.d0, g_occ%f(1,1), 1) + + call gpu_synchronize() + call gpu_deallocate(X) end ! g_vir -subroutine compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) +subroutine compute_g_vir_chol(nO,nV,t1,t2,H_vv,d_cc_space_f_vo, & + d_cc_space_v_ov_chol, d_cc_space_v_vv_chol, g_vir) + use gpu implicit none integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV), H_vv(nV, nV) - double precision, intent(in) :: t2(nO, nO, nV, nV) - double precision, intent(out) :: g_vir(nV, nV) + type(gpu_double2), intent(in) :: t1, H_vv, d_cc_space_f_vo + type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol, d_cc_space_v_vv_chol + type(gpu_double4), intent(in) :: t2 + type(gpu_double2), intent(out) :: g_vir integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam - call dgemm('N','N',nV,nV,nO, & - -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & - t1 , size(t1,1), & - 0d0, g_vir, size(g_vir,1)) + type(gpu_stream) :: stream(max(nO,4)) + + do i=1,max(nO,4) + call gpu_stream_create(stream(i)) + enddo - double precision, allocatable :: tmp_k(:), tmp_vo(:,:,:), tmp_vo2(:,:,:) - allocate(tmp_k(cholesky_mo_num)) - call dgemm('N','N', cholesky_mo_num, 1, nO*nV, 1.d0, & - cc_space_v_ov_chol, cholesky_mo_num, t1, nO*nV, 0.d0, tmp_k, cholesky_mo_num) + call gpu_set_stream(blas_handle, stream(1)) + call gpu_dgemm(blas_handle, 'N','N',nV,nV,nO, & + -1d0, d_cc_space_f_vo%f(1,1) , size(d_cc_space_f_vo%f,1), & + t1%f(1,1) , size(t1%f,1), & + 0d0, g_vir%f(1,1), size(g_vir%f,1)) - call dgemm('T','N', nV*nV, 1, cholesky_mo_num, 2.d0, & - cc_space_v_vv_chol, cholesky_mo_num, tmp_k, cholesky_mo_num, 1.d0, & - g_vir, nV*nV) - deallocate(tmp_k) + type(gpu_double1) :: tmp_k + type(gpu_double3) :: tmp_vo, tmp_vo2 - allocate(tmp_vo(cholesky_mo_num,nV,nO)) - call dgemm('N','T',cholesky_mo_num*nV, nO, nV, 1.d0, & - cc_space_v_vv_chol, cholesky_mo_num*nV, t1, nO, 0.d0, tmp_vo, cholesky_mo_num*nV) + call gpu_allocate(tmp_k,cholesky_mo_num) - allocate(tmp_vo2(cholesky_mo_num,nO,nV)) - do beta=1,nV - do i=1,nO - do k=1,cholesky_mo_num - tmp_vo2(k,i,beta) = -tmp_vo(k,beta,i) - enddo - enddo + call gpu_set_stream(blas_handle, stream(2)) + call gpu_dgemm(blas_handle, 'N','N', cholesky_mo_num, 1, nO*nV, 1.d0, & + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num, t1%f(1,1), nO*nV, 0.d0, tmp_k%f(1), cholesky_mo_num) + + call gpu_dgemm(blas_handle, 'T','N', nV*nV, 1, cholesky_mo_num, 2.d0, & + d_cc_space_v_vv_chol%f(1,1,1), cholesky_mo_num, tmp_k%f(1), cholesky_mo_num, 1.d0, & + g_vir%f(1,1), nV*nV) + + call gpu_set_stream(blas_handle, stream(3)) + call gpu_allocate(tmp_vo,cholesky_mo_num,nV,nO) + + call gpu_dgemm(blas_handle, 'N','T',cholesky_mo_num*nV, nO, nV, 1.d0, & + d_cc_space_v_vv_chol%f(1,1,1), cholesky_mo_num*nV, t1%f(1,1), nO, 0.d0, tmp_vo%f(1,1,1), cholesky_mo_num*nV) + + call gpu_allocate(tmp_vo2,cholesky_mo_num,nO,nV) + + call gpu_synchronize() + call gpu_deallocate(tmp_k) + + do i=1,nO + call gpu_set_stream(blas_handle, stream(i)) + call gpu_dgeam(blas_handle, 'N', 'N', cholesky_mo_num, nV, -1.d0, tmp_vo%f(1,1,i), & + cholesky_mo_num, 0.d0, tmp_vo%f(1,1,i), cholesky_mo_num, tmp_vo2%f(1,i,1), cholesky_mo_num*nO) enddo - deallocate(tmp_vo) - do beta = 1, nV - do a = 1, nV - g_vir(a,beta) = g_vir(a,beta) + H_vv(a,beta) - enddo + call gpu_set_stream(blas_handle, gpu_default_stream) + + do i=1,max(nO,4) + call gpu_stream_destroy(stream(i)) enddo + call gpu_deallocate(tmp_vo) + + call gpu_dgeam(blas_handle, 'N', 'N', nV, nV, 1.d0, g_vir%f(1,1), & + nV, 1.d0, H_vv%f(1,1), nV, g_vir%f(1,1), nV) - call dgemm('T','N', nV, nV, nO*cholesky_mo_num, 1.d0, & - cc_space_v_ov_chol, cholesky_mo_num*nO, & - tmp_vo2, cholesky_mo_num*nO, 1.d0, g_vir, nV) + call gpu_dgemm(blas_handle, 'T','N', nV, nV, nO*cholesky_mo_num, 1.d0, & + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num*nO, & + tmp_vo2%f(1,1,1), cholesky_mo_num*nO, 1.d0, g_vir%f(1,1), nV) + + call gpu_synchronize() + call gpu_deallocate(tmp_vo2) end ! J1 -subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvoo,J1) +subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvoo,d_cc_space_v_vo_chol,d_cc_space_v_vv_chol,J1) + use gpu implicit none - integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV) - double precision, intent(in) :: t2(nO, nO, nV, nV) - double precision, intent(in) :: v_ovvo(nO,nV,nV,nO), v_ovoo(nO,nV,nO,nO) - double precision, intent(in) :: v_vvoo(nV,nV,nO,nO) - double precision, intent(out) :: J1(nO, nV, nV, nO) + integer, intent(in) :: nO,nV + type(gpu_double2), intent(in) :: t1 + type(gpu_double4), intent(in) :: t2, v_ovvo, v_ovoo, v_vvoo + type(gpu_double3), intent(in) :: d_cc_space_v_vo_chol,d_cc_space_v_vv_chol + type(gpu_double4), intent(out) :: J1 integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam - double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) - allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) + type(gpu_double4) :: X_ovoo, Y_ovov - !$omp parallel & - !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & - !$omp private(i,j,a,u,beta) & - !$omp default(none) - do i = 1, nO - !$omp do - do beta = 1, nV - do a = 1, nV - do u = 1, nO - J1(u,a,beta,i) = v_ovvo(u,a,beta,i) - enddo - enddo - enddo - !$omp end do nowait + call gpu_allocate(X_ovoo,nO,nV,nO,nO) + + type(gpu_stream) :: stream(nV) + + + do i=1,nO + call gpu_stream_create(stream(i)) enddo - !$omp do collapse(2) do j = 1, nO + call gpu_set_stream(blas_handle, stream(j)) do i = 1, nO - do a = 1, nV - do u = 1, nO - X_ovoo(u,a,i,j) = v_ovoo(u,a,j,i) - enddo - enddo + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, v_ovoo%f(1,1,j,i), & + nO, 0.d0, X_ovoo%f(1,1,i,j), nO, X_ovoo%f(1,1,i,j), nO) enddo enddo - !$omp end do - !$omp end parallel - call dgemm('N','N',nO*nV*nO,nV,nO, & - -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & - t1 , size(t1,1), & - 0d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2) * size(Y_ovov,3)) + call gpu_set_stream(blas_handle, gpu_default_stream) + + do i=1,nO + call gpu_stream_destroy(stream(i)) + enddo + + call gpu_allocate(Y_ovov,nO,nV,nO,nV) + + call gpu_dgemm(blas_handle, 'N','N',nO*nV*nO,nV,nO, & + -1d0, X_ovoo%f(1,1,1,1), size(X_ovoo%f,1) * size(X_ovoo%f,2) * size(X_ovoo%f,3), & + t1%f(1,1) , size(t1%f,1), & + 0d0, Y_ovov%f(1,1,1,1), size(Y_ovov%f,1) * size(Y_ovov%f,2) * size(Y_ovov%f,3)) + + + call gpu_copy(v_ovvo, J1) + + call gpu_synchronize() + + do a=1,nV + call gpu_stream_create(stream(a)) + enddo - !$omp parallel & - !$omp shared(nO,nV,J1,Y_ovov) & - !$omp private(i,beta,a,u) & - !$omp default(none) do i = 1, nO - !$omp do do beta = 1, nV - do a = 1, nV - do u = 1, nO - J1(u,a,beta,i) = J1(u,a,beta,i) + Y_ovov(u,a,i,beta) - enddo - enddo + call gpu_set_stream(blas_handle, stream(beta)) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, J1%f(1,1,beta,i), & + nO, 1.d0, Y_ovov%f(1,1,i,beta), nO, J1%f(1,1,beta,i), nO) enddo - !$omp end do nowait enddo - !$omp end parallel - deallocate(X_ovoo) - double precision, allocatable :: tmp_cc(:,:,:), J1_tmp(:,:,:,:) - allocate(tmp_cc(cholesky_mo_num,nV,nO), J1_tmp(nV,nO,nV,nO)) + call gpu_allocate(tmp_cc,cholesky_mo_num,nV,nO) + call gpu_allocate(J1_tmp,nV,nO,nV,nO) - call dgemm('N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & - cc_space_v_vv_chol, cholesky_mo_num*nV, & - t1, nO, & - 0.d0, tmp_cc, cholesky_mo_num*nV) + call gpu_set_stream(blas_handle, gpu_default_stream) - call dgemm('T','N', nV*nO, nV*nO, cholesky_mo_num, 1.d0, & - tmp_cc, cholesky_mo_num, cc_space_v_vo_chol, cholesky_mo_num, & - 0.d0, J1_tmp, nV*nO) + type(gpu_double4) :: J1_tmp + type(gpu_double3) :: tmp_cc - deallocate(tmp_cc) + call gpu_dgemm(blas_handle, 'N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & + d_cc_space_v_vv_chol%f(1,1,1), cholesky_mo_num*nV, & + t1%f(1,1), nO, & + 0.d0, tmp_cc%f(1,1,1), cholesky_mo_num*nV) - do i=1,nO - do b=1,nV - do a=1,nV - do u=1,nO - J1(u,a,b,i) = J1(u,a,b,i) + J1_tmp(b,u,a,i) - enddo - enddo + call gpu_dgemm(blas_handle, 'T','N', nV*nO, nV*nO, cholesky_mo_num, 1.d0, & + tmp_cc%f(1,1,1), cholesky_mo_num, d_cc_space_v_vo_chol%f(1,1,1), cholesky_mo_num, & + 0.d0, J1_tmp%f(1,1,1,1), nV*nO) + + + call gpu_deallocate(X_ovoo) + + call gpu_synchronize() + call gpu_deallocate(tmp_cc) + + do i = 1, nO + do a = 1, nV + call gpu_set_stream(blas_handle, stream(a)) + call gpu_dgeam(blas_handle, 'N', 'T', nO, nV, 1.d0, J1%f(1,a,1,i), & + nO*nV, 1.d0, J1_tmp%f(1,1,a,i), nV, J1%f(1,a,1,i), nO*nV) enddo enddo - deallocate(J1_tmp) + type(gpu_double4) :: X_voov, Z_ovvo - !- cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & - double precision, allocatable :: X_voov(:,:,:,:), Z_ovvo(:,:,:,:) - allocate(X_voov(nV,nO,nO,nV), Z_ovvo(nO,nV,nV,nO)) - !$omp parallel & - !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & - !$omp private(i,beta,a,u,b,j) & - !$omp default(none) - !$omp do - do b = 1, nV - do j = 1, nO - do beta = 1, nV - do u = 1, nO - Y_ovov(u,beta,j,b) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) - enddo - enddo + call gpu_allocate(X_voov,nV,nO,nO,nV) + call gpu_allocate(Z_ovvo,nO,nV,nV,nO) + + do j = 1, nO + do beta = 1, nV + call gpu_set_stream(blas_handle, stream(beta)) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 0.5d0, t2%f(1,j,1,beta), & + nO*nO, t1%f(j,beta), t1%f(1,1), nO, Y_ovov%f(1,beta,j,1), nO*nV*nO) enddo enddo - !$omp end do nowait - !$omp do do b = 1, nV - do j = 1, nO - do i = 1, nO - do a = 1, nV - X_voov(a,i,j,b) = v_vvoo(a,b,i,j) - enddo - enddo - enddo + call gpu_set_stream(blas_handle, stream(b)) + call gpu_dgeam(blas_handle, 'N', 'N', nV, nO*nO, 1.d0, v_vvoo%f(1,b,1,1), & + nV*nV, 0.d0, X_voov%f(1,1,1,b), nV, X_voov%f(1,1,1,b), nV) enddo - !$omp end do - !$omp end parallel - call dgemm('N','T',nO*nV,nV*nO,nO*nV, & - -1d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & - X_voov, size(X_voov,1) * size(X_voov,2), & - 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) - deallocate(X_voov) + call gpu_set_stream(blas_handle, gpu_default_stream) + + call gpu_synchronize() + call gpu_deallocate(J1_tmp) + + call gpu_dgemm(blas_handle, 'N','T',nO*nV,nV*nO,nO*nV, & + -1d0, Y_ovov%f(1,1,1,1), size(Y_ovov%f,1) * size(Y_ovov%f,2), & + X_voov%f(1,1,1,1), size(X_voov%f,1) * size(X_voov%f,2), & + 0d0, Z_ovvo%f(1,1,1,1), size(Z_ovvo%f,1) * size(Z_ovvo%f,2)) + + call gpu_synchronize() - double precision, allocatable :: X_ovvo(:,:,:,:), Y_vovo(:,:,:,:) - allocate(X_ovvo(nO,nV,nV,nO),Y_vovo(nV,nO,nV,nO)) - !$omp parallel & - !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & - !$omp private(i,beta,a,u,j,b) & - !$omp default(none) do i = 1, nO - !$omp do - do beta = 1, nV - do a = 1, nV - do u = 1, nO - J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) - enddo - enddo + do a = 1, nV + call gpu_set_stream(blas_handle, stream(a)) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, J1%f(1,a,1,i), & + nO*nV, 1.d0, Z_ovvo%f(1,1,a,i), nO, J1%f(1,a,1,i), nO*nV) enddo - !$omp end do nowait enddo - !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + type(gpu_double4) :: X_ovvo, Y_vovo + call gpu_allocate(Y_vovo,nV,nO,nV,nO) + do j = 1, nO - !$omp do - do b = 1, nV - do i = 1, nO - do a = 1, nV - Y_vovo(a,i,b,j) = 0.5d0 * (2d0 * v_vvoo(a,b,i,j) - v_vvoo(b,a,i,j)) - enddo - enddo + do i = 1, nO + call gpu_set_stream(blas_handle, stream(i)) + call gpu_dgeam(blas_handle, 'N', 'T', nV, nV, 1.d0, v_vvoo%f(1,1,i,j), & + nV, -0.5d0, v_vvoo%f(1,1,i,j), nV, Y_vovo%f(1,i,1,j), nO*nV) enddo - !$omp end do nowait enddo + call gpu_allocate(X_ovvo,nO,nV,nV,nO) + do j = 1, nO - !$omp do do b = 1, nV - do beta = 1, nV - do u = 1, nO - X_ovvo(u,beta,b,j) = t2(u,j,beta,b) - enddo - enddo + call gpu_set_stream(blas_handle, stream(b)) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, t2%f(1,j,1,b), & + nO*nO, 0.d0, t2%f(1,j,1,b), nO*nO, X_ovvo%f(1,1,b,j), nO) enddo - !$omp end do nowait enddo - !$omp end parallel - call dgemm('N','T',nO*nV,nV*nO,nV*nO, & - 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & - Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & - 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + call gpu_set_stream(blas_handle, gpu_default_stream) + call gpu_synchronize() + call gpu_deallocate(X_voov) + + call gpu_dgemm(blas_handle, 'N','T',nO*nV,nV*nO,nV*nO, & + 1d0, X_ovvo%f(1,1,1,1), size(X_ovvo%f,1) * size(X_ovvo%f,2), & + Y_vovo%f(1,1,1,1), size(Y_vovo%f,1) * size(Y_vovo%f,2), & + 0d0, Z_ovvo%f(1,1,1,1), size(Z_ovvo%f,1) * size(Z_ovvo%f,2)) + + call gpu_synchronize() - !$omp parallel & - !$omp shared(nO,nV,J1,Z_ovvo) & - !$omp private(i,beta,a,u) & - !$omp default(none) do i = 1, nO - !$omp do do beta = 1, nV - do a = 1, nV - do u = 1, nO - J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) - enddo - enddo + call gpu_set_stream(blas_handle, stream(beta)) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, J1%f(1,1,beta,i), & + nO, 1.d0, Z_ovvo%f(1,beta,1,i), nO*nV, J1%f(1,1,beta,i), nO) enddo - !$omp end do nowait enddo - !$omp end parallel - deallocate(X_ovvo,Z_ovvo,Y_ovov) + call gpu_set_stream(blas_handle, gpu_default_stream) + call gpu_deallocate(Y_ovov) + call gpu_deallocate(X_ovvo) + + do a = 1, nV + call gpu_stream_destroy(stream(a)) + enddo + + call gpu_deallocate(Z_ovvo) end ! K1 -subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,K1) +subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov, & + d_cc_space_v_ov_chol,d_cc_space_v_vv_chol,K1) + use gpu implicit none - integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV) - double precision, intent(in) :: t2(nO, nO, nV, nV) - double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_ovov(nO,nV,nO,nV) - double precision, intent(in) :: v_ovoo(nO,nV,nO,nO) - double precision, intent(out) :: K1(nO, nV, nO, nV) + integer, intent(in) :: nO,nV + type(gpu_double2), intent(in) :: t1 + type(gpu_double4), intent(in) :: t2, v_vvoo, v_ovov, v_ovoo + type(gpu_double3), intent(in) :: d_cc_space_v_ov_chol, d_cc_space_v_vv_chol + type(gpu_double4), intent(out) :: K1 - double precision, allocatable :: X(:,:,:,:), Y(:,:,:,:), Z(:,:,:,:) + type(gpu_double4) :: X, Y, Z integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam - allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) - !$omp parallel & - !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & - !$omp private(i,beta,a,u,j,b) & - !$omp default(none) - !$omp do - do beta = 1, nV - do i = 1, nO - do a = 1, nV - do u = 1, nO - K1(u,a,i,beta) = v_ovov(u,a,i,beta) - enddo - enddo - enddo - enddo - !$omp end do nowait + call gpu_copy(v_ovov, K1) - do i = 1, nO - !$omp do - do a = 1, nV - do j = 1, nO - do b = 1, nV - X(b,j,a,i) = - v_vvoo(b,a,i,j) - enddo - enddo - enddo - !$omp end do nowait + type(gpu_stream) :: stream(nV) + do a = 1, nV + call gpu_stream_create(stream(a)) enddo - do j = 1, nO - !$omp do - do b = 1, nV - do beta = 1, nV - do u = 1, nO - Y(u,beta,b,j) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) - enddo - enddo + call gpu_allocate(X,nV,nO,nV,nO) + call gpu_allocate(Y,nO,nV,nV,nO) + + do a = 1, nV + call gpu_set_stream(blas_handle, stream(a)) + do i = 1, nO + call gpu_dgeam(blas_handle, 'N', 'N', nV, nO, -1.d0, v_vvoo%f(1,a,i,1), & + nV*nV*nO, 0.d0, v_vvoo%f(1,a,i,1), nV*nV*nO, X%f(1,1,a,i), nV) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 0.5d0, t2%f(1,i,1,a), & + nO*nO, t1%f(i,a), t1%f(1,1), nO, Y%f(1,a,1,i), nO*nV) enddo - !$omp end do enddo - !$omp end parallel - call dgemm('N','N',nO*nV*nO,nV,nO, & - -1d0, v_ovoo, size(v_ovoo,1) * size(v_ovoo,2) * size(v_ovoo,3), & - t1 , size(t1,1), & - 1d0, K1 , size(K1,1) * size(K1,2) * size(K1,3)) + call gpu_set_stream(blas_handle, gpu_default_stream) + + call gpu_dgemm(blas_handle, 'N','N',nO*nV*nO,nV,nO, & + -1d0, v_ovoo%f(1,1,1,1), size(v_ovoo%f,1) * size(v_ovoo%f,2) * size(v_ovoo%f,3), & + t1%f(1,1) , size(t1%f,1), & + 1d0, K1%f(1,1,1,1) , size(K1%f,1) * size(K1%f,2) * size(K1%f,3)) - double precision, allocatable :: K1tmp(:,:,:,:), t1v(:,:,:) - allocate(K1tmp(nO,nO,nV,nV), t1v(cholesky_mo_num,nO,nO)) + type(gpu_double4) :: K1tmp + type(gpu_double3) :: t1v - call dgemm('N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & - cc_space_v_ov_chol, cholesky_mo_num*nO, t1, nO, 0.d0, & - t1v, cholesky_mo_num*nO) + call gpu_allocate(t1v,cholesky_mo_num,nO,nO) - call dgemm('T','N', nO*nO, nV*nV, cholesky_mo_num, 1.d0, & - t1v, cholesky_mo_num, cc_space_v_vv_chol, cholesky_mo_num, 0.d0, & - K1tmp, nO*nO) + call gpu_dgemm(blas_handle, 'N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & + d_cc_space_v_ov_chol%f(1,1,1), cholesky_mo_num*nO, t1%f(1,1), nO, 0.d0, & + t1v%f(1,1,1), cholesky_mo_num*nO) + + call gpu_allocate(K1tmp,nO,nO,nV,nV) + + call gpu_dgemm(blas_handle, 'T','N', nO*nO, nV*nV, cholesky_mo_num, 1.d0, & + t1v%f(1,1,1), cholesky_mo_num, d_cc_space_v_vv_chol%f(1,1,1), cholesky_mo_num, 0.d0, & + K1tmp%f(1,1,1,1), nO*nO) + + call gpu_allocate(Z,nO,nV,nV,nO) + call gpu_synchronize() - deallocate(t1v) ! Y(u,beta,b,j) * X(b,j,a,i) = Z(u,beta,a,i) - call dgemm('N','N',nV*nO,nO*nV,nV*nO, & - 1d0, Y, size(Y,1) * size(Y,2), & - X, size(X,1) * size(X,2), & - 0d0, Z, size(Z,1) * size(Z,2)) + call gpu_dgemm(blas_handle, 'N','N',nV*nO,nO*nV,nV*nO, & + 1d0, Y%f(1,1,1,1), size(Y%f,1) * size(Y%f,2), & + X%f(1,1,1,1), size(X%f,1) * size(X%f,2), & + 0d0, Z%f(1,1,1,1), size(Z%f,1) * size(Z%f,2)) - !$omp parallel & - !$omp shared(nO,nV,K1,Z,K1tmp) & - !$omp private(i,beta,a,u) & - !$omp default(none) - !$omp do - do beta = 1, nV - do i = 1, nO - do a = 1, nV - do u = 1, nO - K1(u,a,i,beta) = K1(u,a,i,beta) + K1tmp(u,i,a,beta) + Z(u,beta,a,i) - enddo - enddo + call gpu_synchronize() + call gpu_deallocate(t1v) + + do beta = 1, nV + call gpu_set_stream(blas_handle, stream(beta)) + do i = 1, nO + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, K1%f(1,1,i,beta), & + nO, 1.d0, K1tmp%f(1,i,1,beta), nO*nO, K1%f(1,1,i,beta), nO) + call gpu_dgeam(blas_handle, 'N', 'N', nO, nV, 1.d0, K1%f(1,1,i,beta), & + nO, 1.d0, Z%f(1,beta,1,i), nO*nV, K1%f(1,1,i,beta), nO) enddo enddo - !$omp end do - !$omp end parallel - deallocate(K1tmp,X,Y,Z) + call gpu_deallocate(X) + call gpu_deallocate(Y) + + do a = 1, nV + call gpu_stream_destroy(stream(a)) + enddo + + call gpu_deallocate(K1tmp) + call gpu_deallocate(Z) end diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 1093c59db..851b6a9f5 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -1,5 +1,5 @@ ! Main -subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) +subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy,t_error) implicit none @@ -7,7 +7,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) double precision, intent(in) :: t2(nO,nO,nV,nV) double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) - double precision, intent(inout) :: energy + double precision, intent(inout) :: energy, t_error double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) @@ -220,8 +220,10 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ call set_multiple_levels_omp(.False.) call wall_time(t00) imin = 1_8 - !$OMP PARALLEL & - !$OMP PRIVATE(ieta,eta,a,b,c,kiter,isample) & + t_error = huge(1.d0) + + !$OMP PARALLEL & + !$OMP PRIVATE(ieta,eta,a,b,c,kiter,isample) & !$OMP DEFAULT(SHARED) NUM_THREADS(nthreads_pt2) do kiter=1,Nabc @@ -328,15 +330,23 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ if (norm > 0.d0) then energy_stoch = ET / norm variance = ET2 / norm - energy_stoch*energy_stoch + if (norm > 1.d0) then + t_error = dsqrt(variance/(norm-1.d0)) + else + t_error = dsqrt(variance) + endif endif energy = energy_det + energy_stoch - print '('' '',F20.8, '' '', ES12.4,'' '', F8.2,'' '')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) + print '('' '',F20.8, '' '', ES12.4,'' '', F8.2,'' '')', eccsd+energy, t_error, 100.*real(Ncomputed)/real(Nabc) + endif !$OMP END MASTER + if (t_error < cc_par_t_stop) exit if (imin > Nabc) exit enddo + !$OMP TASKWAIT !$OMP END PARALLEL print '(A)', ' ======================= ============== ========== ' diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 0281a1d4a..517220a87 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -178,7 +178,7 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :) logical, allocatable :: banned(:,:,:), bannedOrb(:,:) double precision, allocatable :: coef_fullminilist_rev(:,:) - double precision, allocatable :: mat(:,:,:) + double precision, allocatable :: mat(:,:,:), hij_cache(:,:,:) PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique @@ -205,7 +205,7 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc ! Removed to avoid introducing determinants already presents in the wf !double precision, parameter :: norm_thr = 1.d-16 - allocate (indices(N_det), & + allocate (indices(N_det), hij_cache(mo_num,mo_num,2), & exc_degree(max(N_det_alpha_unique,N_det_beta_unique))) ! Pre-compute excitation degrees wrt alpha determinants @@ -511,11 +511,15 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc maskInd = maskInd + 1 if(mod(maskInd, csubset) == (subset-1)) then + call get_mo_two_e_integrals_ij(h2,h1,mo_num,hij_cache(1,1,1),mo_integrals_map) + if (sp /= 3) then ! AA or BB + call get_mo_two_e_integrals_ij(h1,h2,mo_num,hij_cache(1,1,2),mo_integrals_map) + endif call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) if(fullMatch) cycle - call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting) + call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting, hij_cache) call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) end if @@ -531,7 +535,7 @@ subroutine select_singles_and_doubles(i_generator, hole_mask, particle_mask, foc enddo enddo deallocate(preinteresting, prefullinteresting, interesting, fullinteresting) - deallocate(banned, bannedOrb,mat) + deallocate(banned, bannedOrb, mat, hij_cache) end subroutine BEGIN_TEMPLATE @@ -556,7 +560,7 @@ subroutine fill_buffer_$DOUBLE(i_generator, sp, h1, h2, bannedOrb, banned, fock_ double precision, external :: diag_H_mat_elem_fock double precision :: E_shift double precision :: s_weight(N_states,N_states) - PROVIDE dominant_dets_of_cfgs N_dominant_dets_of_cfgs + PROVIDE dominant_dets_of_cfgs N_dominant_dets_of_cfgs thresh_sym excitation_ref hf_bitmask elec_alpha_num do jstate=1,N_states do istate=1,N_states s_weight(istate,jstate) = dsqrt(selection_weight(istate)*selection_weight(jstate)) @@ -742,7 +746,7 @@ subroutine fill_buffer_$DOUBLE(i_generator, sp, h1, h2, bannedOrb, banned, fock_ do istate=1,N_states delta_E = E0(istate) - Hii + E_shift alpha_h_psi = mat(istate, p1, p2) - if (alpha_h_psi == 0.d0) cycle + if (dabs(alpha_h_psi) < mo_integrals_threshold) cycle val = alpha_h_psi + alpha_h_psi tmp = dsqrt(delta_E * delta_E + val * val) @@ -914,7 +918,7 @@ subroutine fill_buffer_$DOUBLE(i_generator, sp, h1, h2, bannedOrb, banned, fock_ END_TEMPLATE -subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting) +subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting, hij_cache) use bitmasks implicit none BEGIN_DOC @@ -926,6 +930,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere integer, intent(in) :: sp, i_gen, N_sel integer, intent(in) :: interesting(0:N_sel) integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N_sel) + double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) logical, intent(inout) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num, 2) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) @@ -995,18 +1000,36 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere if(nt == 4) then call get_d2(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) else if(nt == 3) then - call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)), hij_cache) else - call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i))) + call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)), hij_cache) end if else if(nt == 4) then call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) - call past_d2(banned, p, sp) + if(sp == 3) then + do j=1,p(0,2) + do ii=1,p(0,1) + banned(p(ii,1), p(j,2),1) = .true. + end do + end do + else + do ii=1,p(0, sp) + do j=1,ii-1 + banned(p(j,sp), p(ii,sp),1) = .true. + banned(p(ii,sp), p(j,sp),1) = .true. + end do + end do + end if else if(nt == 3) then call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int) call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int) - call past_d1(bannedOrb, p) + do ii = 1, p(0, 1) + bannedOrb(p(ii, 1), 1) = .true. + end do + do ii = 1, p(0, 2) + bannedOrb(p(ii, 2), 2) = .true. + end do end if end do @@ -1037,6 +1060,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) integer :: bant bant = 1 + PROVIDE mo_integrals_threshold tip = p(0,1) * p(0,2) ma = sp @@ -1062,7 +1086,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(i2, ma) hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2, p1, h1, h2) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) @@ -1092,7 +1116,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(turn2(i), 1) hij = mo_two_e_integral(p1, p2, h1, h2) - if (hij /= 0.d0) then + if (dabs(hij) > mo_integrals_threshold) then hij = hij * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states @@ -1120,7 +1144,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(i1, ma) p2 = p(i2, ma) hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1142,7 +1166,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(i, ma) hij = mo_two_e_integral(p1, p2, h1, h2) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int) if (puti < putj) then @@ -1179,7 +1203,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end -subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) +subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, hij_cache) use bitmasks implicit none @@ -1190,6 +1214,8 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) double precision, intent(in) :: coefs(N_states) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) + double precision, external :: get_phase_bi, mo_two_e_integral logical :: ok @@ -1201,12 +1227,12 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) integer :: bant - double precision, allocatable :: hij_cache(:,:) + double precision, allocatable :: hij_cache1(:,:) double precision :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num) PROVIDE mo_integrals_map N_int allocate (lbanned(mo_num, 2)) - allocate (hij_cache(mo_num,2)) + allocate (hij_cache1(mo_num,2)) lbanned = bannedOrb do i=1, p(0,1) @@ -1230,13 +1256,11 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(1,ma) p2 = p(2,ma) if(.not. bannedOrb(puti, mi)) then - call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map) tmp_row = 0d0 do putj=1, hfix-1 if(lbanned(putj, ma)) cycle if(banned(putj, puti,bant)) cycle - hij = hij_cache(putj,1) - hij_cache(putj,2) + hij = hij_cache(hfix,putj,1) - hij_cache(putj,hfix,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1248,7 +1272,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=hfix+1, mo_num if(lbanned(putj, ma)) cycle if(banned(putj, puti,bant)) cycle - hij = hij_cache(putj,2) - hij_cache(putj,1) + hij = hij_cache(putj,hfix,1) - hij_cache(hfix,putj,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1274,15 +1298,15 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) pfix = p(1,mi) tmp_row = 0d0 tmp_row2 = 0d0 - call get_mo_two_e_integrals(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,pfix,p1,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,pfix,p2,mo_num,hij_cache1(1,2),mo_integrals_map) putj = p1 do puti=1,mo_num !HOT if(lbanned(puti,mi)) cycle !p1 fixed putj = p1 if(.not. banned(putj,puti,bant)) then - hij = hij_cache(puti,2) + hij = hij_cache1(puti,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1296,7 +1320,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) putj = p2 ! do puti=1,mo_num !HOT if(.not. banned(putj,puti,bant)) then - hij = hij_cache(puti,1) + hij = hij_cache1(puti,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) do k=1,N_states @@ -1327,13 +1351,13 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) puti = p(i, ma) p1 = p(turn3(1,i), ma) p2 = p(turn3(2,i), ma) - call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache1(1,2),mo_integrals_map) tmp_row = 0d0 do putj=1,hfix-1 if(banned(putj,puti,1)) cycle if(lbanned(putj,ma)) cycle - hij = hij_cache(putj,1) - hij_cache(putj,2) + hij = hij_cache1(putj,1) - hij_cache1(putj,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) @@ -1342,7 +1366,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) do putj=hfix+1,mo_num if(banned(putj,puti,1)) cycle if(lbanned(putj,ma)) cycle - hij = hij_cache(putj,2) - hij_cache(putj,1) + hij = hij_cache1(putj,2) - hij_cache1(putj,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) @@ -1364,14 +1388,14 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(2,ma) tmp_row = 0d0 tmp_row2 = 0d0 - call get_mo_two_e_integrals(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map) - call get_mo_two_e_integrals(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p1,pfix,mo_num,hij_cache1(1,1),mo_integrals_map) + call get_mo_two_e_integrals(hfix,p2,pfix,mo_num,hij_cache1(1,2),mo_integrals_map) putj = p2 do puti=1,mo_num if(lbanned(puti,ma)) cycle putj = p2 if(.not. banned(puti,putj,1)) then - hij = hij_cache(puti,1) + hij = hij_cache1(puti,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) !DIR$ LOOP COUNT AVG(4) @@ -1383,7 +1407,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) putj = p1 if(.not. banned(puti,putj,1)) then - hij = hij_cache(puti,2) + hij = hij_cache1(puti,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) do k=1,N_states @@ -1408,7 +1432,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) enddo end if end if - deallocate(lbanned,hij_cache) + deallocate(lbanned,hij_cache1) !! MONO if(sp == 3) then @@ -1439,7 +1463,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) -subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) +subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs, hij_cache) use bitmasks implicit none @@ -1450,6 +1474,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) double precision, intent(in) :: coefs(N_states) double precision, intent(inout) :: mat(N_states, mo_num, mo_num) integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision, intent(in) :: hij_cache(mo_num, mo_num, 2) integer :: i, j, k, s, h1, h2, p1, p2, puti, putj double precision :: hij, phase @@ -1457,16 +1482,14 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) logical :: ok integer, parameter :: bant=1 - double precision, allocatable :: hij_cache1(:), hij_cache2(:) - allocate (hij_cache1(mo_num),hij_cache2(mo_num)) + PROVIDE mo_integrals_threshold if(sp == 3) then ! AB h1 = p(1,1) h2 = p(1,2) do p1=1, mo_num if(bannedOrb(p1, 1)) cycle - call get_mo_two_e_integrals(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map) do p2=1, mo_num if(bannedOrb(p2,2)) cycle if(banned(p1, p2, bant)) cycle ! rentable? @@ -1475,9 +1498,9 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) else phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) - hij = hij_cache1(p2) * phase + hij = hij_cache(p2,p1,1) * phase end if - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle !DIR$ LOOP COUNT AVG(4) do k=1,N_states mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij ! HOTSPOT @@ -1490,18 +1513,16 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p2 = p(2,sp) do puti=1, mo_num if (bannedOrb(puti, sp)) cycle - call get_mo_two_e_integrals(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map) - call get_mo_two_e_integrals(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map) do putj=puti+1, mo_num if(bannedOrb(putj, sp)) cycle if(banned(puti, putj, bant)) cycle ! rentable? if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) call i_h_j(gen, det, N_int, hij) - if (hij == 0.d0) cycle + if (dabs(hij) < mo_integrals_threshold) cycle else - hij = hij_cache1(putj) - hij_cache2(putj) - if (hij == 0.d0) cycle + hij = hij_cache(putj,puti,1) - hij_cache(putj,puti,2) + if (dabs(hij) < mo_integrals_threshold) cycle hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) end if !DIR$ LOOP COUNT AVG(4) @@ -1512,50 +1533,9 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) end do end if - deallocate(hij_cache1,hij_cache2) end -subroutine past_d1(bannedOrb, p) - use bitmasks - implicit none - - logical, intent(inout) :: bannedOrb(mo_num, 2) - integer, intent(in) :: p(0:4, 2) - integer :: i,s - - do s = 1, 2 - do i = 1, p(0, s) - bannedOrb(p(i, s), s) = .true. - end do - end do -end - - -subroutine past_d2(banned, p, sp) - use bitmasks - implicit none - - logical, intent(inout) :: banned(mo_num, mo_num) - integer, intent(in) :: p(0:4, 2), sp - integer :: i,j - - if(sp == 3) then - do j=1,p(0,2) - do i=1,p(0,1) - banned(p(i,1), p(j,2)) = .true. - end do - end do - else - do i=1,p(0, sp) - do j=1,i-1 - banned(p(j,sp), p(i,sp)) = .true. - banned(p(i,sp), p(j,sp)) = .true. - end do - end do - end if -end - subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) use bitmasks implicit none diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f index 0dc939cbe..f57b7f924 100644 --- a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f @@ -1,6 +1,5 @@ subroutine davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,energies,sze,N_st,N_st_diag_in,converged,hcalc) - use mmap_module implicit none BEGIN_DOC ! Generic Davidson diagonalization with ONE DIAGONAL DRESSING OPERATOR diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f index 1a8269f4b..c88489985 100644 --- a/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f +++ b/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f @@ -3,8 +3,6 @@ subroutine davidson_general_diag_dressed_ext_rout_nonsym_b1space(u_in, H_jj, Dress_jj,energies, sze, N_st, N_st_diag_in, converged, hcalc) - use mmap_module - BEGIN_DOC ! Generic modified-Davidson diagonalization ! diff --git a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f index 24f4fa10d..1ff6632c8 100644 --- a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f +++ b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f @@ -1,5 +1,4 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies,sze,N_st,N_st_diag,converged,hcalc) - use mmap_module BEGIN_DOC ! Generic Davidson diagonalization with TWO DRESSING VECTORS ! diff --git a/src/dav_general_mat/dav_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_dressed_ext_rout.irp.f index cedaaf0a9..ca59a6882 100644 --- a/src/dav_general_mat/dav_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_dressed_ext_rout.irp.f @@ -1,5 +1,4 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_diag,dressing_state,dressing_vec,idress,converged,hcalc) - use mmap_module implicit none BEGIN_DOC ! Davidson diagonalization. diff --git a/src/dav_general_mat/dav_ext_rout.irp.f b/src/dav_general_mat/dav_ext_rout.irp.f index deb7e3a95..ad60b2a84 100644 --- a/src/dav_general_mat/dav_ext_rout.irp.f +++ b/src/dav_general_mat/dav_ext_rout.irp.f @@ -1,6 +1,5 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,converged,hcalc) - use mmap_module implicit none BEGIN_DOC ! Generic Davidson diagonalization diff --git a/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f index d89aaadb2..ca0a835ee 100644 --- a/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f +++ b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f @@ -3,8 +3,6 @@ subroutine davidson_general_ext_rout_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_diag_in, converged, hcalc) - use mmap_module - BEGIN_DOC ! Generic modified-Davidson diagonalization ! diff --git a/src/dav_general_mat/dav_general.irp.f b/src/dav_general_mat/dav_general.irp.f index 9940bf1ea..a277d9efe 100644 --- a/src/dav_general_mat/dav_general.irp.f +++ b/src/dav_general_mat/dav_general.irp.f @@ -1,6 +1,6 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,converged,h_mat) - use mmap_module +! use mmap_module implicit none BEGIN_DOC ! Davidson diagonalization with specific diagonal elements of the H matrix @@ -160,9 +160,9 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv ! type(c_ptr) :: ptr_w, ptr_s ! integer :: fd_s, fd_w ! call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& -! 8, fd_w, .False., ptr_w) +! 8, fd_w, .False., .True., ptr_w) ! call mmap(trim(ezfio_work_dir)//'davidson_s', (/int(sze,8),int(N_st_diag*itermax,8)/),& -! 4, fd_s, .False., ptr_s) +! 4, fd_s, .False., .True., ptr_s) ! call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) ! call c_f_pointer(ptr_s, s, (/sze,N_st_diag*itermax/)) ! else diff --git a/src/davidson/diagonalization_h_dressed.irp.f b/src/davidson/diagonalization_h_dressed.irp.f index b7179c18f..15bf256df 100644 --- a/src/davidson/diagonalization_h_dressed.irp.f +++ b/src/davidson/diagonalization_h_dressed.irp.f @@ -228,7 +228,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia type(c_ptr) :: ptr_w, ptr_s integer :: fd_s, fd_w call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., ptr_w) + 8, fd_w, .False., .True., ptr_w) call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) else allocate(W(sze,N_st_diag*itermax)) diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index fa8aff807..656dd1d97 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -229,7 +229,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N type(c_ptr) :: ptr_w, ptr_s integer :: fd_s, fd_w call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., ptr_w) + 8, fd_w, .False., .True., ptr_w) call c_f_pointer(ptr_w, W_csf, (/sze_csf,N_st_diag*itermax/)) else allocate(W(sze,N_st_diag),W_csf(sze_csf,N_st_diag*itermax)) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index fd967ecc1..191e00211 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -270,9 +270,9 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ type(c_ptr) :: ptr_w, ptr_s integer :: fd_s, fd_w call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., ptr_w) + 8, fd_w, .False., .True., ptr_w) call mmap(trim(ezfio_work_dir)//'davidson_s', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 4, fd_s, .False., ptr_s) + 4, fd_s, .False., .True., ptr_s) call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) call c_f_pointer(ptr_s, s, (/sze,N_st_diag*itermax/)) else @@ -594,6 +594,13 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ state(l) = idx enddo + ! Check if all states are attributed. If not, exit and N_st_diag will be increased. + do l=1,N_st + if (state(l) == 0) then + return + endif + enddo + ! tmp array before setting state_ok ok = .False. do l = 1, N_st @@ -627,47 +634,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ ! Swapped eigenvectors prev_y = y -! if (state_following) then -! -! overlap = -1.d0 -! do k=1,shift2 -! do i=1,shift2 -! overlap(k,i) = dabs(y(k,i)) -! enddo -! enddo -! do k=1,N_st -! cmax = -1.d0 -! do i=1,N_st -! if (overlap(i,k) > cmax) then -! cmax = overlap(i,k) -! order(k) = i -! endif -! enddo -! do i=1,N_st_diag -! overlap(order(k),i) = -1.d0 -! enddo -! enddo -! overlap = y -! do k=1,N_st -! l = order(k) -! if (k /= l) then -! y(1:shift2,k) = overlap(1:shift2,l) -! endif -! enddo -! do k=1,N_st -! overlap(k,1) = lambda(k) -! overlap(k,2) = s2(k) -! enddo -! do k=1,N_st -! l = order(k) -! if (k /= l) then -! lambda(k) = overlap(l,1) -! s2(k) = overlap(l,2) -! endif -! enddo -! -! endif - ! Express eigenvectors of h in the determinant basis ! -------------------------------------------------- @@ -703,7 +669,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ if ((itertot>1).and.(iter == 1)) then - !don't print + ! Don't print continue else write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:3,1:N_st) diff --git a/src/davidson/diagonalization_nonsym_h_dressed.irp.f b/src/davidson/diagonalization_nonsym_h_dressed.irp.f index 96ca84ab7..86df3a196 100644 --- a/src/davidson/diagonalization_nonsym_h_dressed.irp.f +++ b/src/davidson/diagonalization_nonsym_h_dressed.irp.f @@ -251,7 +251,7 @@ subroutine davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze, type(c_ptr) :: ptr_w, ptr_s integer :: fd_s, fd_w call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& - 8, fd_w, .False., ptr_w) + 8, fd_w, .False., .True., ptr_w) call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) else allocate(W(sze,N_st_diag*itermax)) diff --git a/src/davidson/diagonalize_ci.irp.f b/src/davidson/diagonalize_ci.irp.f index 59c8313a0..6b8529050 100644 --- a/src/davidson/diagonalize_ci.irp.f +++ b/src/davidson/diagonalize_ci.irp.f @@ -282,9 +282,8 @@ print*,' Within the ',N_det,'determinants selected' print*,' and the ',N_states_diag,'states requested' print*,' We did not find only states with S^2 values close to ',expected_s2 - print*,' We will then set the first N_states eigenvectors of the H matrix' - print*,' as the CI_eigenvectors' - print*,' You should consider more states and maybe ask for s2_eig to be .True. or just enlarge the CI space' + print*,' You should consider more states, or change s2_eig, or just enlarge the CI space' + print*,'!!!!!!!! WARNING !!!!!!!!!' print*,'' do j=1,min(N_states_diag,N_det) diff --git a/src/determinants/generate_cas_space.irp.f b/src/determinants/generate_cas_space.irp.f index 47a2ca30d..05201c74b 100644 --- a/src/determinants/generate_cas_space.irp.f +++ b/src/determinants/generate_cas_space.irp.f @@ -33,7 +33,7 @@ subroutine generate_cas_space print *, 'CAS(', n_alpha_act+n_beta_act, ', ', n_act_orb, ')' print *, '' - n_det_alpha_unique = binom_int(n_act_orb, n_alpha_act) + n_det_alpha_unique = int(binom_int(n_act_orb, n_alpha_act),4) TOUCH n_det_alpha_unique n = n_alpha_act @@ -56,7 +56,7 @@ subroutine generate_cas_space u = ior(t1,t2) enddo - n_det_beta_unique = binom_int(n_act_orb, n_beta_act) + n_det_beta_unique = int(binom_int(n_act_orb, n_beta_act),4) TOUCH n_det_beta_unique n = n_beta_act diff --git a/src/determinants/slater_rules_general.irp.f b/src/determinants/slater_rules_general.irp.f new file mode 100644 index 000000000..e987c8460 --- /dev/null +++ b/src/determinants/slater_rules_general.irp.f @@ -0,0 +1,192 @@ +subroutine get_excitation_general(key_i,key_j, Nint,degree_array,holes_array, particles_array,phase) + use bitmasks + BEGIN_DOC +! returns the array, for each spin, of holes/particles between key_i and key_j +! +! with the following convention: a^+_{particle} a_{hole}|key_i> = |key_j> + END_DOC + include 'utils/constants.include.F' + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2),particles_array(100,2),degree_array(2) + double precision, intent(out) :: phase + integer :: ispin,k,i,pos + integer(bit_kind) :: key_hole, key_particle + integer(bit_kind) :: xorvec(N_int_max,2) + holes_array = -1 + particles_array = -1 + degree_array = 0 + do i = 1, N_int + xorvec(i,1) = xor( key_i(i,1), key_j(i,1)) + xorvec(i,2) = xor( key_i(i,2), key_j(i,2)) + degree_array(1) += popcnt(xorvec(i,1)) + degree_array(2) += popcnt(xorvec(i,2)) + enddo + degree_array(1) = shiftr(degree_array(1),1) + degree_array(2) = shiftr(degree_array(2),1) + + do ispin = 1, 2 + k = 1 + !!! GETTING THE HOLES + do i = 1, N_int + key_hole = iand(xorvec(i,ispin),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + do ispin = 1, 2 + k = 1 + !!! GETTING THE PARTICLES + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general ' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + integer :: h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree_array(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end + +subroutine get_holes_general(key_i, key_j,Nint, holes_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of holes between key_i and key_j +! +! with the following convention: a_{hole}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2) + integer(bit_kind) :: key_hole + integer :: ispin,k,i,pos + holes_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_hole = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_particles_general(key_i, key_j,Nint,particles_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of particles between key_i and key_j +! +! with the following convention: a^dagger_{particle}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: particles_array(100,2) + integer(bit_kind) :: key_particle + integer :: ispin,k,i,pos + particles_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'Those are the two determinants' + call debug_det(key_i, N_int) + call debug_det(key_j, N_int) + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_phase_general(key_i,Nint,degree, holes_array, particles_array,phase) + implicit none + integer, intent(in) :: degree(2), Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + integer, intent(in) :: holes_array(100,2),particles_array(100,2) + double precision, intent(out) :: phase + integer :: i,ispin,h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end diff --git a/src/dft_utils_func/on_top_from_ueg.irp.f b/src/dft_utils_func/on_top_from_ueg.irp.f index 4e28ad89b..711ffc395 100644 --- a/src/dft_utils_func/on_top_from_ueg.irp.f +++ b/src/dft_utils_func/on_top_from_ueg.irp.f @@ -32,7 +32,6 @@ double precision function g0_UEG_mu_inf(rho_a,rho_b) C = 0.08193d0 D = -0.01277d0 E = 0.001859d0 - x = -d2*rs if (dabs(rho) > 1.d-20) then rs = (3d0 / (4d0*pi*rho))**(1d0/3d0) ! JT: serious bug fixed 20/03/19 x = -d2*rs diff --git a/src/dft_utils_in_r/ao_in_r.irp.f b/src/dft_utils_in_r/ao_in_r.irp.f index 16414f390..e9c003d45 100644 --- a/src/dft_utils_in_r/ao_in_r.irp.f +++ b/src/dft_utils_in_r/ao_in_r.irp.f @@ -52,35 +52,39 @@ BEGIN_PROVIDER[double precision, aos_grad_in_r_array, (ao_num,n_points_final_grid,3)] - BEGIN_DOC - ! aos_grad_in_r_array(i,j,k) = value of the kth component of the gradient of ith ao on the jth grid point - ! - ! k = 1 : x, k= 2, y, k 3, z - END_DOC + BEGIN_DOC + ! + ! aos_grad_in_r_array(i,j,k) = value of the kth component of the gradient of ith ao on the jth grid point + ! + ! k = 1 : x, k= 2, y, k 3, z + ! + END_DOC - implicit none - integer :: i,j,m - double precision :: aos_array(ao_num), r(3) - double precision :: aos_grad_array(3,ao_num) - !$OMP PARALLEL DO & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,r,aos_array,aos_grad_array,j,m) & - !$OMP SHARED(aos_grad_in_r_array,n_points_final_grid,ao_num,final_grid_points) - do i = 1, n_points_final_grid - r(1) = final_grid_points(1,i) - r(2) = final_grid_points(2,i) - r(3) = final_grid_points(3,i) - call give_all_aos_and_grad_at_r(r,aos_array,aos_grad_array) - do m = 1, 3 - do j = 1, ao_num - aos_grad_in_r_array(j,i,m) = aos_grad_array(m,j) - enddo + implicit none + integer :: i, j, m + double precision :: aos_array(ao_num), r(3) + double precision :: aos_grad_array(3,ao_num) + + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,m,r,aos_array,aos_grad_array) & + !$OMP SHARED(aos_grad_in_r_array,n_points_final_grid,ao_num,final_grid_points) + do i = 1, n_points_final_grid + r(1) = final_grid_points(1,i) + r(2) = final_grid_points(2,i) + r(3) = final_grid_points(3,i) + call give_all_aos_and_grad_at_r(r,aos_array,aos_grad_array) + do m = 1, 3 + do j = 1, ao_num + aos_grad_in_r_array(j,i,m) = aos_grad_array(m,j) + enddo + enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO +END_PROVIDER - END_PROVIDER +! --- BEGIN_PROVIDER[double precision, aos_grad_in_r_array_transp, (3,ao_num,n_points_final_grid)] @@ -205,18 +209,53 @@ END_PROVIDER - BEGIN_PROVIDER[double precision, aos_in_r_array_extra_transp, (n_points_extra_final_grid,ao_num)] - implicit none - BEGIN_DOC - ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point - END_DOC - integer :: i,j - double precision :: aos_array(ao_num), r(3) - do i = 1, n_points_extra_final_grid - do j = 1, ao_num - aos_in_r_array_extra_transp(i,j) = aos_in_r_array_extra(j,i) +! --- + +BEGIN_PROVIDER[double precision, aos_in_r_array_extra_transp, (n_points_extra_final_grid,ao_num)] + + BEGIN_DOC + ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point + END_DOC + + implicit none + integer :: i, j + double precision :: aos_array(ao_num), r(3) + + do i = 1, n_points_extra_final_grid + do j = 1, ao_num + aos_in_r_array_extra_transp(i,j) = aos_in_r_array_extra(j,i) + enddo enddo - enddo - END_PROVIDER +END_PROVIDER + +! --- + +BEGIN_PROVIDER[double precision, aos_grad_in_r_array_extra, (ao_num,n_points_extra_final_grid,3)] + + implicit none + integer :: i, j, m + double precision :: aos_array(ao_num), r(3) + double precision :: aos_grad_array(3,ao_num) + + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,m,r,aos_array,aos_grad_array) & + !$OMP SHARED(aos_grad_in_r_array_extra,n_points_extra_final_grid,ao_num,final_grid_points_extra) + do i = 1, n_points_extra_final_grid + r(1) = final_grid_points_extra(1,i) + r(2) = final_grid_points_extra(2,i) + r(3) = final_grid_points_extra(3,i) + call give_all_aos_and_grad_at_r(r, aos_array, aos_grad_array) + do m = 1, 3 + do j = 1, ao_num + aos_grad_in_r_array_extra(j,i,m) = aos_grad_array(m,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- diff --git a/src/dft_utils_in_r/mo_in_r.irp.f b/src/dft_utils_in_r/mo_in_r.irp.f index 192cb25a5..ad931402f 100644 --- a/src/dft_utils_in_r/mo_in_r.irp.f +++ b/src/dft_utils_in_r/mo_in_r.irp.f @@ -48,7 +48,7 @@ integer :: i,j do i = 1, n_points_final_grid do j = 1, mo_num - mos_in_r_array_transp(i,j) = mos_in_r_array(j,i) + mos_in_r_array_transp(i,j) = mos_in_r_array_omp(j,i) enddo enddo END_PROVIDER diff --git a/src/ezfio_files/01.convert.bats b/src/ezfio_files/convert_bats_old similarity index 100% rename from src/ezfio_files/01.convert.bats rename to src/ezfio_files/convert_bats_old diff --git a/src/ezfio_files/get_unit_and_open.irp.f b/src/ezfio_files/get_unit_and_open.irp.f index 6440579f9..d6a7efac9 100644 --- a/src/ezfio_files/get_unit_and_open.irp.f +++ b/src/ezfio_files/get_unit_and_open.irp.f @@ -47,11 +47,13 @@ integer function getUnitAndOpen(f,mode) endif open(unit=getUnitAndOpen,file=f,status='OLD',action='READ',form='UNFORMATTED') else if (mode.eq.'W') then - open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='WRITE',form='UNFORMATTED') + open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='READWRITE',form='UNFORMATTED') + else if (mode.eq.'A') then + open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='READWRITE',position='APPEND',form='UNFORMATTED') else if (mode.eq.'w') then - open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='WRITE',form='FORMATTED') + open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='READWRITE',form='FORMATTED') else if (mode.eq.'a') then - open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='WRITE',position='APPEND',form='FORMATTED') + open(unit=getUnitAndOpen,file=new_f,status='UNKNOWN',action='READWRITE',position='APPEND',form='FORMATTED') else if (mode.eq.'x') then open(unit=getUnitAndOpen,file=new_f,form='FORMATTED') endif diff --git a/src/gpu/NEED b/src/gpu/NEED new file mode 100644 index 000000000..c2af78d26 --- /dev/null +++ b/src/gpu/NEED @@ -0,0 +1 @@ +gpu_arch diff --git a/src/gpu/README.rst b/src/gpu/README.rst new file mode 100644 index 000000000..17ee28a08 --- /dev/null +++ b/src/gpu/README.rst @@ -0,0 +1,6 @@ +=== +gpu +=== + +Bindings for GPU routines (architecture independent). +Architecture-dependent files are in gpu_arch. diff --git a/src/gpu/gpu.h b/src/gpu/gpu.h new file mode 100644 index 000000000..ac70e21a7 --- /dev/null +++ b/src/gpu/gpu.h @@ -0,0 +1,41 @@ +#include + +int gpu_ndevices(); +void gpu_set_device(int32_t i); + +void gpu_allocate(void** ptr, const int64_t n); +void gpu_free(void** ptr); + +void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n); +void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n); +void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n); + +void gpu_stream_create(void** ptr); +void gpu_stream_destroy(void** ptr); +void gpu_set_stream(void* handle, void* stream); +void gpu_synchronize(); + +void gpu_blas_create(void** handle); +void gpu_blas_destroy(void** handle); + +void gpu_ddot(const void* handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result); + +void gpu_sdot(const void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result); + +void gpu_dgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const double* alpha, + const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy); + +void gpu_sgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const float* alpha, + const float* a, const int64_t lda, const float* x, const int64_t incx, const float* beta, float* y, const int64_t incy); + +void gpu_dgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha, + const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc); + +void gpu_sgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float* alpha, + const float* a, const int64_t lda, const float* b, const int64_t ldb, const float* beta, float* c, const int64_t ldc); + +void gpu_dgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const double* alpha, + const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc); + +void gpu_sgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const float* alpha, + const float* a, const int64_t lda, const float* beta, const float* b, const int64_t ldb, float* c, const int64_t ldc); diff --git a/src/gpu/gpu.irp.f b/src/gpu/gpu.irp.f new file mode 100644 index 000000000..3b2feeb66 --- /dev/null +++ b/src/gpu/gpu.irp.f @@ -0,0 +1,26 @@ +use gpu + +BEGIN_PROVIDER [ type(gpu_blas), blas_handle ] + implicit none + BEGIN_DOC + ! Handle for cuBLAS or RocBLAS + END_DOC + call gpu_blas_create(blas_handle) +END_PROVIDER + +BEGIN_PROVIDER [ type(gpu_stream), gpu_default_stream ] + implicit none + BEGIN_DOC + ! Default stream + END_DOC + gpu_default_stream%c = C_NULL_PTR +END_PROVIDER + +BEGIN_PROVIDER [ integer, gpu_num ] + implicit none + BEGIN_DOC + ! Number of usable GPUs + END_DOC + gpu_num = gpu_ndevices() +END_PROVIDER + diff --git a/src/gpu/gpu_module.F90 b/src/gpu/gpu_module.F90 new file mode 100644 index 000000000..6050075fe --- /dev/null +++ b/src/gpu/gpu_module.F90 @@ -0,0 +1,707 @@ +module gpu + use, intrinsic :: iso_c_binding + implicit none + +! Data types +! ---------- + + type gpu_double1 + type(c_ptr) :: c + double precision, pointer :: f(:) + end type + + type gpu_double2 + type(c_ptr) :: c + double precision, pointer :: f(:,:) + end type + + type gpu_double3 + type(c_ptr) :: c + double precision, pointer :: f(:,:,:) + end type + + type gpu_double4 + type(c_ptr) :: c + double precision, pointer :: f(:,:,:,:) + end type + + type gpu_double5 + type(c_ptr) :: c + double precision, pointer :: f(:,:,:,:,:) + end type + + type gpu_double6 + type(c_ptr) :: c + double precision, pointer :: f(:,:,:,:,:,:) + end type + + + type gpu_blas + type(c_ptr) :: c + end type + + type gpu_stream + type(c_ptr) :: c + end type + + +! C interfaces +! ------------ + + interface + logical(c_bool) function no_gpu() bind(C) + import + end function + + integer function gpu_ndevices() bind(C) + import + end function + + subroutine gpu_set_device(id) bind(C) + import + integer(c_int32_t), value :: id + end subroutine + + subroutine gpu_allocate_c(ptr, n) bind(C, name='gpu_allocate') + import + type(c_ptr) :: ptr + integer(c_int64_t), value :: n + end subroutine + + subroutine gpu_deallocate_c(ptr) bind(C, name='gpu_deallocate') + import + type(c_ptr) :: ptr + end subroutine + + subroutine gpu_upload_c(cpu_ptr, gpu_ptr, n) bind(C, name='gpu_upload') + import + type(c_ptr), value :: cpu_ptr + type(c_ptr), value :: gpu_ptr + integer(c_int64_t), value :: n + end subroutine + + subroutine gpu_download_c(gpu_ptr, cpu_ptr, n) bind(C, name='gpu_download') + import + type(c_ptr), value :: gpu_ptr + type(c_ptr), value :: cpu_ptr + integer(c_int64_t), value :: n + end subroutine + + subroutine gpu_copy_c(gpu_ptr_src, gpu_ptr_dest, n) bind(C, name='gpu_copy') + import + type(c_ptr), value :: gpu_ptr_src + type(c_ptr), value :: gpu_ptr_dest + integer(c_int64_t), value :: n + end subroutine + + subroutine gpu_stream_create_c(stream) bind(C, name='gpu_stream_create') + import + type(c_ptr) :: stream + end subroutine + + subroutine gpu_stream_destroy_c(stream) bind(C, name='gpu_stream_destroy') + import + type(c_ptr) :: stream + end subroutine + + subroutine gpu_set_stream_c(handle, stream) bind(C, name='gpu_set_stream') + import + type(c_ptr), value :: handle, stream + end subroutine + + subroutine gpu_synchronize() bind(C) + import + end subroutine + + subroutine gpu_blas_create_c(handle) bind(C, name='gpu_blas_create') + import + type(c_ptr) :: handle + end subroutine + + subroutine gpu_blas_destroy_c(handle) bind(C, name='gpu_blas_destroy') + import + type(c_ptr) :: handle + end subroutine + + subroutine gpu_ddot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_ddot') + import + type(c_ptr), value, intent(in) :: handle + integer(c_int64_t), value :: n, incx, incy + type(c_ptr), value :: dx, dy + real(c_double), intent(out) :: res + end subroutine + + subroutine gpu_sdot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_sdot') + import + type(c_ptr), value, intent(in) :: handle + integer(c_int64_t), value :: n, incx, incy + type(c_ptr), intent(in), value :: dx, dy + real(c_float), intent(out) :: res + end subroutine + + subroutine gpu_dgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, & + b, ldb, c, ldc) bind(C, name='gpu_dgeam') + import + type(c_ptr), value, intent(in) :: handle + character(c_char), intent(in), value :: transa, transb + integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc + real(c_double), intent(in) :: alpha, beta + type(c_ptr), value :: a, b, c + end subroutine + + subroutine gpu_sgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, & + b, ldb, c, ldc) bind(C, name='gpu_sgeam') + import + type(c_ptr), value, intent(in) :: handle + character(c_char), intent(in), value :: transa, transb + integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc + real(c_float), intent(in) :: alpha, beta + real(c_float) :: a, b, c + end subroutine + + subroutine gpu_dgemv_c(handle, transa, m, n, alpha, a, lda, & + x, incx, beta, y, incy) bind(C, name='gpu_dgemv') + import + type(c_ptr), value, intent(in) :: handle + character(c_char), intent(in) :: transa + integer(c_int64_t), intent(in), value :: m, n, lda, incx, incy + real(c_double), intent(in) :: alpha, beta + real(c_double) :: a, x, y + end subroutine + + subroutine gpu_sgemv_c(handle, transa, m, n, alpha, a, lda, & + x, incx, beta, y, incy) bind(C, name='gpu_sgemv') + import + type(c_ptr), value, intent(in) :: handle + character(c_char), intent(in) :: transa + integer(c_int64_t), intent(in), value :: m, n, lda, incx, incy + real(c_float), intent(in) :: alpha, beta + real(c_float) :: a, x, y + end subroutine + + + subroutine gpu_dgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, & + b, ldb, beta, c, ldc) bind(C, name='gpu_dgemm') + import + type(c_ptr), value, intent(in) :: handle + character(c_char), intent(in) :: transa, transb + integer(c_int64_t), intent(in), value :: m, n, k, lda, ldb, ldc + real(c_double), intent(in) :: alpha, beta + real(c_double) :: a, b, c + end subroutine + + subroutine gpu_sgemm_c(handle, transa, transb, m, n, k, alpha, a, lda, & + b, ldb, beta, c, ldc) bind(C, name='gpu_sgemm') + import + type(c_ptr), value, intent(in) :: handle + character(c_char), intent(in), value :: transa, transb + integer(c_int64_t), intent(in), value :: m, n, k, lda, ldb, ldc + real(c_float), intent(in) :: alpha, beta + real(c_float) :: a, b, c + end subroutine + + end interface + + +! Polymorphic interfaces +! ---------------------- + + interface gpu_allocate + procedure gpu_allocate_double1 & + ,gpu_allocate_double2 & + ,gpu_allocate_double3 & + ,gpu_allocate_double4 & + ,gpu_allocate_double5 & + ,gpu_allocate_double6 & + ,gpu_allocate_double1_64 & + ,gpu_allocate_double2_64 & + ,gpu_allocate_double3_64 & + ,gpu_allocate_double4_64 & + ,gpu_allocate_double5_64 & + ,gpu_allocate_double6_64 + end interface gpu_allocate + + interface gpu_deallocate + procedure gpu_deallocate_double1 & + ,gpu_deallocate_double2 & + ,gpu_deallocate_double3 & + ,gpu_deallocate_double4 & + ,gpu_deallocate_double5 & + ,gpu_deallocate_double6 + end interface gpu_deallocate + + interface gpu_upload + procedure gpu_upload_double1 & + ,gpu_upload_double2 & + ,gpu_upload_double3 & + ,gpu_upload_double4 & + ,gpu_upload_double5 & + ,gpu_upload_double6 + end interface gpu_upload + + interface gpu_download + procedure gpu_download_double1 & + ,gpu_download_double2 & + ,gpu_download_double3 & + ,gpu_download_double4 & + ,gpu_download_double5 & + ,gpu_download_double6 + end interface gpu_download + + interface gpu_copy + procedure gpu_copy_double1 & + ,gpu_copy_double2 & + ,gpu_copy_double3 & + ,gpu_copy_double4 & + ,gpu_copy_double5 & + ,gpu_copy_double6 + end interface gpu_copy + + + contains + + +! gpu_allocate +! ------------ + + subroutine gpu_allocate_double1(ptr, s) + implicit none + type(gpu_double1), intent(inout) :: ptr + integer, intent(in) :: s + + call gpu_allocate_c(ptr%c, s*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s /)) + end subroutine + + subroutine gpu_allocate_double2(ptr, s1, s2) + implicit none + type(gpu_double2), intent(inout) :: ptr + integer, intent(in) :: s1, s2 + + call gpu_allocate_c(ptr%c, s1*s2*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2 /)) + end subroutine + + subroutine gpu_allocate_double3(ptr, s1, s2, s3) + implicit none + type(gpu_double3), intent(inout) :: ptr + integer, intent(in) :: s1, s2, s3 + + call gpu_allocate_c(ptr%c, s1*s2*s3*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3 /)) + end subroutine + + subroutine gpu_allocate_double4(ptr, s1, s2, s3, s4) + implicit none + type(gpu_double4), intent(inout) :: ptr + integer, intent(in) :: s1, s2, s3, s4 + + call gpu_allocate_c(ptr%c, s1*s2*s3*s4*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4 /)) + end subroutine + + subroutine gpu_allocate_double5(ptr, s1, s2, s3, s4, s5) + implicit none + type(gpu_double5), intent(inout) :: ptr + integer, intent(in) :: s1, s2, s3, s4, s5 + + call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5 /)) + end subroutine + + subroutine gpu_allocate_double6(ptr, s1, s2, s3, s4, s5, s6) + implicit none + type(gpu_double6), intent(inout) :: ptr + integer, intent(in) :: s1, s2, s3, s4, s5, s6 + + call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*s6*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5, s6 /)) + end subroutine + + + subroutine gpu_allocate_double1_64(ptr, s) + implicit none + type(gpu_double1), intent(inout) :: ptr + integer*8, intent(in) :: s + + call gpu_allocate_c(ptr%c, s) + call c_f_pointer(ptr%c, ptr%f, (/ s /)) + end subroutine + + subroutine gpu_allocate_double2_64(ptr, s1, s2) + implicit none + type(gpu_double2), intent(inout) :: ptr + integer*8, intent(in) :: s1, s2 + + call gpu_allocate_c(ptr%c, s1*s2*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2 /)) + end subroutine + + subroutine gpu_allocate_double3_64(ptr, s1, s2, s3) + implicit none + type(gpu_double3), intent(inout) :: ptr + integer*8, intent(in) :: s1, s2, s3 + + call gpu_allocate_c(ptr%c, s1*s2*s3*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3 /)) + end subroutine + + subroutine gpu_allocate_double4_64(ptr, s1, s2, s3, s4) + implicit none + type(gpu_double4), intent(inout) :: ptr + integer*8, intent(in) :: s1, s2, s3, s4 + + call gpu_allocate_c(ptr%c, s1*s2*s3*s4*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4 /)) + end subroutine + + subroutine gpu_allocate_double5_64(ptr, s1, s2, s3, s4, s5) + implicit none + type(gpu_double5), intent(inout) :: ptr + integer*8, intent(in) :: s1, s2, s3, s4, s5 + + call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5 /)) + end subroutine + + subroutine gpu_allocate_double6_64(ptr, s1, s2, s3, s4, s5, s6) + implicit none + type(gpu_double6), intent(inout) :: ptr + integer*8, intent(in) :: s1, s2, s3, s4, s5, s6 + + call gpu_allocate_c(ptr%c, s1*s2*s3*s4*s5*s6*8_8) + call c_f_pointer(ptr%c, ptr%f, (/ s1, s2, s3, s4, s5, s6 /)) + end subroutine + + +! gpu_deallocate +! -------------- + + subroutine gpu_deallocate_double1(ptr) + implicit none + type(gpu_double1), intent(inout) :: ptr + call gpu_deallocate_c(ptr%c) + NULLIFY(ptr%f) + end subroutine + + subroutine gpu_deallocate_double2(ptr) + implicit none + type(gpu_double2), intent(inout) :: ptr + call gpu_deallocate_c(ptr%c) + NULLIFY(ptr%f) + end subroutine + + subroutine gpu_deallocate_double3(ptr) + implicit none + type(gpu_double3), intent(inout) :: ptr + call gpu_deallocate_c(ptr%c) + NULLIFY(ptr%f) + end subroutine + + subroutine gpu_deallocate_double4(ptr) + implicit none + type(gpu_double4), intent(inout) :: ptr + call gpu_deallocate_c(ptr%c) + NULLIFY(ptr%f) + end subroutine + + subroutine gpu_deallocate_double5(ptr) + implicit none + type(gpu_double5), intent(inout) :: ptr + call gpu_deallocate_c(ptr%c) + NULLIFY(ptr%f) + end subroutine + + subroutine gpu_deallocate_double6(ptr) + implicit none + type(gpu_double6), intent(inout) :: ptr + call gpu_deallocate_c(ptr%c) + NULLIFY(ptr%f) + end subroutine + + +! gpu_upload +! ---------- + + subroutine gpu_upload_double1(cpu_ptr, gpu_ptr) + implicit none + double precision, target, intent(in) :: cpu_ptr(*) + type(gpu_double1), intent(in) :: gpu_ptr + call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, 8_8*size(gpu_ptr%f)) + end subroutine + + subroutine gpu_upload_double2(cpu_ptr, gpu_ptr) + implicit none + double precision, target, intent(in) :: cpu_ptr(:,:) + type(gpu_double2), intent(in) :: gpu_ptr + call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8) + end subroutine + + subroutine gpu_upload_double3(cpu_ptr, gpu_ptr) + implicit none + double precision, target, intent(in) :: cpu_ptr(:,:,:) + type(gpu_double3), intent(in) :: gpu_ptr + call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8) + end subroutine + + subroutine gpu_upload_double4(cpu_ptr, gpu_ptr) + implicit none + double precision, target, intent(in) :: cpu_ptr(:,:,:,:) + type(gpu_double4), intent(in) :: gpu_ptr + call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8) + end subroutine + + subroutine gpu_upload_double5(cpu_ptr, gpu_ptr) + implicit none + double precision, target, intent(in) :: cpu_ptr(:,:,:,:,:) + type(gpu_double5), intent(in) :: gpu_ptr + call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8) + end subroutine + + subroutine gpu_upload_double6(cpu_ptr, gpu_ptr) + implicit none + double precision, target, intent(in) :: cpu_ptr(:,:,:,:,:,:) + type(gpu_double6), intent(in) :: gpu_ptr + call gpu_upload_c(c_loc(cpu_ptr), gpu_ptr%c, product(shape(gpu_ptr%f)*1_8)*8_8) + end subroutine + + +! gpu_download +! ------------ + + subroutine gpu_download_double1(gpu_ptr, cpu_ptr) + implicit none + type(gpu_double1), intent(in) :: gpu_ptr + double precision, target, intent(in) :: cpu_ptr(:) + call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*size(gpu_ptr%f)) + end subroutine + + subroutine gpu_download_double2(gpu_ptr, cpu_ptr) + implicit none + type(gpu_double2), intent(in) :: gpu_ptr + double precision, target, intent(in) :: cpu_ptr(:,:) + call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8)) + end subroutine + + subroutine gpu_download_double3(gpu_ptr, cpu_ptr) + implicit none + type(gpu_double3), intent(in) :: gpu_ptr + double precision, target, intent(in) :: cpu_ptr(:,:,:) + call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8)) + end subroutine + + subroutine gpu_download_double4(gpu_ptr, cpu_ptr) + implicit none + type(gpu_double4), intent(in) :: gpu_ptr + double precision, target, intent(in) :: cpu_ptr(:,:,:,:) + call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8)) + end subroutine + + subroutine gpu_download_double5(gpu_ptr, cpu_ptr) + implicit none + type(gpu_double5), intent(in) :: gpu_ptr + double precision, target, intent(in) :: cpu_ptr(:,:,:,:,:) + call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8)) + end subroutine + + subroutine gpu_download_double6(gpu_ptr, cpu_ptr) + implicit none + type(gpu_double6), intent(in) :: gpu_ptr + double precision, target, intent(in) :: cpu_ptr(:,:,:,:,:,:) + call gpu_download_c(gpu_ptr%c, c_loc(cpu_ptr), 8_8*product(shape(gpu_ptr%f)*1_8)) + end subroutine + +! gpu_copy +! -------- + + subroutine gpu_copy_double1(gpu_ptr_src, gpu_ptr_dest) + implicit none + type(gpu_double1), intent(in) :: gpu_ptr_src + type(gpu_double1), intent(in) :: gpu_ptr_dest + call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*size(gpu_ptr_dest%f)) + end subroutine + + subroutine gpu_copy_double2(gpu_ptr_src, gpu_ptr_dest) + implicit none + type(gpu_double2), intent(in) :: gpu_ptr_src + type(gpu_double2), intent(in) :: gpu_ptr_dest + call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8)) + end subroutine + + subroutine gpu_copy_double3(gpu_ptr_src, gpu_ptr_dest) + implicit none + type(gpu_double3), intent(in) :: gpu_ptr_src + type(gpu_double3), intent(in) :: gpu_ptr_dest + call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8)) + end subroutine + + subroutine gpu_copy_double4(gpu_ptr_src, gpu_ptr_dest) + implicit none + type(gpu_double4), intent(in) :: gpu_ptr_src + type(gpu_double4), intent(in) :: gpu_ptr_dest + call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8)) + end subroutine + + subroutine gpu_copy_double5(gpu_ptr_src, gpu_ptr_dest) + implicit none + type(gpu_double5), intent(in) :: gpu_ptr_src + type(gpu_double5), intent(in) :: gpu_ptr_dest + call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8)) + end subroutine + + subroutine gpu_copy_double6(gpu_ptr_src, gpu_ptr_dest) + implicit none + type(gpu_double6), intent(in) :: gpu_ptr_src + type(gpu_double6), intent(in) :: gpu_ptr_dest + call gpu_copy_c(gpu_ptr_src%c, gpu_ptr_dest%c, 8_8*product(shape(gpu_ptr_dest%f)*1_8)) + end subroutine + + +! gpu_stream +! ---------- + + subroutine gpu_stream_create(stream) + type(gpu_stream) :: stream + call gpu_stream_create_c(stream%c) + end subroutine + + subroutine gpu_stream_destroy(stream) + type(gpu_stream) :: stream + call gpu_stream_destroy_c(stream%c) + end subroutine + + subroutine gpu_set_stream(handle, stream) + type(gpu_blas) :: handle + type(gpu_stream) :: stream + call gpu_set_stream_c(handle%c, stream%c) + end subroutine + + +! gpu_blas +! -------- + + subroutine gpu_blas_create(handle) + type(gpu_blas) :: handle + call gpu_blas_create_c(handle%c) + end subroutine + + subroutine gpu_blas_destroy(handle) + type(gpu_blas) :: handle + call gpu_blas_destroy_c(handle%c) + end subroutine + + + + + +! dot +! --- + +subroutine gpu_ddot(handle, n, dx, incx, dy, incy, res) +! use gpu + type(gpu_blas), intent(in) :: handle + integer*4 :: n, incx, incy + double precision, target :: dx, dy + double precision, intent(out) :: res + call gpu_ddot_c(handle%c, int(n,c_int64_t), c_loc(dx), int(incx,c_int64_t), c_loc(dy), int(incy,c_int64_t), res) +end subroutine + + +subroutine gpu_ddot_64(handle, n, dx, incx, dy, incy, res) +! use gpu + type(gpu_blas), intent(in) :: handle + integer*8 :: n, incx, incy + double precision, target :: dx, dy + double precision, intent(out) :: res + call gpu_ddot_c(handle%c, n, c_loc(dx), incx, c_loc(dy), incy, res) +end subroutine + + +! geam +! ---- + +subroutine gpu_dgeam(handle, transa, transb, m, n, alpha, a, lda, beta, & + b, ldb, c, ldc) +! use gpu + type(gpu_blas), intent(in) :: handle + character, intent(in) :: transa, transb + integer*4, intent(in) :: m, n, lda, ldb, ldc + double precision, intent(in) :: alpha, beta + double precision, target :: a, b, c + call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, c_loc(a), int(lda,c_int64_t), beta, & + c_loc(b), int(ldb,c_int64_t), c_loc(c), int(ldc,c_int64_t)) +end subroutine + + +subroutine gpu_dgeam_64(handle, transa, transb, m, n, alpha, a, lda, beta, & + b, ldb, c, ldc) +! use gpu + type(gpu_blas), intent(in) :: handle + character, intent(in) :: transa, transb + integer*8, intent(in) :: m, n, lda, ldb, ldc + double precision, intent(in) :: alpha, beta + double precision, target :: a, b, c + call gpu_dgeam_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), alpha, c_loc(a), int(lda,c_int64_t), beta, & + c_loc(b), int(ldb,c_int64_t), c_loc(c), int(ldc,c_int64_t)) +end subroutine + + +! gemv +! ---- + +subroutine gpu_dgemv(handle, transa, m, n, alpha, a, lda, & + x, incx, beta, y, incy) +! use gpu + type(gpu_blas), intent(in) :: handle + character, intent(in) :: transa + integer*4, intent(in) :: m, n, lda, incx, incy + double precision, intent(in) :: alpha, beta + double precision :: a, x, y + call gpu_dgemv_c(handle%c, transa, int(m,c_int64_t), int(n,c_int64_t), & + alpha, a, int(lda,c_int64_t), & + x, int(incx,c_int64_t), beta, y, int(incy,c_int64_t)) +end subroutine + +subroutine gpu_dgemv_64(handle, transa, m, n, alpha, a, lda, & + x, incx, beta, y, incy) +! use gpu + type(gpu_blas), intent(in) :: handle + character, intent(in) :: transa + integer*8, intent(in) :: m, n, lda, incx, incy + double precision, intent(in) :: alpha, beta + double precision :: a, x, y + call gpu_dgemv_c(handle%c, transa, int(m,c_int64_t), int(n,c_int64_t), & + alpha, a, int(lda,c_int64_t), & + x, int(incx,c_int64_t), beta, y, int(incy,c_int64_t)) +end subroutine + + +! gemm +! ---- + +subroutine gpu_dgemm(handle, transa, transb, m, n, k, alpha, a, lda, & + b, ldb, beta, c, ldc) +! use gpu + type(gpu_blas), intent(in) :: handle + character, intent(in) :: transa, transb + integer*4, intent(in) :: m, n, k, lda, ldb, ldc + double precision, intent(in) :: alpha, beta + double precision :: a, b, c + call gpu_dgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), & + alpha, a, int(lda,c_int64_t), & + b, int(ldb,c_int64_t), beta, c, int(ldc,c_int64_t)) +end subroutine + +subroutine gpu_dgemm_64(handle, transa, transb, m, n, k, alpha, a, lda, & + b, ldb, beta, c, ldc) +! use gpu + type(gpu_blas), intent(in) :: handle + character, intent(in) :: transa, transb + integer*8, intent(in) :: m, n, k, lda, ldb, ldc + double precision, intent(in) :: alpha, beta + double precision :: a, b, c + call gpu_dgemm_c(handle%c, transa, transb, int(m,c_int64_t), int(n,c_int64_t), int(k,c_int64_t), & + alpha, a, int(lda,c_int64_t), b, int(ldb,c_int64_t), beta, c, int(ldc,c_int64_t)) +end subroutine + +end module diff --git a/src/hartree_fock/10.hf.bats b/src/hartree_fock/10.hf.bats index b496a0890..214dfa868 100644 --- a/src/hartree_fock/10.hf.bats +++ b/src/hartree_fock/10.hf.bats @@ -115,9 +115,6 @@ rm -rf $EZFIO run hco.ezfio -113.1841002944744 } -@test "HBO" { # 0.805600 1.4543s - run hbo.ezfio -100.018582259096 -} @test "H2S" { # 1.655600 4.21402s run h2s.ezfio -398.6944130421982 @@ -127,9 +124,6 @@ rm -rf $EZFIO run h3coh.ezfio -114.9865030596373 } -@test "H2O" { # 1.811100 1.84387s - run h2o.ezfio -0.760270218692179E+02 -} @test "H2O2" { # 2.217000 8.50267s run h2o2.ezfio -150.7806608469964 @@ -187,13 +181,6 @@ rm -rf $EZFIO run oh.ezfio -75.42025413469165 } -@test "[Cu(NH3)4]2+" { # 59.610100 4.18766m - [[ -n $TRAVIS ]] && skip - qp set_file cu_nh3_4_2plus.ezfio - qp set scf_utils thresh_scf 1.e-10 - run cu_nh3_4_2plus.ezfio -1862.97590358903 -} - @test "SO2" { # 71.894900 3.22567m [[ -n $TRAVIS ]] && skip run so2.ezfio -41.55800401346361 diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f index 65b3d63cf..6d917322c 100644 --- a/src/hartree_fock/fock_matrix_hf.irp.f +++ b/src/hartree_fock/fock_matrix_hf.irp.f @@ -194,17 +194,28 @@ endif - double precision :: rss + double precision :: rss, mem0, mem double precision :: memory_of_double integer :: iblock - integer, parameter :: block_size = 32 + integer :: block_size + + call resident_memory(mem0) + + block_size = 1024 + + rss = memory_of_double(2.d0*ao_num*ao_num) + do + mem = mem0 + block_size*rss + if ( (block_size < 2).or.(mem < qp_max_mem) ) exit + block_size = block_size/2 + enddo + + call check_mem(block_size*rss, irp_here) - rss = memory_of_double(ao_num*ao_num) - call check_mem(2.d0*block_size*rss, irp_here) allocate(X2(ao_num,ao_num,block_size,2)) allocate(X3(ao_num,block_size,ao_num,2)) - + ! ao_two_e_integral_alpha_chol (l,s) -= cholesky_ao(l,m,j) * SCF_density_matrix_ao_beta (m,n) * cholesky_ao(n,s,j) do iblock=1,cholesky_ao_num,block_size diff --git a/src/mo_optimization/NEED b/src/mo_optimization/NEED index 91f41ee3d..33f770c3d 100644 --- a/src/mo_optimization/NEED +++ b/src/mo_optimization/NEED @@ -1,7 +1,3 @@ -two_body_rdm -hartree_fock -cipsi -davidson_undressed +mo_optimization_utils selectors_full generators_full -utils_trust_region diff --git a/src/mo_optimization/cipsi_orb_opt.irp.f b/src/mo_optimization/cipsi_orb_opt.irp.f index 7e3a79eb9..19b3e9dbd 100644 --- a/src/mo_optimization/cipsi_orb_opt.irp.f +++ b/src/mo_optimization/cipsi_orb_opt.irp.f @@ -2,87 +2,7 @@ program optimization read_wf = .true. ! must be True for the orbital optimization !!! TOUCH read_wf - call run_optimization + call run_optimization_mos_CIPSI end -subroutine run_optimization - - implicit none - - double precision :: e_cipsi, e_opt, delta_e - double precision, allocatable :: Ev(:),PT2(:) - integer :: nb_iter,i - logical :: not_converged - character (len=100) :: filename - - PROVIDE psi_det psi_coef mo_two_e_integrals_in_map ao_pseudo_integrals - allocate(Ev(N_states),PT2(N_states)) - - not_converged = .True. - nb_iter = 0 - - ! To start from the wf - N_det_max = max(n_det,5) - TOUCH N_det_max - - open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt') - write(10,*) " Ndet E_cipsi E_opt Delta_e" - call state_average_energy(e_cipsi) - write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0 - close(10) - - do while (not_converged) - print*,'' - print*,'======================' - print*,' Cipsi step:', nb_iter - print*,'======================' - print*,'' - print*,'********** cipsi step **********' - ! cispi calculation - call run_stochastic_cipsi(Ev,PT2) - - ! State average energy after the cipsi step - call state_average_energy(e_cipsi) - - print*,'' - print*,'********** optimization step **********' - ! orbital optimization - call run_orb_opt_trust_v2 - - ! State average energy after the orbital optimization - call state_average_energy(e_opt) - - print*,'' - print*,'********** diff step **********' - ! Gain in energy - delta_e = e_opt - e_cipsi - print*, 'Gain in energy during the orbital optimization:', delta_e - - open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append') - write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e - close(10) - - ! Exit - if (delta_e > 1d-12) then - print*, 'WARNING, something wrong happened' - print*, 'The gain (delta_e) in energy during the optimization process' - print*, 'is > 0, but it must be < 0' - print*, 'The program will exit' - exit - endif - - if (n_det > n_det_max_opt) then - print*, 'The number of determinants in the wf > n_det_max_opt' - print*, 'The program will exit' - exit - endif - - ! To double the number of determinants in the wf - N_det_max = int(dble(n_det * 2)*0.9) - TOUCH N_det_max - - nb_iter = nb_iter + 1 - enddo - -end diff --git a/src/mo_optimization/EZFIO.cfg b/src/mo_optimization_utils/EZFIO.cfg similarity index 100% rename from src/mo_optimization/EZFIO.cfg rename to src/mo_optimization_utils/EZFIO.cfg diff --git a/src/mo_optimization_utils/NEED b/src/mo_optimization_utils/NEED new file mode 100644 index 000000000..1a78a17f6 --- /dev/null +++ b/src/mo_optimization_utils/NEED @@ -0,0 +1,5 @@ +two_body_rdm +hartree_fock +cipsi +davidson_undressed +utils_trust_region diff --git a/src/mo_optimization_utils/README.md b/src/mo_optimization_utils/README.md new file mode 100644 index 000000000..94f29aee0 --- /dev/null +++ b/src/mo_optimization_utils/README.md @@ -0,0 +1,74 @@ +# Orbital optimization + +## Methods +Different methods are available: +- full hessian +``` +qp set orbital_optimization optimization_method full +``` +- diagonal hessian +``` +qp set orbital_optimization optimization_method diag +``` +- identity matrix +``` +qp set orbital_optimization optimization_method none +``` + +After the optimization the ezfio contains the optimized orbitals + +## For a fixed number of determinants +To optimize the MOs for the actual determinants: +``` +qp run orb_opt +``` + +## For a complete optimization, i.e, with a larger and larger wave function +To optimize the MOs with a larger and larger wave function: +``` +qp run optimization +``` + +The results are stored in the EZFIO in "mo_optimization/result_opt", +with the following format: +(1) (2) (3) (4) +1: Number of determinants in the wf, +2: Cispi energy before the optimization, +3: Cipsi energy after the optimization, +4: Energy difference between (2) and (3). + +The optimization process if the following: +- we do a first cipsi step to obtain a small number of determinants in the wf +- we run an orbital optimization for this wf +- we do a new cipsi step to double the number of determinants in the wf +- we run an orbital optimization for this wf +- ... +- we do that until the energy difference between (2) and (3) is + smaller than the targeted accuracy for the cispi (targeted_accuracy_cipsi in qp edit) + or the wf is larger than a given size (n_det_max_opt in qp_edit) +- after that you can reset your determinants (qp reset -d) and run a clean Cispi calculation + +### End of the optimization +You can choos the number of determinants after what the +optimization will stop: +``` +qp set orbital_optimization n_det_max_opt 1e5 # or any number +``` +## Weight of the states +You can change the weights of the differents states directly in qp edit. +It will affect ths weights used in the orbital optimization. + +# Tests +To run the tests: +``` +qp test +``` + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` + diff --git a/src/mo_optimization/constants.h b/src/mo_optimization_utils/constants.h similarity index 100% rename from src/mo_optimization/constants.h rename to src/mo_optimization_utils/constants.h diff --git a/src/mo_optimization/diagonal_hessian_list_opt.irp.f b/src/mo_optimization_utils/diagonal_hessian_list_opt.irp.f similarity index 100% rename from src/mo_optimization/diagonal_hessian_list_opt.irp.f rename to src/mo_optimization_utils/diagonal_hessian_list_opt.irp.f diff --git a/src/mo_optimization/diagonal_hessian_opt.irp.f b/src/mo_optimization_utils/diagonal_hessian_opt.irp.f similarity index 100% rename from src/mo_optimization/diagonal_hessian_opt.irp.f rename to src/mo_optimization_utils/diagonal_hessian_opt.irp.f diff --git a/src/mo_optimization/diagonalization_hessian.irp.f b/src/mo_optimization_utils/diagonalization_hessian.irp.f similarity index 100% rename from src/mo_optimization/diagonalization_hessian.irp.f rename to src/mo_optimization_utils/diagonalization_hessian.irp.f diff --git a/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f b/src/mo_optimization_utils/first_diagonal_hessian_list_opt.irp.f similarity index 100% rename from src/mo_optimization/first_diagonal_hessian_list_opt.irp.f rename to src/mo_optimization_utils/first_diagonal_hessian_list_opt.irp.f diff --git a/src/mo_optimization/first_diagonal_hessian_opt.irp.f b/src/mo_optimization_utils/first_diagonal_hessian_opt.irp.f similarity index 100% rename from src/mo_optimization/first_diagonal_hessian_opt.irp.f rename to src/mo_optimization_utils/first_diagonal_hessian_opt.irp.f diff --git a/src/mo_optimization/first_gradient_list_opt.irp.f b/src/mo_optimization_utils/first_gradient_list_opt.irp.f similarity index 100% rename from src/mo_optimization/first_gradient_list_opt.irp.f rename to src/mo_optimization_utils/first_gradient_list_opt.irp.f diff --git a/src/mo_optimization/first_gradient_opt.irp.f b/src/mo_optimization_utils/first_gradient_opt.irp.f similarity index 100% rename from src/mo_optimization/first_gradient_opt.irp.f rename to src/mo_optimization_utils/first_gradient_opt.irp.f diff --git a/src/mo_optimization/first_hessian_list_opt.irp.f b/src/mo_optimization_utils/first_hessian_list_opt.irp.f similarity index 100% rename from src/mo_optimization/first_hessian_list_opt.irp.f rename to src/mo_optimization_utils/first_hessian_list_opt.irp.f diff --git a/src/mo_optimization/first_hessian_opt.irp.f b/src/mo_optimization_utils/first_hessian_opt.irp.f similarity index 100% rename from src/mo_optimization/first_hessian_opt.irp.f rename to src/mo_optimization_utils/first_hessian_opt.irp.f diff --git a/src/mo_optimization/gradient_list_opt.irp.f b/src/mo_optimization_utils/gradient_list_opt.irp.f similarity index 99% rename from src/mo_optimization/gradient_list_opt.irp.f rename to src/mo_optimization_utils/gradient_list_opt.irp.f index 9b7228c7a..9331c80f3 100644 --- a/src/mo_optimization/gradient_list_opt.irp.f +++ b/src/mo_optimization_utils/gradient_list_opt.irp.f @@ -319,7 +319,7 @@ subroutine gradient_list_opt(n,m,list,v_grad,max_elem,norm) ! \end{equation} ! We need a vector to use the gradient. Here the gradient is a -! antisymetric matrix so we can transform it in a vector of length +! antisymmetric matrix so we can transform it in a vector of length ! mo_num*(mo_num-1)/2. ! Here we do these two things at the same time. diff --git a/src/mo_optimization/gradient_opt.irp.f b/src/mo_optimization_utils/gradient_opt.irp.f similarity index 99% rename from src/mo_optimization/gradient_opt.irp.f rename to src/mo_optimization_utils/gradient_opt.irp.f index 25be6b5a5..10d42b354 100644 --- a/src/mo_optimization/gradient_opt.irp.f +++ b/src/mo_optimization_utils/gradient_opt.irp.f @@ -284,7 +284,7 @@ subroutine gradient_opt(n,v_grad,max_elem) ! \end{equation} ! We need a vector to use the gradient. Here the gradient is a -! antisymetric matrix so we can transform it in a vector of length +! antisymmetric matrix so we can transform it in a vector of length ! mo_num*(mo_num-1)/2. ! Here we do these two things at the same time. diff --git a/src/mo_optimization/hessian_list_opt.irp.f b/src/mo_optimization_utils/hessian_list_opt.irp.f similarity index 100% rename from src/mo_optimization/hessian_list_opt.irp.f rename to src/mo_optimization_utils/hessian_list_opt.irp.f diff --git a/src/mo_optimization/hessian_opt.irp.f b/src/mo_optimization_utils/hessian_opt.irp.f similarity index 100% rename from src/mo_optimization/hessian_opt.irp.f rename to src/mo_optimization_utils/hessian_opt.irp.f diff --git a/src/mo_optimization/org/TODO.org b/src/mo_optimization_utils/org/TODO.org similarity index 100% rename from src/mo_optimization/org/TODO.org rename to src/mo_optimization_utils/org/TODO.org diff --git a/src/mo_optimization/org/debug_gradient_list_opt.org b/src/mo_optimization_utils/org/debug_gradient_list_opt.org similarity index 100% rename from src/mo_optimization/org/debug_gradient_list_opt.org rename to src/mo_optimization_utils/org/debug_gradient_list_opt.org diff --git a/src/mo_optimization/org/debug_gradient_opt.org b/src/mo_optimization_utils/org/debug_gradient_opt.org similarity index 100% rename from src/mo_optimization/org/debug_gradient_opt.org rename to src/mo_optimization_utils/org/debug_gradient_opt.org diff --git a/src/mo_optimization/org/debug_hessian_list_opt.org b/src/mo_optimization_utils/org/debug_hessian_list_opt.org similarity index 100% rename from src/mo_optimization/org/debug_hessian_list_opt.org rename to src/mo_optimization_utils/org/debug_hessian_list_opt.org diff --git a/src/mo_optimization/org/debug_hessian_opt.org b/src/mo_optimization_utils/org/debug_hessian_opt.org similarity index 100% rename from src/mo_optimization/org/debug_hessian_opt.org rename to src/mo_optimization_utils/org/debug_hessian_opt.org diff --git a/src/mo_optimization/org/diagonal_hessian_list_opt.org b/src/mo_optimization_utils/org/diagonal_hessian_list_opt.org similarity index 100% rename from src/mo_optimization/org/diagonal_hessian_list_opt.org rename to src/mo_optimization_utils/org/diagonal_hessian_list_opt.org diff --git a/src/mo_optimization/org/diagonal_hessian_opt.org b/src/mo_optimization_utils/org/diagonal_hessian_opt.org similarity index 100% rename from src/mo_optimization/org/diagonal_hessian_opt.org rename to src/mo_optimization_utils/org/diagonal_hessian_opt.org diff --git a/src/mo_optimization/org/diagonalization_hessian.org b/src/mo_optimization_utils/org/diagonalization_hessian.org similarity index 100% rename from src/mo_optimization/org/diagonalization_hessian.org rename to src/mo_optimization_utils/org/diagonalization_hessian.org diff --git a/src/mo_optimization/org/first_diagonal_hessian_list_opt.org b/src/mo_optimization_utils/org/first_diagonal_hessian_list_opt.org similarity index 100% rename from src/mo_optimization/org/first_diagonal_hessian_list_opt.org rename to src/mo_optimization_utils/org/first_diagonal_hessian_list_opt.org diff --git a/src/mo_optimization/org/first_diagonal_hessian_opt.org b/src/mo_optimization_utils/org/first_diagonal_hessian_opt.org similarity index 100% rename from src/mo_optimization/org/first_diagonal_hessian_opt.org rename to src/mo_optimization_utils/org/first_diagonal_hessian_opt.org diff --git a/src/mo_optimization/org/first_gradient_list_opt.org b/src/mo_optimization_utils/org/first_gradient_list_opt.org similarity index 100% rename from src/mo_optimization/org/first_gradient_list_opt.org rename to src/mo_optimization_utils/org/first_gradient_list_opt.org diff --git a/src/mo_optimization/org/first_gradient_opt.org b/src/mo_optimization_utils/org/first_gradient_opt.org similarity index 100% rename from src/mo_optimization/org/first_gradient_opt.org rename to src/mo_optimization_utils/org/first_gradient_opt.org diff --git a/src/mo_optimization/org/first_hessian_list_opt.org b/src/mo_optimization_utils/org/first_hessian_list_opt.org similarity index 100% rename from src/mo_optimization/org/first_hessian_list_opt.org rename to src/mo_optimization_utils/org/first_hessian_list_opt.org diff --git a/src/mo_optimization/org/first_hessian_opt.org b/src/mo_optimization_utils/org/first_hessian_opt.org similarity index 100% rename from src/mo_optimization/org/first_hessian_opt.org rename to src/mo_optimization_utils/org/first_hessian_opt.org diff --git a/src/mo_optimization/org/gradient_list_opt.org b/src/mo_optimization_utils/org/gradient_list_opt.org similarity index 100% rename from src/mo_optimization/org/gradient_list_opt.org rename to src/mo_optimization_utils/org/gradient_list_opt.org diff --git a/src/mo_optimization/org/gradient_opt.org b/src/mo_optimization_utils/org/gradient_opt.org similarity index 100% rename from src/mo_optimization/org/gradient_opt.org rename to src/mo_optimization_utils/org/gradient_opt.org diff --git a/src/mo_optimization/org/hessian_list_opt.org b/src/mo_optimization_utils/org/hessian_list_opt.org similarity index 100% rename from src/mo_optimization/org/hessian_list_opt.org rename to src/mo_optimization_utils/org/hessian_list_opt.org diff --git a/src/mo_optimization/org/hessian_opt.org b/src/mo_optimization_utils/org/hessian_opt.org similarity index 100% rename from src/mo_optimization/org/hessian_opt.org rename to src/mo_optimization_utils/org/hessian_opt.org diff --git a/src/mo_optimization/org/my_providers.org b/src/mo_optimization_utils/org/my_providers.org similarity index 100% rename from src/mo_optimization/org/my_providers.org rename to src/mo_optimization_utils/org/my_providers.org diff --git a/src/mo_optimization/org/optimization.org b/src/mo_optimization_utils/org/optimization.org similarity index 100% rename from src/mo_optimization/org/optimization.org rename to src/mo_optimization_utils/org/optimization.org diff --git a/src/mo_optimization/org/orb_opt_trust_v2.org b/src/mo_optimization_utils/org/orb_opt_trust_v2.org similarity index 100% rename from src/mo_optimization/org/orb_opt_trust_v2.org rename to src/mo_optimization_utils/org/orb_opt_trust_v2.org diff --git a/src/mo_optimization/org/state_average_energy.org b/src/mo_optimization_utils/org/state_average_energy.org similarity index 100% rename from src/mo_optimization/org/state_average_energy.org rename to src/mo_optimization_utils/org/state_average_energy.org diff --git a/src/mo_optimization/org/state_weight_normalization.org b/src/mo_optimization_utils/org/state_weight_normalization.org similarity index 100% rename from src/mo_optimization/org/state_weight_normalization.org rename to src/mo_optimization_utils/org/state_weight_normalization.org diff --git a/src/mo_optimization/org/update_parameters.org b/src/mo_optimization_utils/org/update_parameters.org similarity index 100% rename from src/mo_optimization/org/update_parameters.org rename to src/mo_optimization_utils/org/update_parameters.org diff --git a/src/mo_optimization/org/update_st_av_ci_energy.org b/src/mo_optimization_utils/org/update_st_av_ci_energy.org similarity index 100% rename from src/mo_optimization/org/update_st_av_ci_energy.org rename to src/mo_optimization_utils/org/update_st_av_ci_energy.org diff --git a/src/mo_optimization_utils/routine_opt_mos.irp.f b/src/mo_optimization_utils/routine_opt_mos.irp.f new file mode 100644 index 000000000..fceba2c5e --- /dev/null +++ b/src/mo_optimization_utils/routine_opt_mos.irp.f @@ -0,0 +1,81 @@ + +subroutine run_optimization_mos_CIPSI + + implicit none + + double precision :: e_cipsi, e_opt, delta_e + double precision, allocatable :: Ev(:),PT2(:) + integer :: nb_iter,i + logical :: not_converged + character (len=100) :: filename + + PROVIDE psi_det psi_coef mo_two_e_integrals_in_map ao_pseudo_integrals + allocate(Ev(N_states),PT2(N_states)) + + not_converged = .True. + nb_iter = 0 + + ! To start from the wf + N_det_max = max(n_det,5) + TOUCH N_det_max + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt') + write(10,*) " Ndet E_cipsi E_opt Delta_e" + call state_average_energy(e_cipsi) + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0 + close(10) + + do while (not_converged) + print*,'' + print*,'======================' + print*,' Cipsi step:', nb_iter + print*,'======================' + print*,'' + print*,'********** cipsi step **********' + ! cispi calculation + call run_stochastic_cipsi(Ev,PT2) + + ! State average energy after the cipsi step + call state_average_energy(e_cipsi) + + print*,'' + print*,'********** optimization step **********' + ! orbital optimization + call run_orb_opt_trust_v2 + + ! State average energy after the orbital optimization + call state_average_energy(e_opt) + + print*,'' + print*,'********** diff step **********' + ! Gain in energy + delta_e = e_opt - e_cipsi + print*, 'Gain in energy during the orbital optimization:', delta_e + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append') + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e + close(10) + + ! Exit + if (delta_e > 1d-12) then + print*, 'WARNING, something wrong happened' + print*, 'The gain (delta_e) in energy during the optimization process' + print*, 'is > 0, but it must be < 0' + print*, 'The program will exit' + exit + endif + + if (n_det > n_det_max_opt) then + print*, 'The number of determinants in the wf > n_det_max_opt' + print*, 'The program will exit' + exit + endif + + ! To double the number of determinants in the wf + N_det_max = int(dble(n_det * 2)*0.9) + TOUCH N_det_max + + nb_iter = nb_iter + 1 + enddo + +end diff --git a/src/mo_optimization/run_orb_opt_trust_v2.irp.f b/src/mo_optimization_utils/run_orb_opt_trust_v2.irp.f similarity index 100% rename from src/mo_optimization/run_orb_opt_trust_v2.irp.f rename to src/mo_optimization_utils/run_orb_opt_trust_v2.irp.f diff --git a/src/mo_optimization/save_energy.irp.f b/src/mo_optimization_utils/save_energy.irp.f similarity index 100% rename from src/mo_optimization/save_energy.irp.f rename to src/mo_optimization_utils/save_energy.irp.f diff --git a/src/mo_optimization/state_average_energy.irp.f b/src/mo_optimization_utils/state_average_energy.irp.f similarity index 100% rename from src/mo_optimization/state_average_energy.irp.f rename to src/mo_optimization_utils/state_average_energy.irp.f diff --git a/src/mo_optimization/state_weight_normalization.irp.f b/src/mo_optimization_utils/state_weight_normalization.irp.f similarity index 100% rename from src/mo_optimization/state_weight_normalization.irp.f rename to src/mo_optimization_utils/state_weight_normalization.irp.f diff --git a/src/mo_optimization/update_parameters.irp.f b/src/mo_optimization_utils/update_parameters.irp.f similarity index 100% rename from src/mo_optimization/update_parameters.irp.f rename to src/mo_optimization_utils/update_parameters.irp.f diff --git a/src/mo_optimization/update_st_av_ci_energy.irp.f b/src/mo_optimization_utils/update_st_av_ci_energy.irp.f similarity index 100% rename from src/mo_optimization/update_st_av_ci_energy.irp.f rename to src/mo_optimization_utils/update_st_av_ci_energy.irp.f diff --git a/src/mo_two_e_ints/EZFIO.cfg b/src/mo_two_e_ints/EZFIO.cfg index 088a2416a..da9d8fc92 100644 --- a/src/mo_two_e_ints/EZFIO.cfg +++ b/src/mo_two_e_ints/EZFIO.cfg @@ -1,9 +1,21 @@ +[io_mo_cholesky] +type: Disk_access +doc: Read/Write |MO| Cholesky integrals from/to disk [ Write | Read | None ] +interface: ezfio,provider,ocaml +default: None + [io_mo_two_e_integrals] type: Disk_access doc: Read/Write |MO| integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None +[mo_integrals_cache_shift] +type: integer +doc: Adjusts the size of the MO integrals cache. 2: 2KB, 3: 32KB, 4: 512KB, 5: 8MB, 6: 128MB, 7: 2GB, 8: 32GB, 9: 512GB +interface: ezfio, provider, ocaml +default: 7 + [mo_integrals_threshold] type: Threshold doc: If | | < `mo_integrals_threshold` then is zero @@ -11,12 +23,6 @@ interface: ezfio,provider,ocaml default: 1.e-15 ezfio_name: threshold_mo -[no_vvvv_integrals] -type: logical -doc: If `True`, computes all integrals except for the integrals having 3 or 4 virtual indices -interface: ezfio,provider,ocaml -default: false - [io_mo_two_e_integrals_erf] type: Disk_access doc: Read/Write MO integrals with the long range interaction from/to disk [ Write | Read | None ] diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 349f13b9d..1fed949d9 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -1,9 +1,26 @@ +BEGIN_PROVIDER [ logical, do_mo_cholesky ] + implicit none + BEGIN_DOC + ! If True, use Cholesky vectors for MO integrals + END_DOC + do_mo_cholesky = do_ao_cholesky +! do_mo_cholesky = .False. +END_PROVIDER + BEGIN_PROVIDER [ integer, cholesky_mo_num ] implicit none BEGIN_DOC ! Number of Cholesky vectors in MO basis END_DOC - cholesky_mo_num = cholesky_ao_num + integer, external :: getUnitAndOpen + integer :: iunit + if (read_mo_cholesky) then + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_mo_transp', 'R') + read(iunit) cholesky_mo_num + close(iunit) + else + cholesky_mo_num = cholesky_ao_num + endif END_PROVIDER BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ] @@ -28,24 +45,90 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis. Warning: it is transposed wrt cholesky_ao: + ! + ! - cholesky_ao is (ao_num^2 x cholesky_ao_num) + ! + ! - cholesky_mo_transp is (cholesky_mo_num x mo_num^2) + END_DOC + + double precision, allocatable :: X(:,:,:) + double precision :: wall0, wall1 + integer, external :: getUnitAndOpen + integer :: iunit, ierr, rank + + if (read_mo_cholesky) then + print *, 'Reading Cholesky MO vectors from disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_mo_transp', 'R') + read(iunit) rank + if (cholesky_mo_num /= rank) then + stop 'inconsistent rank' + endif + read(iunit) cholesky_mo_transp + close(iunit) + else + print *, '' + print *, 'AO->MO Transformation of Cholesky vectors' + print *, '-----------------------------------------' + print *, '' + + call wall_time(wall0) + + allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': Allocation failed' + endif + call dgemm('T','N', ao_num*cholesky_mo_num, mo_num, ao_num, 1.d0, & + cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_mo_num) + call dgemm('T','N', cholesky_mo_num*mo_num, mo_num, ao_num, 1.d0, & + X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_mo_num*mo_num) + deallocate(X) + call wall_time(wall1) + print*,'Time to provide MO cholesky vectors = ',(wall1-wall0)/60.d0, ' min' + + + if (write_mo_cholesky) then + print *, 'Writing Cholesky MO vectors to disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_mo_transp', 'W') + write(iunit) cholesky_mo_num + write(iunit) cholesky_mo_transp + close(iunit) + call ezfio_set_mo_two_e_ints_io_mo_cholesky('Read') + endif + endif + +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, cholesky_semi_mo_transp_simple, (cholesky_mo_num, ao_num, mo_num) ] implicit none BEGIN_DOC ! Cholesky vectors in MO basis END_DOC double precision, allocatable :: X(:,:,:) + double precision :: wall0, wall1 integer :: ierr - print *, 'AO->MO Transformation of Cholesky vectors' + print *, 'Semi AO->MO Transformation of Cholesky vectors' + call wall_time(wall0) allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) if (ierr /= 0) then print *, irp_here, ': Allocation failed' endif - call dgemm('T','N', ao_num*cholesky_mo_num, mo_num, ao_num, 1.d0, & - cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_mo_num) - call dgemm('T','N', cholesky_mo_num*mo_num, mo_num, ao_num, 1.d0, & - X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_mo_num*mo_num) - deallocate(X) + integer :: i_chol, i_mo, j_mo, i_ao + cholesky_semi_mo_transp_simple = 0.d0 + do i_mo = 1, mo_num + do i_ao = 1, ao_num + do j_mo = 1, mo_num + do i_chol = 1, cholesky_mo_num + cholesky_semi_mo_transp_simple(i_chol, i_ao,i_mo) += cholesky_mo_transp(i_chol,j_mo,i_mo) * mo_coef_transp(j_mo,i_ao) + enddo + enddo + enddo + enddo END_PROVIDER diff --git a/src/mo_two_e_ints/four_idx_novvvv.irp.f b/src/mo_two_e_ints/four_idx_novvvv.irp.f deleted file mode 100644 index 2be09689c..000000000 --- a/src/mo_two_e_ints/four_idx_novvvv.irp.f +++ /dev/null @@ -1,189 +0,0 @@ -!BEGIN_PROVIDER [ logical, no_vvvv_integrals ] -! implicit none -! BEGIN_DOC -! If `True`, computes all integrals except for the integrals having 3 or 4 virtual indices -! END_DOC -! -! no_vvvv_integrals = .False. -!END_PROVIDER - -BEGIN_PROVIDER [ double precision, mo_coef_novirt, (ao_num,n_core_inact_act_orb) ] - implicit none - BEGIN_DOC - ! MO coefficients without virtual MOs - END_DOC - integer :: j,jj - - do j=1,n_core_inact_act_orb - jj = list_core_inact_act(j) - mo_coef_novirt(:,j) = mo_coef(:,jj) - enddo - -END_PROVIDER - -subroutine ao_to_mo_novirt(A_ao,LDA_ao,A_mo,LDA_mo) - implicit none - BEGIN_DOC - ! Transform A from the |AO| basis to the |MO| basis excluding virtuals - ! - ! $C^\dagger.A_{ao}.C$ - END_DOC - integer, intent(in) :: LDA_ao,LDA_mo - double precision, intent(in) :: A_ao(LDA_ao,ao_num) - double precision, intent(out) :: A_mo(LDA_mo,n_core_inact_act_orb) - double precision, allocatable :: T(:,:) - - allocate ( T(ao_num,n_core_inact_act_orb) ) - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T - - call dgemm('N','N', ao_num, n_core_inact_act_orb, ao_num, & - 1.d0, A_ao,LDA_ao, & - mo_coef_novirt, size(mo_coef_novirt,1), & - 0.d0, T, size(T,1)) - - call dgemm('T','N', n_core_inact_act_orb, n_core_inact_act_orb, ao_num,& - 1.d0, mo_coef_novirt,size(mo_coef_novirt,1), & - T, ao_num, & - 0.d0, A_mo, size(A_mo,1)) - - deallocate(T) -end - - -subroutine four_idx_novvvv - print*,'********' - print*,'********' - print*,'********' - print*,'WARNING :: Using four_idx_novvvv, and we are not sure that this routine is not bugged ...' - print*,'********' - print*,'********' - print*,'********' - use map_module - implicit none - BEGIN_DOC - ! Retransform MO integrals for next CAS-SCF step - END_DOC - print*,'Using partial transformation' - print*,'It will not transform all integrals with at least 3 indices within the virtuals' - integer :: i,j,k,l,n_integrals - double precision, allocatable :: f(:,:,:), f2(:,:,:), d(:,:), T(:,:,:,:), T2(:,:,:,:) - double precision, external :: get_ao_two_e_integral - integer(key_kind), allocatable :: idx(:) - real(integral_kind), allocatable :: values(:) - - integer :: p,q,r,s - double precision :: c - allocate( T(n_core_inact_act_orb,n_core_inact_act_orb,ao_num,ao_num) , & - T2(n_core_inact_act_orb,n_core_inact_act_orb,ao_num,ao_num) ) - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP SHARED(mo_num,ao_num,T,n_core_inact_act_orb, mo_coef_transp, & - !$OMP mo_integrals_threshold,mo_coef,mo_integrals_map, & - !$OMP list_core_inact_act,T2,ao_integrals_map) & - !$OMP PRIVATE(i,j,k,l,p,q,r,s,idx,values,n_integrals, & - !$OMP f,f2,d,c) - allocate(f(ao_num,ao_num,ao_num), f2(ao_num,ao_num,ao_num), d(mo_num,mo_num), & - idx(mo_num*mo_num), values(mo_num*mo_num) ) - - ! - !$OMP DO - do s=1,ao_num - do r=1,ao_num - do q=1,ao_num - do p=1,r - f (p,q,r) = get_ao_two_e_integral(p,q,r,s,ao_integrals_map) - f (r,q,p) = f(p,q,r) - enddo - enddo - enddo - do r=1,ao_num - do q=1,ao_num - do p=1,ao_num - f2(p,q,r) = f(p,r,q) - enddo - enddo - enddo - ! f (p,q,r) = - ! f2(p,q,r) = - - do r=1,ao_num - call ao_to_mo_novirt(f (1,1,r),size(f ,1),T (1,1,r,s),size(T,1)) - call ao_to_mo_novirt(f2(1,1,r),size(f2,1),T2(1,1,r,s),size(T,1)) - enddo - ! T (i,j,p,q) = - ! T2(i,j,p,q) = - - enddo - !$OMP END DO - - !$OMP DO - do j=1,n_core_inact_act_orb - do i=1,n_core_inact_act_orb - do s=1,ao_num - do r=1,ao_num - f (r,s,1) = T (i,j,r,s) - f2(r,s,1) = T2(i,j,r,s) - enddo - enddo - call ao_to_mo(f ,size(f ,1),d,size(d,1)) - n_integrals = 0 - do l=1,mo_num - do k=1,mo_num - n_integrals+=1 - call two_e_integrals_index(list_core_inact_act(i),list_core_inact_act(j),k,l,idx(n_integrals)) - values(n_integrals) = d(k,l) - enddo - enddo - call map_append(mo_integrals_map, idx, values, n_integrals) - - call ao_to_mo(f2,size(f2,1),d,size(d,1)) - n_integrals = 0 - do l=1,mo_num - do k=1,mo_num - n_integrals+=1 - call two_e_integrals_index(list_core_inact_act(i),k,list_core_inact_act(j),l,idx(n_integrals)) - values(n_integrals) = d(k,l) - enddo - enddo - call map_append(mo_integrals_map, idx, values, n_integrals) - enddo - enddo - !$OMP END DO - deallocate(f,f2,d,idx,values) - - !$OMP END PARALLEL - - deallocate(T,T2) - - - call map_sort(mo_integrals_map) - call map_unique(mo_integrals_map) - call map_shrink(mo_integrals_map,real(mo_integrals_threshold,integral_kind)) - -end - -subroutine four_idx_novvvv2 - use bitmasks - implicit none - integer :: i - integer(bit_kind) :: mask_ijkl(N_int,4) - - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = full_ijkl_bitmask_4(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = full_ijkl_bitmask_4(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = virt_bitmask(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - -end diff --git a/src/mo_two_e_ints/integrals_3_index.irp.f b/src/mo_two_e_ints/integrals_3_index.irp.f index eb05da84d..c0dab506c 100644 --- a/src/mo_two_e_ints/integrals_3_index.irp.f +++ b/src/mo_two_e_ints/integrals_3_index.irp.f @@ -10,7 +10,7 @@ double precision :: get_two_e_integral double precision :: integral - if (do_ao_cholesky) then + if (do_mo_cholesky) then double precision, allocatable :: buffer_jj(:,:), buffer(:,:,:) allocate(buffer_jj(cholesky_mo_num,mo_num), buffer(mo_num,mo_num,mo_num)) diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index ada256a2a..eeb4279f2 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -32,60 +32,96 @@ subroutine insert_into_mo_integrals_map(n_integrals, & call map_update(mo_integrals_map, buffer_i, buffer_values, n_integrals, thr) end - BEGIN_PROVIDER [ integer*4, mo_integrals_cache_min ] -&BEGIN_PROVIDER [ integer*4, mo_integrals_cache_max ] -&BEGIN_PROVIDER [ integer*8, mo_integrals_cache_min_8 ] -&BEGIN_PROVIDER [ integer*8, mo_integrals_cache_max_8 ] + BEGIN_PROVIDER [ integer, mo_integrals_cache_min ] +&BEGIN_PROVIDER [ integer, mo_integrals_cache_max ] +&BEGIN_PROVIDER [ integer, mo_integrals_cache_size ] implicit none BEGIN_DOC ! Min and max values of the MOs for which the integrals are in the cache END_DOC - mo_integrals_cache_min_8 = max(1_8,elec_alpha_num - 63_8) - mo_integrals_cache_max_8 = min(int(mo_num,8),mo_integrals_cache_min_8+127_8) - mo_integrals_cache_min = max(1,elec_alpha_num - 63) - mo_integrals_cache_max = min(mo_num,mo_integrals_cache_min+127) + + mo_integrals_cache_size = 2**mo_integrals_cache_shift + + mo_integrals_cache_min = max(1,elec_alpha_num - (mo_integrals_cache_size/2 - 1) ) + mo_integrals_cache_max = min(mo_num, mo_integrals_cache_min + mo_integrals_cache_size - 1) + print *, 'MO integrals cache: (', mo_integrals_cache_min, ', ', mo_integrals_cache_max, ')' END_PROVIDER -BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:128_8*128_8*128_8*128_8) ] +BEGIN_PROVIDER [ double precision, mo_integrals_cache, (0_8:(1_8*mo_integrals_cache_size)**4) ] implicit none BEGIN_DOC ! Cache of MO integrals for fast access END_DOC PROVIDE mo_two_e_integrals_in_map - integer*8 :: i,j,k,l - integer*4 :: i4,j4,k4,l4 + integer :: i,j,k,l integer*8 :: ii integer(key_kind) :: idx real(integral_kind) :: integral FREE ao_integrals_cache - !$OMP PARALLEL DO PRIVATE (i,j,k,l,i4,j4,k4,l4,idx,ii,integral) - do l=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - l4 = int(l,4) - do k=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - k4 = int(k,4) - do j=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - j4 = int(j,4) - do i=mo_integrals_cache_min_8,mo_integrals_cache_max_8 - i4 = int(i,4) - !DIR$ FORCEINLINE - call two_e_integrals_index(i4,j4,k4,l4,idx) - !DIR$ FORCEINLINE - call map_get(mo_integrals_map,idx,integral) - ii = l-mo_integrals_cache_min_8 - ii = ior( shiftl(ii,7), k-mo_integrals_cache_min_8) - ii = ior( shiftl(ii,7), j-mo_integrals_cache_min_8) - ii = ior( shiftl(ii,7), i-mo_integrals_cache_min_8) - mo_integrals_cache(ii) = integral + if (do_mo_cholesky) then + + call set_multiple_levels_omp(.False.) + !$OMP PARALLEL DO PRIVATE (k,l,ii) + do l=mo_integrals_cache_min,mo_integrals_cache_max + do k=mo_integrals_cache_min,mo_integrals_cache_max + ii = int(l-mo_integrals_cache_min,8) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) + ii = shiftl(ii,mo_integrals_cache_shift) + ii = shiftl(ii,mo_integrals_cache_shift) + call dgemm('T','N', mo_integrals_cache_max-mo_integrals_cache_min+1, & + mo_integrals_cache_max-mo_integrals_cache_min+1, & + cholesky_mo_num, 1.d0, & + cholesky_mo_transp(1,mo_integrals_cache_min,k), cholesky_mo_num, & + cholesky_mo_transp(1,mo_integrals_cache_min,l), cholesky_mo_num, 0.d0, & + mo_integrals_cache(ii), mo_integrals_cache_size) + enddo + enddo + !$OMP END PARALLEL DO + + else + !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx,ii,integral) + do l=mo_integrals_cache_min,mo_integrals_cache_max + do k=mo_integrals_cache_min,mo_integrals_cache_max + do j=mo_integrals_cache_min,mo_integrals_cache_max + do i=mo_integrals_cache_min,mo_integrals_cache_max + !DIR$ FORCEINLINE + call two_e_integrals_index(i,j,k,l,idx) + !DIR$ FORCEINLINE + call map_get(mo_integrals_map,idx,integral) + ii = int(l-mo_integrals_cache_min,8) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(j-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(i-mo_integrals_cache_min,8)) + mo_integrals_cache(ii) = integral + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + endif END_PROVIDER +double precision function get_two_e_integral_cache(i,j,k,l) + use map_module + implicit none + BEGIN_DOC + ! Returns one integral in the MO basis taken from the cache + END_DOC + integer, intent(in) :: i,j,k,l + integer*8 :: ii + + ii = int(l-mo_integrals_cache_min,8) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(j-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii,mo_integrals_cache_shift), int(i-mo_integrals_cache_min,8)) + get_two_e_integral_cache = mo_integrals_cache(ii) + +end + + double precision function get_two_e_integral(i,j,k,l,map) use map_module implicit none @@ -95,10 +131,11 @@ double precision function get_two_e_integral(i,j,k,l,map) integer, intent(in) :: i,j,k,l integer(key_kind) :: idx integer :: ii - integer*8 :: ii_8 type(map_type), intent(inout) :: map real(integral_kind) :: tmp - PROVIDE mo_two_e_integrals_in_map mo_integrals_cache + + PROVIDE mo_two_e_integrals_in_map mo_integrals_cache do_mo_cholesky + if (use_banned_excitation) then if (banned_excitation(i,k)) then get_two_e_integral = 0.d0 @@ -109,39 +146,42 @@ double precision function get_two_e_integral(i,j,k,l,map) return endif endif + + ii = l-mo_integrals_cache_min ii = ior(ii, k-mo_integrals_cache_min) ii = ior(ii, j-mo_integrals_cache_min) ii = ior(ii, i-mo_integrals_cache_min) - if (iand(ii, -128) /= 0) then - !DIR$ FORCEINLINE - call two_e_integrals_index(i,j,k,l,idx) - !DIR$ FORCEINLINE - call map_get(map,idx,tmp) - get_two_e_integral = dble(tmp) + + if (iand(ii, -mo_integrals_cache_size) == 0) then + + double precision, external :: get_two_e_integral_cache + get_two_e_integral = get_two_e_integral_cache(i,j,k,l) + else - ii_8 = int(l,8)-mo_integrals_cache_min_8 - ii_8 = ior( shiftl(ii_8,7), int(k,8)-mo_integrals_cache_min_8) - ii_8 = ior( shiftl(ii_8,7), int(j,8)-mo_integrals_cache_min_8) - ii_8 = ior( shiftl(ii_8,7), int(i,8)-mo_integrals_cache_min_8) - get_two_e_integral = mo_integrals_cache(ii_8) - endif -end + ! Integral is not in the cache -double precision function mo_two_e_integral(i,j,k,l) - implicit none - BEGIN_DOC - ! Returns one integral in the MO basis - END_DOC - integer, intent(in) :: i,j,k,l - double precision :: get_two_e_integral - PROVIDE mo_two_e_integrals_in_map mo_integrals_cache - !DIR$ FORCEINLINE - mo_two_e_integral = get_two_e_integral(i,j,k,l,mo_integrals_map) - return + if (do_mo_cholesky) then + + double precision, external :: ddot + get_two_e_integral = ddot(cholesky_mo_num, cholesky_mo_transp(1,i,k), 1, cholesky_mo_transp(1,j,l), 1) +! double precision, external :: get_from_mo_cholesky_cache +! get_two_e_integral = get_from_mo_cholesky_cache(i,j,k,l,.False.) + + else + + !DIR$ FORCEINLINE + call two_e_integrals_index(i,j,k,l,idx) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + get_two_e_integral = dble(tmp) + endif + + endif end + subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) use map_module implicit none @@ -153,57 +193,166 @@ subroutine get_mo_two_e_integrals(j,k,l,sze,out_val,map) double precision, intent(out) :: out_val(sze) type(map_type), intent(inout) :: map integer :: i - double precision, external :: get_two_e_integral - integer :: ii, ii0 - integer*8 :: ii_8, ii0_8 + integer :: ii real(integral_kind) :: tmp integer(key_kind) :: i1, idx integer(key_kind) :: p,q,r,s,i2 PROVIDE mo_two_e_integrals_in_map mo_integrals_cache -!DEBUG -! do i=1,sze -! out_val(i) = get_two_e_integral(i,j,k,l,map) -! enddo -! return -!DEBUG - - out_val(1:sze) = 0.d0 if (banned_excitation(j,l)) then - return + out_val(1:sze) = 0.d0 + return endif +! + ii = l-mo_integrals_cache_min + ii = ior(ii, k-mo_integrals_cache_min) + ii = ior(ii, j-mo_integrals_cache_min) + + if (iand(ii, -mo_integrals_cache_size) == 0) then + ! Some integrals are in the cache + + if (mo_integrals_cache_min > 1) then + + if (do_mo_cholesky) then + + !TODO: here + call dgemv('T', cholesky_mo_num, mo_integrals_cache_min-1, 1.d0, & + cholesky_mo_transp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val, 1) + + else + + q = min(j,l) + s = max(j,l) + q = q+shiftr(s*s-s,1) + + do i=1,mo_integrals_cache_min-1 + if (banned_excitation(i,k)) then + out_val(i) = 0.d0 + cycle + endif + p = min(i,k) + r = max(i,k) + p = p+shiftr(r*r-r,1) + i1 = min(p,q) + i2 = max(p,q) + idx = i1+shiftr(i2*i2-i2,1) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + out_val(i) = dble(tmp) + enddo + + endif - ii0 = l-mo_integrals_cache_min - ii0 = ior(ii0, k-mo_integrals_cache_min) - ii0 = ior(ii0, j-mo_integrals_cache_min) + endif + + call get_mo_two_e_integrals_cache(j,k,l,sze,out_val) + + if (mo_integrals_cache_max < mo_num) then + + if (do_mo_cholesky) then + + !TODO: here + call dgemv('T', cholesky_mo_num, mo_num-mo_integrals_cache_max, 1.d0, & + cholesky_mo_transp(1,mo_integrals_cache_max+1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val(mo_integrals_cache_max+1), 1) + + else + + q = min(j,l) + s = max(j,l) + q = q+shiftr(s*s-s,1) + + do i=mo_integrals_cache_max+1,mo_num + if (banned_excitation(i,k)) then + out_val(i) = 0.d0 + cycle + endif + p = min(i,k) + r = max(i,k) + p = p+shiftr(r*r-r,1) + i1 = min(p,q) + i2 = max(p,q) + idx = i1+shiftr(i2*i2-i2,1) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + out_val(i) = dble(tmp) + enddo + + endif + + endif + + else - ii0_8 = int(l,8)-mo_integrals_cache_min_8 - ii0_8 = ior( shiftl(ii0_8,7), int(k,8)-mo_integrals_cache_min_8) - ii0_8 = ior( shiftl(ii0_8,7), int(j,8)-mo_integrals_cache_min_8) + if (do_mo_cholesky) then - q = min(j,l) - s = max(j,l) - q = q+shiftr(s*s-s,1) + !TODO: here + call dgemv('T', cholesky_mo_num, mo_num, 1.d0, & + cholesky_mo_transp(1,1,k), cholesky_mo_num, & + cholesky_mo_transp(1,j,l), 1, 0.d0, & + out_val, 1) - do i=1,sze - if (banned_excitation(i,k)) cycle - ii = ior(ii0, i-mo_integrals_cache_min) - if (iand(ii, -128) == 0) then - ii_8 = ior( shiftl(ii0_8,7), int(i,8)-mo_integrals_cache_min_8) - out_val(i) = mo_integrals_cache(ii_8) else - p = min(i,k) - r = max(i,k) - p = p+shiftr(r*r-r,1) - i1 = min(p,q) - i2 = max(p,q) - idx = i1+shiftr(i2*i2-i2,1) - !DIR$ FORCEINLINE - call map_get(map,idx,tmp) - out_val(i) = dble(tmp) + + q = min(j,l) + s = max(j,l) + q = q+shiftr(s*s-s,1) + + do i=1,sze + if (banned_excitation(i,k)) cycle + p = min(i,k) + r = max(i,k) + p = p+shiftr(r*r-r,1) + i1 = min(p,q) + i2 = max(p,q) + idx = i1+shiftr(i2*i2-i2,1) + !DIR$ FORCEINLINE + call map_get(map,idx,tmp) + out_val(i) = dble(tmp) + enddo + endif - enddo + + endif + +end + +double precision function mo_two_e_integral(i,j,k,l) + implicit none + BEGIN_DOC + ! Returns one integral in the MO basis + END_DOC + integer, intent(in) :: i,j,k,l + double precision :: get_two_e_integral + PROVIDE mo_two_e_integrals_in_map mo_integrals_cache + !DIR$ FORCEINLINE + mo_two_e_integral = get_two_e_integral(i,j,k,l,mo_integrals_map) + return +end + + +subroutine get_mo_two_e_integrals_cache(j,k,l,sze,out_val) + use map_module + implicit none + BEGIN_DOC + ! Returns multiple integrals in the MO basis, all + ! i for j,k,l fixed, all integrals from the cache + END_DOC + integer, intent(in) :: j,k,l, sze + double precision, intent(out) :: out_val(sze) + integer*8 :: ii + + ii = int(l-mo_integrals_cache_min,8) + ii = ior( shiftl(ii, mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) + ii = ior( shiftl(ii, mo_integrals_cache_shift), int(j-mo_integrals_cache_min,8)) + ii = shiftl(ii, mo_integrals_cache_shift) + out_val(mo_integrals_cache_min:mo_integrals_cache_max) = & + mo_integrals_cache(ii:ii+int(mo_integrals_cache_max-mo_integrals_cache_min,8)) + end subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map) @@ -220,9 +369,32 @@ subroutine get_mo_two_e_integrals_ij(k,l,sze,out_array,map) integer :: j real(integral_kind), allocatable :: tmp_val(:) - do j=1,sze - call get_mo_two_e_integrals(j,k,l,sze,out_array(1,j),map) - enddo + if ( (mo_integrals_cache_min>1).or.(mo_integrals_cache_max1).or.(mo_integrals_cache_max1).or.(mo_integrals_cache_max1).or.(mo_integrals_cache_max MO integrals transformation' - print *, '---------------------------------' - print *, '' - call wall_time(wall_1) call cpu_time(cpu_1) - if(no_vvvv_integrals)then - call four_idx_novvvv_old + if (do_mo_cholesky) then + PROVIDE cholesky_mo_transp else if (do_ao_cholesky) then call add_integrals_to_map_cholesky + else if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then + call four_idx_dgemm else - if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then - call four_idx_dgemm - else - call add_integrals_to_map(full_ijkl_bitmask_4) - endif + call add_integrals_to_map(full_ijkl_bitmask_4) endif endif @@ -92,8 +99,15 @@ subroutine four_idx_dgemm double precision, allocatable :: a1(:,:,:,:) double precision, allocatable :: a2(:,:,:,:) + PROVIDE ao_two_e_integrals_in_map mo_coef + + print *, '' + print *, 'DGEMM-based AO->MO Transformation' + print *, '---------------------------------' + print *, '' + if (ao_num > 1289) then - print *, irp_here, ': Integer overflow in ao_num**3' + print *, irp_here, ': Integer overflow in ao_num**3. Set do_ao_cholesky=.True.' endif allocate (a1(ao_num,ao_num,ao_num,ao_num)) @@ -213,6 +227,12 @@ subroutine add_integrals_to_map(mask_ijkl) PROVIDE ao_two_e_integrals_in_map mo_coef + + print *, '' + print *, 'Sparse AO->MO Transformation' + print *, '----------------------------' + print *, '' + !Get list of MOs for i,j,k and l !------------------------------- @@ -453,6 +473,9 @@ subroutine add_integrals_to_map(mask_ijkl) end + + + subroutine add_integrals_to_map_cholesky use bitmasks implicit none @@ -469,6 +492,7 @@ subroutine add_integrals_to_map_cholesky integer(key_kind) , allocatable :: buffer_i(:) real(integral_kind), allocatable :: buffer_value(:) + PROVIDE cholesky_mo_transp call set_multiple_levels_omp(.False.) !$OMP PARALLEL DEFAULT(SHARED) & @@ -515,837 +539,7 @@ subroutine add_integrals_to_map_cholesky end -subroutine add_integrals_to_map_three_indices(mask_ijk) - use bitmasks - implicit none - - BEGIN_DOC - ! Adds integrals to the MO map according to some bitmask - END_DOC - - integer(bit_kind), intent(in) :: mask_ijk(N_int,3) - - integer :: i,j,k,l - integer :: i0,j0,k0,l0 - double precision :: c, cpu_1, cpu_2, wall_1, wall_2, wall_0 - - integer, allocatable :: list_ijkl(:,:) - integer :: n_i, n_j, n_k - integer :: m - integer, allocatable :: two_e_tmp_0_idx(:) - real(integral_kind), allocatable :: two_e_tmp_0(:,:) - double precision, allocatable :: two_e_tmp_1(:) - double precision, allocatable :: two_e_tmp_2(:,:) - double precision, allocatable :: two_e_tmp_3(:,:,:) - !DIR$ ATTRIBUTES ALIGN : 64 :: two_e_tmp_1, two_e_tmp_2, two_e_tmp_3 - - integer :: n_integrals - integer :: size_buffer - integer(key_kind),allocatable :: buffer_i(:) - real(integral_kind),allocatable :: buffer_value(:) - double precision :: map_mb - - integer :: i1,j1,k1,l1, ii1, kmax, thread_num - integer :: i2,i3,i4 - double precision,parameter :: thr_coef = 1.d-10 - - PROVIDE ao_two_e_integrals_in_map mo_coef - - !Get list of MOs for i,j,k and l - !------------------------------- - - allocate(list_ijkl(mo_num,4)) - call bitstring_to_list( mask_ijk(1,1), list_ijkl(1,1), n_i, N_int ) - call bitstring_to_list( mask_ijk(1,2), list_ijkl(1,2), n_j, N_int ) - call bitstring_to_list( mask_ijk(1,3), list_ijkl(1,3), n_k, N_int ) - j = 0 - do i = 1, N_int - j += popcnt(mask_ijk(i,1)) - enddo - if(j==0)then - return - endif - - j = 0 - do i = 1, N_int - j += popcnt(mask_ijk(i,2)) - enddo - if(j==0)then - return - endif - - j = 0 - do i = 1, N_int - j += popcnt(mask_ijk(i,3)) - enddo - if(j==0)then - return - endif - - if (ao_num > 1289) then - print *, irp_here, ': Integer overflow in ao_num**3' - endif - size_buffer = min(ao_num*ao_num*ao_num,16000000) - print*, 'Providing the molecular integrals ' - print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& - ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' - - call wall_time(wall_1) - call cpu_time(cpu_1) - !$OMP PARALLEL PRIVATE(m,l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, & - !$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,& - !$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, & - !$OMP wall_0,thread_num) & - !$OMP DEFAULT(NONE) & - !$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k, & - !$OMP mo_coef_transp, & - !$OMP mo_coef_transp_is_built, list_ijkl, & - !$OMP mo_coef_is_built, wall_1, & - !$OMP mo_coef,mo_integrals_threshold,mo_integrals_map) - n_integrals = 0 - wall_0 = wall_1 - allocate(two_e_tmp_3(mo_num, n_j, n_k), & - two_e_tmp_1(mo_num), & - two_e_tmp_0(ao_num,ao_num), & - two_e_tmp_0_idx(ao_num), & - two_e_tmp_2(mo_num, n_j), & - buffer_i(size_buffer), & - buffer_value(size_buffer) ) - - thread_num = 0 - !$ thread_num = omp_get_thread_num() - !$OMP DO SCHEDULE(guided) - do l1 = 1,ao_num - two_e_tmp_3 = 0.d0 - do k1 = 1,ao_num - two_e_tmp_2 = 0.d0 - do j1 = 1,ao_num - call get_ao_two_e_integrals(j1,k1,l1,ao_num,two_e_tmp_0(1,j1)) - enddo - do j1 = 1,ao_num - kmax = 0 - do i1 = 1,ao_num - c = two_e_tmp_0(i1,j1) - if (c == 0.d0) then - cycle - endif - kmax += 1 - two_e_tmp_0(kmax,j1) = c - two_e_tmp_0_idx(kmax) = i1 - enddo - - if (kmax==0) then - cycle - endif - - two_e_tmp_1 = 0.d0 - ii1=1 - do ii1 = 1,kmax-4,4 - i1 = two_e_tmp_0_idx(ii1) - i2 = two_e_tmp_0_idx(ii1+1) - i3 = two_e_tmp_0_idx(ii1+2) - i4 = two_e_tmp_0_idx(ii1+3) - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_1(i) = two_e_tmp_1(i) + & - mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) + & - mo_coef_transp(i,i2) * two_e_tmp_0(ii1+1,j1) + & - mo_coef_transp(i,i3) * two_e_tmp_0(ii1+2,j1) + & - mo_coef_transp(i,i4) * two_e_tmp_0(ii1+3,j1) - enddo ! i - enddo ! ii1 - - i2 = ii1 - do ii1 = i2,kmax - i1 = two_e_tmp_0_idx(ii1) - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_1(i) = two_e_tmp_1(i) + mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) - enddo ! i - enddo ! ii1 - c = 0.d0 - - do i = list_ijkl(1,1), list_ijkl(n_i,1) - c = max(c,abs(two_e_tmp_1(i))) - if (c>mo_integrals_threshold) exit - enddo - if ( c < mo_integrals_threshold ) then - cycle - endif - - do j0 = 1, n_j - j = list_ijkl(j0,2) - c = mo_coef_transp(j,j1) - if (abs(c) < thr_coef) then - cycle - endif - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_2(i,j0) = two_e_tmp_2(i,j0) + c * two_e_tmp_1(i) - enddo ! i - enddo ! j - enddo !j1 - if ( maxval(abs(two_e_tmp_2)) < mo_integrals_threshold ) then - cycle - endif - - - do k0 = 1, n_k - k = list_ijkl(k0,3) - c = mo_coef_transp(k,k1) - if (abs(c) < thr_coef) then - cycle - endif - - do j0 = 1, n_j - j = list_ijkl(j0,2) - do i = list_ijkl(1,1), k - two_e_tmp_3(i,j0,k0) = two_e_tmp_3(i,j0,k0) + c* two_e_tmp_2(i,j0) - enddo!i - enddo !j - - enddo !k - enddo !k1 - - - - do l0 = 1,n_j - l = list_ijkl(l0,2) - c = mo_coef_transp(l,l1) - if (abs(c) < thr_coef) then - cycle - endif - do k0 = 1, n_k - k = list_ijkl(k0,3) - i1 = shiftr((k*k-k),1) - two_e_tmp_1 = 0.d0 - j0 = l0 - j = list_ijkl(j0,2) - do i0 = 1, n_i - i = list_ijkl(i0,1) - if (i>k) then - exit - endif - two_e_tmp_1(i) = c*two_e_tmp_3(i,j0,k0) - enddo - - do i0 = 1, n_i - i = list_ijkl(i0,1) - if (i>k) then !min(k,j1-i1) - exit - endif - if (abs(two_e_tmp_1(i)) < mo_integrals_threshold) then - cycle - endif - n_integrals += 1 - buffer_value(n_integrals) = two_e_tmp_1(i) - if(i==k .and. j==l .and. i.ne.j)then - buffer_value(n_integrals) = buffer_value(n_integrals) *0.5d0 - endif - !DIR$ FORCEINLINE - call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals)) - if (n_integrals == size_buffer) then - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - n_integrals = 0 - endif - enddo - enddo - enddo - - do l0 = 1,n_j - l = list_ijkl(l0,2) - c = mo_coef_transp(l,l1) - if (abs(c) < thr_coef) then - cycle - endif - do k0 = 1, n_k - k = list_ijkl(k0,3) - i1 = shiftr((k*k-k),1) - two_e_tmp_1 = 0.d0 - j0 = k0 - j = list_ijkl(k0,2) - i0 = l0 - i = list_ijkl(i0,2) - if (k==l) then - cycle - endif - two_e_tmp_1(i) = c*two_e_tmp_3(i,j0,k0) - - n_integrals += 1 - buffer_value(n_integrals) = two_e_tmp_1(i) - !DIR$ FORCEINLINE - call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals)) - if (n_integrals == size_buffer) then - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - n_integrals = 0 - endif - enddo - enddo - - call wall_time(wall_2) - if (thread_num == 0) then - if (wall_2 - wall_0 > 1.d0) then - wall_0 = wall_2 - print*, 100.*float(l1)/float(ao_num), '% in ', & - wall_2-wall_1, 's', map_mb(mo_integrals_map) ,'MB' - endif - endif - enddo - !$OMP END DO NOWAIT - deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3) - - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - deallocate(buffer_i, buffer_value) - !$OMP END PARALLEL - call map_merge(mo_integrals_map) - - call wall_time(wall_2) - call cpu_time(cpu_2) - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - - deallocate(list_ijkl) - - - print*,'Molecular integrals provided:' - print*,' Size of MO map ', map_mb(mo_integrals_map) ,'MB' - print*,' Number of MO integrals: ', mo_map_size - print*,' cpu time :',cpu_2 - cpu_1, 's' - print*,' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')' - -end - -subroutine add_integrals_to_map_no_exit_34(mask_ijkl) - use bitmasks - implicit none - - BEGIN_DOC - ! Adds integrals to tha MO map according to some bitmask - END_DOC - - integer(bit_kind), intent(in) :: mask_ijkl(N_int,4) - - integer :: i,j,k,l - integer :: i0,j0,k0,l0 - double precision :: c, cpu_1, cpu_2, wall_1, wall_2, wall_0 - - integer, allocatable :: list_ijkl(:,:) - integer :: n_i, n_j, n_k, n_l - integer, allocatable :: two_e_tmp_0_idx(:) - real(integral_kind), allocatable :: two_e_tmp_0(:,:) - double precision, allocatable :: two_e_tmp_1(:) - double precision, allocatable :: two_e_tmp_2(:,:) - double precision, allocatable :: two_e_tmp_3(:,:,:) - !DIR$ ATTRIBUTES ALIGN : 64 :: two_e_tmp_1, two_e_tmp_2, two_e_tmp_3 - - integer :: n_integrals - integer :: size_buffer - integer(key_kind),allocatable :: buffer_i(:) - real(integral_kind),allocatable :: buffer_value(:) - double precision :: map_mb - - integer :: i1,j1,k1,l1, ii1, kmax, thread_num - integer :: i2,i3,i4 - double precision,parameter :: thr_coef = 1.d-10 - - PROVIDE ao_two_e_integrals_in_map mo_coef - - !Get list of MOs for i,j,k and l - !------------------------------- - - allocate(list_ijkl(mo_num,4)) - call bitstring_to_list( mask_ijkl(1,1), list_ijkl(1,1), n_i, N_int ) - call bitstring_to_list( mask_ijkl(1,2), list_ijkl(1,2), n_j, N_int ) - call bitstring_to_list( mask_ijkl(1,3), list_ijkl(1,3), n_k, N_int ) - call bitstring_to_list( mask_ijkl(1,4), list_ijkl(1,4), n_l, N_int ) - - if (ao_num > 1289) then - print *, irp_here, ': Integer overflow in ao_num**3' - endif - size_buffer = min(ao_num*ao_num*ao_num,16000000) - print*, 'Providing the molecular integrals ' - print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& - ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' - - call wall_time(wall_1) - call cpu_time(cpu_1) - - !$OMP PARALLEL PRIVATE(l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, & - !$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,& - !$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, & - !$OMP wall_0,thread_num) & - !$OMP DEFAULT(NONE) & - !$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,n_l, & - !$OMP mo_coef_transp, & - !$OMP mo_coef_transp_is_built, list_ijkl, & - !$OMP mo_coef_is_built, wall_1, & - !$OMP mo_coef,mo_integrals_threshold,mo_integrals_map) - n_integrals = 0 - wall_0 = wall_1 - allocate(two_e_tmp_3(mo_num, n_j, n_k), & - two_e_tmp_1(mo_num), & - two_e_tmp_0(ao_num,ao_num), & - two_e_tmp_0_idx(ao_num), & - two_e_tmp_2(mo_num, n_j), & - buffer_i(size_buffer), & - buffer_value(size_buffer) ) - - thread_num = 0 - !$ thread_num = omp_get_thread_num() - !$OMP DO SCHEDULE(guided) - do l1 = 1,ao_num - !IRP_IF COARRAY - ! if (mod(l1-this_image(),num_images()) /= 0 ) then - ! cycle - ! endif - !IRP_ENDIF - two_e_tmp_3 = 0.d0 - do k1 = 1,ao_num - two_e_tmp_2 = 0.d0 - do j1 = 1,ao_num - call get_ao_two_e_integrals(j1,k1,l1,ao_num,two_e_tmp_0(1,j1)) - enddo - do j1 = 1,ao_num - kmax = 0 - do i1 = 1,ao_num - c = two_e_tmp_0(i1,j1) - if (c == 0.d0) then - cycle - endif - kmax += 1 - two_e_tmp_0(kmax,j1) = c - two_e_tmp_0_idx(kmax) = i1 - enddo - - if (kmax==0) then - cycle - endif - - two_e_tmp_1 = 0.d0 - ii1=1 - do ii1 = 1,kmax-4,4 - i1 = two_e_tmp_0_idx(ii1) - i2 = two_e_tmp_0_idx(ii1+1) - i3 = two_e_tmp_0_idx(ii1+2) - i4 = two_e_tmp_0_idx(ii1+3) - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_1(i) = two_e_tmp_1(i) + & - mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) + & - mo_coef_transp(i,i2) * two_e_tmp_0(ii1+1,j1) + & - mo_coef_transp(i,i3) * two_e_tmp_0(ii1+2,j1) + & - mo_coef_transp(i,i4) * two_e_tmp_0(ii1+3,j1) - enddo ! i - enddo ! ii1 - - i2 = ii1 - do ii1 = i2,kmax - i1 = two_e_tmp_0_idx(ii1) - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_1(i) = two_e_tmp_1(i) + mo_coef_transp(i,i1) * two_e_tmp_0(ii1,j1) - enddo ! i - enddo ! ii1 - c = 0.d0 - - do i = list_ijkl(1,1), list_ijkl(n_i,1) - c = max(c,abs(two_e_tmp_1(i))) - if (c>mo_integrals_threshold) exit - enddo - if ( c < mo_integrals_threshold ) then - cycle - endif - - do j0 = 1, n_j - j = list_ijkl(j0,2) - c = mo_coef_transp(j,j1) - if (abs(c) < thr_coef) then - cycle - endif - do i = list_ijkl(1,1), list_ijkl(n_i,1) - two_e_tmp_2(i,j0) = two_e_tmp_2(i,j0) + c * two_e_tmp_1(i) - enddo ! i - enddo ! j - enddo !j1 - if ( maxval(abs(two_e_tmp_2)) < mo_integrals_threshold ) then - cycle - endif - - - do k0 = 1, n_k - k = list_ijkl(k0,3) - c = mo_coef_transp(k,k1) - if (abs(c) < thr_coef) then - cycle - endif - - do j0 = 1, n_j - j = list_ijkl(j0,2) - do i = list_ijkl(1,1), k - two_e_tmp_3(i,j0,k0) = two_e_tmp_3(i,j0,k0) + c* two_e_tmp_2(i,j0) - enddo!i - enddo !j - - enddo !k - enddo !k1 - - - - do l0 = 1,n_l - l = list_ijkl(l0,4) - c = mo_coef_transp(l,l1) - if (abs(c) < thr_coef) then - cycle - endif - j1 = shiftr((l*l-l),1) - do j0 = 1, n_j - j = list_ijkl(j0,2) - if (j > l) then - exit - endif - j1 += 1 - do k0 = 1, n_k - k = list_ijkl(k0,3) - i1 = shiftr((k*k-k),1) - two_e_tmp_1 = 0.d0 - do i0 = 1, n_i - i = list_ijkl(i0,1) - if (i>k) then - exit - endif - two_e_tmp_1(i) = c*two_e_tmp_3(i,j0,k0) - enddo - - do i0 = 1, n_i - i = list_ijkl(i0,1) - if(i> k)then - exit - endif - - if (abs(two_e_tmp_1(i)) < mo_integrals_threshold) then - cycle - endif - n_integrals += 1 - buffer_value(n_integrals) = two_e_tmp_1(i) - !DIR$ FORCEINLINE - call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals)) - if (n_integrals == size_buffer) then - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - n_integrals = 0 - endif - enddo - enddo - enddo - enddo - - call wall_time(wall_2) - if (thread_num == 0) then - if (wall_2 - wall_0 > 1.d0) then - wall_0 = wall_2 - print*, 100.*float(l1)/float(ao_num), '% in ', & - wall_2-wall_1, 's', map_mb(mo_integrals_map) ,'MB' - endif - endif - enddo - !$OMP END DO NOWAIT - deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3) - - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) - deallocate(buffer_i, buffer_value) - !$OMP END PARALLEL - !IRP_IF COARRAY - ! print*, 'Communicating the map' - ! call communicate_mo_integrals() - !IRP_ENDIF - call map_merge(mo_integrals_map) - - call wall_time(wall_2) - call cpu_time(cpu_2) - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - - deallocate(list_ijkl) - - - print*,'Molecular integrals provided:' - print*,' Size of MO map ', map_mb(mo_integrals_map) ,'MB' - print*,' Number of MO integrals: ', mo_map_size - print*,' cpu time :',cpu_2 - cpu_1, 's' - print*,' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1), ')' - - -end - - - - BEGIN_PROVIDER [ double precision, mo_two_e_integral_jj_from_ao, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_two_e_integrals_jj_exchange_from_ao, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_two_e_integrals_jj_anti_from_ao, (mo_num,mo_num) ] - implicit none - BEGIN_DOC - ! mo_two_e_integral_jj_from_ao(i,j) = J_ij - ! mo_two_e_integrals_jj_exchange_from_ao(i,j) = J_ij - ! mo_two_e_integrals_jj_anti_from_ao(i,j) = J_ij - K_ij - END_DOC - - integer :: i,j,p,q,r,s - double precision :: c - real(integral_kind) :: integral - integer :: n, pp - real(integral_kind), allocatable :: int_value(:) - integer, allocatable :: int_idx(:) - - double precision, allocatable :: iqrs(:,:), iqsr(:,:), iqis(:), iqri(:) - - if (.not.do_direct_integrals) then - PROVIDE ao_two_e_integrals_in_map mo_coef - endif - - mo_two_e_integral_jj_from_ao = 0.d0 - mo_two_e_integrals_jj_exchange_from_ao = 0.d0 - - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: iqrs, iqsr - - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE (i,j,p,q,r,s,integral,c,n,pp,int_value,int_idx, & - !$OMP iqrs, iqsr,iqri,iqis) & - !$OMP SHARED(mo_num,mo_coef_transp,ao_num, & - !$OMP ao_integrals_threshold,do_direct_integrals) & - !$OMP REDUCTION(+:mo_two_e_integral_jj_from_ao,mo_two_e_integrals_jj_exchange_from_ao) - - allocate( int_value(ao_num), int_idx(ao_num), & - iqrs(mo_num,ao_num), iqis(mo_num), iqri(mo_num), & - iqsr(mo_num,ao_num) ) - - !$OMP DO SCHEDULE (guided) - do s=1,ao_num - do q=1,ao_num - - do j=1,ao_num - do i=1,mo_num - iqrs(i,j) = 0.d0 - iqsr(i,j) = 0.d0 - enddo - enddo - - if (do_direct_integrals) then - double precision :: ao_two_e_integral - do r=1,ao_num - call compute_ao_two_e_integrals(q,r,s,ao_num,int_value) - do p=1,ao_num - integral = int_value(p) - if (abs(integral) > ao_integrals_threshold) then - do i=1,mo_num - iqrs(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - call compute_ao_two_e_integrals(q,s,r,ao_num,int_value) - do p=1,ao_num - integral = int_value(p) - if (abs(integral) > ao_integrals_threshold) then - do i=1,mo_num - iqsr(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - enddo - - else - - do r=1,ao_num - call get_ao_two_e_integrals_non_zero(q,r,s,ao_num,int_value,int_idx,n) - do pp=1,n - p = int_idx(pp) - integral = int_value(pp) - if (abs(integral) > ao_integrals_threshold) then - do i=1,mo_num - iqrs(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - call get_ao_two_e_integrals_non_zero(q,s,r,ao_num,int_value,int_idx,n) - do pp=1,n - p = int_idx(pp) - integral = int_value(pp) - if (abs(integral) > ao_integrals_threshold) then - do i=1,mo_num - iqsr(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - enddo - endif - iqis = 0.d0 - iqri = 0.d0 - do r=1,ao_num - do i=1,mo_num - iqis(i) += mo_coef_transp(i,r) * iqrs(i,r) - iqri(i) += mo_coef_transp(i,r) * iqsr(i,r) - enddo - enddo - do i=1,mo_num - do j=1,mo_num - c = mo_coef_transp(j,q)*mo_coef_transp(j,s) - mo_two_e_integral_jj_from_ao(j,i) += c * iqis(i) - mo_two_e_integrals_jj_exchange_from_ao(j,i) += c * iqri(i) - enddo - enddo - - enddo - enddo - !$OMP END DO NOWAIT - deallocate(iqrs,iqsr,int_value,int_idx) - !$OMP END PARALLEL - - mo_two_e_integrals_jj_anti_from_ao = mo_two_e_integral_jj_from_ao - mo_two_e_integrals_jj_exchange_from_ao - - -END_PROVIDER - - BEGIN_PROVIDER [ double precision, mo_two_e_integrals_vv_from_ao, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_two_e_integrals_vv_exchange_from_ao, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_two_e_integrals_vv_anti_from_ao, (mo_num,mo_num) ] - implicit none - BEGIN_DOC - ! mo_two_e_integrals_vv_from_ao(i,j) = J_ij - ! mo_two_e_integrals_vv_exchange_from_ao(i,j) = J_ij - ! mo_two_e_integrals_vv_anti_from_ao(i,j) = J_ij - K_ij - ! but only for the virtual orbitals - END_DOC - - integer :: i,j,p,q,r,s - integer :: i0,j0 - double precision :: c - real(integral_kind) :: integral - integer :: n, pp - real(integral_kind), allocatable :: int_value(:) - integer, allocatable :: int_idx(:) - - double precision, allocatable :: iqrs(:,:), iqsr(:,:), iqis(:), iqri(:) - - if (.not.do_direct_integrals) then - PROVIDE ao_two_e_integrals_in_map mo_coef - endif - - mo_two_e_integrals_vv_from_ao = 0.d0 - mo_two_e_integrals_vv_exchange_from_ao = 0.d0 - - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: iqrs, iqsr - - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE (i0,j0,i,j,p,q,r,s,integral,c,n,pp,int_value,int_idx, & - !$OMP iqrs, iqsr,iqri,iqis) & - !$OMP SHARED(n_virt_orb,mo_num,list_virt,mo_coef_transp,ao_num, & - !$OMP ao_integrals_threshold,do_direct_integrals) & - !$OMP REDUCTION(+:mo_two_e_integrals_vv_from_ao,mo_two_e_integrals_vv_exchange_from_ao) - - allocate( int_value(ao_num), int_idx(ao_num), & - iqrs(mo_num,ao_num), iqis(mo_num), iqri(mo_num),& - iqsr(mo_num,ao_num) ) - - !$OMP DO SCHEDULE (guided) - do s=1,ao_num - do q=1,ao_num - - do j=1,ao_num - do i0=1,n_virt_orb - i = list_virt(i0) - iqrs(i,j) = 0.d0 - iqsr(i,j) = 0.d0 - enddo - enddo - - if (do_direct_integrals) then - double precision :: ao_two_e_integral - do r=1,ao_num - call compute_ao_two_e_integrals(q,r,s,ao_num,int_value) - do p=1,ao_num - integral = int_value(p) - if (abs(integral) > ao_integrals_threshold) then - do i0=1,n_virt_orb - i = list_virt(i0) - iqrs(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - call compute_ao_two_e_integrals(q,s,r,ao_num,int_value) - do p=1,ao_num - integral = int_value(p) - if (abs(integral) > ao_integrals_threshold) then - do i0=1,n_virt_orb - i =list_virt(i0) - iqsr(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - enddo - - else - - do r=1,ao_num - call get_ao_two_e_integrals_non_zero(q,r,s,ao_num,int_value,int_idx,n) - do pp=1,n - p = int_idx(pp) - integral = int_value(pp) - if (abs(integral) > ao_integrals_threshold) then - do i0=1,n_virt_orb - i =list_virt(i0) - iqrs(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - call get_ao_two_e_integrals_non_zero(q,s,r,ao_num,int_value,int_idx,n) - do pp=1,n - p = int_idx(pp) - integral = int_value(pp) - if (abs(integral) > ao_integrals_threshold) then - do i0=1,n_virt_orb - i = list_virt(i0) - iqsr(i,r) += mo_coef_transp(i,p) * integral - enddo - endif - enddo - enddo - endif - iqis = 0.d0 - iqri = 0.d0 - do r=1,ao_num - do i0=1,n_virt_orb - i = list_virt(i0) - iqis(i) += mo_coef_transp(i,r) * iqrs(i,r) - iqri(i) += mo_coef_transp(i,r) * iqsr(i,r) - enddo - enddo - do i0=1,n_virt_orb - i= list_virt(i0) - do j0=1,n_virt_orb - j = list_virt(j0) - c = mo_coef_transp(j,q)*mo_coef_transp(j,s) - mo_two_e_integrals_vv_from_ao(j,i) += c * iqis(i) - mo_two_e_integrals_vv_exchange_from_ao(j,i) += c * iqri(i) - enddo - enddo - - enddo - enddo - !$OMP END DO NOWAIT - deallocate(iqrs,iqsr,int_value,int_idx) - !$OMP END PARALLEL - - mo_two_e_integrals_vv_anti_from_ao = mo_two_e_integrals_vv_from_ao - mo_two_e_integrals_vv_exchange_from_ao - ! print*, '**********' - ! do i0 =1, n_virt_orb - ! i = list_virt(i0) - ! print*, mo_two_e_integrals_vv_from_ao(i,i) - ! enddo - ! print*, '**********' - - -END_PROVIDER BEGIN_PROVIDER [ double precision, mo_two_e_integrals_jj, (mo_num,mo_num) ] @@ -1362,7 +556,7 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl) double precision :: get_two_e_integral - if (do_ao_cholesky) then + if (do_mo_cholesky) then double precision, allocatable :: buffer(:,:) allocate (buffer(cholesky_mo_num,mo_num)) do k=1,cholesky_mo_num diff --git a/src/mo_two_e_ints/no_vvvv.irp.f b/src/mo_two_e_ints/no_vvvv.irp.f deleted file mode 100644 index 48a7f5e2d..000000000 --- a/src/mo_two_e_ints/no_vvvv.irp.f +++ /dev/null @@ -1,88 +0,0 @@ - -subroutine four_idx_novvvv_old - use map_module - use bitmasks - implicit none - BEGIN_DOC - ! Retransform MO integrals for next CAS-SCF step - END_DOC - integer(bit_kind) :: mask_ijkl(N_int,4) - integer(bit_kind) :: mask_ijk(N_int,3) - - print*,'Using partial transformation' - print*,'It will not transform all integrals with at least 3 indices within the virtuals' - integer :: i,j,k,l - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I I I !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 4 - ! - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = core_inact_act_bitmask_4(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I V V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 2 (virt) ^2 - ! = J_iv - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = virt_bitmask(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - - ! (core+inact+act) ^ 2 (virt) ^2 - ! = (iv|iv) - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = virt_bitmask(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! V V V !!!!!!!!!!!!!!!!!!!!!!! -! if(.not.no_vvv_integrals)then - print*, '' - print*, ' and ' - do i = 1,N_int - mask_ijk(i,1) = virt_bitmask(i,1) - mask_ijk(i,2) = virt_bitmask(i,1) - mask_ijk(i,3) = virt_bitmask(i,1) - enddo - call add_integrals_to_map_three_indices(mask_ijk) -! endif - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I I I V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 3 (virt) ^1 - ! - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,3) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map(mask_ijkl) - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! I V V V !!!!!!!!!!!!!!!!!!!! - ! (core+inact+act) ^ 1 (virt) ^3 - ! -! if(.not.no_ivvv_integrals)then - print*, '' - print*, '' - do i = 1,N_int - mask_ijkl(i,1) = core_inact_act_bitmask_4(i,1) - mask_ijkl(i,2) = virt_bitmask(i,1) - mask_ijkl(i,3) = virt_bitmask(i,1) - mask_ijkl(i,4) = virt_bitmask(i,1) - enddo - call add_integrals_to_map_no_exit_34(mask_ijkl) -end diff --git a/src/mol_properties/multi_s_dipole_moment.irp.f b/src/mol_properties/multi_s_dipole_moment.irp.f index 913ae2f3f..8aae3bf47 100644 --- a/src/mol_properties/multi_s_dipole_moment.irp.f +++ b/src/mol_properties/multi_s_dipole_moment.irp.f @@ -18,7 +18,7 @@ -BEGIN_PROVIDER [double precision, multi_s_dipole_moment, (N_states, N_states)] + BEGIN_PROVIDER [double precision, multi_s_dipole_moment , (N_states, N_states)] &BEGIN_PROVIDER [double precision, multi_s_x_dipole_moment, (N_states, N_states)] &BEGIN_PROVIDER [double precision, multi_s_y_dipole_moment, (N_states, N_states)] &BEGIN_PROVIDER [double precision, multi_s_z_dipole_moment, (N_states, N_states)] @@ -40,27 +40,153 @@ ! gamma^{nm}: density matrix \bra{\Psi^n} a^{\dagger}_a a_i \ket{\Psi^m} END_DOC - integer :: istate,jstate ! States - integer :: i,j ! general spatial MOs + integer :: istate, jstate ! States + integer :: i, j ! general spatial MOs double precision :: nuclei_part_x, nuclei_part_y, nuclei_part_z multi_s_x_dipole_moment = 0.d0 multi_s_y_dipole_moment = 0.d0 multi_s_z_dipole_moment = 0.d0 + + if(8.d0*mo_num*mo_num*n_states*n_states*1d-9 .lt. 200.d0) then - do jstate = 1, N_states - do istate = 1, N_states - - do i = 1, mo_num - do j = 1, mo_num - multi_s_x_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_x(j,i) - multi_s_y_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_y(j,i) - multi_s_z_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_z(j,i) - enddo + do jstate = 1, N_states + do istate = 1, N_states + do i = 1, mo_num + do j = 1, mo_num + multi_s_x_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_x(j,i) + multi_s_y_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_y(j,i) + multi_s_z_dipole_moment(istate,jstate) -= one_e_tr_dm_mo(j,i,istate,jstate) * mo_dipole_z(j,i) + enddo + enddo enddo - enddo - enddo + + else + + ! no enouph memory + ! on the fly scheme + + PROVIDE psi_det_alpha_unique psi_det_beta_unique + + integer :: l, k_a, k_b + integer :: occ(N_int*bit_kind_size,2) + integer :: h1, h2, p1, p2, degree + integer :: exc(0:2,2), n_occ(2) + integer :: krow, kcol, lrow, lcol + integer(bit_kind) :: tmp_det(N_int,2), tmp_det2(N_int) + double precision :: ck, ckl, phase + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j, l, k_a, k_b, istate, jstate, occ, ck, ckl, h1, h2, p1, p2, exc, & + !$OMP phase, degree, n_occ, krow, kcol, lrow, lcol, tmp_det, tmp_det2) & + !$OMP SHARED(N_int, N_states, elec_alpha_num, elec_beta_num, N_det, & + !$OMP psi_bilinear_matrix_rows, psi_bilinear_matrix_columns, & + !$OMP psi_bilinear_matrix_transp_rows, psi_bilinear_matrix_transp_columns, & + !$OMP psi_det_alpha_unique, psi_det_beta_unique, & + !$OMP psi_bilinear_matrix_values, psi_bilinear_matrix_transp_values, & + !$OMP mo_dipole_x, mo_dipole_y, mo_dipole_z, & + !$OMP multi_s_x_dipole_moment, multi_s_y_dipole_moment, multi_s_z_dipole_moment) + !$OMP DO COLLAPSE(2) + do istate = 1, N_states + do jstate = 1, N_states + + do k_a = 1, N_det + krow = psi_bilinear_matrix_rows (k_a) + kcol = psi_bilinear_matrix_columns(k_a) + + tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow) + tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol) + + ! Diagonal part + call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int) + ck = psi_bilinear_matrix_values(k_a,istate)*psi_bilinear_matrix_values(k_a,jstate) + do l = 1, elec_alpha_num + j = occ(l,1) + multi_s_x_dipole_moment(istate,jstate) -= ck * mo_dipole_x(j,j) + multi_s_y_dipole_moment(istate,jstate) -= ck * mo_dipole_y(j,j) + multi_s_z_dipole_moment(istate,jstate) -= ck * mo_dipole_z(j,j) + enddo + + if (k_a == N_det) cycle + l = k_a + 1 + lrow = psi_bilinear_matrix_rows (l) + lcol = psi_bilinear_matrix_columns(l) + ! Fix beta determinant, loop over alphas + do while (lcol == kcol) + tmp_det2(:) = psi_det_alpha_unique(:,lrow) + call get_excitation_degree_spin(tmp_det(1,1), tmp_det2, degree, N_int) + if (degree == 1) then + exc = 0 + call get_single_excitation_spin(tmp_det(1,1), tmp_det2, exc, phase, N_int) + call decode_exc_spin(exc, h1, p1, h2, p2) + ckl = psi_bilinear_matrix_values(k_a,istate)*psi_bilinear_matrix_values(l,jstate) * phase + multi_s_x_dipole_moment(istate,jstate) -= ckl * mo_dipole_x(h1,p1) + multi_s_y_dipole_moment(istate,jstate) -= ckl * mo_dipole_y(h1,p1) + multi_s_z_dipole_moment(istate,jstate) -= ckl * mo_dipole_z(h1,p1) + ckl = psi_bilinear_matrix_values(k_a,jstate)*psi_bilinear_matrix_values(l,istate) * phase + multi_s_x_dipole_moment(istate,jstate) -= ckl * mo_dipole_x(p1,h1) + multi_s_y_dipole_moment(istate,jstate) -= ckl * mo_dipole_y(p1,h1) + multi_s_z_dipole_moment(istate,jstate) -= ckl * mo_dipole_z(p1,h1) + endif + l = l+1 + if (l > N_det) exit + lrow = psi_bilinear_matrix_rows (l) + lcol = psi_bilinear_matrix_columns(l) + enddo + enddo ! k_a + + do k_b = 1, N_det + krow = psi_bilinear_matrix_transp_rows (k_b) + kcol = psi_bilinear_matrix_transp_columns(k_b) + + tmp_det(1:N_int,1) = psi_det_alpha_unique(1:N_int,krow) + tmp_det(1:N_int,2) = psi_det_beta_unique (1:N_int,kcol) + + ! Diagonal part + call bitstring_to_list_ab(tmp_det, occ, n_occ, N_int) + ck = psi_bilinear_matrix_transp_values(k_b,istate)*psi_bilinear_matrix_transp_values(k_b,jstate) + do l = 1, elec_beta_num + j = occ(l,2) + multi_s_x_dipole_moment(istate,jstate) -= ck * mo_dipole_x(j,j) + multi_s_y_dipole_moment(istate,jstate) -= ck * mo_dipole_y(j,j) + multi_s_z_dipole_moment(istate,jstate) -= ck * mo_dipole_z(j,j) + enddo + + if (k_b == N_det) cycle + l = k_b+1 + lrow = psi_bilinear_matrix_transp_rows (l) + lcol = psi_bilinear_matrix_transp_columns(l) + ! Fix beta determinant, loop over alphas + do while (lrow == krow) + tmp_det2(:) = psi_det_beta_unique(:,lcol) + call get_excitation_degree_spin(tmp_det(1,2), tmp_det2, degree, N_int) + if (degree == 1) then + exc = 0 + call get_single_excitation_spin(tmp_det(1,2), tmp_det2, exc, phase, N_int) + call decode_exc_spin(exc, h1, p1, h2, p2) + ckl = psi_bilinear_matrix_transp_values(k_b,istate)*psi_bilinear_matrix_transp_values(l,jstate) * phase + multi_s_x_dipole_moment(istate,jstate) -= ckl * mo_dipole_x(h1,p1) + multi_s_y_dipole_moment(istate,jstate) -= ckl * mo_dipole_y(h1,p1) + multi_s_z_dipole_moment(istate,jstate) -= ckl * mo_dipole_z(h1,p1) + ckl = psi_bilinear_matrix_transp_values(k_b,jstate)*psi_bilinear_matrix_transp_values(l,istate) * phase + multi_s_x_dipole_moment(istate,jstate) -= ckl * mo_dipole_x(p1,h1) + multi_s_y_dipole_moment(istate,jstate) -= ckl * mo_dipole_y(p1,h1) + multi_s_z_dipole_moment(istate,jstate) -= ckl * mo_dipole_z(p1,h1) + endif + l = l+1 + if (l > N_det) exit + lrow = psi_bilinear_matrix_transp_rows (l) + lcol = psi_bilinear_matrix_transp_columns(l) + enddo + enddo ! k_b + + enddo ! istate + enddo ! jstate + !$OMP END DO + !$OMP END PARALLEL + + endif ! memory condition ! Nuclei part nuclei_part_x = 0.d0 @@ -91,3 +217,42 @@ enddo END_PROVIDER + +! --- + + BEGIN_PROVIDER [double precision, multi_s_x_dipole_moment_eigenvec, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_y_dipole_moment_eigenvec, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_z_dipole_moment_eigenvec, (N_states, N_states)] +&BEGIN_PROVIDER [double precision, multi_s_x_dipole_moment_eigenval, (N_states)] +&BEGIN_PROVIDER [double precision, multi_s_y_dipole_moment_eigenval, (N_states)] +&BEGIN_PROVIDER [double precision, multi_s_z_dipole_moment_eigenval, (N_states)] + + implicit none + double precision, allocatable :: eigval(:), eigvec(:,:), A(:,:) + + PROVIDE multi_s_x_dipole_moment multi_s_y_dipole_moment multi_s_z_dipole_moment + + allocate(A(N_states,N_states), eigvec(N_states,N_states), eigval(N_states)) + + A = multi_s_x_dipole_moment + call lapack_diag(eigval(1), eigvec(1,1), A(1,1), N_states, N_states) + multi_s_x_dipole_moment_eigenval = eigval + multi_s_x_dipole_moment_eigenvec = eigvec + + A = multi_s_y_dipole_moment + call lapack_diag(eigval(1), eigvec(1,1), A(1,1), N_states, N_states) + multi_s_y_dipole_moment_eigenval = eigval + multi_s_y_dipole_moment_eigenvec = eigvec + + A = multi_s_z_dipole_moment + call lapack_diag(eigval(1), eigvec(1,1), A(1,1), N_states, N_states) + multi_s_z_dipole_moment_eigenval = eigval + multi_s_z_dipole_moment_eigenvec = eigvec + + deallocate(A, eigvec, eigval) + +END_PROVIDER + +! --- + + diff --git a/src/mol_properties/print_properties.irp.f b/src/mol_properties/print_properties.irp.f index af413a883..c392c1483 100644 --- a/src/mol_properties/print_properties.irp.f +++ b/src/mol_properties/print_properties.irp.f @@ -100,7 +100,7 @@ subroutine print_transition_dipole_moment dip_str = d_x**2 + d_y**2 + d_z**2 d = multi_s_dipole_moment(istate,jstate) f = 2d0/3d0 * d * d * dabs(ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)) - write(*,'(I4,I4,A4,I3,6(F12.6))') (istate-1), (jstate-1), ' ->', (istate-1), d_x, d_y, d_z, d, dip_str, f + write(*,'(I4,I4,A4,I3,6(F12.6))') (jstate -1) * (2*N_states-jstate)/2 + istate - jstate, (jstate-1), ' ->', (istate-1), d_x, d_y, d_z, d, dip_str, f enddo enddo @@ -117,7 +117,7 @@ subroutine print_transition_dipole_moment dip_str = d_x**2 + d_y**2 + d_z**2 f = 2d0/3d0 * d * d * dabs(ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)) d = multi_s_dipole_moment(istate,jstate) * au_to_D - write(*,'(I4,I4,A4,I3,6(F12.6))') (istate-1), (jstate-1), ' ->', (istate-1), d_x, d_y, d_z, d, dip_str, f + write(*,'(I4,I4,A4,I3,6(F12.6))') (jstate -1) * (2*N_states-jstate)/2 + istate - jstate, (jstate-1), ' ->', (istate-1), d_x, d_y, d_z, d, dip_str, f enddo enddo print*,'==============================================' @@ -181,10 +181,9 @@ subroutine print_oscillator_strength ! Mixed gauge f_m = 2d0/3d0 * d * v - write(*,'(A19,I3,A9,F10.6,A5,F7.1,A10,F9.6,A6,F9.6,A6,F9.6,A8,F7.3)') ' # Transition n.', (istate-1), ': Excit.=', dabs((ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)))*ha_to_ev, & + write(*,'(A19,I3,A9,F10.6,A5,F7.1,A10,F9.6,A6,F9.6,A6,F9.6,A8,F7.3)') ' # Transition n.', (jstate -1) * (2*N_states-jstate)/2 + istate - jstate, ': Excit.=', dabs((ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)))*ha_to_ev, & ' eV ( ',dabs((ci_energy_no_diag(istate) - ci_energy_no_diag(jstate)))*Ha_to_nm,' nm), f_l=',f_l, ', f_v=', f_v, ', f_m=', f_m, ', =', s2_values(istate) !write(*,'(I4,I4,A4,I3,A6,F6.1,A6,F6.1)') (istate-1), (jstate-1), ' ->', (istate-1), ', %T1=', percent_exc(2,istate), ', %T2=',percent_exc(3,istate) - enddo enddo diff --git a/src/mu_of_r/EZFIO.cfg b/src/mu_of_r/EZFIO.cfg index a66b00efa..7a39b0121 100644 --- a/src/mu_of_r/EZFIO.cfg +++ b/src/mu_of_r/EZFIO.cfg @@ -6,9 +6,9 @@ size: (becke_numerical_grid.n_points_final_grid,determinants.n_states) [mu_of_r_potential] type: character*(32) -doc: type of potential for the mu(r) interaction: can be [ hf| cas_full | cas_truncated | pure_act] +doc: type of potential for the mu(r) interaction: can be [ hf| hf_sparse | cas_full | cas_truncated | pure_act] interface: ezfio, provider, ocaml -default: hf +default: hf_sparse [io_mu_of_r] type: Disk_access diff --git a/src/mu_of_r/basis_def.irp.f b/src/mu_of_r/basis_def.irp.f index fff9f5810..e433f4d86 100644 --- a/src/mu_of_r/basis_def.irp.f +++ b/src/mu_of_r/basis_def.irp.f @@ -114,3 +114,48 @@ enddo enddo END_PROVIDER + +! BEGIN_PROVIDER [integer, n_docc_val_orb_for_cas] +!&BEGIN_PROVIDER [integer, n_max_docc_val_orb_for_cas] +! implicit none +! BEGIN_DOC +! ! Number of DOUBLY OCCUPIED VALENCE ORBITALS for the CAS wave function +! ! +! ! This determines the size of the space \mathcal{A} of Eqs. (15-16) of Phys.Chem.Lett.2019, 10, 2931 2937 +! END_DOC +! integer :: i +! n_docc_val_orb_for_cas = 0 +! ! You browse the BETA ELECTRONS and check if its not a CORE ORBITAL +! do i = 1, elec_beta_num +! if( trim(mo_class(i))=="Inactive" & +! .or. trim(mo_class(i))=="Active" & +! .or. trim(mo_class(i))=="Virtual" )then +! n_docc_val_orb_for_cas +=1 +! endif +! enddo +! n_max_docc_val_orb_for_cas = maxval(n_docc_val_orb_for_cas) +! +!END_PROVIDER +! +!BEGIN_PROVIDER [integer, list_doc_valence_orb_for_cas, (n_max_docc_val_orb_for_cas)] +! implicit none +! BEGIN_DOC +! ! List of OCCUPIED valence orbitals for each spin to build the f_{HF}(r_1,r_2) function +! ! +! ! This corresponds to ALL OCCUPIED orbitals in the HF wave function, except those defined as "core" +! ! +! ! This determines the space \mathcal{A} of Eqs. (15-16) of Phys.Chem.Lett.2019, 10, 2931 2937 +! END_DOC +! j = 0 +! ! You browse the BETA ELECTRONS and check if its not a CORE ORBITAL +! do i = 1, elec_beta_num +! if( trim(mo_class(i))=="Inactive" & +! .or. trim(mo_class(i))=="Active" & +! .or. trim(mo_class(i))=="Virtual" )then +! j +=1 +! list_doc_valence_orb_for_cas(j) = i +! endif +! enddo +! +!END_PROVIDER + diff --git a/src/mu_of_r/f_hf_cholesky.irp.f b/src/mu_of_r/f_hf_cholesky.irp.f new file mode 100644 index 000000000..179b80dd0 --- /dev/null +++ b/src/mu_of_r/f_hf_cholesky.irp.f @@ -0,0 +1,420 @@ +BEGIN_PROVIDER [integer, list_couple_hf_orb_r1, (2,n_couple_orb_r1)] + implicit none + integer :: ii,i,mm,m,itmp + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + itmp += 1 + list_couple_hf_orb_r1(1,itmp) = i + list_couple_hf_orb_r1(2,itmp) = m + enddo + enddo +END_PROVIDER + + +BEGIN_PROVIDER [integer, list_couple_hf_orb_r2, (2,n_couple_orb_r2)] + implicit none + integer :: ii,i,mm,m,itmp + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + itmp += 1 + list_couple_hf_orb_r2(1,itmp) = i + list_couple_hf_orb_r2(2,itmp) = m + enddo + enddo +END_PROVIDER + + +BEGIN_PROVIDER [integer, n_couple_orb_r1] + implicit none + BEGIN_DOC + ! number of couples of alpha occupied times any basis orbital + END_DOC + n_couple_orb_r1 = n_occ_val_orb_for_hf(1) * n_basis_orb +END_PROVIDER + +BEGIN_PROVIDER [integer, n_couple_orb_r2] + implicit none + BEGIN_DOC + ! number of couples of beta occupied times any basis orbital + END_DOC + n_couple_orb_r2 = n_occ_val_orb_for_hf(2) * n_basis_orb +END_PROVIDER + +BEGIN_PROVIDER [ double precision, mos_times_cholesky_r1, (cholesky_mo_num,n_points_final_grid)] + implicit none + BEGIN_DOC + ! V1_AR = \sum_{I}V_AI Phi_IR where "R" specifies the index of the grid point and A the number of cholesky point + ! + ! here Phi_IR is phi_i(R)xphi_b(R) for r1 and V_AI = (ib|A) chollesky vector + END_DOC + double precision, allocatable :: mos_ib_r1(:,:),mo_chol_r1(:,:) + double precision, allocatable :: test(:,:) + double precision :: mo_i_r1,mo_b_r1 + integer :: ii,i,mm,m,itmp,ipoint,ll + allocate(mos_ib_r1(n_couple_orb_r1,n_points_final_grid)) + allocate(mo_chol_r1(cholesky_mo_num,n_couple_orb_r1)) + + do ipoint = 1, n_points_final_grid + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + itmp += 1 + mos_ib_r1(itmp,ipoint) = mo_i_r1 * mo_b_r1 + enddo + enddo + enddo + + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + itmp += 1 + do ll = 1, cholesky_mo_num + mo_chol_r1(ll,itmp) = cholesky_mo_transp(ll,m,i) + enddo + enddo + enddo + + call get_AB_prod(mo_chol_r1,cholesky_mo_num,n_couple_orb_r1,mos_ib_r1,n_points_final_grid,mos_times_cholesky_r1) + + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, mos_times_cholesky_r2, (cholesky_mo_num,n_points_final_grid)] + implicit none + BEGIN_DOC + ! V1_AR = \sum_{I}V_AI Phi_IR where "R" specifies the index of the grid point and A the number of cholesky point + ! + ! here Phi_IR is phi_i(R)xphi_b(R) for r2 and V_AI = (ib|A) chollesky vector + END_DOC + double precision, allocatable :: mos_ib_r2(:,:),mo_chol_r2(:,:) + double precision, allocatable :: test(:,:) + double precision :: mo_i_r2,mo_b_r2 + integer :: ii,i,mm,m,itmp,ipoint,ll + allocate(mos_ib_r2(n_couple_orb_r2,n_points_final_grid)) + allocate(mo_chol_r2(cholesky_mo_num,n_couple_orb_r2)) + + do ipoint = 1, n_points_final_grid + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + mo_i_r2 = mos_in_r_array_omp(i,ipoint) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r2 = mos_in_r_array_omp(m,ipoint) + itmp += 1 + mos_ib_r2(itmp,ipoint) = mo_i_r2 * mo_b_r2 + enddo + enddo + enddo + + itmp = 0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + itmp += 1 + do ll = 1, cholesky_mo_num + mo_chol_r2(ll,itmp) = cholesky_mo_transp(ll,m,i) + enddo + enddo + enddo + + call get_AB_prod(mo_chol_r2,cholesky_mo_num,n_couple_orb_r2,mos_ib_r2,n_points_final_grid,mos_times_cholesky_r2) + +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, f_hf_cholesky, (n_points_final_grid)] + implicit none + integer :: ipoint,m,k + !!f(R) = \sum_{I} \sum_{J} Phi_I(R) Phi_J(R) V_IJ + !! = \sum_{I}\sum_{J}\sum_A Phi_I(R) Phi_J(R) V_AI V_AJ + !! = \sum_A \sum_{I}Phi_I(R)V_AI \sum_{J}V_AJ Phi_J(R) + !! = \sum_A V_AR G_AR + !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI + double precision :: u_dot_v,wall0,wall1 + if(elec_alpha_num == elec_beta_num)then + print*,'providing f_hf_cholesky ...' + call wall_time(wall0) + provide mos_times_cholesky_r1 + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,m) & + !$OMP ShARED (mos_times_cholesky_r1,cholesky_mo_num,f_hf_cholesky,n_points_final_grid) + do ipoint = 1, n_points_final_grid + f_hf_cholesky(ipoint) = 0.d0 + do m = 1, cholesky_mo_num + f_hf_cholesky(ipoint) = f_hf_cholesky(ipoint) + & + mos_times_cholesky_r1(m,ipoint) * mos_times_cholesky_r1(m,ipoint) + enddo + f_hf_cholesky(ipoint) *= 2.D0 + enddo + !$OMP END PARALLEL DO + + call wall_time(wall1) + print*,'Time to provide f_hf_cholesky = ',wall1-wall0 + free mos_times_cholesky_r1 + else + print*,'providing f_hf_cholesky ...' + call wall_time(wall0) + provide mos_times_cholesky_r2 mos_times_cholesky_r1 + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,m) & + !$OMP ShARED (mos_times_cholesky_r2,mos_times_cholesky_r1,cholesky_mo_num,f_hf_cholesky,n_points_final_grid) + do ipoint = 1, n_points_final_grid + f_hf_cholesky(ipoint) = 0.D0 + do m = 1, cholesky_mo_num + f_hf_cholesky(ipoint) = f_hf_cholesky(ipoint) + & + mos_times_cholesky_r2(m,ipoint)*mos_times_cholesky_r1(m,ipoint) + enddo + f_hf_cholesky(ipoint) *= 2.D0 + enddo + !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide f_hf_cholesky = ',wall1-wall0 + free mos_times_cholesky_r2 mos_times_cholesky_r1 + endif +END_PROVIDER + +BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse, (n_points_final_grid)] + implicit none + integer :: ipoint,m,mm,i,ii,p + !!f(R) = \sum_{I} \sum_{J} Phi_I(R) Phi_J(R) V_IJ + !! = \sum_{I}\sum_{J}\sum_A Phi_I(R) Phi_J(R) V_AI V_AJ + !! = \sum_A \sum_{I}Phi_I(R)V_AI \sum_{J}V_AJ Phi_J(R) + !! = \sum_A V_AR G_AR + !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI + double precision :: u_dot_v,wall0,wall1,accu_1, accu_2,mo_i_r1,mo_b_r1 + double precision :: thresh_1,thresh_2 + double precision, allocatable :: accu_vec(:),delta_vec(:) + thresh_2 = ao_cholesky_threshold * 100.d0 + thresh_1 = dsqrt(thresh_2) + provide cholesky_mo_transp + if(elec_alpha_num == elec_beta_num)then + call wall_time(wall0) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE (accu_vec,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & + !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp,thresh_1,thresh_2) & + !$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse,n_points_final_grid,cholesky_mo_transp,n_basis_orb) + allocate(accu_vec(cholesky_mo_num)) + !$OMP DO + do ipoint = 1, n_points_final_grid + f_hf_cholesky_sparse(ipoint) = 0.d0 + accu_vec = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint)*mo_i_r1 + if(dabs(mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + accu_vec(p) = accu_vec(p) + mo_b_r1 * cholesky_mo_transp(p,m,i) + enddo + enddo + enddo + do p = 1, cholesky_mo_num + f_hf_cholesky_sparse(ipoint) = f_hf_cholesky_sparse(ipoint) + accu_vec(p) * accu_vec(p) + enddo + f_hf_cholesky_sparse(ipoint) *= 2.D0 + enddo + !$OMP END DO + deallocate(accu_vec) + !$OMP END PARALLEL + + call wall_time(wall1) + print*,'Time to provide f_hf_cholesky_sparse = ',wall1-wall0 + else + call wall_time(wall0) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE (accu_vec,delta_vec,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & + !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp,thresh_1,thresh_2) & + !$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse,n_points_final_grid,cholesky_mo_transp,n_basis_orb) + allocate(accu_vec(cholesky_mo_num),delta_vec(cholesky_mo_num)) + !$OMP DO + do ipoint = 1, n_points_final_grid + f_hf_cholesky_sparse(ipoint) = 0.d0 + accu_vec = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + accu_vec(p) = accu_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + enddo + enddo + enddo + delta_vec = 0.d0 + do ii = n_occ_val_orb_for_hf(2)+1,n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + delta_vec(p) = delta_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + enddo + enddo + enddo + do p = 1, cholesky_mo_num + f_hf_cholesky_sparse(ipoint) = f_hf_cholesky_sparse(ipoint) + accu_vec(p) * accu_vec(p) + accu_vec(p) * delta_vec(p) + enddo + f_hf_cholesky_sparse(ipoint) *= 2.D0 + enddo + !$OMP END DO + deallocate(accu_vec) + !$OMP END PARALLEL + call wall_time(wall1) + print*,'Time to provide f_hf_cholesky_sparse = ',wall1-wall0 + endif +END_PROVIDER + +BEGIN_PROVIDER [ double precision, f_hf_cholesky_sparse_bis, (n_points_final_grid)] + implicit none + integer :: ipoint,m,mm,i,ii,p + !!f(R) = \sum_{I} \sum_{J} Phi_I(R) Phi_J(R) V_IJ + !! = \sum_{I}\sum_{J}\sum_A Phi_I(R) Phi_J(R) V_AI V_AJ + !! = \sum_A \sum_{I}Phi_I(R)V_AI \sum_{J}V_AJ Phi_J(R) + !! = \sum_A V_AR G_AR + !! V_AR = \sum_{I}Phi_IR V_AI = \sum_{I}Phi^t_RI V_AI + double precision :: u_dot_v,wall0,wall1,accu_1, accu_2,mo_i_r1,mo_b_r1 + double precision :: thresh_1,thresh_2 + double precision, allocatable :: accu_vec(:),delta_vec(:) + thresh_2 = ao_cholesky_threshold * 100.d0 + thresh_1 = dsqrt(thresh_2) + provide cholesky_mo_transp + if(elec_alpha_num == elec_beta_num)then + call wall_time(wall0) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE (accu_vec,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & + !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,mos_in_r_array_omp,aos_in_r_array,thresh_1,thresh_2) & + !$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse_bis,n_points_final_grid,cholesky_semi_mo_transp_simple,ao_num) + allocate(accu_vec(cholesky_mo_num)) + !$OMP DO + do ipoint = 1, n_points_final_grid + f_hf_cholesky_sparse_bis(ipoint) = 0.d0 + accu_vec = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle + do mm = 1, ao_num ! electron 1 + mo_b_r1 = aos_in_r_array(mm,ipoint)*mo_i_r1 + if(dabs(mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + accu_vec(p) = accu_vec(p) + mo_b_r1 * cholesky_semi_mo_transp_simple(p,mm,i) + enddo + enddo + enddo + do p = 1, cholesky_mo_num + f_hf_cholesky_sparse_bis(ipoint) = f_hf_cholesky_sparse_bis(ipoint) + accu_vec(p) * accu_vec(p) + enddo + f_hf_cholesky_sparse_bis(ipoint) *= 2.D0 + enddo + !$OMP END DO + deallocate(accu_vec) + !$OMP END PARALLEL + + call wall_time(wall1) + print*,'Time to provide f_hf_cholesky_sparse_bis = ',wall1-wall0 + else + call wall_time(wall0) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE (accu_vec,delta_vec,ipoint,p,ii,i,mm,m,mo_i_r1,mo_b_r1) & + !$OMP ShARED (n_occ_val_orb_for_hf,list_valence_orb_for_hf,list_basis,mos_in_r_array_omp,thresh_1,thresh_2) & + !$OMP ShARED (cholesky_mo_num,f_hf_cholesky_sparse_bis,n_points_final_grid,cholesky_mo_transp,n_basis_orb) + allocate(accu_vec(cholesky_mo_num),delta_vec(cholesky_mo_num)) + !$OMP DO + do ipoint = 1, n_points_final_grid + f_hf_cholesky_sparse_bis(ipoint) = 0.d0 + accu_vec = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + accu_vec(p) = accu_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + enddo + enddo + enddo + delta_vec = 0.d0 + do ii = n_occ_val_orb_for_hf(2)+1,n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + mo_i_r1 = mos_in_r_array_omp(i,ipoint) + if(dabs(mo_i_r1).lt.thresh_1)cycle + do mm = 1, n_basis_orb ! electron 1 + m = list_basis(mm) + mo_b_r1 = mos_in_r_array_omp(m,ipoint) + if(dabs(mo_i_r1*mo_b_r1).lt.thresh_2)cycle + do p = 1, cholesky_mo_num + delta_vec(p) = delta_vec(p) + mo_i_r1 * mo_b_r1 * cholesky_mo_transp(p,m,i) + enddo + enddo + enddo + do p = 1, cholesky_mo_num + f_hf_cholesky_sparse_bis(ipoint) = f_hf_cholesky_sparse_bis(ipoint) + accu_vec(p) * accu_vec(p) + accu_vec(p) * delta_vec(p) + enddo + f_hf_cholesky_sparse_bis(ipoint) *= 2.D0 + enddo + !$OMP END DO + deallocate(accu_vec) + !$OMP END PARALLEL + call wall_time(wall1) + print*,'Time to provide f_hf_cholesky_sparse_bis = ',wall1-wall0 + endif +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, on_top_hf_grid, (n_points_final_grid)] + implicit none + integer :: ipoint,i,ii + double precision :: dm_a, dm_b,wall0,wall1 + print*,'providing on_top_hf_grid ...' + provide mos_in_r_array_omp + call wall_time(wall0) + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,dm_a,dm_b,ii,i) & + !$OMP ShARED (n_points_final_grid,n_occ_val_orb_for_hf,mos_in_r_array_omp,list_valence_orb_for_hf,on_top_hf_grid) + do ipoint = 1, n_points_final_grid + dm_a = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(1) + i = list_valence_orb_for_hf(ii,1) + dm_a += mos_in_r_array_omp(i,ipoint)*mos_in_r_array_omp(i,ipoint) + enddo + dm_b = 0.d0 + do ii = 1, n_occ_val_orb_for_hf(2) + i = list_valence_orb_for_hf(ii,2) + dm_b += mos_in_r_array_omp(i,ipoint)*mos_in_r_array_omp(i,ipoint) + enddo + on_top_hf_grid(ipoint) = 2.D0 * dm_a*dm_b + enddo + !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide on_top_hf_grid = ',wall1-wall0 +END_PROVIDER + diff --git a/src/mu_of_r/mu_of_r_conditions.irp.f b/src/mu_of_r/mu_of_r_conditions.irp.f index 6b49b9dfd..f2bb71459 100644 --- a/src/mu_of_r/mu_of_r_conditions.irp.f +++ b/src/mu_of_r/mu_of_r_conditions.irp.f @@ -13,7 +13,6 @@ integer :: ipoint,istate double precision :: wall0,wall1 print*,'providing mu_of_r ...' -! PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals call wall_time(wall0) if (read_mu_of_r) then @@ -26,6 +25,10 @@ do ipoint = 1, n_points_final_grid if(mu_of_r_potential.EQ."hf")then mu_of_r_prov(ipoint,istate) = mu_of_r_hf(ipoint) + else if(mu_of_r_potential.EQ."hf_old")then + mu_of_r_prov(ipoint,istate) = mu_of_r_hf_old(ipoint) + else if(mu_of_r_potential.EQ."hf_sparse")then + mu_of_r_prov(ipoint,istate) = mu_of_r_hf_sparse(ipoint) else if(mu_of_r_potential.EQ."cas_full".or.mu_of_r_potential.EQ."cas_truncated".or.mu_of_r_potential.EQ."pure_act")then mu_of_r_prov(ipoint,istate) = mu_of_r_psi_cas(ipoint,istate) else @@ -61,15 +64,87 @@ END_DOC integer :: ipoint double precision :: wall0,wall1,f_hf,on_top,w_hf,sqpi - PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals print*,'providing mu_of_r_hf ...' call wall_time(wall0) + PROVIDE f_hf_cholesky on_top_hf_grid + sqpi = dsqrt(dacos(-1.d0)) + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,f_hf,on_top,w_hf) & + !$OMP ShARED (n_points_final_grid,mu_of_r_hf,f_hf_cholesky,on_top_hf_grid,sqpi) + do ipoint = 1, n_points_final_grid + f_hf = f_hf_cholesky(ipoint) + on_top = on_top_hf_grid(ipoint) + if(on_top.le.1.d-12.or.f_hf.le.0.d0.or.f_hf * on_top.lt.0.d0)then + w_hf = 1.d+10 + else + w_hf = f_hf / on_top + endif + mu_of_r_hf(ipoint) = w_hf * sqpi * 0.5d0 + enddo + !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide mu_of_r_hf = ',wall1-wall0 + END_PROVIDER + + BEGIN_PROVIDER [double precision, mu_of_r_hf_sparse, (n_points_final_grid) ] + implicit none + BEGIN_DOC + ! mu(r) computed with a HF wave function (assumes that HF MOs are stored in the EZFIO) + ! + ! corresponds to Eq. (37) of J. Chem. Phys. 149, 194301 (2018) but for \Psi^B = HF^B + ! + ! !!!!!! WARNING !!!!!! if no_core_density == .True. then all contributions from the core orbitals + ! + ! in the two-body density matrix are excluded + END_DOC + integer :: ipoint + double precision :: wall0,wall1,f_hf,on_top,w_hf,sqpi + print*,'providing mu_of_r_hf_sparse ...' + call wall_time(wall0) + sqpi = dsqrt(dacos(-1.d0)) + PROVIDE f_hf_cholesky_sparse on_top_hf_grid + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint,f_hf,on_top,w_hf) & + !$OMP ShARED (n_points_final_grid,mu_of_r_hf_sparse,f_hf_cholesky_sparse,on_top_hf_grid,sqpi) + do ipoint = 1, n_points_final_grid + f_hf = f_hf_cholesky_sparse(ipoint) + on_top = on_top_hf_grid(ipoint) + if(on_top.le.1.d-12.or.f_hf.le.0.d0.or.f_hf * on_top.lt.0.d0)then + w_hf = 1.d+10 + else + w_hf = f_hf / on_top + endif + mu_of_r_hf_sparse(ipoint) = w_hf * sqpi * 0.5d0 + enddo + !$OMP END PARALLEL DO + call wall_time(wall1) + print*,'Time to provide mu_of_r_hf_sparse = ',wall1-wall0 + END_PROVIDER + + BEGIN_PROVIDER [double precision, mu_of_r_hf_old, (n_points_final_grid) ] + implicit none + BEGIN_DOC + ! mu(r) computed with a HF wave function (assumes that HF MOs are stored in the EZFIO) + ! + ! corresponds to Eq. (37) of J. Chem. Phys. 149, 194301 (2018) but for \Psi^B = HF^B + ! + ! !!!!!! WARNING !!!!!! if no_core_density == .True. then all contributions from the core orbitals + ! + ! in the two-body density matrix are excluded + END_DOC + integer :: ipoint + double precision :: wall0,wall1,f_hf,on_top,w_hf,sqpi + PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals + print*,'providing mu_of_r_hf_old ...' + call wall_time(wall0) sqpi = dsqrt(dacos(-1.d0)) provide f_psi_hf_ab !$OMP PARALLEL DO & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint,f_hf,on_top,w_hf) & - !$OMP ShARED (n_points_final_grid,mu_of_r_hf,f_psi_hf_ab,on_top_hf_mu_r,sqpi) + !$OMP ShARED (n_points_final_grid,mu_of_r_hf_old,f_psi_hf_ab,on_top_hf_mu_r,sqpi) do ipoint = 1, n_points_final_grid f_hf = f_psi_hf_ab(ipoint) on_top = on_top_hf_mu_r(ipoint) @@ -78,13 +153,14 @@ else w_hf = f_hf / on_top endif - mu_of_r_hf(ipoint) = w_hf * sqpi * 0.5d0 + mu_of_r_hf_old(ipoint) = w_hf * sqpi * 0.5d0 enddo !$OMP END PARALLEL DO call wall_time(wall1) - print*,'Time to provide mu_of_r_hf = ',wall1-wall0 + print*,'Time to provide mu_of_r_hf_old = ',wall1-wall0 END_PROVIDER + BEGIN_PROVIDER [double precision, mu_of_r_psi_cas, (n_points_final_grid,N_states) ] implicit none BEGIN_DOC diff --git a/src/mu_of_r/test_proj_op.irp.f b/src/mu_of_r/test_proj_op.irp.f index f9aba094e..94052b186 100644 --- a/src/mu_of_r/test_proj_op.irp.f +++ b/src/mu_of_r/test_proj_op.irp.f @@ -12,10 +12,27 @@ program projected_operators mu_of_r_potential = "cas_full" touch mu_of_r_potential print*,'Using Valence Only functions' -! call test_f_HF_valence_ab -! call routine_full_mos -! call test_f_ii_valence_ab - call test_f_ia_valence_ab + call test_f_HF_valence_ab + call routine_full_mos + call test_f_ii_valence_ab + call test_f_ia_valence_ab call test_f_ii_ia_aa_valence_ab + call test +end + + +subroutine test + implicit none + integer :: i_point + double precision :: ref, new, accu, weight + accu = 0.d0 + do i_point = 1, n_points_final_grid + ref = f_hf_cholesky_sparse(i_point) + new = f_hf_cholesky_sparse_bis(i_point) + weight = final_weight_at_r_vector(i_point) + accu += dabs(ref - new) * weight + enddo + print*,'accu = ',accu + end diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index 3f5c85494..e0fe5319a 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -217,7 +217,7 @@ subroutine Roothaan_Hall_SCF do while (i -void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) +void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only, int single_node) { int fd; int result; @@ -21,7 +21,7 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) perror("Error opening mmap file for reading"); exit(EXIT_FAILURE); } - map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0); + map = mmap(NULL, bytes, PROT_READ, MAP_PRIVATE, fd, 0); } else { @@ -39,8 +39,8 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) perror("Error calling lseek() to stretch the file"); exit(EXIT_FAILURE); } - - result = write(fd, "", 1); + + result = write(fd, " ", 1); if (result != 1) { close(fd); printf("%s:\n", filename); @@ -48,7 +48,17 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) exit(EXIT_FAILURE); } - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (single_node == 1) { + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); +/* + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_POPULATE | MAP_NONBLOCK | MAP_NORESERVE, fd, 0); + if (map == MAP_FAILED) { + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + } +*/ + } else { + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + } } if (map == MAP_FAILED) { diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 20386b30b..4e7ca87da 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1856,7 +1856,7 @@ subroutine pivoted_cholesky( A, rank, tol, ndim, U) ! ! matrix A is destroyed inside this subroutine ! Cholesky vectors are stored in U -! dimension of U: U(1:rank, 1:n) +! dimension of U: U(1:n, 1:rank) ! U is allocated inside this subroutine ! rank is the number of Cholesky vectors depending on tol ! diff --git a/src/utils/map_functions.irp.f b/src/utils/map_functions.irp.f index 97d0e8bf0..e3a62b079 100644 --- a/src/utils/map_functions.irp.f +++ b/src/utils/map_functions.irp.f @@ -21,13 +21,13 @@ subroutine map_save_to_disk(filename,map) stop 'map already consolidated' endif - call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .False., c_pointer(1)) + call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .False., .False., c_pointer(1)) call c_f_pointer(c_pointer(1),map % consolidated_idx, (/ map % map_size +2_8/)) - call mmap(trim(filename)//'_consolidated_key', (/ n_elements /), cache_key_kind, fd(2), .False., c_pointer(2)) + call mmap(trim(filename)//'_consolidated_key', (/ n_elements /), cache_key_kind, fd(2), .False., .False., c_pointer(2)) call c_f_pointer(c_pointer(2),map % consolidated_key, (/ n_elements /)) - call mmap(trim(filename)//'_consolidated_value', (/ n_elements /), integral_kind, fd(3), .False., c_pointer(3)) + call mmap(trim(filename)//'_consolidated_value', (/ n_elements /), integral_kind, fd(3), .False., .False., c_pointer(3)) call c_f_pointer(c_pointer(3),map % consolidated_value, (/ n_elements /)) if (.not.associated(map%consolidated_key)) then @@ -85,15 +85,15 @@ subroutine map_load_from_disk(filename,map) stop 'map already consolidated' endif - call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .True., c_pointer(1)) + call mmap(trim(filename)//'_consolidated_idx', (/ map % map_size + 2_8 /), 8, fd(1), .True., .False., c_pointer(1)) call c_f_pointer(c_pointer(1),map % consolidated_idx, (/ map % map_size + 2_8/)) map% n_elements = map % consolidated_idx (map % map_size+2_8)-1_8 - call mmap(trim(filename)//'_consolidated_key', (/ map % n_elements /), cache_key_kind, fd(2), .True., c_pointer(2)) + call mmap(trim(filename)//'_consolidated_key', (/ map % n_elements /), cache_key_kind, fd(2), .True., .False., c_pointer(2)) call c_f_pointer(c_pointer(2),map % consolidated_key, (/ map % n_elements /)) - call mmap(trim(filename)//'_consolidated_value', (/ map % n_elements /), integral_kind, fd(3), .True., c_pointer(3)) + call mmap(trim(filename)//'_consolidated_value', (/ map % n_elements /), integral_kind, fd(3), .True., .False., c_pointer(3)) call c_f_pointer(c_pointer(3),map % consolidated_value, (/ map % n_elements /)) l = 0_8 diff --git a/src/utils/memory.irp.f b/src/utils/memory.irp.f index e69bf71e0..e2e8dd76d 100644 --- a/src/utils/memory.irp.f +++ b/src/utils/memory.irp.f @@ -6,7 +6,7 @@ character*(128) :: env integer, external :: get_total_available_memory - qp_max_mem = get_total_available_memory() + qp_max_mem = max(get_total_available_memory() - 1,3) call write_int(6,qp_max_mem,'Total available memory (GB)') call getenv('QP_MAXMEM',env) if (trim(env) /= '') then @@ -79,6 +79,26 @@ subroutine total_memory(value) call unlock_io() end function +double precision function memory_of_double8(n) + implicit none + BEGIN_DOC +! Computes the memory required for n double precision elements in gigabytes. + END_DOC + integer*8, intent(in) :: n + double precision, parameter :: f = 8.d0 / (1024.d0*1024.d0*1024.d0) + memory_of_double8 = dble(n) * f +end function + +double precision function memory_of_int8(n) + implicit none + BEGIN_DOC +! Computes the memory required for n double precision elements in gigabytes. + END_DOC + integer*8, intent(in) :: n + double precision, parameter :: f = 4.d0 / (1024.d0*1024.d0*1024.d0) + memory_of_int8 = dble(n) * f +end function + double precision function memory_of_double(n) implicit none BEGIN_DOC diff --git a/src/utils/mmap.f90 b/src/utils/mmap.f90 index 41e60224d..e342b4222 100644 --- a/src/utils/mmap.f90 +++ b/src/utils/mmap.f90 @@ -7,12 +7,13 @@ module mmap_module ! File descriptors ! ---------------- - type(c_ptr) function c_mmap_fortran(filename, length, fd, read_only) bind(c,name='mmap_fortran') + type(c_ptr) function c_mmap_fortran(filename, length, fd, read_only, single_node) bind(c,name='mmap_fortran') use iso_c_binding character(c_char), intent(in) :: filename(*) integer(c_size_t), intent(in), value :: length integer(c_int), intent(out) :: fd integer(c_int), intent(in), value :: read_only + integer(c_int), intent(in), value :: single_node end function subroutine c_munmap_fortran(length, fd, map) bind(c,name='munmap_fortran') @@ -33,31 +34,33 @@ subroutine c_msync_fortran(length, fd, map) bind(c,name='msync_fortran') contains - subroutine mmap(filename, shape, bytes, fd, read_only, map) + subroutine mmap(filename, shape, bytes, fd, read_only, single_node, map) use iso_c_binding implicit none character*(*), intent(in) :: filename ! Name of the mapped file integer*8, intent(in) :: shape(:) ! Shape of the array to map integer, intent(in) :: bytes ! Number of bytes per element logical, intent(in) :: read_only ! If true, mmap is read-only + logical, intent(in) :: single_node! If true, mmap is on a single node integer, intent(out) :: fd ! File descriptor type(c_ptr), intent(out) :: map ! C Pointer integer(c_size_t) :: length - integer(c_int) :: fd_ + integer(c_int) :: fd_, read_only_, single_node_ integer :: i + read_only_ = 0 + single_node_ = 0 + if (read_only) read_only_ = 1 + if (single_node) single_node_ = 1 + length = int(bytes,8) do i=1,size(shape) length = length * shape(i) enddo - if (read_only) then - map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 1) - else - map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 0) - endif + map = c_mmap_fortran( trim(filename)//char(0), length, fd_, read_only_, single_node_) fd = fd_ end subroutine diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f index de01656b2..29ec8ed45 100644 --- a/src/utils/util.irp.f +++ b/src/utils/util.irp.f @@ -576,7 +576,7 @@ logical function is_same_spin(sigma_1, sigma_2) is_same_spin = .false. endif -end function is_same_spin +end ! --- @@ -596,7 +596,7 @@ function Kronecker_delta(i, j) result(delta) delta = 0.d0 endif -end function Kronecker_delta +end ! --- @@ -634,7 +634,81 @@ subroutine diagonalize_sym_matrix(N, A, e) print*,'Problem in diagonalize_sym_matrix (dsyev)!!' endif -end subroutine diagonalize_sym_matrix +end + +! --- + + +subroutine give_degen(A, n, shift, list_degen, n_degen_list) + + BEGIN_DOC + ! returns n_degen_list :: the number of degenerated SET of elements (i.e. with |A(i)-A(i+1)| below shift) + ! + ! for each of these sets, list_degen(1,i) = first degenerate element of the set i, + ! + ! list_degen(2,i) = last degenerate element of the set i. + END_DOC + + implicit none + + double precision, intent(in) :: A(n) + double precision, intent(in) :: shift + integer, intent(in) :: n + integer, intent(out) :: list_degen(2,n), n_degen_list + + integer :: i, j, n_degen, k + logical :: keep_on + double precision, allocatable :: Aw(:) + + list_degen = -1 + allocate(Aw(n)) + Aw = A + i=1 + k = 0 + do while(i.lt.n) + if(dabs(Aw(i)-Aw(i+1)).lt.shift)then + k+=1 + j=1 + list_degen(1,k) = i + keep_on = .True. + do while(keep_on) + if(i+j.gt.n)then + keep_on = .False. + exit + endif + if(dabs(Aw(i)-Aw(i+j)).lt.shift)then + j+=1 + else + keep_on=.False. + exit + endif + enddo + n_degen = j + list_degen(2,k) = list_degen(1,k)-1 + n_degen + j=0 + keep_on = .True. + do while(keep_on) + if(i+j+1.gt.n)then + keep_on = .False. + exit + endif + if(dabs(Aw(i+j)-Aw(i+j+1)).lt.shift)then + Aw(i+j) += (j-n_degen/2) * shift + j+=1 + else + keep_on = .False. + exit + endif + enddo + Aw(i+n_degen-1) += (n_degen-1-n_degen/2) * shift + i+=n_degen + else + i+=1 + endif + enddo + n_degen_list = k + +end ! --- diff --git a/src/utils_cc/EZFIO.cfg b/src/utils_cc/EZFIO.cfg index fb6d90342..7d50d66a8 100644 --- a/src/utils_cc/EZFIO.cfg +++ b/src/utils_cc/EZFIO.cfg @@ -58,6 +58,12 @@ doc: If true, the CCSD(T) will be computed. interface: ezfio,ocaml,provider default: False +[cc_par_t_stop] +type: double precision +doc: Stops the calculation when the statistical error bar is below the given value. +interface: ezfio,ocaml,provider +default: 1.e-5 + [cc_dev] type: logical doc: Only for dev purposes.