From c285aaca49d26ed4f3019aa2b9adc900dbc9e443 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 31 Jan 2025 17:14:49 +0100 Subject: [PATCH 1/9] Stabilize PT2 when expected accuracy is low --- external/ezfio | 2 +- src/cipsi_utils/pt2_stoch_routines.irp.f | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/ezfio b/external/ezfio index d02132ea7..dba01c4fe 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit d02132ea79217c16fd24242e8f8b8a6c3ff68091 +Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3 diff --git a/src/cipsi_utils/pt2_stoch_routines.irp.f b/src/cipsi_utils/pt2_stoch_routines.irp.f index 144d052db..162ab02c2 100644 --- a/src/cipsi_utils/pt2_stoch_routines.irp.f +++ b/src/cipsi_utils/pt2_stoch_routines.irp.f @@ -530,7 +530,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_ avg = E0 + pt2_data_S(t) % pt2(pt2_stoch_istate) / dble(c) avg2 = v0 + pt2_data_S(t) % variance(pt2_stoch_istate) / dble(c) avg3(:) = n0(:) + pt2_data_S(t) % overlap(:,pt2_stoch_istate) / dble(c) - if ((avg /= 0.d0) .or. (n == N_det_generators) ) then + if (((c>=10).and.(avg /= 0.d0)) .or. (n == N_det_generators) ) then do_exit = .true. endif if (qp_stop()) then From 4b9939e738fbecf831847c9047a8bd11b99abd9b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 3 Feb 2025 13:41:46 +0100 Subject: [PATCH 2/9] Fix qp_exc_energy.py:195: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated --- scripts/qp_exc_energy.py | 12 ++++++++++-- src/davidson/u0_hs2_u0.irp.f | 4 ++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/scripts/qp_exc_energy.py b/scripts/qp_exc_energy.py index 44136311c..e08866e38 100755 --- a/scripts/qp_exc_energy.py +++ b/scripts/qp_exc_energy.py @@ -157,11 +157,15 @@ def compute(data): B = np.array( [ [ data[-1][0] ], [ data[-2][0] ] ] ) E0 = np.linalg.solve(A,B)[1] +E0 = E0[0] + A = np.array( [ [ data[-1][4], 1. ], [ data[-2][4], 1. ] ] ) B = np.array( [ [ data[-1][3] ], [ data[-2][3] ] ] ) E1 = np.linalg.solve(A,B)[1] +E1 = E1[0] + average_2 = (E1-E0)*to_eV A = np.array( [ [ data[-1][1], 1. ], @@ -170,14 +174,18 @@ def compute(data): B = np.array( [ [ data[-1][0] ], [ data[-2][0] ], [ data[-3][0] ] ] ) -E0 = np.linalg.lstsq(A,B,rcond=None)[0][1] +E0 = np.linalg.lstsq(A,B,rcond=None)[0] +E0 = E0[0][0] + A = np.array( [ [ data[-1][4], 1. ], [ data[-2][4], 1. ], [ data[-3][4], 1. ] ] ) B = np.array( [ [ data[-1][3] ], [ data[-2][3] ], [ data[-3][3] ] ] ) -E1 = np.linalg.lstsq(A,B,rcond=None)[0][1] +E1 = np.linalg.lstsq(A,B,rcond=None)[0] +E1 = E1[0][0] + average_3 = (E1-E0)*to_eV exc = ((data[-1][3] + data[-1][4]) - (data[-1][0] + data[-1][1])) * to_eV diff --git a/src/davidson/u0_hs2_u0.irp.f b/src/davidson/u0_hs2_u0.irp.f index 3afe4ec6f..f2ce7aa9a 100644 --- a/src/davidson/u0_hs2_u0.irp.f +++ b/src/davidson/u0_hs2_u0.irp.f @@ -291,7 +291,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend, ASSERT (istart > 0) ASSERT (istep > 0) - !$OMP DO SCHEDULE(guided,64) + !$OMP DO SCHEDULE(dynamic,64) do k_a=istart+ishift,iend,istep krow = psi_bilinear_matrix_rows(k_a) @@ -469,7 +469,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend, enddo !$OMP END DO - !$OMP DO SCHEDULE(guided,64) + !$OMP DO SCHEDULE(dynamic,64) do k_a=istart+ishift,iend,istep From dc75f495001a13dd38209a220d7230fb5ad3e450 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 3 Feb 2025 14:05:28 +0100 Subject: [PATCH 3/9] Added selected MRCI module --- src/cipsi_utils/pt2_stoch_routines.irp.f | 1 + src/mo_basis/utils.irp.f | 2 +- src/mrci/EZFIO.cfg | 24 +++++++++ src/mrci/NEED | 4 ++ src/mrci/README.rst | 17 +++++++ src/mrci/class.irp.f | 8 +++ src/mrci/mrci.irp.f | 64 ++++++++++++++++++++++++ src/mrci/save_energy.irp.f | 10 ++++ 8 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 src/mrci/EZFIO.cfg create mode 100644 src/mrci/NEED create mode 100644 src/mrci/README.rst create mode 100644 src/mrci/class.irp.f create mode 100644 src/mrci/mrci.irp.f create mode 100644 src/mrci/save_energy.irp.f diff --git a/src/cipsi_utils/pt2_stoch_routines.irp.f b/src/cipsi_utils/pt2_stoch_routines.irp.f index 162ab02c2..68a3c9fcb 100644 --- a/src/cipsi_utils/pt2_stoch_routines.irp.f +++ b/src/cipsi_utils/pt2_stoch_routines.irp.f @@ -212,6 +212,7 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in) ipos += 1 endif enddo + call write_int(6,pt2_stoch_istate,'State') call write_int(6,sum(pt2_F),'Number of tasks') call write_int(6,ipos,'Number of fragmented tasks') diff --git a/src/mo_basis/utils.irp.f b/src/mo_basis/utils.irp.f index 987c394ac..3f83b518b 100644 --- a/src/mo_basis/utils.irp.f +++ b/src/mo_basis/utils.irp.f @@ -228,7 +228,7 @@ subroutine mo_as_svd_vectors_of_mo_matrix_eig(matrix,lda,m,n,eig,label) call dgemm('N','N',ao_num,m,m,1.d0,mo_coef_new,size(mo_coef_new,1),U,size(U,1),0.d0,mo_coef,size(mo_coef,1)) do i=1,m - if (eig(i) > 1.d-20) then + if (D(i) > 1.d-20) then eig(i) = D(i) else eig(i) = 0.d0 diff --git a/src/mrci/EZFIO.cfg b/src/mrci/EZFIO.cfg new file mode 100644 index 000000000..c8efd4b9f --- /dev/null +++ b/src/mrci/EZFIO.cfg @@ -0,0 +1,24 @@ +[energy] +type: double precision +doc: Calculated Selected CASSD energy +interface: ezfio +size: (determinants.n_states) + +[energy_pt2] +type: double precision +doc: Calculated CASSD energy + PT2 +interface: ezfio +size: (determinants.n_states) + + +[do_ddci] +type: logical +doc: If true, remove purely inactive double excitations +interface: ezfio,provider,ocaml +default: False + +[do_only_1h1p] +type: logical +doc: If true, do only one hole/one particle excitations +interface: ezfio,provider,ocaml +default: False diff --git a/src/mrci/NEED b/src/mrci/NEED new file mode 100644 index 000000000..ad99293fb --- /dev/null +++ b/src/mrci/NEED @@ -0,0 +1,4 @@ +cipsi +generators_cas +selectors_full +davidson_undressed diff --git a/src/mrci/README.rst b/src/mrci/README.rst new file mode 100644 index 000000000..ac3c0139d --- /dev/null +++ b/src/mrci/README.rst @@ -0,0 +1,17 @@ +==== +mrci +==== + + +|CIPSI| algorithm in the multi-reference CI space (CAS + Singles and Doubles). + + +This module is the same as the :ref:`fci` module, except for the choice of the +generator and selector determinants. + +The inactive, active and virtual |MOs| will need to be set with the +:ref:`qp_set_mo_class` program. + +.. seealso:: + + The documentation of the :ref:`fci` module. diff --git a/src/mrci/class.irp.f b/src/mrci/class.irp.f new file mode 100644 index 000000000..271eb9307 --- /dev/null +++ b/src/mrci/class.irp.f @@ -0,0 +1,8 @@ +BEGIN_PROVIDER [ logical, do_only_cas ] + implicit none + BEGIN_DOC + ! In the CAS+SD case, always false + END_DOC + do_only_cas = .False. +END_PROVIDER + diff --git a/src/mrci/mrci.irp.f b/src/mrci/mrci.irp.f new file mode 100644 index 000000000..ef4a2454d --- /dev/null +++ b/src/mrci/mrci.irp.f @@ -0,0 +1,64 @@ +program mrci + implicit none + BEGIN_DOC +! Selected CAS+Singles and Doubles with stochastic selection +! and PT2. +! +! This program performs a |CIPSI|-like selected |CI| using a +! stochastic scheme for both the selection of the important Slater +! determinants and the computation of the |PT2| correction. This +! |CIPSI|-like algorithm will be performed for the lowest states of +! the variational space (see :option:`determinants n_states`). The +! program will stop when reaching at least one the two following +! conditions: +! +! * number of Slater determinants > :option:`determinants n_det_max` +! * |PT2| < :option:`perturbation pt2_max` +! +! The following other options can be of interest: +! +! :option:`determinants read_wf` +! When set to |false|, the program starts with a ROHF-like Slater +! determinant as a guess wave function. When set to |true|, the +! program starts with the wave function(s) stored in the |EZFIO| +! directory as guess wave function(s). +! +! :option:`determinants s2_eig` +! When set to |true|, the selection will systematically add all the +! necessary Slater determinants in order to have a pure spin wave +! function with an |S^2| value corresponding to +! :option:`determinants expected_s2`. +! +! For excited states calculations, it is recommended to start with +! :ref:`.cis.` or :ref:`.cisd.` guess wave functions, eventually in +! a restricted set of |MOs|, and to set :option:`determinants s2_eig` +! to |true|. +! + END_DOC + + PROVIDE all_mo_integrals + if (.not.is_zmq_slave) then + PROVIDE psi_det psi_coef + + write(json_unit,json_array_open_fmt) 'fci' + + double precision, allocatable :: Ev(:),PT2(:) + allocate(Ev(N_states), PT2(N_states)) + if (do_pt2) then + call run_stochastic_cipsi(Ev,PT2) + else + call run_cipsi + endif + + write(json_unit,json_dict_uopen_fmt) + write(json_unit,json_dict_close_fmtx) + write(json_unit,json_array_close_fmtx) + call json_close + + else + PROVIDE pt2_min_parallel_tasks + + call run_slave_cipsi + + endif +end diff --git a/src/mrci/save_energy.irp.f b/src/mrci/save_energy.irp.f new file mode 100644 index 000000000..28840b6cc --- /dev/null +++ b/src/mrci/save_energy.irp.f @@ -0,0 +1,10 @@ +subroutine save_energy(E,pt2) + implicit none + BEGIN_DOC +! Saves the energy in |EZFIO|. + END_DOC + double precision, intent(in) :: E(N_states), pt2(N_states) + call ezfio_set_mrci_energy(E(1:N_states)) + call ezfio_set_mrci_energy_pt2(E(1:N_states)+pt2(1:N_states)) +end + From 0647a9db5f7173189cf38451a7032436b21f0d57 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Feb 2025 14:49:39 +0100 Subject: [PATCH 4/9] Starting optimization of open-shell ccsd --- src/ccsd/ccsd_space_orb.irp.f | 5 +- src/ccsd/ccsd_spin_orb.irp.f | 2 - src/ccsd/ccsd_spin_orb_sub.irp.f | 444 ++++++++++++++--------------- src/utils_cc/mo_integrals_cc.irp.f | 16 ++ 4 files changed, 229 insertions(+), 238 deletions(-) diff --git a/src/ccsd/ccsd_space_orb.irp.f b/src/ccsd/ccsd_space_orb.irp.f index 53028ec0e..91f703a00 100644 --- a/src/ccsd/ccsd_space_orb.irp.f +++ b/src/ccsd/ccsd_space_orb.irp.f @@ -1,9 +1,10 @@ -! Code - program ccsd implicit none + BEGIN_DOC + ! Closed-shell CCSD + END_DOC read_wf = .True. touch read_wf diff --git a/src/ccsd/ccsd_spin_orb.irp.f b/src/ccsd/ccsd_spin_orb.irp.f index 6f2de11ce..04344fbba 100644 --- a/src/ccsd/ccsd_spin_orb.irp.f +++ b/src/ccsd/ccsd_spin_orb.irp.f @@ -1,5 +1,3 @@ -! Prog - program ccsd implicit none diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f index 09d6a0fe0..fe202ebfb 100644 --- a/src/ccsd/ccsd_spin_orb_sub.irp.f +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -11,9 +11,9 @@ subroutine run_ccsd_spin_orb double precision, allocatable :: t1(:,:), t2(:,:,:,:), tau(:,:,:,:), tau_t(:,:,:,:) double precision, allocatable :: r1(:,:), r2(:,:,:,:) double precision, allocatable :: cF_oo(:,:), cF_ov(:,:), cF_vv(:,:) - double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:), cW_vvvv(:,:,:,:) - - double precision, allocatable :: f_oo(:,:), f_ov(:,:), f_vv(:,:), f_o(:), f_v(:) + double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:) !, cW_vvvv(:,:,:,:) + + double precision, allocatable :: f_o(:), f_v(:) double precision, allocatable :: v_oooo(:,:,:,:), v_vooo(:,:,:,:), v_ovoo(:,:,:,:) double precision, allocatable :: v_oovo(:,:,:,:), v_ooov(:,:,:,:), v_vvoo(:,:,:,:) double precision, allocatable :: v_vovo(:,:,:,:), v_voov(:,:,:,:), v_ovvo(:,:,:,:) @@ -24,8 +24,7 @@ subroutine run_ccsd_spin_orb double precision, allocatable :: all_err(:,:), all_t(:,:) logical :: not_converged - integer, allocatable :: list_occ(:,:), list_vir(:,:) - integer :: nO,nV,nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4) + integer :: nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4) integer :: nb_iter, i,j,a,b double precision :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi integer(bit_kind) :: det(N_int,2) @@ -33,7 +32,7 @@ subroutine run_ccsd_spin_orb det = psi_det(:,:,cc_ref) print*,'Reference determinant:' call print_det(det,N_int) - + ! Extract number of occ/vir alpha/beta spin orbitals !call extract_n_spin(det,n_spin) nOa = cc_nOa !n_spin(1) @@ -41,107 +40,83 @@ subroutine run_ccsd_spin_orb nVa = cc_nVa !n_spin(3) nVb = cc_nVb !n_spin(4) - ! Total number of occ/vir spin orb - nO = cc_nOab !nOa + nOb - nV = cc_nVab !nVa + nVb - ! Debug - !print*,nO,nV - ! Number of occ/vir spin orb per spin nO_S = cc_nO_S !(/nOa,nOb/) nV_S = cc_nV_S !(/nVa,nVb/) ! Debug !print*,nO_S,nV_S - ! Maximal number of occ/vir + ! Maximal number of occ/vir nO_m = cc_nO_m !max(nOa, nOb) nV_m = cc_nV_m !max(nVa, nVb) ! Debug !print*,nO_m,nV_m - - allocate(list_occ(nO_m,2), list_vir(nV_m,2)) - list_occ = cc_list_occ_spin - list_vir = cc_list_vir_spin - ! Debug - !call extract_list_orb_spin(det,nO_m,nV_m,list_occ,list_vir) - !print*,list_occ(:,1) - !print*,list_occ(:,2) - !print*,list_vir(:,1) - !print*,list_vir(:,2) ! Allocation - allocate(t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV), tau_t(nO,nO,nV,nV)) - allocate(r1(nO,nV), r2(nO,nO,nV,nV)) - allocate(cF_oo(nO,nO), cF_ov(nO,nV), cF_vv(nV,nV)) - allocate(cW_oooo(nO,nO,nO,nO), cW_ovvo(nO,nV,nV,nO))!, cW_vvvv(nV,nV,nV,nV)) - allocate(v_oooo(nO,nO,nO,nO)) - !allocate(v_vooo(nV,nO,nO,nO)) - allocate(v_ovoo(nO,nV,nO,nO)) - allocate(v_oovo(nO,nO,nV,nO)) - allocate(v_ooov(nO,nO,nO,nV)) - allocate(v_vvoo(nV,nV,nO,nO)) - !allocate(v_vovo(nV,nO,nV,nO)) - !allocate(v_voov(nV,nO,nO,nV)) - allocate(v_ovvo(nO,nV,nV,nO)) - allocate(v_ovov(nO,nV,nO,nV)) - allocate(v_oovv(nO,nO,nV,nV)) - !allocate(v_vvvo(nV,nV,nV,nO)) - !allocate(v_vvov(nV,nV,nO,nV)) - !allocate(v_vovv(nV,nO,nV,nV)) - !allocate(v_ovvv(nO,nV,nV,nV)) - !allocate(v_vvvv(nV,nV,nV,nV)) - allocate(f_o(nO), f_v(nV)) - allocate(f_oo(nO, nO)) - allocate(f_ov(nO, nV)) - allocate(f_vv(nV, nV)) - + allocate(t1(cc_nOab,cc_nVab), t2(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau_t(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) + allocate(r1(cc_nOab,cc_nVab), r2(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) + allocate(cF_oo(cc_nOab,cc_nOab), cF_ov(cc_nOab,cc_nVab), cF_vv(cc_nVab,cc_nVab)) + allocate(cW_oooo(cc_nOab,cc_nOab,cc_nOab,cc_nOab), cW_ovvo(cc_nOab,cc_nVab,cc_nVab,cc_nOab))!, cW_vvvv(cc_nVab,cc_nVab,cc_nVab,cc_nVab)) + allocate(v_oooo(cc_nOab,cc_nOab,cc_nOab,cc_nOab)) + !allocate(v_vooo(cc_nVab,cc_nOab,cc_nOab,cc_nOab)) + allocate(v_ovoo(cc_nOab,cc_nVab,cc_nOab,cc_nOab)) + allocate(v_oovo(cc_nOab,cc_nOab,cc_nVab,cc_nOab)) + allocate(v_ooov(cc_nOab,cc_nOab,cc_nOab,cc_nVab)) + allocate(v_vvoo(cc_nVab,cc_nVab,cc_nOab,cc_nOab)) + !allocate(v_vovo(cc_nVab,cc_nOab,cc_nVab,cc_nOab)) + !allocate(v_voov(cc_nVab,cc_nOab,cc_nOab,cc_nVab)) + allocate(v_ovvo(cc_nOab,cc_nVab,cc_nVab,cc_nOab)) + allocate(v_ovov(cc_nOab,cc_nVab,cc_nOab,cc_nVab)) + allocate(v_oovv(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) + !allocate(v_vvvo(cc_nVab,cc_nVab,cc_nVab,cc_nOab)) + !allocate(v_vvov(cc_nVab,cc_nVab,cc_nOab,cc_nVab)) + !allocate(v_vovv(cc_nVab,cc_nOab,cc_nVab,cc_nVab)) + !allocate(v_ovvv(cc_nOab,cc_nVab,cc_nVab,cc_nVab)) + !allocate(v_vvvv(cc_nVab,cc_nVab,cc_nVab,cc_nVab)) + allocate(f_o(cc_nOab), f_v(cc_nVab)) + ! Allocation for the diis if (cc_update_method == 'diis') then - allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) + allocate(all_err(cc_nOab*cc_nVab+cc_nOab*cc_nOab*cc_nVab*cc_nVab,cc_diis_depth), all_t(cc_nOab*cc_nVab+cc_nOab*cc_nOab*cc_nVab*cc_nVab,cc_diis_depth)) all_err = 0d0 all_t = 0d0 endif - ! Fock elements - call gen_f_spin(det, nO_m,nO_m, nO_S,nO_S, list_occ,list_occ, nO,nO, f_oo) - call gen_f_spin(det, nO_m,nV_m, nO_S,nV_S, list_occ,list_vir, nO,nV, f_ov) - call gen_f_spin(det, nV_m,nV_m, nV_S,nV_S, list_vir,list_vir, nV,nV, f_vv) - ! Diag elements - do i = 1, nO - f_o(i) = f_oo(i,i) + do i = 1, cc_nOab + f_o(i) = cc_spin_f_oo(i,i) enddo - do i = 1, nV - f_v(i) = f_vv(i,i) + do i = 1, cc_nVab + f_v(i) = cc_spin_f_vv(i,i) enddo ! Bi electronic integrals from list call wall_time(ti) ! OOOO - call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, list_occ,list_occ,list_occ,list_occ, nO,nO,nO,nO, v_oooo) + call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nOab,cc_nOab,cc_nOab,cc_nOab, v_oooo) ! OOO V - !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, list_vir,list_occ,list_occ,list_occ, nV,nO,nO,nO, v_vooo) - call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, list_occ,list_vir,list_occ,list_occ, nO,nV,nO,nO, v_ovoo) - call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, list_occ,list_occ,list_vir,list_occ, nO,nO,nV,nO, v_oovo) - call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, list_occ,list_occ,list_occ,list_vir, nO,nO,nO,nV, v_ooov) + !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nVab,cc_nOab,cc_nOab,cc_nOab, v_vooo) + call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nOab,cc_nVab,cc_nOab,cc_nOab, v_ovoo) + call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nOab,cc_nOab,cc_nVab,cc_nOab, v_oovo) + call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nOab,cc_nOab,cc_nOab,cc_nVab, v_ooov) ! OO VV - call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, list_vir,list_vir,list_occ,list_occ, nV,nV,nO,nO, v_vvoo) - !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, list_vir,list_occ,list_vir,list_occ, nV,nO,nV,nO, v_vovo) - !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, list_vir,list_occ,list_occ,list_vir, nV,nO,nO,nV, v_voov) - call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, list_occ,list_vir,list_vir,list_occ, nO,nV,nV,nO, v_ovvo) - call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, list_occ,list_vir,list_occ,list_vir, nO,nV,nO,nV, v_ovov) - call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, list_occ,list_occ,list_vir,list_vir, nO,nO,nV,nV, v_oovv) + call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nVab,cc_nVab,cc_nOab,cc_nOab, v_vvoo) + !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nVab,cc_nOab,cc_nVab,cc_nOab, v_vovo) + !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nVab,cc_nOab,cc_nOab,cc_nVab, v_voov) + call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nOab,cc_nVab,cc_nVab,cc_nOab, v_ovvo) + call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nOab,cc_nVab,cc_nOab,cc_nVab, v_ovov) + call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nOab,cc_nOab,cc_nVab,cc_nVab, v_oovv) ! O VVV - !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, list_vir,list_vir,list_vir,list_occ, nV,nV,nV,nO, v_vvvo) - !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, list_vir,list_vir,list_occ,list_vir, nV,nV,nO,nV, v_vvov) - !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, list_vir,list_occ,list_vir,list_vir, nV,nO,nV,nV, v_vovv) - !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, list_occ,list_vir,list_vir,list_vir, nO,nV,nV,nV, v_ovvv) + !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nVab,cc_nVab,cc_nVab,cc_nOab, v_vvvo) + !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nVab,cc_nVab,cc_nOab,cc_nVab, v_vvov) + !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nVab,cc_nOab,cc_nVab,cc_nVab, v_vovv) + !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nOab,cc_nVab,cc_nVab,cc_nVab, v_ovvv) ! VVVV - !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, list_vir,list_vir,list_vir,list_vir, nV,nV,nV,nV, v_vvvv) + !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nVab,cc_nVab,cc_nVab,cc_nVab, v_vvvv) call wall_time(tf) if (cc_dev) then print*,'Load bi elec int:',tf-ti,'s' @@ -149,11 +124,11 @@ subroutine run_ccsd_spin_orb ! Init of T t1 = 0d0 - call guess_t1(nO,nV,f_o,f_v,f_ov,t1) - call guess_t2(nO,nV,f_o,f_v,v_oovv,t2) - call compute_tau_spin(nO,nV,t1,t2,tau) - call compute_tau_t_spin(nO,nV,t1,t2,tau_t) - + call guess_t1(cc_nOab,cc_nVab,f_o,f_v,cc_spin_f_ov,t1) + call guess_t2(cc_nOab,cc_nVab,f_o,f_v,v_oovv,t2) + call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau) + call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t) + ! Loop init nb_iter = 0 not_converged = .True. @@ -164,9 +139,9 @@ subroutine run_ccsd_spin_orb call det_energy(det,uncorr_energy) print*,'Det energy', uncorr_energy - call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) + call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,v_oovv,energy) print*,'guess energy', uncorr_energy+energy, energy - + write(*,'(A77)') ' -----------------------------------------------------------------------------' write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' write(*,'(A77)') ' -----------------------------------------------------------------------------' @@ -179,18 +154,18 @@ subroutine run_ccsd_spin_orb ! Intermediates call wall_time(tbi) call wall_time(ti) - call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo) - call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov) - call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,cF_vv) + call compute_cF_oo(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_oo,cc_spin_F_ov,v_ooov,v_oovv,cF_oo) + call compute_cF_ov(cc_nOab,cc_nVab,t1,cc_spin_F_ov,v_oovv,cF_ov) + call compute_cF_vv(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_ov,cc_spin_F_vv,v_oovv,cF_vv) call wall_time(tf) if (cc_dev) then print*,'Compute cFs:',tf-ti,'s' endif - + call wall_time(ti) - call compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) - call compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) - !call compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) + call compute_cW_oooo(cc_nOab,cc_nVab,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) + call compute_cW_ovvo(cc_nOab,cc_nVab,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) + !call compute_cW_vvvv(cc_nOab,cc_nVab,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) call wall_time(tf) if (cc_dev) then print*,'Compute cFs:',tf-ti,'s' @@ -198,13 +173,13 @@ subroutine run_ccsd_spin_orb ! Residuals call wall_time(ti) - call compute_r1_spin(nO,nV,t1,t2,f_o,f_v,F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) + call compute_r1_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) call wall_time(tf) if (cc_dev) then print*,'Compute r1:',tf-ti,'s' endif call wall_time(ti) - call compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) + call compute_r2_spin(cc_nOab,cc_nVab,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) call wall_time(tf) if (cc_dev) then print*,'Compute r2:',tf-ti,'s' @@ -218,29 +193,29 @@ subroutine run_ccsd_spin_orb call wall_time(ti) ! Update if (cc_update_method == 'diis') then - !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) - !call update_t_ccsd_diis(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) - call update_t_ccsd_diis_v3(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) + !call update_t_ccsd(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + !call update_t_ccsd_diis(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) + call update_t_ccsd_diis_v3(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) ! Standard update as T = T - Delta elseif (cc_update_method == 'none') then - call update_t1(nO,nV,f_o,f_v,r1,t1) - call update_t2(nO,nV,f_o,f_v,r2,t2) + call update_t1(cc_nOab,cc_nVab,f_o,f_v,r1,t1) + call update_t2(cc_nOab,cc_nVab,f_o,f_v,r2,t2) else print*,'Unkonw cc_method_method: '//cc_update_method endif - call compute_tau_spin(nO,nV,t1,t2,tau) - call compute_tau_t_spin(nO,nV,t1,t2,tau_t) + call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau) + call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t) call wall_time(tf) if (cc_dev) then print*,'Update:',tf-ti,'s' endif ! Print - call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) + call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,v_oovv,energy) call wall_time(tfi) - + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', & uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' if (cc_dev) then @@ -270,8 +245,8 @@ subroutine run_ccsd_spin_orb print*,'' if (write_amplitudes) then - call write_t1(nO,nV,t1) - call write_t2(nO,nV,t2) + call write_t1(cc_nOab,cc_nVab,t1) + call write_t2(cc_nOab,cc_nVab,t2) call ezfio_set_utils_cc_io_amplitudes('Read') endif @@ -286,20 +261,20 @@ subroutine run_ccsd_spin_orb deallocate(v_oooo) deallocate(v_ovoo,v_oovo) deallocate(v_ovvo,v_ovov,v_oovv) - + double precision :: t_corr t_corr = 0.d0 if (cc_par_t .and. elec_alpha_num +elec_beta_num > 2) then print*,'CCSD(T) calculation...' call wall_time(ta) - !allocate(v_vvvo(nV,nV,nV,nO)) + !allocate(v_vvvo(cc_nVab,cc_nVab,cc_nVab,cc_nOab)) !call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, & ! cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & ! cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & - ! nV,nV,nV,nO, v_vvvo) + ! cc_nVab,cc_nVab,cc_nVab,cc_nOab, v_vvvo) - !call ccsd_par_t_spin(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,v_vvvo,t_corr) - call ccsd_par_t_spin_v2(nO,nV,t1,t2,f_o,f_v,f_ov,v_ooov,v_vvoo,t_corr) + !call ccsd_par_t_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,v_ooov,v_vvoo,v_vvvo,t_corr) + call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,v_ooov,v_vvoo,t_corr) !print*,'Working on it...' !call abort call wall_time(tb) @@ -313,12 +288,12 @@ subroutine run_ccsd_spin_orb endif call save_energy(uncorr_energy + energy, t_corr) - - deallocate(f_oo,f_ov,f_vv,f_o,f_v) + + deallocate(f_o,f_v) deallocate(v_ooov,v_vvoo,t1,t2) !deallocate(v_ovvv,v_vvvo,v_vovv) !deallocate(v_vvvv) - + end ! Energy @@ -354,7 +329,7 @@ subroutine ccsd_energy_spin(nO,nV,t1,t2,Fov,v_oovv,energy) do j=1,nO do a=1,nV do b=1,nV - energy = energy & + energy = energy & + 0.5d0 * v_oovv(i,j,a,b) * t1(i,a) * t1(j,b) & + 0.25d0 * v_oovv(i,j,a,b) * t2(i,j,a,b) end do @@ -375,7 +350,7 @@ subroutine compute_tau_spin(nO,nV,t1,t2,tau) double precision,intent(in) :: t2(nO,nO,nV,nV) double precision,intent(out) :: tau(nO,nO,nV,nV) - + integer :: i,j,k,l integer :: a,b,c,d @@ -463,7 +438,7 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov !$OMP v_ovov,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,f,m,n) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(1) do a=1,nV do i=1,nO @@ -494,7 +469,7 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov 1d0, t1 , size(t1,1), & cF_vv, size(cF_vv,1), & 1d0, r1 , size(r1,1)) - + !do a=1,nV ! do i=1,nO ! do m=1,nO @@ -531,7 +506,7 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov !$OMP SHARED(r1,t1,t2,X_vovf,v_ovvf,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,f,m,n) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) !do f = 1, nV @@ -546,28 +521,28 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov !enddo !$OMP END DO !$OMP END PARALLEL - + call dgemm('N','T', nO, nV, nO*nV, & -0.5d0, t2(1,1,1,f), size(t2,1), & X_vovf, size(X_vovf,1), & 1d0 , r1 , size(r1,1)) enddo - + !call dgemm('N','T', nO, nV, nO*nV*nV, & ! -0.5d0, t2 , size(t2,1), & ! X_vovv, size(X_vovv,1), & ! 1d0 , r1 , size(r1,1)) - + deallocate(X_vovf) !deallocate(X_vovv) allocate(X_oovv(nO,nO,nV,nV)) - + !$OMP PARALLEL & !$OMP SHARED(r1,t1,t2,X_oovv, & !$OMP f_o,f_v,v_oovo,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,f,m,n) & !$OMP DEFAULT(NONE) - + !do a=1,nV ! do i=1,nO ! do e=1,nV @@ -579,7 +554,7 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov ! end do ! end do !end do - + !$OMP DO collapse(3) do a = 1, nV do e = 1, nV @@ -592,12 +567,12 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov enddo !$OMP END DO !$OMP END PARALLEL - + call dgemm('T','N', nO, nV, nO*nO*nV, & -0.5d0, v_oovo, size(v_oovo,1) * size(v_oovo,2) * size(v_oovo,3), & X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & 1d0 , r1 , size(r1,1)) - + !$OMP PARALLEL & !$OMP SHARED(r1,t1,X_oovv,f_o,f_v,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,f,m,n) & @@ -610,7 +585,7 @@ subroutine compute_r1_spin(nO,nV,t1,t2,f_o,f_v,Fov,cF_oo,cF_ov,cF_vv,v_oovo,v_ov enddo !$OMP END DO !$OMP END PARALLEL - + deallocate(X_oovv) end @@ -684,7 +659,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV @@ -697,7 +672,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ end do !$OMP END DO !$OMP END PARALLEL - + !deallocate(X_oovv) !do b=1,nV @@ -726,25 +701,25 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ 0.5d0, t2 , size(t2,1) * size(t2,2) * size(t2,3), & A_vv , size(A_vv,1), & 0d0 , X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) - + !$OMP PARALLEL & !$OMP SHARED(r2,v_oovv,X_oovv,nO,nV) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV do j=1,nO do i=1,nO - r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) end do end do end do end do !$OMP END DO !$OMP END PARALLEL - + deallocate(A_vv)!,X_oovv) !do b=1,nV @@ -766,7 +741,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) & !$OMP PRIVATE(i,m,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV @@ -789,13 +764,13 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV do j=1,nO do i=1,nO - r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) + r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) end do end do end do @@ -821,17 +796,17 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ ! end do !end do allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) - + call dgemm('N','T', nO, nO, nV, & 1d0, t1 , size(t1,1), & cF_ov, size(cF_ov,1), & 0d0, A_oo , size(A_oo,1)) - + !$OMP PARALLEL & !$OMP SHARED(t2,B_oovv,nO,nV) & !$OMP PRIVATE(i,m,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b = 1, nV do a = 1, nV @@ -844,17 +819,17 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ enddo !$OMP END DO !$OMP END PARALLEL - + call dgemm('N','N', nO, nO*nV*nV, nO, & 0.5d0, A_oo, size(A_oo,1), & B_oovv, size(B_oovv,1), & 0d0 , X_oovv, size(X_oovv,1)) - + !$OMP PARALLEL & !$OMP SHARED(r2,X_oovv,nO,nV) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV @@ -888,7 +863,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ 0.5d0, cW_oooo, size(cW_oooo,1) * size(cW_oooo,2), & tau , size(tau,1) * size(tau,2), & 1d0 , r2 , size(r2,1) * size(r2,2)) - + !do b=1,nV ! do a=1,nV ! do j=1,nO @@ -908,6 +883,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ ! 0.5d0, tau , size(tau,1) * size(tau,2), & ! cW_vvvv, size(cW_vvvv,1) * size(cW_vvvv,2), & ! 1d0 , r2 , size(r2,1) * size(r2,2)) + double precision :: ti,tf call wall_time(ti) call use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) @@ -915,7 +891,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ if (cc_dev) then print*,'cW_vvvv:',tf-ti,'s' endif - + !do b=1,nV ! do a=1,nV ! do j=1,nO @@ -923,7 +899,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ ! do e=1,nV ! do m=1,nO - ! r2(i,j,a,b) = r2(i,j,a,b) & + ! r2(i,j,a,b) = r2(i,j,a,b) & ! + t2(i,m,a,e)*cW_ovvo(m,b,e,j) & ! - t2(j,m,a,e)*cW_ovvo(m,b,e,i) & ! - t2(i,m,b,e)*cW_ovvo(m,a,e,j) & @@ -944,7 +920,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ !$OMP SHARED(t2,A_ovov,B_ovvo,cW_ovvo,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do a = 1, nV do i = 1, nO @@ -961,24 +937,24 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ do b = 1, nV do e = 1, nV do m = 1, nO - B_ovvo(m,e,b,j) = cW_ovvo(m,b,e,j) + B_ovvo(m,e,b,j) = cW_ovvo(m,b,e,j) enddo enddo enddo enddo !$OMP END DO !$OMP END PARALLEL - + call dgemm('T','N', nO*nV, nV*nO, nO*nV, & 1d0, A_ovov, size(A_ovov,1) * size(A_ovov,2), & B_ovvo, size(B_ovvo,1) * size(B_ovvo,2), & 0d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2)) - + !$OMP PARALLEL & !$OMP SHARED(r2,X_ovvo,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b = 1, nV do a = 1, nV @@ -992,15 +968,15 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ enddo !$OMP END DO !$OMP END PARALLEL - + deallocate(A_ovov,B_ovvo,X_ovvo) allocate(A_vvoo(nV,nV,nO,nO), B_ovoo(nO,nV,nO,nO), C_ovov(nO,nV,nO,nV)) - + !$OMP PARALLEL & !$OMP SHARED(A_vvoo,v_ovvo,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do m = 1, nO do j = 1, nO @@ -1013,22 +989,22 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ enddo !$OMP END DO !$OMP END PARALLEL - + call dgemm('N','N', nO, nV*nO*nO, nV, & 1d0, t1 , size(t1,1), & A_vvoo, size(A_vvoo,1), & 0d0, B_ovoo, size(B_ovoo,1)) - + call dgemm('N','N', nO*nV*nO, nV, nO, & 1d0, B_ovoo, size(B_ovoo,1) * size(B_ovoo,2) * size(B_ovoo,3), & t1 , size(t1,1), & 0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2) * size(C_ovov,3)) - + !$OMP PARALLEL & !$OMP SHARED(r2,C_ovov,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV @@ -1042,9 +1018,9 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ end do !$OMP END DO !$OMP END PARALLEL - + deallocate(A_vvoo, B_ovoo, C_ovov) - + !do b=1,nV ! do a=1,nV ! do j=1,nO @@ -1065,12 +1041,12 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ call gen_v_spin_3idx_i_kl(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, b, cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & nV,nV,nO, v_vbvo) - + !$OMP PARALLEL & !$OMP SHARED(b,A_vbov,v_vbvo,nO,nV) & !$OMP PRIVATE(i,j,a,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(2) do e = 1, nV do j = 1, nO @@ -1093,12 +1069,12 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ ! 1d0, A_vvov, size(A_vvov,1) * size(A_vvov,2) * size(A_vvov,3), & ! t1 , size(t1,1), & ! 0d0, X_vvoo, size(X_vvoo,1) * size(X_vvoo,2) * size(X_vvoo,3)) - + !$OMP PARALLEL & !$OMP SHARED(b,r2,X_vboo,nO,nV) & !$OMP PRIVATE(i,j,a,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(2) !do b = 1, nV do a = 1, nV @@ -1113,7 +1089,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ !$OMP END DO !$OMP END PARALLEL enddo - + !deallocate(A_vvov)!,X_vvoo) deallocate(A_vbov, X_vboo, v_vbvo) allocate(X_vvoo(nV,nV,nO,nO)) @@ -1132,7 +1108,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ ! end do !end do !allocate(X_vvoo(nV,nV,nO,nO)) - + call dgemm('T','N', nV, nV*nO*nO, nO, & 1d0, t1 , size(t1,1), & v_ovoo, size(v_ovoo,1), & @@ -1142,7 +1118,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ !$OMP SHARED(r2,X_vvoo,f_o,f_v,t2,nO,nV) & !$OMP PRIVATE(i,j,a,b,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV @@ -1154,7 +1130,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ end do end do !$OMP END DO - + !$OMP DO collapse(3) do b=1,nV do a=1,nV @@ -1167,7 +1143,7 @@ subroutine compute_r2_spin(nO,nV,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ end do !$OMP END DO !$OMP END PARALLEL - + deallocate(X_vvoo) end @@ -1182,16 +1158,16 @@ subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2) double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau_t(nO,nO,nV,nV) double precision, intent(in) :: F_oo(nO,nV), F_ov(nO,nV) double precision, intent(in) :: v_ooov(nO,nO,nO,nV), v_oovv(nO,nO,nV,nV) - + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) - + double precision, allocatable :: cF_oo(:,:), X_oovv(:,:,:,:),Y_oovv(:,:,:,:) integer :: i,j,m,a,b allocate(cF_oo(nO,nO)) - + call compute_cF_oo(nO,nV,t1,tau_t,F_oo,F_ov,v_ooov,v_oovv,cF_oo) - + !do a=1,nV ! do i=1,nO ! do m=1,nO @@ -1218,13 +1194,13 @@ subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2) ! end do ! end do !end do - + allocate(Y_oovv(nO,nO,nV,nV),X_oovv(nO,nO,nV,nV)) !$OMP PARALLEL & !$OMP SHARED(t2,v_oovv,X_oovv,nO,nV) & !$OMP PRIVATE(i,m,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV @@ -1247,20 +1223,20 @@ subroutine use_cF_oo(nO,nV,t1,t2,tau_t,F_oo,F_ov,v_ooov,v_oovv,r1,r2) !$OMP SHARED(r2,v_oovv,Y_oovv,nO,nV) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV do j=1,nO do i=1,nO - r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) + r2(i,j,a,b) = r2(i,j,a,b) - Y_oovv(j,i,a,b) + Y_oovv(i,j,a,b) end do end do end do end do !$OMP END DO !$OMP END PARALLEL - + deallocate(cF_oo,X_oovv,Y_oovv) end @@ -1274,7 +1250,7 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) integer, intent(in) :: nO,nV double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) double precision, intent(in) :: F_ov(nO,nV), v_oovv(nO,nO,nV,nV) - + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) double precision, allocatable :: cF_ov(:,:), A_oo(:,:), A_vv(:,:) @@ -1282,14 +1258,14 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) integer :: i,j,a,b,e,m allocate(cF_ov(nO,nV)) - + call compute_cF_ov(nO,nV,t1,F_ov,v_oovv,cF_ov) !$OMP PARALLEL & !$OMP SHARED(r1,t2,cF_ov,nO,nV) & !$OMP PRIVATE(i,a,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(1) do a=1,nV do i=1,nO @@ -1334,22 +1310,22 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) !$OMP SHARED(nO,nV,r2,X_oovv) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV do j=1,nO do i=1,nO - r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) + r2(i,j,a,b) = r2(i,j,a,b) - X_oovv(i,j,a,b) + X_oovv(i,j,b,a) end do end do end do end do !$OMP END DO !$OMP END PARALLEL - + deallocate(A_vv) - + !do b=1,nV ! do a=1,nV ! do j=1,nO @@ -1367,17 +1343,17 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) ! end do !end do allocate(A_oo(nO,nO),B_oovv(nO,nO,nV,nV))!,X_oovv(nO,nO,nV,nV)) - + call dgemm('N','T', nO, nO, nV, & 1d0, t1 , size(t1,1), & cF_ov, size(cF_ov,1), & 0d0, A_oo , size(A_oo,1)) - + !$OMP PARALLEL & !$OMP SHARED(t2,B_oovv,nO,nV) & !$OMP PRIVATE(i,m,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b = 1, nV do a = 1, nV @@ -1390,7 +1366,7 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) enddo !$OMP END DO !$OMP END PARALLEL - + call dgemm('N','N', nO, nO*nV*nV, nO, & 0.5d0, A_oo, size(A_oo,1), & B_oovv, size(B_oovv,1), & @@ -1400,7 +1376,7 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) !$OMP SHARED(r2,X_oovv,nO,nV) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b=1,nV do a=1,nV @@ -1413,9 +1389,9 @@ subroutine use_cF_ov(nO,nV,t1,t2,F_ov,v_oovv,r1,r2) end do !$OMP END DO !$OMP END PARALLEL - + deallocate(cF_ov,A_oo,B_oovv,X_oovv) - + end ! Use cF_vv @@ -1426,18 +1402,18 @@ subroutine use_cF_vv(nO,nV,t1,t2,r1,r2) integer, intent(in) :: nO,nV double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) - + double precision, intent(inout) :: r1(nO,nV), r2(nO,nO,nV,nV) double precision, allocatable :: cF_vv(:,:) integer :: i,j,a,b,e,m allocate(cF_vv(nV,nV)) - + !call compute_cF_vv(nO,nV,t1,tau_t,F_ov,F_vv,v_oovv,v_ovvv,cF_vv) deallocate(cF_vv) - + end ! Use cW_vvvd @@ -1450,7 +1426,7 @@ subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) double precision, intent(in) :: v_oovv(nO,nO,nV,nV) !double precision, intent(in) :: v_vovv(nV,nO,nV,nV) - + double precision, intent(inout) :: r2(nO,nO,nV,nV) double precision, allocatable :: cW_vvvf(:,:,:), v_vvvf(:,:,:), tau_f(:,:,:), v_vovf(:,:,:) @@ -1460,7 +1436,7 @@ subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) allocate(cW_vvvf(nV,nV,nV),v_vvvf(nV,nV,nV),tau_f(nO,nO,nV),v_vovf(nV,nO,nV)) !PROVIDE cc_nVab - + !do b=1,nV ! do a=1,nV ! do j=1,nO @@ -1476,14 +1452,14 @@ subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) ! end do ! end do !end do - + do f = 1, nV call wall_time(ti) !$OMP PARALLEL & !$OMP SHARED(tau,tau_f,f,nO,nV) & !$OMP PRIVATE(i,j,e) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(2) do e = 1, nV do j = 1, nO @@ -1515,7 +1491,7 @@ subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) if (cc_dev .and. f == 1) then print*,'vovf', tf-ti endif - + call wall_time(ti) call compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) call wall_time(tf) @@ -1535,7 +1511,7 @@ subroutine use_cW_vvvf(nO,nV,t1,t2,tau,v_oovv,r2) enddo deallocate(cW_vvvf,v_vvvf,v_vovf) - + end ! cF_oo @@ -1562,7 +1538,7 @@ subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo) !$OMP SHARED(cF_oo,Foo,t1,v_ooov,nO,nV) & !$OMP PRIVATE(i,m,n,e) & !$OMP DEFAULT(NONE) - + !do i=1,nO ! do m=1,nO ! cF_oo(m,i) = (1d0 - Kronecker_delta(m,i))*Foo(m,i) @@ -1580,7 +1556,7 @@ subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo) cF_oo(i,i) = 0d0 end do !$OMP END DO - + do e=1,nV do n=1,nO !$OMP DO collapse(1) @@ -1620,8 +1596,8 @@ subroutine compute_cF_oo(nO,nV,t1,tau_t,Foo,Fov,v_ooov,v_oovv,cF_oo) call dgemm('N','T', nO, nO, nO*nV*nV, & 0.5d0, v_oovv, size(v_oovv,1), & tau_t , size(tau_t,1), & - 1d0 , cF_oo , size(cF_oo,1)) - + 1d0 , cF_oo , size(cF_oo,1)) + end ! cF_ov @@ -1643,7 +1619,7 @@ subroutine compute_cF_ov(nO,nV,t1,Fov,v_oovv,cF_ov) !$OMP SHARED(cF_ov,Fov,t1,v_oovv,nO,nV) & !$OMP PRIVATE(i,a,m,n,e,f) & !$OMP DEFAULT(NONE) - + !cF_ov = Fov !$OMP DO collapse(1) @@ -1659,7 +1635,7 @@ subroutine compute_cF_ov(nO,nV,t1,Fov,v_oovv,cF_ov) end do !$OMP END DO !$OMP END PARALLEL - + end ! cF_vv @@ -1677,7 +1653,7 @@ subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv) !double precision,intent(in) :: v_ovvv(nO,nV,nV,nV) double precision,intent(out) :: cF_vv(nV,nV) - + double precision, allocatable :: v_ovfv(:,:,:),X_ovfv(:,:,:) integer :: i,j,m,n integer :: a,b,e,f @@ -1699,7 +1675,7 @@ subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv) enddo !$OMP END DO !$OMP END PARALLEL - + !do e=1,nV ! do a=1,nV ! do m=1,nO @@ -1711,7 +1687,7 @@ subroutine compute_cF_vv(nO,nV,t1,tau_t,Fov,Fvv,v_oovv,cF_vv) -0.5d0, t1 , size(t1,1), & Fov , size(Fov,1), & 1d0 , cF_vv, size(cF_vv,1)) - + !do e=1,nV ! do a=1,nV ! do m=1,nO @@ -1791,7 +1767,7 @@ subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) integer :: a,b,e,f double precision, allocatable :: X_oooo(:,:,:,:) - ! oooo block + ! oooo block !cW_oooo = v_oooo @@ -1809,7 +1785,7 @@ subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) ! end do !end do allocate(X_oooo(nO,nO,nO,nO)) - + call dgemm('N','T', nO*nO*nO, nO, nV, & 1d0, v_ooov, size(v_ooov,1) * size(v_ooov,2) * size(v_ooov,3), & t1 , size(t1,1), & @@ -1830,14 +1806,14 @@ subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) end do !$OMP END DO !$OMP END PARALLEL - + deallocate(X_oooo) - + !do m=1,nO ! do n=1,nO ! do i=1,nO ! do j=1,nO - ! + ! ! do e=1,nV ! do f=1,nV ! cW_oooo(m,n,i,j) = cW_oooo(m,n,i,j) + 0.25d0*tau(i,j,e,f)*v_oovv(m,n,e,f) @@ -1853,7 +1829,7 @@ subroutine compute_cW_oooo(nO,nV,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) 0.25d0, v_oovv , size(v_oovv,1) * size(v_oovv,2), & tau , size(tau,1) * size(tau,2), & 1.d0 , cW_oooo, size(cW_oooo,1) * size(cW_oooo,2)) - + end ! cW_ovvo @@ -1913,7 +1889,7 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) call gen_v_spin_3idx_ij_l(cc_nO_m,cc_nV_m,cc_nV_m,cc_nV_m, e, cc_nO_S,cc_nV_S,cc_nV_S,cc_nV_S, & cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, & nO,nV,nV, v_ovev) - + call dgemm('N','T', nO*nV, nO, nV, & 1.d0, v_ovev , size(v_ovev,1) * size(v_ovev,2), & t1 , size(t1,1), & @@ -1950,14 +1926,14 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) ! end do ! end do !end do - + allocate(A_oovo(nO,nO,nV,nO), B_vovo(nV,nO,nV,nO)) - + !$OMP PARALLEL & !$OMP SHARED(A_oovo,v_oovo,nO,nV) & !$OMP PRIVATE(j,e,m,n) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do j=1,nO do e=1,nV @@ -1970,17 +1946,17 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) end do !$OMP END DO !$OMP END PARALLEL - + call dgemm('T','N', nV, nO*nV*nO, nO, & 1d0, t1 , size(t1,1), & A_oovo, size(A_oovo,1), & 0d0, B_vovo, size(B_vovo,1)) - + !$OMP PARALLEL & !$OMP SHARED(cW_ovvo,B_vovo,nO,nV) & !$OMP PRIVATE(j,e,m,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do j=1,nO do e=1,nV @@ -2015,7 +1991,7 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) !$OMP SHARED(nO,nV,A_voov,B_voov,v_oovv,t2,t1) & !$OMP PRIVATE(f,n,m,e,j,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do b = 1, nV do j = 1, nO @@ -2039,19 +2015,19 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) enddo !$OMP END DO !$OMP END PARALLEL - + call dgemm('T','N', nO*nV, nV*nO, nV*nO, & 1d0, A_voov, size(A_voov,1) * size(A_voov,2), & B_voov, size(B_voov,1) * size(B_voov,2), & 0d0, C_ovov, size(C_ovov,1) * size(C_ovov,2)) - + deallocate(A_voov,B_voov) !$OMP PARALLEL & !$OMP SHARED(cW_ovvo,C_ovov,nO,nV) & !$OMP PRIVATE(j,e,m,b) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(3) do j = 1, nO do e = 1, nV @@ -2064,7 +2040,7 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) enddo !$OMP END DO !$OMP END PARALLEL - + deallocate(C_ovov) end @@ -2072,7 +2048,7 @@ subroutine compute_cW_ovvo(nO,nV,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) ! cW_vvvv subroutine compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) - + implicit none integer,intent(in) :: nO,nV @@ -2154,14 +2130,14 @@ subroutine compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) end do !$OMP END DO !$OMP END PARALLEL - + deallocate(A_ovvv,B_vvvv) !do a=1,nV ! do b=1,nV ! do e=1,nV ! do f=1,nV - ! + ! ! do m=1,nO ! do n=1,nO ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f) @@ -2182,7 +2158,7 @@ subroutine compute_cW_vvvv(nO,nV,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) ! cW_vvvf subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) - + implicit none integer,intent(in) :: nO,nV,f @@ -2207,7 +2183,7 @@ subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) !$OMP SHARED(nO,nV,cW_vvvf,A_ovvf,v_vovf,v_vvvf,f) & !$OMP PRIVATE(a,b,c,d,e,m) & !$OMP DEFAULT(NONE) - + !$OMP DO collapse(2) do c = 1, nV do b = 1, nV @@ -2248,7 +2224,7 @@ subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) 1d0, t1 , size(t1,1), & A_ovvf, size(A_ovvf,1), & 0d0, B_vvvf, size(B_vvvf,1)) - + !$OMP PARALLEL & !$OMP SHARED(nO,nV,cW_vvvf,B_vvvf,v_oovf,v_oovv,f) & !$OMP PRIVATE(a,b,c,d,e,m,n) & @@ -2264,14 +2240,14 @@ subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) end do end do !$OMP END DO NOWAIT - + !deallocate(A_ovvf,B_vvvf) !do a=1,nV ! do b=1,nV ! do e=1,nV ! do f=1,nV - ! + ! ! do m=1,nO ! do n=1,nO ! cW_vvvv(a,b,e,f) = cW_vvvv(a,b,e,f) + 0.25d0*tau(m,n,a,b)*v_oovv(m,n,e,f) @@ -2292,13 +2268,13 @@ subroutine compute_cW_vvvf(nO,nV,t1,t2,tau,f,v_vvvf,v_vovf,v_oovv,cW_vvvf) enddo enddo !$OMP END DO - !$OMP END PARALLEL - + !$OMP END PARALLEL + call dgemm('T','N', nV*nV, nV, nO*nO, & 0.25d0, tau , size(tau,1) * size(tau,2), & v_oovf , size(v_oovf,1) * size(v_oovf,2), & 1.d0 , cW_vvvf, size(cW_vvvf,1) * size(cW_vvvf,2)) - + deallocate(v_oovf) deallocate(A_ovvf,B_vvvf) diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index 6f21c316f..eebc84ca8 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -1006,6 +1006,22 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) END_PROVIDER + +BEGIN_PROVIDER [ double precision, cc_spin_f_oo, (cc_nOab, cc_nOab)] + implicit none + call gen_f_spin(psi_det(1,1,cc_ref), cc_nO_m, cc_nO_m, cc_nO_S, cc_nO_S, cc_list_occ_spin, cc_list_occ_spin, cc_nOab, cc_nOab, cc_spin_f_oo) +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cc_spin_f_ov, (cc_nOab, cc_nVab)] + implicit none + call gen_f_spin(psi_det(1,1,cc_ref), cc_nO_m, cc_nV_m, cc_nO_S, cc_nV_S, cc_list_occ_spin, cc_list_vir_spin, cc_nOab, cc_nVab, cc_spin_f_ov) +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cc_spin_f_vv, (cc_nVab, cc_nVab)] + implicit none + call gen_f_spin(psi_det(1,1,cc_ref), cc_nV_m, cc_nV_m, cc_nV_S, cc_nV_S, cc_list_vir_spin, cc_list_vir_spin, cc_nVab, cc_nVab, cc_spin_f_vv) +END_PROVIDER + ! Shift subroutine shift_idx_spin(s,n_S,shift) From e3e874879f47013b48963d729f8f312bf6ddf33b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Feb 2025 18:11:47 +0100 Subject: [PATCH 5/9] Introduced Cholesky in gen_spin_space --- src/utils_cc/mo_integrals_cc.irp.f | 1234 +++++++++++++++++++++------- 1 file changed, 947 insertions(+), 287 deletions(-) diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index eebc84ca8..813c186a8 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -47,7 +47,7 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k - if (do_ao_cholesky) then + if (do_mo_cholesky) then double precision, allocatable :: buffer(:,:,:,:) double precision, allocatable :: v1(:,:,:), v2(:,:,:) allocate(v1(cholesky_mo_num,n1,n3), v2(cholesky_mo_num,n2,n4)) @@ -132,7 +132,7 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1,i2,i3,i4 double precision, allocatable :: buffer(:,:,:) call set_multiple_levels_omp(.False.) @@ -190,7 +190,7 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -233,7 +233,7 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -277,7 +277,7 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -311,7 +311,7 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -345,7 +345,7 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -379,7 +379,7 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -422,7 +422,7 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -465,7 +465,7 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -499,7 +499,7 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -533,7 +533,7 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -567,7 +567,7 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) implicit none - if (do_ao_cholesky) then + if (do_mo_cholesky) then integer :: i1, i2, i3, i4 integer :: n1, n2, n3, n4 @@ -1169,7 +1169,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, implicit none BEGIN_DOC - ! Compute the bi electronic integrals corresponding to four lists of spin orbitals. + ! Compute the 2e-integrals corresponding to four lists of spin orbitals. ! Ex: occ/occ/occ/occ, occ/vir/occ/vir, ... END_DOC @@ -1178,129 +1178,306 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, integer, intent(in) :: dim1, dim2, dim3, dim4 double precision, intent(out) :: v(dim1,dim2,dim3,dim4) - double precision :: mo_two_e_integral + double precision, external :: mo_two_e_integral integer :: i,j,k,l,idx_i,idx_j,idx_k,idx_l integer :: i_shift,j_shift,k_shift,l_shift integer :: tmp_i,tmp_j,tmp_k,tmp_l integer :: si,sj,sk,sl,s - PROVIDE cc_space_v + double precision, allocatable :: buffer(:,:,:,:) + double precision, allocatable :: v1(:,:,:), v2(:,:,:) + + if (do_mo_cholesky) then + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + + allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,n2_S(sj),n4_S(sl))) + allocate(buffer(n1_S(si),n3_S(sk),n2_S(sj),n4_S(sl))) + + call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n2_S(sj),n4_S(sl),list2(1,sj),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n1_S(si)*n3_S(sk), n2_S(sj)*n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,l,idx_i,idx_j,idx_k,idx_l) + !$OMP DO collapse(3) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = buffer(i,k,j,l) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + allocate(v1(cholesky_mo_num,n2_S(sj),n3_S(sk)), v2(cholesky_mo_num,n1_S(si),n4_S(sl))) + allocate(buffer(n2_S(sj),n3_S(sk),n1_S(si),n4_S(sl))) + + call gen_v_space_chol(n2_S(sj),n3_S(sk),list2(1,sj),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n2_S(sj)*n3_S(sk), n1_S(si)*n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,l,idx_i,idx_j,idx_k,idx_l) + !$OMP DO collapse(3) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = v(idx_i,idx_j,idx_k,idx_l) - buffer(j,k,i,l) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + ! or + elseif (si == sk .and. sj == sl) then + + allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,n2_S(sj),n4_S(sl))) + allocate(buffer(n1_S(si),n3_S(sk),n2_S(sj),n4_S(sl))) + + call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n2_S(sj),n4_S(sl),list2(1,sj),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n1_S(si)*n3_S(sk), n2_S(sj)*n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,l,idx_i,idx_j,idx_k,idx_l) + !$OMP DO collapse(3) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = buffer(i,k,j,l) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + ! or + elseif (si == sl .and. sj == sk) then + + allocate(v1(cholesky_mo_num,n2_S(sj),n3_S(sk)), v2(cholesky_mo_num,n1_S(si),n4_S(sl))) + allocate(buffer(n2_S(sj),n3_S(sk),n1_S(si),n4_S(sl))) + + call gen_v_space_chol(n2_S(sj),n3_S(sk),list2(1,sj),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n2_S(sj)*n3_S(sk), n1_S(si)*n4_S(sl), cholesky_mo_num, -1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,l,idx_i,idx_j,idx_k,idx_l) + !$OMP DO collapse(3) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = buffer(j,k,i,l) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + else + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,l,idx_i,idx_j,idx_k,idx_l) + !$OMP DO collapse(3) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = 0d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL - !$OMP PARALLEL & - !$OMP SHARED(cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) & - !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, & - !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,& - !$OMP tmp_i,tmp_j,tmp_k,tmp_l)& - !$OMP DEFAULT(NONE) + endif - do sl = 1, 2 - call shift_idx_spin(sl,n4_S,l_shift) - do sk = 1, 2 - call shift_idx_spin(sk,n3_S,k_shift) - do sj = 1, 2 - call shift_idx_spin(sj,n2_S,j_shift) - do si = 1, 2 - call shift_idx_spin(si,n1_S,i_shift) + enddo + enddo + enddo + enddo - s = si+sj+sk+sl - ! or - if (s == 4 .or. s == 8) then - !$OMP DO collapse(3) - do tmp_l = 1, n4_S(sl) - do tmp_k = 1, n3_S(sk) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + else + !$OMP PARALLEL & + !$OMP SHARED(n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) & + !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, & + !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,& + !$OMP tmp_i,tmp_j,tmp_k,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) - v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + enddo enddo enddo enddo - enddo - !$OMP END DO - - ! or - elseif (si == sk .and. sj == sl) then - !$OMP DO collapse(3) - do tmp_l = 1, n4_S(sl) - do tmp_k = 1, n3_S(sk) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - v(idx_i,idx_j,idx_k,idx_l) = cc_space_v(i,j,k,l) + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + enddo enddo enddo enddo - enddo - !$OMP END DO - - ! or - elseif (si == sl .and. sj == sk) then - !$OMP DO collapse(3) - do tmp_l = 1, n4_S(sl) - do tmp_k = 1, n3_S(sk) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) - v(idx_i,idx_j,idx_k,idx_l) = - cc_space_v(j,i,k,l) + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + enddo enddo enddo enddo - enddo - !$OMP END DO - else - !$OMP DO collapse(3) - do tmp_l = 1, n4_S(sl) - do tmp_k = 1, n3_S(sk) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) - l = list4(tmp_l,sl) + !$OMP END DO + else + !$OMP DO collapse(3) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) idx_l = tmp_l + l_shift - k = list3(tmp_k,sk) idx_k = tmp_k + k_shift - j = list2(tmp_j,sj) idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - v(idx_i,idx_j,idx_k,idx_l) = 0d0 + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v(idx_i,idx_j,idx_k,idx_l) = 0d0 + enddo enddo enddo enddo - enddo - !$OMP END DO - endif + !$OMP END DO + endif + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL + !$OMP END PARALLEL + + endif end + ! V_3idx subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, dim1,dim2,dim3, v_l) @@ -1323,7 +1500,8 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, integer :: tmp_i,tmp_j,tmp_k,tmp_l integer :: si,sj,sk,sl,s - PROVIDE cc_space_v + double precision, allocatable :: buffer(:,:,:) + double precision, allocatable :: v1(:,:,:), v2(:,:,:) if (idx_l <= n4_S(1)) then sl = 1 @@ -1334,99 +1512,255 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, tmp_l = idx_l - l_shift l = list4(tmp_l,sl) - !$OMP PARALLEL & - !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) & - !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, & - !$OMP i,j,k,idx_i,idx_j,idx_k,& - !$OMP tmp_i,tmp_j,tmp_k)& - !$OMP DEFAULT(NONE) + if (do_mo_cholesky) then - do sk = 1, 2 - call shift_idx_spin(sk,n3_S,k_shift) - do sj = 1, 2 - call shift_idx_spin(sj,n2_S,j_shift) - do si = 1, 2 - call shift_idx_spin(si,n1_S,i_shift) - - s = si+sj+sk+sl - ! or - if (s == 4 .or. s == 8) then - !$OMP DO collapse(2) - do tmp_k = 1, n3_S(sk) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) - k = list3(tmp_k,sk) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + + allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,n2_S(sj),1)) + allocate(buffer(n1_S(si),n3_S(sk),n2_S(sj))) + + call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n2_S(sj),1,list2(1,sj),list4(tmp_l,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n1_S(si)*n3_S(sk), n2_S(sj), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,idx_i,idx_j,idx_k) + !$OMP DO collapse(2) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_l(idx_i,idx_j,idx_k) = buffer(i,k,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + allocate(v1(cholesky_mo_num,n2_S(sj),n3_S(sk)), v2(cholesky_mo_num,n1_S(si),1)) + allocate(buffer(n2_S(sj),n3_S(sk),n1_S(si))) + + call gen_v_space_chol(n2_S(sj),n3_S(sk),list2(1,sj),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n1_S(si),1,list1(1,si),list4(tmp_l,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n2_S(sj)*n3_S(sk), n1_S(si), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,idx_i,idx_j,idx_k) + !$OMP DO collapse(2) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_l(idx_i,idx_j,idx_k) = v_l(idx_i,idx_j,idx_k) - buffer(j,k,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + ! or + elseif (si == sk .and. sj == sl) then + + allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,n2_S(sj),1)) + allocate(buffer(n1_S(si),n3_S(sk),n2_S(sj))) + + call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n2_S(sj),1,list2(1,sj),list4(tmp_l,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n1_S(si)*n3_S(sk), n2_S(sj), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,idx_i,idx_j,idx_k) + !$OMP DO collapse(2) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_l(idx_i,idx_j,idx_k) = buffer(i,k,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + ! or + elseif (si == sl .and. sj == sk) then + + allocate(v1(cholesky_mo_num,n2_S(sj),n3_S(sk)), v2(cholesky_mo_num,n1_S(si),1)) + allocate(buffer(n2_S(sj),n3_S(sk),n1_S(si))) + + call gen_v_space_chol(n2_S(sj),n3_S(sk),list2(1,sj),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n1_S(si),1,list1(1,si),list4(tmp_l,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n2_S(sj)*n3_S(sk), n1_S(si), cholesky_mo_num, -1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,idx_i,idx_j,idx_k,idx_l) + !$OMP DO collapse(2) + do k = 1, n3_S(sk) + do j = 1, n2_S(sj) + idx_k = k + k_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_l(idx_i,idx_j,idx_k) = buffer(j,k,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + else + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) idx_k = tmp_k + k_shift - j = list2(tmp_j,sj) idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) - v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + do tmp_i = 1, n1_S(si) + idx_i = tmp_i + i_shift + v_l(idx_i,idx_j,idx_k) = 0d0 + enddo enddo enddo - enddo - !$OMP END DO - - ! or - elseif (si == sk .and. sj == sl) then - !$OMP DO collapse(2) - do tmp_k = 1, n3_S(sk) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + !$OMP END DO + + endif + + enddo + enddo + enddo + + + else + + PROVIDE cc_space_v + + !$OMP PARALLEL & + !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) & + !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, & + !$OMP i,j,k,idx_i,idx_j,idx_k,& + !$OMP tmp_i,tmp_j,tmp_k)& + !$OMP DEFAULT(NONE) + + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo enddo enddo - enddo - !$OMP END DO - - ! or - elseif (si == sl .and. sj == sk) then - !$OMP DO collapse(2) - do tmp_k = 1, n3_S(sk) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) - v_l(idx_i,idx_j,idx_k) = - cc_space_v(j,i,k,l) + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_l(idx_i,idx_j,idx_k) = cc_space_v(i,j,k,l) + enddo enddo enddo - enddo - !$OMP END DO - else - !$OMP DO collapse(2) - do tmp_k = 1, n3_S(sk) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - v_l(idx_i,idx_j,idx_k) = 0d0 + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_l(idx_i,idx_j,idx_k) = - cc_space_v(j,i,k,l) + enddo enddo enddo - enddo - !$OMP END DO - endif + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_k = 1, n3_S(sk) + do tmp_j = 1, n2_S(sj) + idx_k = tmp_k + k_shift + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + idx_i = tmp_i + i_shift + v_l(idx_i,idx_j,idx_k) = 0d0 + enddo + enddo + enddo + !$OMP END DO + endif + enddo enddo enddo - enddo - !$OMP END PARALLEL + !$OMP END PARALLEL + + endif end @@ -1452,7 +1786,8 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l integer :: tmp_i,tmp_j,tmp_k,tmp_l integer :: si,sj,sk,sl,s - PROVIDE cc_space_v + double precision, allocatable :: buffer(:,:,:) + double precision, allocatable :: v1(:,:,:), v2(:,:,:) if (idx_k <= n3_S(1)) then sk = 1 @@ -1463,100 +1798,257 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l tmp_k = idx_k - k_shift k = list3(tmp_k,sk) - !$OMP PARALLEL & - !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) & - !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, & - !$OMP i,j,l,idx_i,idx_j,idx_l,& - !$OMP tmp_i,tmp_j,tmp_l)& - !$OMP DEFAULT(NONE) + if (do_mo_cholesky) then + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) - do sl = 1, 2 - call shift_idx_spin(sl,n4_S,l_shift) - do sj = 1, 2 - call shift_idx_spin(sj,n2_S,j_shift) - do si = 1, 2 - call shift_idx_spin(si,n1_S,i_shift) - - s = si+sj+sk+sl - ! or - if (s == 4 .or. s == 8) then - !$OMP DO collapse(2) - do tmp_l = 1, n4_S(sl) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + + allocate(v1(cholesky_mo_num,n1_S(si),1), v2(cholesky_mo_num,n2_S(sj),n4_S(sl))) + allocate(buffer(n1_S(si),n2_S(sj),n4_S(sl))) + + call gen_v_space_chol(n1_S(si),1,list1(1,si),list3(tmp_k,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n2_S(sj),n4_S(sl),list2(1,sj),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n1_S(si), n2_S(sj)*n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,l,idx_i,idx_j,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_k(idx_i,idx_j,idx_l) = buffer(i,j,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + allocate(v1(cholesky_mo_num,n2_S(sj),1), v2(cholesky_mo_num,n1_S(si),n4_S(sl))) + allocate(buffer(n2_S(sj),n1_S(si),n4_S(sl))) + + call gen_v_space_chol(n2_S(sj),1,list2(1,sj),list3(tmp_k,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n2_S(sj), n1_S(si)*n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,l,idx_i,idx_j,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_k(idx_i,idx_j,idx_l) = v_k(idx_i,idx_j,idx_l) - buffer(j,i,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + ! or + elseif (si == sk .and. sj == sl) then + + allocate(v1(cholesky_mo_num,n1_S(si),1), v2(cholesky_mo_num,n2_S(sj),n4_S(sl))) + allocate(buffer(n1_S(si),n2_S(sj),n4_S(sl))) + + call gen_v_space_chol(n1_S(si),1,list1(1,si),list3(tmp_k,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n2_S(sj),n4_S(sl),list2(1,sj),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n1_S(si), n2_S(sj)*n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,l,idx_i,idx_j,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_k(idx_i,idx_j,idx_l) = buffer(i,j,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + ! or + elseif (si == sl .and. sj == sk) then + + allocate(v1(cholesky_mo_num,n2_S(sj),1), v2(cholesky_mo_num,n1_S(si),n4_S(sl))) + allocate(buffer(n2_S(sj),n1_S(si),n4_S(sl))) + + call gen_v_space_chol(n2_S(sj),1,list2(1,sj),list3(tmp_k,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n2_S(sj), n1_S(si)*n4_S(sl), cholesky_mo_num, -1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n2_S(sj)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,l,idx_i,idx_j,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do j = 1, n2_S(sj) + idx_l = l + l_shift + idx_j = j + j_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_k(idx_i,idx_j,idx_l) = buffer(j,i,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + else + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) + idx_l = tmp_l + l_shift + idx_j = tmp_j + j_shift + do tmp_i = 1, n1_S(si) + idx_i = tmp_i + i_shift + v_k(idx_i,idx_j,idx_l) = 0d0 + enddo + enddo + enddo + !$OMP END DO + + endif + + enddo + enddo + enddo + + else + + PROVIDE cc_space_v + + !$OMP PARALLEL & + !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) & + !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, & + !$OMP i,j,l,idx_i,idx_j,idx_l,& + !$OMP tmp_i,tmp_j,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sj = 1, 2 + call shift_idx_spin(sj,n2_S,j_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) - v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo enddo enddo - enddo - !$OMP END DO - - ! or - elseif (si == sk .and. sj == sl) then - !$OMP DO collapse(2) - do tmp_l = 1, n4_S(sl) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + !$OMP END DO + + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_k(idx_i,idx_j,idx_l) = cc_space_v(i,j,k,l) + enddo enddo enddo - enddo - !$OMP END DO - - ! or - elseif (si == sl .and. sj == sk) then - !$OMP DO collapse(2) - do tmp_l = 1, n4_S(sl) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + !$OMP END DO + + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) - v_k(idx_i,idx_j,idx_l) = - cc_space_v(j,i,k,l) + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_k(idx_i,idx_j,idx_l) = - cc_space_v(j,i,k,l) + enddo enddo enddo - enddo - !$OMP END DO - else - !$OMP DO collapse(2) - do tmp_l = 1, n4_S(sl) - do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_j = 1, n2_S(sj) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) idx_j = tmp_j + j_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - v_k(idx_i,idx_j,idx_l) = 0d0 + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_k(idx_i,idx_j,idx_l) = 0d0 + enddo enddo enddo - enddo - !$OMP END DO - endif + !$OMP END DO + endif + enddo enddo enddo - enddo - !$OMP END PARALLEL + !$OMP END PARALLEL + endif end ! V_3idx_i_kl @@ -1581,7 +2073,8 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l integer :: tmp_i,tmp_j,tmp_k,tmp_l integer :: si,sj,sk,sl,s - PROVIDE cc_space_v + double precision, allocatable :: buffer(:,:,:) + double precision, allocatable :: v1(:,:,:), v2(:,:,:) if (idx_j <= n2_S(1)) then sj = 1 @@ -1592,98 +2085,265 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l tmp_j = idx_j - j_shift j = list2(tmp_j,sj) - !$OMP PARALLEL & - !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) & - !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, & - !$OMP i,k,l,idx_i,idx_k,idx_l,& - !$OMP tmp_i,tmp_k,tmp_l)& - !$OMP DEFAULT(NONE) - do sl = 1, 2 - call shift_idx_spin(sl,n4_S,l_shift) - do sk = 1, 2 - call shift_idx_spin(sk,n3_S,k_shift) - do si = 1, 2 - call shift_idx_spin(si,n1_S,i_shift) - - s = si+sj+sk+sl - ! or - if (s == 4 .or. s == 8) then - !$OMP DO collapse(2) - do tmp_l = 1, n4_S(sl) - do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + if (do_mo_cholesky) then + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + + allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,1,n4_S(sl))) + allocate(buffer(n1_S(si),n3_S(sk),n4_S(sl))) + + call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(1,n4_S(sl),list2(tmp_j,sj),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n1_S(si)*n3_S(sk), n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,k,l,idx_i,idx_k,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + idx_l = l + l_shift + idx_k = k + k_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_j(idx_i,idx_k,idx_l) = buffer(i,k,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + allocate(v1(cholesky_mo_num,1,n3_S(sk)), v2(cholesky_mo_num,n1_S(si),n4_S(sl))) + allocate(buffer(n3_S(sk),n1_S(si),n4_S(sl))) + + call gen_v_space_chol(1,n3_S(sk),list2(tmp_j,sj),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n3_S(sk), n1_S(si)*n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,k,l,idx_i,idx_k,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + idx_l = l + l_shift + idx_k = k + k_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_j(idx_i,idx_k,idx_l) = v_j(idx_i,idx_k,idx_l) - buffer(k,i,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + ! or + elseif (si == sk .and. sj == sl) then + + allocate(v1(cholesky_mo_num,n1_S(si),n3_S(sk)), v2(cholesky_mo_num,1,n4_S(sl))) + allocate(buffer(n1_S(si),n3_S(sk),n4_S(sl))) + + call gen_v_space_chol(n1_S(si),n3_S(sk),list1(1,si),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(1,n4_S(sl),list2(tmp_j,sj),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n1_S(si)*n3_S(sk), n4_S(sl), cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1_S(si)*n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,k,l,idx_i,idx_k,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + idx_l = l + l_shift + idx_k = k + k_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_j(idx_i,idx_k,idx_l) = buffer(i,k,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + ! or + elseif (si == sl .and. sj == sk) then + + allocate(v1(cholesky_mo_num,1,n3_S(sk)), v2(cholesky_mo_num,n1_S(si),n4_S(sl))) + allocate(buffer(n3_S(sk),n1_S(si),n4_S(sl))) + + call gen_v_space_chol(1,n3_S(sk),list2(tmp_j,sj),list3(1,sk),v1,cholesky_mo_num) + call gen_v_space_chol(n1_S(si),n4_S(sl),list1(1,si),list4(1,sl),v2,cholesky_mo_num) + + call dgemm('T','N', n3_S(sk), n1_S(si)*n4_S(sl), cholesky_mo_num, -1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n3_S(sk)) + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,k,l,idx_i,idx_k,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + idx_l = l + l_shift + idx_k = k + k_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_j(idx_i,idx_k,idx_l) = buffer(k,i,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(v1, v2, buffer) + + else + + !$OMP PARALLEL & + !$OMP DEFAULT(SHARED) & + !$OMP PRIVATE(i,k,l,idx_i,idx_k,idx_l) + !$OMP DO collapse(2) + do l = 1, n4_S(sl) + do k = 1, n3_S(sk) + idx_l = l + l_shift + idx_k = k + k_shift + do i = 1, n1_S(si) + idx_i = i + i_shift + v_j(idx_i,idx_k,idx_l) = 0d0 + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + endif + + enddo + enddo + enddo + + + + else + + PROVIDE cc_space_v + + !$OMP PARALLEL & + !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) & + !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, & + !$OMP i,k,l,idx_i,idx_k,idx_l,& + !$OMP tmp_i,tmp_k,tmp_l)& + !$OMP DEFAULT(NONE) + + do sl = 1, 2 + call shift_idx_spin(sl,n4_S,l_shift) + do sk = 1, 2 + call shift_idx_spin(sk,n3_S,k_shift) + do si = 1, 2 + call shift_idx_spin(si,n1_S,i_shift) + + s = si+sj+sk+sl + ! or + if (s == 4 .or. s == 8) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) idx_k = tmp_k + k_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) - v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - mo_two_e_integral(j,i,k,l) + v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) - cc_space_v(j,i,k,l) + enddo enddo enddo - enddo - !$OMP END DO + !$OMP END DO - ! or - elseif (si == sk .and. sj == sl) then - !$OMP DO collapse(2) - do tmp_l = 1, n4_S(sl) - do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + ! or + elseif (si == sk .and. sj == sl) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) idx_k = tmp_k + k_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) - v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = mo_two_e_integral(i,j,k,l) + v_j(idx_i,idx_k,idx_l) = cc_space_v(i,j,k,l) + enddo enddo enddo - enddo - !$OMP END DO + !$OMP END DO - ! or - elseif (si == sl .and. sj == sk) then - !$OMP DO collapse(2) - do tmp_l = 1, n4_S(sl) - do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + ! or + elseif (si == sl .and. sj == sk) then + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) idx_k = tmp_k + k_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) - v_j(idx_i,idx_k,idx_l) = - cc_space_v(j,i,k,l) + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + !v(idx_i,idx_j,idx_k,idx_l) = - mo_two_e_integral(j,i,k,l) + v_j(idx_i,idx_k,idx_l) = - cc_space_v(j,i,k,l) + enddo enddo enddo - enddo - !$OMP END DO - else - !$OMP DO collapse(2) - do tmp_l = 1, n4_S(sl) - do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + !$OMP END DO + else + !$OMP DO collapse(2) + do tmp_l = 1, n4_S(sl) + do tmp_k = 1, n3_S(sk) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) idx_k = tmp_k + k_shift - i = list1(tmp_i,si) - idx_i = tmp_i + i_shift - v_j(idx_i,idx_k,idx_l) = 0d0 + do tmp_i = 1, n1_S(si) + i = list1(tmp_i,si) + idx_i = tmp_i + i_shift + v_j(idx_i,idx_k,idx_l) = 0d0 + enddo enddo enddo - enddo - !$OMP END DO - endif + !$OMP END DO + endif + enddo enddo enddo - enddo - !$OMP END PARALLEL + !$OMP END PARALLEL + endif end From 3d46cde2e46c8f6dd0eb0d56d5ff446d649907bb Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Feb 2025 13:35:38 +0100 Subject: [PATCH 6/9] Cleaning ccsd_spin --- src/ccsd/ccsd_space_orb_sub.irp.f | 10 +- src/ccsd/ccsd_spin_orb_sub.irp.f | 156 ++++++----------------------- src/utils_cc/mo_integrals_cc.irp.f | 101 +++++++++++++++++++ 3 files changed, 131 insertions(+), 136 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 30f134fc2..7cd4c50c6 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -26,7 +26,6 @@ subroutine run_ccsd_space_orb double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) - integer(bit_kind) :: det(N_int,2) integer :: nO, nV, nOa, nVa call set_multiple_levels_omp(.False.) @@ -38,9 +37,8 @@ subroutine run_ccsd_space_orb PROVIDE all_mo_integrals endif - det = psi_det(:,:,cc_ref) print*,'Reference determinant:' - call print_det(det,N_int) + call print_det(psi_det(1,1,cc_ref),N_int) nOa = cc_nOa nVa = cc_nVa @@ -57,10 +55,6 @@ subroutine run_ccsd_space_orb allocate(list_occ(nO),list_vir(nV)) list_occ = cc_list_occ list_vir = cc_list_vir - ! Debug - !call extract_list_orb_space(det,nO,nV,list_occ,list_vir) - !print*,'occ',list_occ - !print*,'vir',list_vir ! GPU arrays call gpu_allocate(d_cc_space_f_oo, nO, nO) @@ -186,7 +180,7 @@ subroutine run_ccsd_space_orb call update_tau_space(nO,nV,h_t1,t1,t2,tau) call update_tau_x_space(nO,nV,tau,tau_x) - call det_energy(det,uncorr_energy) + call det_energy(psi_det(1,1,cc_ref),uncorr_energy) print*,'Det energy', uncorr_energy call ccsd_energy_space_x(nO,nV,d_cc_space_v_oovv,d_cc_space_f_vo,tau_x,t1,energy) diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f index fe202ebfb..fa0983cd2 100644 --- a/src/ccsd/ccsd_spin_orb_sub.irp.f +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -14,65 +14,31 @@ subroutine run_ccsd_spin_orb double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:) !, cW_vvvv(:,:,:,:) double precision, allocatable :: f_o(:), f_v(:) - double precision, allocatable :: v_oooo(:,:,:,:), v_vooo(:,:,:,:), v_ovoo(:,:,:,:) - double precision, allocatable :: v_oovo(:,:,:,:), v_ooov(:,:,:,:), v_vvoo(:,:,:,:) - double precision, allocatable :: v_vovo(:,:,:,:), v_voov(:,:,:,:), v_ovvo(:,:,:,:) - double precision, allocatable :: v_ovov(:,:,:,:), v_oovv(:,:,:,:), v_vvvo(:,:,:,:) - double precision, allocatable :: v_vvov(:,:,:,:), v_vovv(:,:,:,:), v_ovvv(:,:,:,:) - double precision, allocatable :: v_vvvv(:,:,:,:) +! double precision, allocatable :: v_ovvv(:,:,:,:) double precision, allocatable :: all_err(:,:), all_t(:,:) logical :: not_converged - integer :: nOa,nOb,nVa,nVb,nO_m,nV_m,nO_S(2),nV_S(2),n_spin(4) + integer :: n_spin(4) integer :: nb_iter, i,j,a,b double precision :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi - integer(bit_kind) :: det(N_int,2) - det = psi_det(:,:,cc_ref) + if (do_mo_cholesky) then + PROVIDE cholesky_mo_transp + FREE cholesky_ao + else + PROVIDE all_mo_integrals + endif + print*,'Reference determinant:' - call print_det(det,N_int) - - ! Extract number of occ/vir alpha/beta spin orbitals - !call extract_n_spin(det,n_spin) - nOa = cc_nOa !n_spin(1) - nOb = cc_nOb !n_spin(2) - nVa = cc_nVa !n_spin(3) - nVb = cc_nVb !n_spin(4) - - ! Number of occ/vir spin orb per spin - nO_S = cc_nO_S !(/nOa,nOb/) - nV_S = cc_nV_S !(/nVa,nVb/) - ! Debug - !print*,nO_S,nV_S - - ! Maximal number of occ/vir - nO_m = cc_nO_m !max(nOa, nOb) - nV_m = cc_nV_m !max(nVa, nVb) - ! Debug - !print*,nO_m,nV_m + call print_det(psi_det(1,1,cc_ref),N_int) ! Allocation allocate(t1(cc_nOab,cc_nVab), t2(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau_t(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) allocate(r1(cc_nOab,cc_nVab), r2(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) allocate(cF_oo(cc_nOab,cc_nOab), cF_ov(cc_nOab,cc_nVab), cF_vv(cc_nVab,cc_nVab)) allocate(cW_oooo(cc_nOab,cc_nOab,cc_nOab,cc_nOab), cW_ovvo(cc_nOab,cc_nVab,cc_nVab,cc_nOab))!, cW_vvvv(cc_nVab,cc_nVab,cc_nVab,cc_nVab)) - allocate(v_oooo(cc_nOab,cc_nOab,cc_nOab,cc_nOab)) - !allocate(v_vooo(cc_nVab,cc_nOab,cc_nOab,cc_nOab)) - allocate(v_ovoo(cc_nOab,cc_nVab,cc_nOab,cc_nOab)) - allocate(v_oovo(cc_nOab,cc_nOab,cc_nVab,cc_nOab)) - allocate(v_ooov(cc_nOab,cc_nOab,cc_nOab,cc_nVab)) - allocate(v_vvoo(cc_nVab,cc_nVab,cc_nOab,cc_nOab)) - !allocate(v_vovo(cc_nVab,cc_nOab,cc_nVab,cc_nOab)) - !allocate(v_voov(cc_nVab,cc_nOab,cc_nOab,cc_nVab)) - allocate(v_ovvo(cc_nOab,cc_nVab,cc_nVab,cc_nOab)) - allocate(v_ovov(cc_nOab,cc_nVab,cc_nOab,cc_nVab)) - allocate(v_oovv(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) - !allocate(v_vvvo(cc_nVab,cc_nVab,cc_nVab,cc_nOab)) - !allocate(v_vvov(cc_nVab,cc_nVab,cc_nOab,cc_nVab)) - !allocate(v_vovv(cc_nVab,cc_nOab,cc_nVab,cc_nVab)) - !allocate(v_ovvv(cc_nOab,cc_nVab,cc_nVab,cc_nVab)) - !allocate(v_vvvv(cc_nVab,cc_nVab,cc_nVab,cc_nVab)) + allocate(f_o(cc_nOab), f_v(cc_nVab)) ! Allocation for the diis @@ -90,45 +56,20 @@ subroutine run_ccsd_spin_orb f_v(i) = cc_spin_f_vv(i,i) enddo - ! Bi electronic integrals from list - call wall_time(ti) - ! OOOO - call gen_v_spin(nO_m,nO_m,nO_m,nO_m, nO_S,nO_S,nO_S,nO_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nOab,cc_nOab,cc_nOab,cc_nOab, v_oooo) - - ! OOO V - !call gen_v_spin(nV_m,nO_m,nO_m,nO_m, nV_S,nO_S,nO_S,nO_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nVab,cc_nOab,cc_nOab,cc_nOab, v_vooo) - call gen_v_spin(nO_m,nV_m,nO_m,nO_m, nO_S,nV_S,nO_S,nO_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nOab,cc_nVab,cc_nOab,cc_nOab, v_ovoo) - call gen_v_spin(nO_m,nO_m,nV_m,nO_m, nO_S,nO_S,nV_S,nO_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nOab,cc_nOab,cc_nVab,cc_nOab, v_oovo) - call gen_v_spin(nO_m,nO_m,nO_m,nV_m, nO_S,nO_S,nO_S,nV_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nOab,cc_nOab,cc_nOab,cc_nVab, v_ooov) - - ! OO VV - call gen_v_spin(nV_m,nV_m,nO_m,nO_m, nV_S,nV_S,nO_S,nO_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, cc_nVab,cc_nVab,cc_nOab,cc_nOab, v_vvoo) - !call gen_v_spin(nV_m,nO_m,nV_m,nO_m, nV_S,nO_S,nV_S,nO_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nVab,cc_nOab,cc_nVab,cc_nOab, v_vovo) - !call gen_v_spin(nV_m,nO_m,nO_m,nV_m, nV_S,nO_S,nO_S,nV_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nVab,cc_nOab,cc_nOab,cc_nVab, v_voov) - call gen_v_spin(nO_m,nV_m,nV_m,nO_m, nO_S,nV_S,nV_S,nO_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nOab,cc_nVab,cc_nVab,cc_nOab, v_ovvo) - call gen_v_spin(nO_m,nV_m,nO_m,nV_m, nO_S,nV_S,nO_S,nV_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nOab,cc_nVab,cc_nOab,cc_nVab, v_ovov) - call gen_v_spin(nO_m,nO_m,nV_m,nV_m, nO_S,nO_S,nV_S,nV_S, cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nOab,cc_nOab,cc_nVab,cc_nVab, v_oovv) - - ! O VVV - !call gen_v_spin(nV_m,nV_m,nV_m,nO_m, nV_S,nV_S,nV_S,nO_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, cc_nVab,cc_nVab,cc_nVab,cc_nOab, v_vvvo) - !call gen_v_spin(nV_m,nV_m,nO_m,nV_m, nV_S,nV_S,nO_S,nV_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin, cc_nVab,cc_nVab,cc_nOab,cc_nVab, v_vvov) - !call gen_v_spin(nV_m,nO_m,nV_m,nV_m, nV_S,nO_S,nV_S,nV_S, cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nVab,cc_nOab,cc_nVab,cc_nVab, v_vovv) - !call gen_v_spin(nO_m,nV_m,nV_m,nV_m, nO_S,nV_S,nV_S,nV_S, cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nOab,cc_nVab,cc_nVab,cc_nVab, v_ovvv) - - ! VVVV - !call gen_v_spin(nV_m,nV_m,nV_m,nV_m, nV_S,nV_S,nV_S,nV_S, cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin, cc_nVab,cc_nVab,cc_nVab,cc_nVab, v_vvvv) - call wall_time(tf) - if (cc_dev) then - print*,'Load bi elec int:',tf-ti,'s' - endif ! Init of T t1 = 0d0 call guess_t1(cc_nOab,cc_nVab,f_o,f_v,cc_spin_f_ov,t1) - call guess_t2(cc_nOab,cc_nVab,f_o,f_v,v_oovv,t2) + call guess_t2(cc_nOab,cc_nVab,f_o,f_v,cc_spin_v_oovv,t2) call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau) call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t) + call det_energy(psi_det(1,1,cc_ref),uncorr_energy) + print*,'Det energy', uncorr_energy + + call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,cc_spin_v_oovv,energy) + print*,'guess energy', uncorr_energy+energy, energy + ! Loop init nb_iter = 0 not_converged = .True. @@ -137,11 +78,6 @@ subroutine run_ccsd_spin_orb max_r1 = 0d0 max_r2 = 0d0 - call det_energy(det,uncorr_energy) - print*,'Det energy', uncorr_energy - call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,v_oovv,energy) - print*,'guess energy', uncorr_energy+energy, energy - write(*,'(A77)') ' -----------------------------------------------------------------------------' write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' write(*,'(A77)') ' -----------------------------------------------------------------------------' @@ -152,38 +88,16 @@ subroutine run_ccsd_spin_orb do while (not_converged) ! Intermediates - call wall_time(tbi) - call wall_time(ti) - call compute_cF_oo(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_oo,cc_spin_F_ov,v_ooov,v_oovv,cF_oo) - call compute_cF_ov(cc_nOab,cc_nVab,t1,cc_spin_F_ov,v_oovv,cF_ov) - call compute_cF_vv(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_ov,cc_spin_F_vv,v_oovv,cF_vv) - call wall_time(tf) - if (cc_dev) then - print*,'Compute cFs:',tf-ti,'s' - endif + call compute_cF_oo(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_oo,cc_spin_F_ov,cc_spin_v_ooov,cc_spin_v_oovv,cF_oo) + call compute_cF_ov(cc_nOab,cc_nVab,t1,cc_spin_F_ov,cc_spin_v_oovv,cF_ov) + call compute_cF_vv(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_ov,cc_spin_F_vv,cc_spin_v_oovv,cF_vv) - call wall_time(ti) - call compute_cW_oooo(cc_nOab,cc_nVab,t1,t2,tau,v_oooo,v_ooov,v_oovv,cW_oooo) - call compute_cW_ovvo(cc_nOab,cc_nVab,t1,t2,tau,v_ovvo,v_oovo,v_oovv,cW_ovvo) - !call compute_cW_vvvv(cc_nOab,cc_nVab,t1,t2,tau,v_vvvv,v_vovv,v_oovv,cW_vvvv) - call wall_time(tf) - if (cc_dev) then - print*,'Compute cFs:',tf-ti,'s' - endif + call compute_cW_oooo(cc_nOab,cc_nVab,t1,t2,tau,cc_spin_v_oooo,cc_spin_v_ooov,cc_spin_v_oovv,cW_oooo) + call compute_cW_ovvo(cc_nOab,cc_nVab,t1,t2,tau,cc_spin_v_ovvo,cc_spin_v_oovo,cc_spin_v_oovv,cW_ovvo) ! Residuals - call wall_time(ti) - call compute_r1_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,v_oovo,v_ovov,r1) - call wall_time(tf) - if (cc_dev) then - print*,'Compute r1:',tf-ti,'s' - endif - call wall_time(ti) - call compute_r2_spin(cc_nOab,cc_nVab,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,v_ovoo,v_oovv,v_ovvo,r2) - call wall_time(tf) - if (cc_dev) then - print*,'Compute r2:',tf-ti,'s' - endif + call compute_r1_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,cc_spin_v_oovo,cc_spin_v_ovov,r1) + call compute_r2_spin(cc_nOab,cc_nVab,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,cc_spin_v_ovoo,cc_spin_v_oovv,cc_spin_v_ovvo,r2) ! Max elements in the residuals max_r1 = maxval(abs(r1(:,:))) @@ -213,7 +127,7 @@ subroutine run_ccsd_spin_orb endif ! Print - call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,v_oovv,energy) + call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,cc_spin_v_oovv,energy) call wall_time(tfi) write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', & @@ -258,25 +172,13 @@ subroutine run_ccsd_spin_orb deallocate(r1,r2) deallocate(cF_oo,cF_ov,cF_vv) deallocate(cW_oooo,cW_ovvo)!,cW_vvvv) - deallocate(v_oooo) - deallocate(v_ovoo,v_oovo) - deallocate(v_ovvo,v_ovov,v_oovv) double precision :: t_corr t_corr = 0.d0 if (cc_par_t .and. elec_alpha_num +elec_beta_num > 2) then print*,'CCSD(T) calculation...' call wall_time(ta) - !allocate(v_vvvo(cc_nVab,cc_nVab,cc_nVab,cc_nOab)) - !call gen_v_spin(cc_nV_m,cc_nV_m,cc_nV_m,cc_nO_m, & - ! cc_nV_S,cc_nV_S,cc_nV_S,cc_nO_S, & - ! cc_list_vir_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & - ! cc_nVab,cc_nVab,cc_nVab,cc_nOab, v_vvvo) - - !call ccsd_par_t_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,v_ooov,v_vvoo,v_vvvo,t_corr) - call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,v_ooov,v_vvoo,t_corr) - !print*,'Working on it...' - !call abort + call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,cc_spin_v_ooov,cc_spin_v_vvoo,t_corr) call wall_time(tb) print*,'Done' print*,'Time: ',tb-ta, ' s' @@ -290,9 +192,7 @@ subroutine run_ccsd_spin_orb call save_energy(uncorr_energy + energy, t_corr) deallocate(f_o,f_v) - deallocate(v_ooov,v_vvoo,t1,t2) - !deallocate(v_ovvv,v_vvvo,v_vovv) - !deallocate(v_vvvv) + deallocate(t1,t2) end diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index 813c186a8..4053c49bd 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -227,6 +227,19 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) END_PROVIDER +BEGIN_PROVIDER [double precision, cc_spin_v_oooo, (cc_nOab, cc_nOab, cc_nOab, cc_nOab)] + + implicit none + + !TODO + call gen_v_spin(cc_nO_m,cc_nO_m,cc_nO_m,cc_nO_m, & + cc_nO_S,cc_nO_S,cc_nO_S,cc_nO_S, & + cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin, & + cc_nOab,cc_nOab,cc_nOab,cc_nOab, & + cc_spin_v_oooo) + +END_PROVIDER + ! vooo BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_nOa)] @@ -305,6 +318,17 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) END_PROVIDER +BEGIN_PROVIDER [ double precision, cc_spin_v_ovoo, (cc_nOab,cc_nVab,cc_nOab,cc_nOab)] + implicit none + + call gen_v_spin(cc_nO_m,cc_nV_m,cc_nO_m,cc_nO_m, & + cc_nO_S,cc_nV_S,cc_nO_S,cc_nO_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, & + cc_nOab,cc_nVab,cc_nOab,cc_nOab, & + cc_spin_v_ovoo) + +END_PROVIDER + ! oovo BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_nOa)] @@ -339,6 +363,17 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) END_PROVIDER +BEGIN_PROVIDER [ double precision, cc_spin_v_oovo, (cc_nOab,cc_nOab,cc_nVab,cc_nOab)] + implicit none + + call gen_v_spin(cc_nO_m,cc_nO_m,cc_nV_m,cc_nO_m, & + cc_nO_S,cc_nO_S,cc_nV_S,cc_nO_S, & + cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin, & + cc_nOab,cc_nOab,cc_nVab,cc_nOab, & + cc_spin_v_oovo) +END_PROVIDER + + ! ooov BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_nVa)] @@ -373,6 +408,17 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) END_PROVIDER +BEGIN_PROVIDER [ double precision, cc_spin_v_ooov, (cc_nOab,cc_nOab,cc_nOab,cc_nVab)] + implicit none + + call gen_v_spin(cc_nO_m,cc_nO_m,cc_nO_m,cc_nV_m, & + cc_nO_S,cc_nO_S,cc_nO_S,cc_nV_S, & + cc_list_occ_spin,cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin, & + cc_nOab,cc_nOab,cc_nOab,cc_nVab, & + cc_spin_v_ooov) +END_PROVIDER + + ! vvoo BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_nOa)] @@ -416,6 +462,18 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) END_PROVIDER + +BEGIN_PROVIDER [ double precision, cc_spin_v_vvoo, (cc_nVab,cc_nVab,cc_nOab,cc_nOab)] + implicit none + + call gen_v_spin(cc_nV_m,cc_nV_m,cc_nO_m,cc_nO_m, & + cc_nV_S,cc_nV_S,cc_nO_S,cc_nO_S, & + cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_occ_spin, & + cc_nVab,cc_nVab,cc_nOab,cc_nOab, & + cc_spin_v_vvoo) +END_PROVIDER + + ! vovo BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_nOa)] @@ -527,6 +585,16 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) END_PROVIDER +BEGIN_PROVIDER [ double precision, cc_spin_v_ovvo, (cc_nOab,cc_nVab,cc_nVab,cc_nOab) ] + implicit none + + call gen_v_spin(cc_nO_m,cc_nV_m,cc_nV_m,cc_nO_m, & + cc_nO_S,cc_nV_S,cc_nV_S,cc_nO_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin,cc_list_occ_spin, & + cc_nOab,cc_nVab,cc_nVab,cc_nOab, & + cc_spin_v_ovvo) +END_PROVIDER + ! ovov BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_nVa)] @@ -561,6 +629,16 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) END_PROVIDER +BEGIN_PROVIDER [ double precision, cc_spin_v_ovov, (cc_nOab,cc_nVab,cc_nOab,cc_nVab) ] + implicit none + + call gen_v_spin(cc_nO_m,cc_nV_m,cc_nO_m,cc_nV_m, & + cc_nO_S,cc_nV_S,cc_nO_S,cc_nV_S, & + cc_list_occ_spin,cc_list_vir_spin,cc_list_occ_spin,cc_list_vir_spin, & + cc_nOab,cc_nVab,cc_nOab,cc_nVab, & + cc_spin_v_ovov) +END_PROVIDER + ! oovv BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_nVa)] @@ -595,6 +673,16 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) END_PROVIDER +BEGIN_PROVIDER [ double precision, cc_spin_v_oovv, (cc_nOab,cc_nOab,cc_nVab,cc_nVab) ] + implicit none + + call gen_v_spin(cc_nO_m,cc_nO_m,cc_nV_m,cc_nV_m, & + cc_nO_S,cc_nO_S,cc_nV_S,cc_nV_S, & + cc_list_occ_spin,cc_list_occ_spin,cc_list_vir_spin,cc_list_vir_spin, & + cc_nOab,cc_nOab,cc_nVab,cc_nVab, & + cc_spin_v_oovv) +END_PROVIDER + ! vvvo BEGIN_PROVIDER [double precision, cc_space_v_vvvo, (cc_nVa, cc_nVa, cc_nVa, cc_nOa)] @@ -625,6 +713,7 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) END_PROVIDER + ! ovvv BEGIN_PROVIDER [double precision, cc_space_v_ovvv, (cc_nOa, cc_nVa, cc_nVa, cc_nVa)] @@ -677,6 +766,18 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) END_PROVIDER +BEGIN_PROVIDER [double precision, cc_spin_v_oo_chol, (cholesky_mo_num, cc_nOab, cc_nOab)] + + implicit none + integer :: list_occ(cc_nOab) + + list_occ(1:cc_nOa) = cc_list_occ_spin(1:cc_nOa,1) + list_occ(cc_nOa+1:cc_nOab) = cc_list_occ_spin(1:cc_nOb,2) + call gen_v_space_chol(cc_nOab, cc_nOab, list_occ, list_occ, & + cc_spin_v_oo_chol, cholesky_mo_num) + +END_PROVIDER + ! ppqq BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)] From 243ee0ed1457d0bb3ea725a2676620584e2b27db Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Feb 2025 13:59:37 +0100 Subject: [PATCH 7/9] Introduced GPU arrays --- src/ccsd/ccsd_space_orb_sub.irp.f | 5 +- src/ccsd/ccsd_spin_orb_sub.irp.f | 89 ++++++++++++++++++------------- 2 files changed, 55 insertions(+), 39 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 7cd4c50c6..af7e92855 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -177,9 +177,11 @@ subroutine run_ccsd_space_orb call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,h_t2) call gpu_upload(h_t2, t2) + deallocate(h_t1, h_t2) - call update_tau_space(nO,nV,h_t1,t1,t2,tau) + call update_tau_space(nO,nV,t1%f,t1,t2,tau) call update_tau_x_space(nO,nV,tau,tau_x) + call det_energy(psi_det(1,1,cc_ref),uncorr_energy) print*,'Det energy', uncorr_energy @@ -310,7 +312,6 @@ subroutine run_ccsd_space_orb call save_energy(uncorr_energy + energy, e_t) - deallocate(h_t1, h_t2) if (do_mo_cholesky) then call gpu_deallocate(d_cc_space_v_oo_chol) call gpu_deallocate(d_cc_space_v_ov_chol) diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f index fa0983cd2..16062356e 100644 --- a/src/ccsd/ccsd_spin_orb_sub.irp.f +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -1,6 +1,5 @@ -! Code - subroutine run_ccsd_spin_orb + use gpu implicit none @@ -8,8 +7,6 @@ subroutine run_ccsd_spin_orb ! CCSD in spin orbitals END_DOC - double precision, allocatable :: t1(:,:), t2(:,:,:,:), tau(:,:,:,:), tau_t(:,:,:,:) - double precision, allocatable :: r1(:,:), r2(:,:,:,:) double precision, allocatable :: cF_oo(:,:), cF_ov(:,:), cF_vv(:,:) double precision, allocatable :: cW_oooo(:,:,:,:), cW_ovvo(:,:,:,:) !, cW_vvvv(:,:,:,:) @@ -23,6 +20,9 @@ subroutine run_ccsd_spin_orb integer :: nb_iter, i,j,a,b double precision :: uncorr_energy, energy, max_r, max_r1, max_r2, cc, ta, tb,ti,tf,tbi,tfi + type(gpu_double4) :: t2, r2, tau, tau_t + type(gpu_double2) :: t1, r1 + if (do_mo_cholesky) then PROVIDE cholesky_mo_transp FREE cholesky_ao @@ -34,13 +34,18 @@ subroutine run_ccsd_spin_orb call print_det(psi_det(1,1,cc_ref),N_int) ! Allocation - allocate(t1(cc_nOab,cc_nVab), t2(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau(cc_nOab,cc_nOab,cc_nVab,cc_nVab), tau_t(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) - allocate(r1(cc_nOab,cc_nVab), r2(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) allocate(cF_oo(cc_nOab,cc_nOab), cF_ov(cc_nOab,cc_nVab), cF_vv(cc_nVab,cc_nVab)) allocate(cW_oooo(cc_nOab,cc_nOab,cc_nOab,cc_nOab), cW_ovvo(cc_nOab,cc_nVab,cc_nVab,cc_nOab))!, cW_vvvv(cc_nVab,cc_nVab,cc_nVab,cc_nVab)) allocate(f_o(cc_nOab), f_v(cc_nVab)) + call gpu_allocate(t1, cc_nOab,cc_nVab) + call gpu_allocate(r1, cc_nOab,cc_nVab) + call gpu_allocate(t2, cc_nOab,cc_nOab,cc_nVab,cc_nVab) + call gpu_allocate(r2, cc_nOab,cc_nOab,cc_nVab,cc_nVab) + call gpu_allocate(tau, cc_nOab,cc_nOab,cc_nVab,cc_nVab) + call gpu_allocate(tau_t, cc_nOab,cc_nOab,cc_nVab,cc_nVab) + ! Allocation for the diis if (cc_update_method == 'diis') then allocate(all_err(cc_nOab*cc_nVab+cc_nOab*cc_nOab*cc_nVab*cc_nVab,cc_diis_depth), all_t(cc_nOab*cc_nVab+cc_nOab*cc_nOab*cc_nVab*cc_nVab,cc_diis_depth)) @@ -58,23 +63,29 @@ subroutine run_ccsd_spin_orb ! Init of T - t1 = 0d0 - call guess_t1(cc_nOab,cc_nVab,f_o,f_v,cc_spin_f_ov,t1) - call guess_t2(cc_nOab,cc_nVab,f_o,f_v,cc_spin_v_oovv,t2) - call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau) - call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t) + double precision, allocatable :: h_t1(:,:), h_t2(:,:,:,:) + allocate(h_t1(cc_nOab,cc_nVab), h_t2(cc_nOab,cc_nOab,cc_nVab,cc_nVab)) + h_t1 = 0d0 + call guess_t1(cc_nOab,cc_nVab,f_o,f_v,cc_spin_f_ov,h_t1) + call gpu_upload(h_t1, t1) + + call guess_t2(cc_nOab,cc_nVab,f_o,f_v,cc_spin_v_oovv,h_t2) + call gpu_upload(h_t2, t2) + + deallocate(h_t1,h_t2) + + call compute_tau_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau%f) + call compute_tau_t_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau_t%f) call det_energy(psi_det(1,1,cc_ref),uncorr_energy) print*,'Det energy', uncorr_energy - call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,cc_spin_v_oovv,energy) + call ccsd_energy_spin(cc_nOab,cc_nVab,t1%f,t2%f,cc_spin_F_ov,cc_spin_v_oovv,energy) print*,'guess energy', uncorr_energy+energy, energy ! Loop init nb_iter = 0 not_converged = .True. - r1 = 0d0 - r2 = 0d0 max_r1 = 0d0 max_r2 = 0d0 @@ -88,46 +99,46 @@ subroutine run_ccsd_spin_orb do while (not_converged) ! Intermediates - call compute_cF_oo(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_oo,cc_spin_F_ov,cc_spin_v_ooov,cc_spin_v_oovv,cF_oo) - call compute_cF_ov(cc_nOab,cc_nVab,t1,cc_spin_F_ov,cc_spin_v_oovv,cF_ov) - call compute_cF_vv(cc_nOab,cc_nVab,t1,tau_t,cc_spin_F_ov,cc_spin_F_vv,cc_spin_v_oovv,cF_vv) + call compute_cF_oo(cc_nOab,cc_nVab,t1%f,tau_t%f,cc_spin_F_oo,cc_spin_F_ov,cc_spin_v_ooov,cc_spin_v_oovv,cF_oo) + call compute_cF_ov(cc_nOab,cc_nVab,t1%f,cc_spin_F_ov,cc_spin_v_oovv,cF_ov) + call compute_cF_vv(cc_nOab,cc_nVab,t1%f,tau_t%f,cc_spin_F_ov,cc_spin_F_vv,cc_spin_v_oovv,cF_vv) - call compute_cW_oooo(cc_nOab,cc_nVab,t1,t2,tau,cc_spin_v_oooo,cc_spin_v_ooov,cc_spin_v_oovv,cW_oooo) - call compute_cW_ovvo(cc_nOab,cc_nVab,t1,t2,tau,cc_spin_v_ovvo,cc_spin_v_oovo,cc_spin_v_oovv,cW_ovvo) + call compute_cW_oooo(cc_nOab,cc_nVab,t1%f,t2%f,tau%f,cc_spin_v_oooo,cc_spin_v_ooov,cc_spin_v_oovv,cW_oooo) + call compute_cW_ovvo(cc_nOab,cc_nVab,t1%f,t2%f,tau%f,cc_spin_v_ovvo,cc_spin_v_oovo,cc_spin_v_oovv,cW_ovvo) ! Residuals - call compute_r1_spin(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,cc_spin_v_oovo,cc_spin_v_ovov,r1) - call compute_r2_spin(cc_nOab,cc_nVab,t1,t2,tau,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,cc_spin_v_ovoo,cc_spin_v_oovv,cc_spin_v_ovvo,r2) + call compute_r1_spin(cc_nOab,cc_nVab,t1%f,t2%f,f_o,f_v,cc_spin_F_ov,cF_oo,cF_ov,cF_vv,cc_spin_v_oovo,cc_spin_v_ovov,r1%f) + call compute_r2_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau%f,f_o,f_v,cF_oo,cF_ov,cF_vv,cW_oooo,cW_ovvo,cc_spin_v_ovoo,cc_spin_v_oovv,cc_spin_v_ovvo,r2%f) ! Max elements in the residuals - max_r1 = maxval(abs(r1(:,:))) - max_r2 = maxval(abs(r2(:,:,:,:))) + max_r1 = maxval(abs(r1%f(:,:))) + max_r2 = maxval(abs(r2%f(:,:,:,:))) max_r = max(max_r1,max_r2) call wall_time(ti) ! Update if (cc_update_method == 'diis') then - !call update_t_ccsd(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) - !call update_t_ccsd_diis(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) - call update_t_ccsd_diis_v3(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err,all_t) + !call update_t_ccsd(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1%f,r2%f,t1%f,t2%f,all_err1,all_err2,all_t1%f,all_t2) + !call update_t_ccsd_diis(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1%f,r2%f,t1%f,t2%f,all_err1,all_err2,all_t1%f,all_t2) + call update_t_ccsd_diis_v3(cc_nOab,cc_nVab,nb_iter,f_o,f_v,r1%f,r2%f,t1%f,t2%f,all_err,all_t) ! Standard update as T = T - Delta elseif (cc_update_method == 'none') then - call update_t1(cc_nOab,cc_nVab,f_o,f_v,r1,t1) - call update_t2(cc_nOab,cc_nVab,f_o,f_v,r2,t2) + call update_t1(cc_nOab,cc_nVab,f_o,f_v,r1%f,t1%f) + call update_t2(cc_nOab,cc_nVab,f_o,f_v,r2%f,t2%f) else print*,'Unkonw cc_method_method: '//cc_update_method endif - call compute_tau_spin(cc_nOab,cc_nVab,t1,t2,tau) - call compute_tau_t_spin(cc_nOab,cc_nVab,t1,t2,tau_t) + call compute_tau_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau%f) + call compute_tau_t_spin(cc_nOab,cc_nVab,t1%f,t2%f,tau_t%f) call wall_time(tf) if (cc_dev) then print*,'Update:',tf-ti,'s' endif ! Print - call ccsd_energy_spin(cc_nOab,cc_nVab,t1,t2,cc_spin_F_ov,cc_spin_v_oovv,energy) + call ccsd_energy_spin(cc_nOab,cc_nVab,t1%f,t2%f,cc_spin_F_ov,cc_spin_v_oovv,energy) call wall_time(tfi) write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', & @@ -159,8 +170,8 @@ subroutine run_ccsd_spin_orb print*,'' if (write_amplitudes) then - call write_t1(cc_nOab,cc_nVab,t1) - call write_t2(cc_nOab,cc_nVab,t2) + call write_t1(cc_nOab,cc_nVab,t1%f) + call write_t2(cc_nOab,cc_nVab,t2%f) call ezfio_set_utils_cc_io_amplitudes('Read') endif @@ -168,8 +179,6 @@ subroutine run_ccsd_spin_orb if (cc_update_method == 'diis') then deallocate(all_err,all_t) endif - deallocate(tau,tau_t) - deallocate(r1,r2) deallocate(cF_oo,cF_ov,cF_vv) deallocate(cW_oooo,cW_ovvo)!,cW_vvvv) @@ -178,7 +187,7 @@ subroutine run_ccsd_spin_orb if (cc_par_t .and. elec_alpha_num +elec_beta_num > 2) then print*,'CCSD(T) calculation...' call wall_time(ta) - call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1,t2,f_o,f_v,cc_spin_f_ov,cc_spin_v_ooov,cc_spin_v_vvoo,t_corr) + call ccsd_par_t_spin_v2(cc_nOab,cc_nVab,t1%f,t2%f,f_o,f_v,cc_spin_f_ov,cc_spin_v_ooov,cc_spin_v_vvoo,t_corr) call wall_time(tb) print*,'Done' print*,'Time: ',tb-ta, ' s' @@ -192,7 +201,13 @@ subroutine run_ccsd_spin_orb call save_energy(uncorr_energy + energy, t_corr) deallocate(f_o,f_v) - deallocate(t1,t2) + + call gpu_deallocate(t1) + call gpu_deallocate(r1) + call gpu_deallocate(t2) + call gpu_deallocate(r2) + call gpu_deallocate(tau) + call gpu_deallocate(tau_t) end From 6b597c5ceeb7ccb12e32f477c36989c957c7189a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Feb 2025 11:49:42 +0100 Subject: [PATCH 8/9] Optimization in CASSCF --- src/casscf_cipsi/chol_bielec.irp.f | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/casscf_cipsi/chol_bielec.irp.f b/src/casscf_cipsi/chol_bielec.irp.f index f69832c11..a22ad7f32 100644 --- a/src/casscf_cipsi/chol_bielec.irp.f +++ b/src/casscf_cipsi/chol_bielec.irp.f @@ -191,10 +191,15 @@ double precision function bielec_PQxx_no(i_mo, j_mo, i_ca, j_ca) END_DOC integer, intent(in) :: i_ca, j_ca, i_mo, j_mo integer :: ii_ca, jj_ca - double precision :: bielec_no_basis ii_ca = list_core_inact_act(i_ca) jj_ca = list_core_inact_act(j_ca) - bielec_PQxx_no = bielec_no_basis(i_mo,j_mo,ii_ca,jj_ca) +! double precision :: bielec_no_basis +! bielec_PQxx_no = bielec_no_basis(i_mo,j_mo,ii_ca,jj_ca) + integer :: i + bielec_PQxx_no = 0.d0 + do i = 1, cholesky_mo_num + bielec_PQxx_no = bielec_PQxx_no + cholesky_no_total_transp(i,i_mo, j_mo) * cholesky_no_total_transp(i,ii_ca,jj_ca) + enddo end double precision function bielec_PxxQ_no(i_mo, j_ca, i_ca, j_mo) @@ -206,10 +211,15 @@ double precision function bielec_PxxQ_no(i_mo, j_ca, i_ca, j_mo) END_DOC integer, intent(in) :: i_ca, j_ca, i_mo, j_mo integer :: ii_ca, jj_ca - double precision :: bielec_no_basis ii_ca = list_core_inact_act(i_ca) jj_ca = list_core_inact_act(j_ca) - bielec_PxxQ_no = bielec_no_basis(i_mo, jj_ca, ii_ca, j_mo) + double precision :: bielec_no_basis +! bielec_PxxQ_no = bielec_no_basis(i_mo, jj_ca, ii_ca, j_mo) + integer :: i + bielec_PxxQ_no = 0.d0 + do i = 1, cholesky_mo_num + bielec_PxxQ_no = bielec_PxxQ_no + cholesky_no_total_transp(i,i_mo, jj_ca) * cholesky_no_total_transp(i,ii_ca,j_mo) + enddo end From 8c7184fb774706add1e56ed536cb3f18126aa79d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Feb 2025 11:50:11 +0100 Subject: [PATCH 9/9] Speed up cache in integrals --- src/davidson/u0_hs2_u0.irp.f | 2 +- src/mo_two_e_ints/map_integrals.irp.f | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/davidson/u0_hs2_u0.irp.f b/src/davidson/u0_hs2_u0.irp.f index f2ce7aa9a..dd5e01ebd 100644 --- a/src/davidson/u0_hs2_u0.irp.f +++ b/src/davidson/u0_hs2_u0.irp.f @@ -158,7 +158,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishift, double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) - PROVIDE ref_bitmask_energy N_int + PROVIDE ref_bitmask_energy N_int all_mo_integrals select case (N_int) case (1) diff --git a/src/mo_two_e_ints/map_integrals.irp.f b/src/mo_two_e_ints/map_integrals.irp.f index 5b2338993..b5f78b7b8 100644 --- a/src/mo_two_e_ints/map_integrals.irp.f +++ b/src/mo_two_e_ints/map_integrals.irp.f @@ -81,11 +81,15 @@ subroutine insert_into_mo_integrals_map(n_integrals, & integer(key_kind) :: idx real(integral_kind) :: integral FREE ao_integrals_cache + if (do_mo_cholesky) then call set_multiple_levels_omp(.False.) - !$OMP PARALLEL DO PRIVATE (k,l,ii) + + + !$OMP PARALLEL DO PRIVATE(k,l,ii) SCHEDULE(dynamic) do l=mo_integrals_cache_min,mo_integrals_cache_max + print *, l do k=mo_integrals_cache_min,mo_integrals_cache_max ii = int(l-mo_integrals_cache_min,8) ii = ior( shiftl(ii,mo_integrals_cache_shift), int(k-mo_integrals_cache_min,8)) @@ -101,7 +105,7 @@ subroutine insert_into_mo_integrals_map(n_integrals, & !$OMP END PARALLEL DO else - !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx,ii,integral) + !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx,ii,integral) SCHEDULE(dynamic) do l=mo_integrals_cache_min,mo_integrals_cache_max do k=mo_integrals_cache_min,mo_integrals_cache_max do j=mo_integrals_cache_min,mo_integrals_cache_max