Version 5.2.0, Revision 23279, Hash 6ac3a88

MODIFIED * include/version/version.m4 dipoles/DIPOLE_overlaps.F modules/mod_interfaces.F wf_and_fft/WF_shift_kpoint.F wf_and_fft/WF_symm_kpoint.F Bugs: - [yambo] Fixed covariant dipoles with CUDA (issue yambo-code#106) Patch sent by: Davide Sangalli <[email protected]>
sangallidavide · Jul 6, 2024 · 092f32e · 092f32e
1 parent 6ac3a88
commit 092f32e
Show file tree

Hide file tree

Showing 5 changed files with 56 additions and 54 deletions.
diff --git a/include/version/version.m4 b/include/version/version.m4
@@ -1,9 +1,9 @@
-AC_INIT(Yambo, 5.2.0 r.23260 h.b090d22a1, [email protected])
+AC_INIT(Yambo, 5.2.0 r.23279 h.6ac3a88014, [email protected])
 SVERSION="5"
 SSUBVERSION="2"
 SPATCHLEVEL="0"
-SREVISION="23260"
-SHASH="b090d22a1"
+SREVISION="23279"
+SHASH="6ac3a88014"
 AC_SUBST(SVERSION)
 AC_SUBST(SSUBVERSION)
 AC_SUBST(SPATCHLEVEL)

diff --git a/src/dipoles/DIPOLE_overlaps.F b/src/dipoles/DIPOLE_overlaps.F
@@ -22,6 +22,7 @@ subroutine DIPOLE_overlaps(Xk,Dip)
  use electrons,         ONLY:n_spinor,n_sp_pol
  use wrapper,           ONLY:Vstar_dot_V
  use wave_func,         ONLY:wf_ng_1st_BZ,wf_ng_overlaps,wf_ng
+ use deviceXlib_m,      ONLY:dev_memcpy
  use parallel_m,        ONLY:PAR_IND_DIPk_bz,PAR_IND_DIPk_bz_ID,PAR_COM_DIPk_ibz_A2A, &
 &                            PAR_IND_CON_BANDS_OVLP,PAR_IND_VAL_BANDS_OVLP,           &
 &                            PAR_IND_OVLPk_ibz,PAR_COM_DIPk_ibz_INDEX
@@ -40,9 +41,10 @@ subroutine DIPOLE_overlaps(Xk,Dip)
  integer           :: id,idx_kp(3),idx_k(3),shift(3),g0_idx(3,2)
  real(SP)          :: g0_length(3)
  !
- complex(SP), allocatable DEV_ATTR :: WF_symm(:,:,:,:)
- complex(SP), allocatable DEV_ATTR :: WF_ik(:,:,:,:)
- complex(SP), allocatable DEV_ATTR :: WF_ikp(:,:,:,:)
+ complex(SP), allocatable DEV_ATTR :: WF_symm(:,:,:)
+ complex(SP), allocatable, target DEV_ATTR :: WF_ik(:,:,:)
+ complex(SP), allocatable DEV_ATTR :: WF_ikp(:,:,:)
+ complex(SP), pointer DEV_ATTR :: WF_tmp(:,:,:)
  !
  logical           :: USE_shifted_wf
  !
@@ -73,9 +75,9 @@ subroutine DIPOLE_overlaps(Xk,Dip)
  !
  call PARALLEL_WF_index( )
  !
- YAMBO_ALLOC(WF_symm,     (wf_ng_1st_BZ,n_spinor,1,n_sp_pol))
- YAMBO_ALLOC(WF_ikp,    (wf_ng_overlaps,n_spinor,1,n_sp_pol))
- YAMBO_ALLOC(WF_ik,     (wf_ng_overlaps,n_spinor,Dip%ib(2),n_sp_pol))
+ YAMBO_ALLOC(WF_symm,   (wf_ng_1st_BZ,n_spinor,1))
+ YAMBO_ALLOC(WF_ikp,    (wf_ng_overlaps,n_spinor,1))
+ YAMBO_ALLOC(WF_ik,     (wf_ng_overlaps,n_spinor,Dip%ib(2)))
  !
 !$OMP WORKSHARE
  DIP_S=cZERO
@@ -94,13 +96,18 @@ subroutine DIPOLE_overlaps(Xk,Dip)
      ik = Xk%sstar(ikbz,1)
      is = Xk%sstar(ikbz,2)
      !
-     call WF_load(WF,0,1,(/1,Dip%ib(2)/),(/ik,ik/),space='G',title='-Oscill/G space/Overlaps',quiet=.true.)
+     call WF_load(WF,0,1,(/1,Dip%ib(2)/),(/ik,ik/),(/i_sp_pol,i_sp_pol/),&
+     &            space='G',title='-Oscill/G space/Overlaps',quiet=.true.)
      !
      do ib=1,Dip%ib(2)
        !
-       call WF_symm_kpoint((/ib,ib/),ikbz,Xk,WF_symm)
+       call WF_symm_kpoint((/ib,ib/),ikbz,i_sp_pol,Xk,WF_symm)
+       !
+       WF_tmp=>WF_ik(:,:,ib:ib)
        !  
-       call WF_shift_kpoint((/ib,ib/),ikbz,WF_shifts(ikbz,:),Xk,WF_symm,WF_ik(:,:,ib:ib,:))
+       call WF_shift_kpoint((/ib,ib/),ikbz,i_sp_pol,WF_shifts(ikbz,:),Xk,WF_symm,WF_tmp)
+       !
+       nullify(WF_tmp)
        !
      enddo
      !
@@ -128,7 +135,8 @@ subroutine DIPOLE_overlaps(Xk,Dip)
          ikp = Xk%sstar(ikbzp,1)
          isp = Xk%sstar(ikbzp,2)
          !
-         call WF_load(WF,0,1,(/1,Dip%ib(2)/),(/ikp,ikp/),space='G',title='-Oscill/G space/Overlaps',quiet=.true.)
+         call WF_load(WF,0,1,(/1,Dip%ib(2)/),(/ikp,ikp/),(/i_sp_pol,i_sp_pol/),&
+         &              space='G',title='-Oscill/G space/Overlaps',quiet=.true.)
          !
          ! Shift the wave-function by a G-vector if the neighbor is out of the BZ (USE_shifed_wf=.true.)
          shift=WF_shifts(ikbzp,:)
@@ -140,12 +148,12 @@ subroutine DIPOLE_overlaps(Xk,Dip)
          !
          do ibp=1,Dip%ib(2) 
            if(.not.PAR_IND_CON_BANDS_OVLP%element_1D(ibp)) cycle
-           call WF_symm_kpoint((/ibp,ibp/),ikbzp,Xk,WF_symm)
-           call WF_shift_kpoint((/ibp,ibp/),ikbzp,shift,Xk,WF_symm,WF_ikp)
+           call WF_symm_kpoint((/ibp,ibp/),ikbzp,i_sp_pol,Xk,WF_symm)
+           call WF_shift_kpoint((/ibp,ibp/),ikbzp,i_sp_pol,shift,Xk,WF_symm,WF_ikp)
            do ib=1,Dip%ib(2)
              if(.not.PAR_IND_VAL_BANDS_OVLP%element_1D(ib)) cycle
              DIP_S(ib,ibp,id+(istep-1)*3,ikbz,i_sp_pol)= &
-             &  Vstar_dot_V(wf_ng_overlaps*n_spinor,WF_ik(:,:,ib,i_sp_pol),WF_ikp(:,:,1,i_sp_pol))
+             &  Vstar_dot_V(wf_ng_overlaps*n_spinor,WF_ik(:,:,ib),WF_ikp(:,:,1))
            enddo ! ibp
          enddo ! ib
          !

diff --git a/src/modules/mod_interfaces.F b/src/modules/mod_interfaces.F
@@ -296,42 +296,42 @@ end function TDDFT_ALDA_eh_space_R_kernel
  !
  interface WF_shift_kpoint
    !
-   subroutine DEV_SUB_ALT(WF_shift_kpoint)(b_to_shift,ikbz,wf_shift,Xk,WF_k_in,WF_k_out)
+   subroutine DEV_SUB_ALT(WF_shift_kpoint)(b_to_shift,ikbz,i_sp_pol,wf_shift,Xk,WF_k_in,WF_k_out)
      use pars,          ONLY:SP
-     use electrons,     ONLY:n_spinor,n_sp_pol
+     use electrons,     ONLY:n_spinor
      use wave_func,     ONLY:wf_ng_1st_BZ,wf_ng_overlaps
      use R_lattice,     ONLY:bz_samp
-     integer,       intent(in) :: wf_shift(3),ikbz,b_to_shift(2)
+     integer,       intent(in) :: wf_shift(3),ikbz,i_sp_pol,b_to_shift(2)
      type(bz_samp), intent(in) :: Xk
      complex(SP), intent(in)  DEV_ATTR :: &
-&         WF_k_in (wf_ng_1st_BZ,  n_spinor,b_to_shift(1):b_to_shift(2),n_sp_pol)
+&         WF_k_in (wf_ng_1st_BZ,  n_spinor,b_to_shift(1):b_to_shift(2))
      complex(SP), intent(out) DEV_ATTR :: &
-&         WF_k_out(wf_ng_overlaps,n_spinor,b_to_shift(1):b_to_shift(2),n_sp_pol)
+&         WF_k_out(wf_ng_overlaps,n_spinor,b_to_shift(1):b_to_shift(2))
    end subroutine
  end interface 
  !
  interface WF_symm_kpoint
    !
-   subroutine WF_symm_kpoint_cpu(b_to_load,ikbz,Xk,WF_k_out)
+   subroutine WF_symm_kpoint_cpu(b_to_load,ikbz,i_sp_pol,Xk,WF_k_out)
      use pars,          ONLY:SP 
      use R_lattice,     ONLY:bz_samp
      use wave_func,     ONLY:wf_ng_1st_BZ
-     use electrons,     ONLY:n_spinor,n_sp_pol
-     integer,       intent(in)  :: ikbz,b_to_load(2)
+     use electrons,     ONLY:n_spinor
+     integer,       intent(in)  :: ikbz,i_sp_pol,b_to_load(2)
      type(bz_samp), intent(in)  :: Xk
-     complex(SP),   intent(out) :: WF_k_out(wf_ng_1st_BZ,n_spinor,b_to_load(1):b_to_load(2),n_sp_pol)
+     complex(SP),   intent(out) :: WF_k_out(wf_ng_1st_BZ,n_spinor,b_to_load(1):b_to_load(2))
    end subroutine WF_symm_kpoint_cpu
    !
 #ifdef _CUDA
-   subroutine WF_symm_kpoint_gpu(b_to_load,ikbz,Xk,WF_k_out)
+   subroutine WF_symm_kpoint_gpu(b_to_load,ikbz,i_sp_pol,Xk,WF_k_out)
      use pars,          ONLY:SP 
      use R_lattice,     ONLY:bz_samp
      use wave_func,     ONLY:wf_ng_1st_BZ
-     use electrons,     ONLY:n_spinor,n_sp_pol
-     integer,       intent(in)  :: ikbz,b_to_load(2)
+     use electrons,     ONLY:n_spinor
+     integer,       intent(in)  :: ikbz,i_sp_pol,b_to_load(2)
      type(bz_samp), intent(in)  :: Xk
      complex(SP),   intent(out) DEV_ATTR :: &
-&       WF_k_out(wf_ng_1st_BZ,n_spinor,b_to_load(1):b_to_load(2),n_sp_pol)
+&       WF_k_out(wf_ng_1st_BZ,n_spinor,b_to_load(1):b_to_load(2))
    end subroutine WF_symm_kpoint_gpu
 #endif
  end interface

diff --git a/src/wf_and_fft/WF_shift_kpoint.F b/src/wf_and_fft/WF_shift_kpoint.F
@@ -7,7 +7,7 @@
 !
 #include<dev_defs.h>
 !
-subroutine DEV_SUB_ALT(WF_shift_kpoint)(b_to_shift,ikbz,wf_shift,Xk,WF_k_in,WF_k_out)
+subroutine DEV_SUB_ALT(WF_shift_kpoint)(b_to_shift,ikbz,i_sp_pol,wf_shift,Xk,WF_k_in,WF_k_out)
  !
  use pars,          ONLY:SP,cZERO
  use electrons,     ONLY:n_spinor,n_sp_pol
@@ -18,18 +18,18 @@ subroutine DEV_SUB_ALT(WF_shift_kpoint)(b_to_shift,ikbz,wf_shift,Xk,WF_k_in,WF_k
  !
  implicit none
  !
- integer,       intent(in) :: wf_shift(3),ikbz,b_to_shift(2)
+ integer,       intent(in) :: wf_shift(3),ikbz,i_sp_pol,b_to_shift(2)
  type(bz_samp), intent(in) :: Xk
- complex(SP), intent(in)  DEV_ATTR :: WF_k_in (wf_ng_1st_BZ,  n_spinor,b_to_shift(1):b_to_shift(2),n_sp_pol)
- complex(SP), intent(out) DEV_ATTR :: WF_k_out(wf_ng_overlaps,n_spinor,b_to_shift(1):b_to_shift(2),n_sp_pol)
+ complex(SP), intent(in)  DEV_ATTR :: WF_k_in (wf_ng_1st_BZ,  n_spinor,b_to_shift(1):b_to_shift(2))
+ complex(SP), intent(out) DEV_ATTR :: WF_k_out(wf_ng_overlaps,n_spinor,b_to_shift(1):b_to_shift(2))
  !
  ! Work space
  !
  integer :: id,ik,is
- complex(SP) DEV_ATTR :: WF_tmp(wf_ng_overlaps,n_spinor,b_to_shift(1):b_to_shift(2),n_sp_pol)
+ complex(SP) DEV_ATTR :: WF_tmp(wf_ng_overlaps,n_spinor,b_to_shift(1):b_to_shift(2))
  integer :: ig,igp,i_b_ref
  integer :: g0_idx(3,2),g0_idx_val
- integer :: i_spinor,i_b,i_sp_pol
+ integer :: i_spinor,i_b
  !
  ik = Xk%sstar(ikbz,1)
  is = Xk%sstar(ikbz,2)
@@ -48,21 +48,19 @@ subroutine DEV_SUB_ALT(WF_shift_kpoint)(b_to_shift,ikbz,wf_shift,Xk,WF_k_in,WF_k
      g0_idx_val=k_map%g0_idx(id,WF_shift(id))
      !
 #if defined _CUDA
-     !$cuf kernel do(4) <<<*,*>>>
+     !$cuf kernel do(3) <<<*,*>>>
 #endif
-     do i_sp_pol=1,n_sp_pol
      do i_b=b_to_shift(1),b_to_shift(2)
      do i_spinor=1,n_spinor
      do ig=1,wf_ng_1st_BZ
        !
-       if(WF_tmp(ig,1,i_b_ref,1)==cZERO) cycle
+       if(WF_tmp(ig,1,i_b_ref)==cZERO) cycle
        igp=DEV_VAR(G_m_G)(ig,g0_idx_val)
-       WF_k_out(igp,i_spinor,i_b,i_sp_pol)=WF_tmp(ig,i_spinor,i_b,i_sp_pol)
+       WF_k_out(igp,i_spinor,i_b)=WF_tmp(ig,i_spinor,i_b)
        !
      enddo
      enddo
      enddo
-     enddo
      !
    else
      call dev_memcpy(WF_k_out,WF_tmp)

diff --git a/src/wf_and_fft/WF_symm_kpoint.F b/src/wf_and_fft/WF_symm_kpoint.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): MG CA DS AF
 !
-subroutine DEV_SUB_ALT(WF_symm_kpoint)(b_to_load,ikbz,Xk,WF_k_out)
+subroutine DEV_SUB_ALT(WF_symm_kpoint)(b_to_load,ikbz,i_sp_pol,Xk,WF_k_out)
  !
  ! INCLUDED in: WF_symm_kpoint_incl.F 
  !
@@ -18,14 +18,14 @@ subroutine DEV_SUB_ALT(WF_symm_kpoint)(b_to_load,ikbz,Xk,WF_k_out)
  !
  implicit none
  !
- integer,       intent(in)  :: ikbz,b_to_load(2)
+ integer,       intent(in)  :: ikbz,i_sp_pol,b_to_load(2)
  type(bz_samp), intent(in)  :: Xk
- complex(SP),   intent(out) DEV_ATTR :: WF_k_out(wf_ng_1st_BZ,n_spinor,b_to_load(1):b_to_load(2),n_sp_pol)
+ complex(SP),   intent(out) DEV_ATTR :: WF_k_out(wf_ng_1st_BZ,n_spinor,b_to_load(1):b_to_load(2))
  !
  ! Work space
  !
  complex(SP), pointer DEV_ATTR :: WF_p(:,:,:)
- integer :: ik,is,i_sp_pol,ib,iwf
+ integer :: ik,is,ib,iwf
  integer :: i_g,i_spinor
  !
  ik = Xk%sstar(ikbz,1)
@@ -36,18 +36,15 @@ subroutine DEV_SUB_ALT(WF_symm_kpoint)(b_to_load,ikbz,Xk,WF_k_out)
  !
  if(is==idt_index) then
    !  
-   do i_sp_pol=1,n_sp_pol
      do ib=b_to_load(1),b_to_load(2)
        !
        iwf=WF%index(ib,ik,i_sp_pol)
-       call dev_memcpy(WF_k_out(:,:,ib,i_sp_pol),DEV_VAR(WF%c)(:,:,iwf))
+       call dev_memcpy(WF_k_out(:,:,ib),DEV_VAR(WF%c)(:,:,iwf))
        !
      enddo
-   enddo
    !
  else
    !
-   do i_sp_pol=1,n_sp_pol
      do ib=b_to_load(1),b_to_load(2)
        !
        iwf=WF%index(ib,ik,i_sp_pol)
@@ -59,7 +56,7 @@ subroutine DEV_SUB_ALT(WF_symm_kpoint)(b_to_load,ikbz,Xk,WF_k_out)
 #endif
          do i_spinor=1,n_spinor
          do i_g=1,wf_ng
-           WF_k_out(DEV_VAR(g_rot)(i_g,is),i_spinor,ib,i_sp_pol)=WF_p(i_g,i_spinor,iwf)
+           WF_k_out(DEV_VAR(g_rot)(i_g,is),i_spinor,ib)=WF_p(i_g,i_spinor,iwf)
          enddo
          enddo
          !
@@ -69,15 +66,14 @@ subroutine DEV_SUB_ALT(WF_symm_kpoint)(b_to_load,ikbz,Xk,WF_k_out)
          !$cuf kernel do(1) <<<*,*>>>
 #endif
          do i_g=1,wf_ng
-           WF_k_out(DEV_VAR(g_rot)(i_g,is),1,ib,i_sp_pol)=DEV_VAR(spin_sop)(1,1,is)*WF_p(i_g,1,iwf)+ &
-&                                                         DEV_VAR(spin_sop)(1,2,is)*WF_p(i_g,2,iwf)
-           WF_k_out(DEV_VAR(g_rot)(i_g,is),2,ib,i_sp_pol)=DEV_VAR(spin_sop)(2,1,is)*WF_p(i_g,1,iwf)+ &
-&                                                         DEV_VAR(spin_sop)(2,2,is)*WF_p(i_g,2,iwf)
+           WF_k_out(DEV_VAR(g_rot)(i_g,is),1,ib)=DEV_VAR(spin_sop)(1,1,is)*WF_p(i_g,1,iwf)+ &
+&                                                DEV_VAR(spin_sop)(1,2,is)*WF_p(i_g,2,iwf)
+           WF_k_out(DEV_VAR(g_rot)(i_g,is),2,ib)=DEV_VAR(spin_sop)(2,1,is)*WF_p(i_g,1,iwf)+ &
+&                                                DEV_VAR(spin_sop)(2,2,is)*WF_p(i_g,2,iwf)
          enddo
        endif
        !
      enddo
-   enddo
    !
    if(is>nsym/(1+i_time_rev)) call dev_conjg(WF_k_out)
    !