Skip to content

Commit

Permalink
Version 5.2.0, Revision 23279, Hash 6ac3a88
Browse files Browse the repository at this point in the history
MODIFIED *  include/version/version.m4 dipoles/DIPOLE_overlaps.F modules/mod_interfaces.F wf_and_fft/WF_shift_kpoint.F wf_and_fft/WF_symm_kpoint.F

Bugs:
- [yambo] Fixed covariant dipoles with CUDA (issue yambo-code#106)

Patch sent by:  Davide Sangalli <[email protected]>
  • Loading branch information
sangallidavide committed Jul 6, 2024
1 parent 6ac3a88 commit 092f32e
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 54 deletions.
6 changes: 3 additions & 3 deletions include/version/version.m4
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
AC_INIT(Yambo, 5.2.0 r.23260 h.b090d22a1, [email protected])
AC_INIT(Yambo, 5.2.0 r.23279 h.6ac3a88014, [email protected])
SVERSION="5"
SSUBVERSION="2"
SPATCHLEVEL="0"
SREVISION="23260"
SHASH="b090d22a1"
SREVISION="23279"
SHASH="6ac3a88014"
AC_SUBST(SVERSION)
AC_SUBST(SSUBVERSION)
AC_SUBST(SPATCHLEVEL)
Expand Down
34 changes: 21 additions & 13 deletions src/dipoles/DIPOLE_overlaps.F
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ subroutine DIPOLE_overlaps(Xk,Dip)
use electrons, ONLY:n_spinor,n_sp_pol
use wrapper, ONLY:Vstar_dot_V
use wave_func, ONLY:wf_ng_1st_BZ,wf_ng_overlaps,wf_ng
use deviceXlib_m, ONLY:dev_memcpy
use parallel_m, ONLY:PAR_IND_DIPk_bz,PAR_IND_DIPk_bz_ID,PAR_COM_DIPk_ibz_A2A, &
& PAR_IND_CON_BANDS_OVLP,PAR_IND_VAL_BANDS_OVLP, &
& PAR_IND_OVLPk_ibz,PAR_COM_DIPk_ibz_INDEX
Expand All @@ -40,9 +41,10 @@ subroutine DIPOLE_overlaps(Xk,Dip)
integer :: id,idx_kp(3),idx_k(3),shift(3),g0_idx(3,2)
real(SP) :: g0_length(3)
!
complex(SP), allocatable DEV_ATTR :: WF_symm(:,:,:,:)
complex(SP), allocatable DEV_ATTR :: WF_ik(:,:,:,:)
complex(SP), allocatable DEV_ATTR :: WF_ikp(:,:,:,:)
complex(SP), allocatable DEV_ATTR :: WF_symm(:,:,:)
complex(SP), allocatable, target DEV_ATTR :: WF_ik(:,:,:)
complex(SP), allocatable DEV_ATTR :: WF_ikp(:,:,:)
complex(SP), pointer DEV_ATTR :: WF_tmp(:,:,:)
!
logical :: USE_shifted_wf
!
Expand Down Expand Up @@ -73,9 +75,9 @@ subroutine DIPOLE_overlaps(Xk,Dip)
!
call PARALLEL_WF_index( )
!
YAMBO_ALLOC(WF_symm, (wf_ng_1st_BZ,n_spinor,1,n_sp_pol))
YAMBO_ALLOC(WF_ikp, (wf_ng_overlaps,n_spinor,1,n_sp_pol))
YAMBO_ALLOC(WF_ik, (wf_ng_overlaps,n_spinor,Dip%ib(2),n_sp_pol))
YAMBO_ALLOC(WF_symm, (wf_ng_1st_BZ,n_spinor,1))
YAMBO_ALLOC(WF_ikp, (wf_ng_overlaps,n_spinor,1))
YAMBO_ALLOC(WF_ik, (wf_ng_overlaps,n_spinor,Dip%ib(2)))
!
!$OMP WORKSHARE
DIP_S=cZERO
Expand All @@ -94,13 +96,18 @@ subroutine DIPOLE_overlaps(Xk,Dip)
ik = Xk%sstar(ikbz,1)
is = Xk%sstar(ikbz,2)
!
call WF_load(WF,0,1,(/1,Dip%ib(2)/),(/ik,ik/),space='G',title='-Oscill/G space/Overlaps',quiet=.true.)
call WF_load(WF,0,1,(/1,Dip%ib(2)/),(/ik,ik/),(/i_sp_pol,i_sp_pol/),&
& space='G',title='-Oscill/G space/Overlaps',quiet=.true.)
!
do ib=1,Dip%ib(2)
!
call WF_symm_kpoint((/ib,ib/),ikbz,Xk,WF_symm)
call WF_symm_kpoint((/ib,ib/),ikbz,i_sp_pol,Xk,WF_symm)
!
WF_tmp=>WF_ik(:,:,ib:ib)
!
call WF_shift_kpoint((/ib,ib/),ikbz,WF_shifts(ikbz,:),Xk,WF_symm,WF_ik(:,:,ib:ib,:))
call WF_shift_kpoint((/ib,ib/),ikbz,i_sp_pol,WF_shifts(ikbz,:),Xk,WF_symm,WF_tmp)
!
nullify(WF_tmp)
!
enddo
!
Expand Down Expand Up @@ -128,7 +135,8 @@ subroutine DIPOLE_overlaps(Xk,Dip)
ikp = Xk%sstar(ikbzp,1)
isp = Xk%sstar(ikbzp,2)
!
call WF_load(WF,0,1,(/1,Dip%ib(2)/),(/ikp,ikp/),space='G',title='-Oscill/G space/Overlaps',quiet=.true.)
call WF_load(WF,0,1,(/1,Dip%ib(2)/),(/ikp,ikp/),(/i_sp_pol,i_sp_pol/),&
& space='G',title='-Oscill/G space/Overlaps',quiet=.true.)
!
! Shift the wave-function by a G-vector if the neighbor is out of the BZ (USE_shifed_wf=.true.)
shift=WF_shifts(ikbzp,:)
Expand All @@ -140,12 +148,12 @@ subroutine DIPOLE_overlaps(Xk,Dip)
!
do ibp=1,Dip%ib(2)
if(.not.PAR_IND_CON_BANDS_OVLP%element_1D(ibp)) cycle
call WF_symm_kpoint((/ibp,ibp/),ikbzp,Xk,WF_symm)
call WF_shift_kpoint((/ibp,ibp/),ikbzp,shift,Xk,WF_symm,WF_ikp)
call WF_symm_kpoint((/ibp,ibp/),ikbzp,i_sp_pol,Xk,WF_symm)
call WF_shift_kpoint((/ibp,ibp/),ikbzp,i_sp_pol,shift,Xk,WF_symm,WF_ikp)
do ib=1,Dip%ib(2)
if(.not.PAR_IND_VAL_BANDS_OVLP%element_1D(ib)) cycle
DIP_S(ib,ibp,id+(istep-1)*3,ikbz,i_sp_pol)= &
& Vstar_dot_V(wf_ng_overlaps*n_spinor,WF_ik(:,:,ib,i_sp_pol),WF_ikp(:,:,1,i_sp_pol))
& Vstar_dot_V(wf_ng_overlaps*n_spinor,WF_ik(:,:,ib),WF_ikp(:,:,1))
enddo ! ibp
enddo ! ib
!
Expand Down
26 changes: 13 additions & 13 deletions src/modules/mod_interfaces.F
Original file line number Diff line number Diff line change
Expand Up @@ -296,42 +296,42 @@ end function TDDFT_ALDA_eh_space_R_kernel
!
interface WF_shift_kpoint
!
subroutine DEV_SUB_ALT(WF_shift_kpoint)(b_to_shift,ikbz,wf_shift,Xk,WF_k_in,WF_k_out)
subroutine DEV_SUB_ALT(WF_shift_kpoint)(b_to_shift,ikbz,i_sp_pol,wf_shift,Xk,WF_k_in,WF_k_out)
use pars, ONLY:SP
use electrons, ONLY:n_spinor,n_sp_pol
use electrons, ONLY:n_spinor
use wave_func, ONLY:wf_ng_1st_BZ,wf_ng_overlaps
use R_lattice, ONLY:bz_samp
integer, intent(in) :: wf_shift(3),ikbz,b_to_shift(2)
integer, intent(in) :: wf_shift(3),ikbz,i_sp_pol,b_to_shift(2)
type(bz_samp), intent(in) :: Xk
complex(SP), intent(in) DEV_ATTR :: &
& WF_k_in (wf_ng_1st_BZ, n_spinor,b_to_shift(1):b_to_shift(2),n_sp_pol)
& WF_k_in (wf_ng_1st_BZ, n_spinor,b_to_shift(1):b_to_shift(2))
complex(SP), intent(out) DEV_ATTR :: &
& WF_k_out(wf_ng_overlaps,n_spinor,b_to_shift(1):b_to_shift(2),n_sp_pol)
& WF_k_out(wf_ng_overlaps,n_spinor,b_to_shift(1):b_to_shift(2))
end subroutine
end interface
!
interface WF_symm_kpoint
!
subroutine WF_symm_kpoint_cpu(b_to_load,ikbz,Xk,WF_k_out)
subroutine WF_symm_kpoint_cpu(b_to_load,ikbz,i_sp_pol,Xk,WF_k_out)
use pars, ONLY:SP
use R_lattice, ONLY:bz_samp
use wave_func, ONLY:wf_ng_1st_BZ
use electrons, ONLY:n_spinor,n_sp_pol
integer, intent(in) :: ikbz,b_to_load(2)
use electrons, ONLY:n_spinor
integer, intent(in) :: ikbz,i_sp_pol,b_to_load(2)
type(bz_samp), intent(in) :: Xk
complex(SP), intent(out) :: WF_k_out(wf_ng_1st_BZ,n_spinor,b_to_load(1):b_to_load(2),n_sp_pol)
complex(SP), intent(out) :: WF_k_out(wf_ng_1st_BZ,n_spinor,b_to_load(1):b_to_load(2))
end subroutine WF_symm_kpoint_cpu
!
#ifdef _CUDA
subroutine WF_symm_kpoint_gpu(b_to_load,ikbz,Xk,WF_k_out)
subroutine WF_symm_kpoint_gpu(b_to_load,ikbz,i_sp_pol,Xk,WF_k_out)
use pars, ONLY:SP
use R_lattice, ONLY:bz_samp
use wave_func, ONLY:wf_ng_1st_BZ
use electrons, ONLY:n_spinor,n_sp_pol
integer, intent(in) :: ikbz,b_to_load(2)
use electrons, ONLY:n_spinor
integer, intent(in) :: ikbz,i_sp_pol,b_to_load(2)
type(bz_samp), intent(in) :: Xk
complex(SP), intent(out) DEV_ATTR :: &
& WF_k_out(wf_ng_1st_BZ,n_spinor,b_to_load(1):b_to_load(2),n_sp_pol)
& WF_k_out(wf_ng_1st_BZ,n_spinor,b_to_load(1):b_to_load(2))
end subroutine WF_symm_kpoint_gpu
#endif
end interface
Expand Down
20 changes: 9 additions & 11 deletions src/wf_and_fft/WF_shift_kpoint.F
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
!
#include<dev_defs.h>
!
subroutine DEV_SUB_ALT(WF_shift_kpoint)(b_to_shift,ikbz,wf_shift,Xk,WF_k_in,WF_k_out)
subroutine DEV_SUB_ALT(WF_shift_kpoint)(b_to_shift,ikbz,i_sp_pol,wf_shift,Xk,WF_k_in,WF_k_out)
!
use pars, ONLY:SP,cZERO
use electrons, ONLY:n_spinor,n_sp_pol
Expand All @@ -18,18 +18,18 @@ subroutine DEV_SUB_ALT(WF_shift_kpoint)(b_to_shift,ikbz,wf_shift,Xk,WF_k_in,WF_k
!
implicit none
!
integer, intent(in) :: wf_shift(3),ikbz,b_to_shift(2)
integer, intent(in) :: wf_shift(3),ikbz,i_sp_pol,b_to_shift(2)
type(bz_samp), intent(in) :: Xk
complex(SP), intent(in) DEV_ATTR :: WF_k_in (wf_ng_1st_BZ, n_spinor,b_to_shift(1):b_to_shift(2),n_sp_pol)
complex(SP), intent(out) DEV_ATTR :: WF_k_out(wf_ng_overlaps,n_spinor,b_to_shift(1):b_to_shift(2),n_sp_pol)
complex(SP), intent(in) DEV_ATTR :: WF_k_in (wf_ng_1st_BZ, n_spinor,b_to_shift(1):b_to_shift(2))
complex(SP), intent(out) DEV_ATTR :: WF_k_out(wf_ng_overlaps,n_spinor,b_to_shift(1):b_to_shift(2))
!
! Work space
!
integer :: id,ik,is
complex(SP) DEV_ATTR :: WF_tmp(wf_ng_overlaps,n_spinor,b_to_shift(1):b_to_shift(2),n_sp_pol)
complex(SP) DEV_ATTR :: WF_tmp(wf_ng_overlaps,n_spinor,b_to_shift(1):b_to_shift(2))
integer :: ig,igp,i_b_ref
integer :: g0_idx(3,2),g0_idx_val
integer :: i_spinor,i_b,i_sp_pol
integer :: i_spinor,i_b
!
ik = Xk%sstar(ikbz,1)
is = Xk%sstar(ikbz,2)
Expand All @@ -48,21 +48,19 @@ subroutine DEV_SUB_ALT(WF_shift_kpoint)(b_to_shift,ikbz,wf_shift,Xk,WF_k_in,WF_k
g0_idx_val=k_map%g0_idx(id,WF_shift(id))
!
#if defined _CUDA
!$cuf kernel do(4) <<<*,*>>>
!$cuf kernel do(3) <<<*,*>>>
#endif
do i_sp_pol=1,n_sp_pol
do i_b=b_to_shift(1),b_to_shift(2)
do i_spinor=1,n_spinor
do ig=1,wf_ng_1st_BZ
!
if(WF_tmp(ig,1,i_b_ref,1)==cZERO) cycle
if(WF_tmp(ig,1,i_b_ref)==cZERO) cycle
igp=DEV_VAR(G_m_G)(ig,g0_idx_val)
WF_k_out(igp,i_spinor,i_b,i_sp_pol)=WF_tmp(ig,i_spinor,i_b,i_sp_pol)
WF_k_out(igp,i_spinor,i_b)=WF_tmp(ig,i_spinor,i_b)
!
enddo
enddo
enddo
enddo
!
else
call dev_memcpy(WF_k_out,WF_tmp)
Expand Down
24 changes: 10 additions & 14 deletions src/wf_and_fft/WF_symm_kpoint.F
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
!
! Authors (see AUTHORS file for details): MG CA DS AF
!
subroutine DEV_SUB_ALT(WF_symm_kpoint)(b_to_load,ikbz,Xk,WF_k_out)
subroutine DEV_SUB_ALT(WF_symm_kpoint)(b_to_load,ikbz,i_sp_pol,Xk,WF_k_out)
!
! INCLUDED in: WF_symm_kpoint_incl.F
!
Expand All @@ -18,14 +18,14 @@ subroutine DEV_SUB_ALT(WF_symm_kpoint)(b_to_load,ikbz,Xk,WF_k_out)
!
implicit none
!
integer, intent(in) :: ikbz,b_to_load(2)
integer, intent(in) :: ikbz,i_sp_pol,b_to_load(2)
type(bz_samp), intent(in) :: Xk
complex(SP), intent(out) DEV_ATTR :: WF_k_out(wf_ng_1st_BZ,n_spinor,b_to_load(1):b_to_load(2),n_sp_pol)
complex(SP), intent(out) DEV_ATTR :: WF_k_out(wf_ng_1st_BZ,n_spinor,b_to_load(1):b_to_load(2))
!
! Work space
!
complex(SP), pointer DEV_ATTR :: WF_p(:,:,:)
integer :: ik,is,i_sp_pol,ib,iwf
integer :: ik,is,ib,iwf
integer :: i_g,i_spinor
!
ik = Xk%sstar(ikbz,1)
Expand All @@ -36,18 +36,15 @@ subroutine DEV_SUB_ALT(WF_symm_kpoint)(b_to_load,ikbz,Xk,WF_k_out)
!
if(is==idt_index) then
!
do i_sp_pol=1,n_sp_pol
do ib=b_to_load(1),b_to_load(2)
!
iwf=WF%index(ib,ik,i_sp_pol)
call dev_memcpy(WF_k_out(:,:,ib,i_sp_pol),DEV_VAR(WF%c)(:,:,iwf))
call dev_memcpy(WF_k_out(:,:,ib),DEV_VAR(WF%c)(:,:,iwf))
!
enddo
enddo
!
else
!
do i_sp_pol=1,n_sp_pol
do ib=b_to_load(1),b_to_load(2)
!
iwf=WF%index(ib,ik,i_sp_pol)
Expand All @@ -59,7 +56,7 @@ subroutine DEV_SUB_ALT(WF_symm_kpoint)(b_to_load,ikbz,Xk,WF_k_out)
#endif
do i_spinor=1,n_spinor
do i_g=1,wf_ng
WF_k_out(DEV_VAR(g_rot)(i_g,is),i_spinor,ib,i_sp_pol)=WF_p(i_g,i_spinor,iwf)
WF_k_out(DEV_VAR(g_rot)(i_g,is),i_spinor,ib)=WF_p(i_g,i_spinor,iwf)
enddo
enddo
!
Expand All @@ -69,15 +66,14 @@ subroutine DEV_SUB_ALT(WF_symm_kpoint)(b_to_load,ikbz,Xk,WF_k_out)
!$cuf kernel do(1) <<<*,*>>>
#endif
do i_g=1,wf_ng
WF_k_out(DEV_VAR(g_rot)(i_g,is),1,ib,i_sp_pol)=DEV_VAR(spin_sop)(1,1,is)*WF_p(i_g,1,iwf)+ &
& DEV_VAR(spin_sop)(1,2,is)*WF_p(i_g,2,iwf)
WF_k_out(DEV_VAR(g_rot)(i_g,is),2,ib,i_sp_pol)=DEV_VAR(spin_sop)(2,1,is)*WF_p(i_g,1,iwf)+ &
& DEV_VAR(spin_sop)(2,2,is)*WF_p(i_g,2,iwf)
WF_k_out(DEV_VAR(g_rot)(i_g,is),1,ib)=DEV_VAR(spin_sop)(1,1,is)*WF_p(i_g,1,iwf)+ &
& DEV_VAR(spin_sop)(1,2,is)*WF_p(i_g,2,iwf)
WF_k_out(DEV_VAR(g_rot)(i_g,is),2,ib)=DEV_VAR(spin_sop)(2,1,is)*WF_p(i_g,1,iwf)+ &
& DEV_VAR(spin_sop)(2,2,is)*WF_p(i_g,2,iwf)
enddo
endif
!
enddo
enddo
!
if(is>nsym/(1+i_time_rev)) call dev_conjg(WF_k_out)
!
Expand Down

0 comments on commit 092f32e

Please sign in to comment.