Skip to content

Commit

Permalink
Merge pull request #27 from IvanMary69/main
Browse files Browse the repository at this point in the history
FastC: HPC_layer simplifiee
  • Loading branch information
vincentcasseau authored Oct 30, 2024
2 parents d6b5dc7 + 269bc15 commit 89139bc
Show file tree
Hide file tree
Showing 742 changed files with 6,993 additions and 10,555 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
*~
build
__pycache__
*.pyc
*.egg-info
core.*
setup.cfg
Expand All @@ -20,3 +21,5 @@ Data*
ValidData*
*.cgns
*.dat
*.swo
*.swp
12 changes: 4 additions & 8 deletions Fast/FastC/FastC/HPC_LAYER/INDICE_RANGE.for
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,13 @@
#endif
#endif
call indice_boucle_ssdom(ndo, extended_range,
& ibloc , jbloc , kbloc,
& icache, jcache, kcache,
& param_int(KFLUDOM),
& topo_s, ithread_sock,thread_pos_tmp,
& topo_s, thread_pos,
& size_cache,
& synchro_receive_sock,
& synchro_receive_th ,
& synchro_send_sock,
& synchro_send_th ,
& param_int(NIJK), param_int(IJKV),
& ind_dm_zone, ind_dm_socket,
& ind_dm_zone,
& ind_dm_omp, ijkv_sdm,
& ind_sdm , ind_coe,
& ind_grad, ind_rhs,
Expand All @@ -23,7 +19,7 @@

#if CHECK_BLOCK > 0
if(ithread.eq.param_int( IO_THREAD).and.nitrun.eq.0)then
if(ibloc*jbloc*kbloc.le.1) then
!if(ibloc*jbloc*kbloc.le.1) then
write(*,'(a,6i4)')'sdm =',ind_sdm
write(*,'(a,6i4)')'grad=',ind_grad
write(*,'(a,6i4)')'coe =',ind_coe
Expand All @@ -32,7 +28,7 @@
write(*,'(a,6i4)')'ssa =',ind_ssa
write(*,'(a,6i4)')'hrr =',ind_hrr
write(*,'(a,6i4)')'gcb =',ind_gcb
endif
!endif
endif
#endif

Expand Down
15 changes: 6 additions & 9 deletions Fast/FastC/FastC/HPC_LAYER/LOC_VAR_DECLARATION.for
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,15 @@

character*7 omp_init,omp_wait,omp_go,omp_wait_lu

INTEGER_E i, icache,jcache,kcache,lmin,
INTEGER_E i,l, icache,jcache,kcache,lmin,
& size_max, size_loc,thread_parsock,
& thread_parsock_actif,extended_range,
& lok_shap_sock(4),thread_topology(3),
& thread_topology(3),
& lok_shap(4), size_cache(3),
& socket_pos(3), synchro_receive_sock(3),synchro_send_sock(3),
& synchro_receive_th(3),synchro_send_th(3),ipt_lok_sock,
& ithread_sock,ipt_lok,size_max_sock,neq_lok,taille,
& ijkv_thread(3),kGbloc,jGbloc,iGbloc,ip,jp,kp,lth,
& ibloc,jbloc,kbloc,ijkvloc(3),skip(3),shift(3),test(3),lwait,lgo,
& size_thread(3),thread_pos(3),thread_pos_tmp(3),sens(3),
& size_target(3), cache(3)
& synchro_receive_th(3),synchro_send_th(3),
& ipt_lok,size_max_sock,neq_lok,taille,
& ijkvloc(3),skip(3),shift(3),test(3),lwait,lgo,
& size_thread(3),thread_pos(3),sens(3), size_target(3)

INTEGER_E ind_coe(6),ind_grad(6),ind_sdm(6),ind_rhs(6),ind_mjr(6),
& ind_ssa(6), ind_hrr(6), ind_gcb(6)
Expand Down
15 changes: 5 additions & 10 deletions Fast/FastC/FastC/HPC_LAYER/SYNCHRO_GO.for
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
call synchro_omp_scater(param_int, ithread,
& lth, sens,lgo,lwait,Nbre_socket,
& Nbre_thread_actif,thread_parsock,
& lok_shap_sock, lok_shap,neq_lok,
& socket , socket_topology, socket_pos,
& ithread, thread_topology,thread_pos_tmp,
& synchro_receive_sock, synchro_send_sock,
& synchro_receive_th , synchro_send_th,
& ibloc , jbloc , kbloc , ijkv_thread,
& Nbre_thread_actif,
& lok_shap, neq_lok,
& ithread, thread_topology, thread_pos,
& synchro_receive_th, synchro_send_th,
& icache, jcache, kcache, ijkv_sdm,
& size_cache,
& ind_dm_omp,
& lok(1),lok(ipt_lok_sock),
& lok(ipt_lok), omp_go )
& lok(ipt_lok), omp_go )
13 changes: 4 additions & 9 deletions Fast/FastC/FastC/HPC_LAYER/SYNCHRO_WAIT.for
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
call synchro_omp_scater(param_int, ithread,
& lth, sens,lgo,lwait,Nbre_socket,
& Nbre_thread_actif,thread_parsock,
& lok_shap_sock, lok_shap,neq_lok,
& socket , socket_topology, socket_pos,
& ithread, thread_topology,thread_pos_tmp,
& synchro_receive_sock, synchro_send_sock,
& synchro_receive_th , synchro_send_th,
& ibloc , jbloc , kbloc , ijkv_thread,
& Nbre_thread_actif,
& lok_shap, neq_lok,
& ithread, thread_topology, thread_pos,
& synchro_receive_th, synchro_send_th,
& icache, jcache, kcache, ijkv_sdm,
& size_cache,
& ind_dm_omp,
& lok(1),lok(ipt_lok_sock),
& lok(ipt_lok), omp_wait )
50 changes: 46 additions & 4 deletions Fast/FastC/FastC/HPC_LAYER/WORK_DISTRIBUTION_BEGIN.for
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,51 @@
omp_wait_lu = 'wait_lu'
lerr =.false.

thread_parsock = Nbre_thread_actif/Nbre_socket
#include "../FastC/FastC/HPC_LAYER/topo_cache.for"
#include "../FastC/FastC/HPC_LAYER/loopBloc_begin.for"
#include "../FastC/FastC/HPC_LAYER/loop_scater.for"
thread_topology(1) =topo_s(1)
thread_topology(2) =topo_s(2)
thread_topology(3) =topo_s(3)

!cible taille cache bloc applique a la souszone omp
size_cache(1) = param_int(CACHEBLCKI)
size_cache(2) = param_int(CACHEBLCKJ)
size_cache(3) = param_int(CACHEBLCKK)

do i=1,3
if(thread_topology(i).ne.1) then
l =(ind_dm_omp(2*i)-ind_dm_omp(2*i-1)+1)/2
size_cache(i)=min( l, size_cache(i) )
size_cache(i)=max( 1, size_cache(i) )
endif
enddo

thread_pos(3) = 1 + (ithread-1)/(topo_s(1)*topo_s(2))
l = ithread -(thread_pos(3)-1)*topo_s(1)*topo_s(2)
thread_pos(2) = 1 + (l-1)/topo_s(1)
thread_pos(1) = l - (thread_pos(2)-1)*topo_s(1)
!on determine synchro_thrread + nbre sous-domaine )cache !bloc)

#if defined(__INTEL_COMPILER)
#if not defined(__INTEL_LLVM_COMPILER)
!DIR$ ATTRIBUTES FORCEINLINE :: crsdm_scater
#endif
#endif
!in : ind_dm_zone: taille zone, ind_dm_omp: sousdomaine omp
!out: loop cache bloc (ijkv_sdm), synchro thread
call crsdm_scater( ndo, topo_s, size_cache,
& synchro_receive_th, synchro_send_th,
& ind_dm_zone , ind_dm_omp, ijkv_sdm )

if(ithread.eq.param_int( IO_THREAD).and.nitrun.eq.0)then
write(*,'(a,3i4)')'thread_pos =',thread_pos
!write(*,'(a,3i4)')'IJKV socket=',ijkv_thread
write(*,'(a,3i4)')'IJKV thread=',ijkv_sdm
write(*,'(a,9i4)')'topo thread',thread_topology,ind_dm_omp
write(*,'(a,9i4)')'synchro_receive_th',synchro_receive_th
write(*,'(a,9i4)')'synchro_send_th',synchro_send_th
endif

ccc#include "../FastC/FastC/HPC_LAYER/topo_cache.for"
ccc#include "../FastC/FastC/HPC_LAYER/loopBloc_begin.for"
ccc#include "../FastC/FastC/HPC_LAYER/loop_scater.for"
#include "../FastC/FastC/HPC_LAYER/verif_loksize.for"

26 changes: 13 additions & 13 deletions Fast/FastC/FastC/HPC_LAYER/WORK_DISTRIBUTION_END.for
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
enddo
enddo
enddo !boucle patern bloc
c enddo
c enddo
c enddo !boucle patern bloc


9999 continue
c 9999 continue

#if CHECK_SPLIT > 0
#include "../FastC/FastC/HPC_LAYER/check_split1.for"
#endif
c#if CHECK_SPLIT > 0
c#include "../FastC/FastC/HPC_LAYER/check_split1.for"
c#endif

enddo
enddo
enddo !boucle bloc_thread
c enddo
c enddo
c enddo !boucle bloc_thread

#if CHECK_SPLIT > 0
#include "../FastC/FastC/HPC_LAYER/check_split2.for"
#endif
c#if CHECK_SPLIT > 0
c#include "../FastC/FastC/HPC_LAYER/check_split2.for"
c#endif
12 changes: 6 additions & 6 deletions Fast/FastC/FastC/HPC_LAYER/crsdm_scater.for
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ c $Date: 2011-12-07 15:30:38 +0100 (mer 07 déc 2011) $
c $Revision: 56 $
c $Author: MarcTerracol $
c***********************************************************************
subroutine crsdm_scater(ndo,ith,jth,kth, topo_th,
subroutine crsdm_scater(ndo, topo_th,
& size_cache, synchro_receive, synchro_send,
& ind_dm_glob, ind_dm, ijkv_sdm )
& ind_dm_zone, ind_dm, ijkv_sdm )
c***********************************************************************
c_P O N E R A
c ACT
Expand All @@ -20,9 +20,9 @@ c=======================================================================
include "omp_lib.h"
INTEGER_E ndo, ith,jth,kth, topo_th(3),
INTEGER_E ndo, topo_th(3),
& size_cache(3), synchro_receive(3),synchro_send(3),
& ind_dm_glob(6), ind_dm(6), ijkv_sdm(3)
& ind_dm_zone(6), ind_dm(6), ijkv_sdm(3)
C Var loc
INTEGER_E l,i,j,k,ivloc,iverbs
Expand All @@ -34,9 +34,9 @@ C Var loc
synchro_receive(i) = 0
synchro_send (i) = 0
if( ind_dm_glob(2*i ).ne. ind_dm(2*i).and.topo_th(i).ne.1)
if( ind_dm_zone(2*i ).ne. ind_dm(2*i).and.topo_th(i).ne.1)
& synchro_receive(i)= 1
if( ind_dm_glob(2*i-1).ne. ind_dm(2*i-1).and.topo_th(i).ne.1)
if( ind_dm_zone(2*i-1).ne. ind_dm(2*i-1).and.topo_th(i).ne.1)
& synchro_send(i) = 1
ivloc = ind_dm(2*i)- ind_dm(2*i-1) + 1
Expand Down
37 changes: 16 additions & 21 deletions Fast/FastC/FastC/HPC_LAYER/distributeThreads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,8 @@ void K_FASTC::distributeThreads_c( E_Int**& param_int, E_Float**& param_real, E_
FldArrayI newtab_nozone(mxzone); E_Int* ipt_nozone_new = newtab_nozone.begin();
FldArrayI newtab_nosszone(mxzone); E_Int* ipt_nosszone_new= newtab_nosszone.begin();

FldArrayF tab_HPC_CUPS(mxzone); E_Float* ipt_HPC_CUPS = tab_HPC_CUPS.begin();
FldArrayF newtab_HPC_CUPS(mxzone); E_Float* ipt_HPC_CUPS_new= newtab_HPC_CUPS.begin();
FldArrayF tab_hpccups(mxzone); E_Float* ipt_hpccups = tab_hpccups.begin();
FldArrayF newtab_hpccups(mxzone); E_Float* ipt_hpccups_new= newtab_hpccups.begin();

E_Int c = 0;
E_Int ndimt=0;
Expand All @@ -228,8 +228,8 @@ void K_FASTC::distributeThreads_c( E_Int**& param_int, E_Float**& param_real, E_
E_Int* nozone = ipt_nozone + c;
E_Int* nosszone = ipt_nosszone + c;

ipt_HPC_CUPS[c] = param_real[nd][HPC_CUPS];
if (ipt_HPC_CUPS[c] > CupsMax) CupsMax = ipt_HPC_CUPS[c];
ipt_hpccups[c] = param_real[nd][HPC_CUPS];
if (ipt_hpccups[c] > CupsMax) CupsMax = ipt_hpccups[c];


ijk_start[0]= ipt_ind_dm[nd][0+shift];
Expand Down Expand Up @@ -274,31 +274,28 @@ void K_FASTC::distributeThreads_c( E_Int**& param_int, E_Float**& param_real, E_
{
E_Int* ndimdx = ipt_ndimdx + c1;

E_Float* hpc_cups = ipt_HPC_CUPS+ c1;
E_Float* hpc_cups = ipt_hpccups+ c1;

//if(ndimdx[0] > ndimdx_max) { ndimdx_max= ndimdx[0]; c_tg = c1;}
if( float(ndimdx[0])/hpc_cups[0] > ndimdx_max) { ndimdx_max= float(ndimdx[0])/hpc_cups[0]; c_tg = c1;}
}// loop recherche plus grosse zone



E_Int* nijk = ipt_nijk + 3*c_tg;
E_Int* ijk_start= ipt_nijk + 3*c_tg +3*mxzone;
E_Int* ndimdx = ipt_ndimdx + c_tg;

E_Int* nozone = ipt_nozone + c_tg;
E_Int* nosszone = ipt_nosszone + c_tg;

E_Float* hpc_cups = ipt_HPC_CUPS+ c_tg;

//printf("zone %d %d %d %d %d \n", c, ndimdx_max, c_tg, nozone[0], nosszone[0]);
E_Float* hpc_cups = ipt_hpccups+ c_tg;

E_Int* nijk_new = ipt_nijk_new + 3*c;
E_Int* ijk_startnew = ipt_nijk_new + 3*c +3*mxzone;
E_Int* ndimdx_new = ipt_ndimdx_new + c;
E_Int* nozone_new = ipt_nozone_new + c;
E_Int* nosszone_new = ipt_nosszone_new + c;

E_Float* hpc_cups_new = ipt_HPC_CUPS_new + c;
E_Float* hpc_cups_new = ipt_hpccups_new + c;

nijk_new[0] = nijk[0];
nijk_new[1] = nijk[1];
Expand Down Expand Up @@ -337,7 +334,7 @@ void K_FASTC::distributeThreads_c( E_Int**& param_int, E_Float**& param_real, E_
E_Int* nozone_new = ipt_nozone_new + c;
//E_Int No_zone = nozone_new[0];

E_Float* hpc_cups = ipt_HPC_CUPS_new + c;
E_Float* hpc_cups = ipt_hpccups_new + c;

ind_dm[0]= ijk_start[0];
ind_dm[2]= ijk_start[1];
Expand All @@ -353,8 +350,6 @@ void K_FASTC::distributeThreads_c( E_Int**& param_int, E_Float**& param_real, E_
E_Int size_k = 0;
if(nijk[2] != 1) size_k = (nijk[2]+1)*nijk[0]*nijk[1];

//poids = cupsmoy/ipt_HPC_CUPS[c];
//poids = cupsmoy/ipt_HPC_CUPS[No_zone];
poids = cupsmoy/hpc_cups[0];
//if(nstep==1) printf("poids %f %f %f %d %d \n", poids, cupsmoy,ipt_HPC_CUPS[No_zone], c, No_zone);

Expand Down Expand Up @@ -411,7 +406,7 @@ void K_FASTC::distributeThreads_c( E_Int**& param_int, E_Float**& param_real, E_
E_Int No_sszone = nosszone_new[0];
//E_Int* ndimdx = ipt_ndimdx_new + c;

E_Float* hpc_cups = ipt_HPC_CUPS_new + c;
E_Float* hpc_cups = ipt_hpccups_new + c;

E_Int* ipt_nidom_loc = ipt_ind_dm[No_zone] + param_int[No_zone][ MXSSDOM_LU ]*6*nssiter + nssiter; //nidom_loc(nssiter)
E_Int nb_subzone = ipt_nidom_loc [nstep-1];
Expand All @@ -433,8 +428,8 @@ void K_FASTC::distributeThreads_c( E_Int**& param_int, E_Float**& param_real, E_
if(nstep==1) { for (E_Int socket = 0; socket < NBR_SOCKET; socket++) { numa_socket[socket] = 0;} }

poids = cupsmoy/hpc_cups[0];
//poids = cupsmoy/ipt_HPC_CUPS[c];
//poids = cupsmoy/ipt_HPC_CUPS[No_zone];
//poids = cupsmoy/ipt_hpccups[c];
//poids = cupsmoy/ipt_hpccups[No_zone];
//printf("Nozone %d %d %f %d \n",c, No_zone, hpc_cups[0], nstep);

E_Int size_c = nijk[0]*nijk[1]*nijk[2]*poids;
Expand Down Expand Up @@ -639,9 +634,9 @@ void K_FASTC::distributeThreads_c( E_Int**& param_int, E_Float**& param_real, E_
}
*/

//poids = cupsmoy/ipt_HPC_CUPS[No_zone];
//poids = cupsmoy/ipt_hpccups[No_zone];
poids = cupsmoy/hpc_cups[0];
//poids = cupsmoy/ipt_HPC_CUPS[c];
//poids = cupsmoy/ipt_hpccups[c];

E_Int res = (cc*dim_i[0]*dim_j[0]*dim_k[0] + ci*dim_i[0]*dim_j[0]*dim_k[0] + cj*dim_j[0]*dim_i[0]*dim_k[0] + ck*dim_k[0]*dim_i[0]*dim_j[0] )*poids - cells_tg_loc;
E_Float sign = 1.;
Expand Down Expand Up @@ -894,9 +889,9 @@ void K_FASTC::distributeThreads_c( E_Int**& param_int, E_Float**& param_real, E_
{ list_affected[th]=-1;
ipt_omp[PtTask + 2 + th] = -2;} //Thread inactif

//poids = cupsmoy/ipt_HPC_CUPS[No_zone];
//poids = cupsmoy/ipt_hpccups[No_zone];
poids = cupsmoy/hpc_cups[0];
//poids = cupsmoy/ipt_HPC_CUPS[c];
//poids = cupsmoy/ipt_hpccups[c];
//Recherche thread disponible
E_Int th = 0;
for (E_Int k = 0; k < topo_lu[2]; k++){
Expand Down
Loading

0 comments on commit 89139bc

Please sign in to comment.