Skip to content

Commit 1c537fa

Browse files
committed
Nbody6++GPU-Dec2019-silk
1 parent 15ec54c commit 1c537fa

16 files changed

+521
-494
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# make install
77
#
88

9-
RESULT = nbody6++.avx
9+
RESULT = nbody6++.sse.gpu.mpi
1010
INSTALLDIR = /usr/local
1111
EXTRATOOLS = nb6++dumpb2a libinitial.so libnb6out3.so nb6++snapshot
1212
ifeq ("x", "x $(EXTRATOOLS)")

config.status

+21-21
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ $config_files
417417
418418
Report bugs to <[email protected]>."
419419

420-
ac_cs_config="'--disable-mpi' '--disable-gpu' '--with-par=128k'"
420+
ac_cs_config="'--with-par=b1m' '--enable-simd=sse' '--enable-mcmodel=large'"
421421
ac_cs_version="\
422422
Nbody6++ config.status 1.0
423423
configured by ./configure, generated by GNU Autoconf 2.69,
@@ -427,9 +427,9 @@ Copyright (C) 2012 Free Software Foundation, Inc.
427427
This config.status script is free software; the Free Software Foundation
428428
gives unlimited permission to copy, distribute and modify it."
429429

430-
ac_pwd='/work/Tux1/spurzem/Nbody/Nbody6++GPU-Dec2019'
430+
ac_pwd='/p/home/jusers/spurzem2/jusuf/Nbody/Nbody6++GPU-Dec2019-bugfix'
431431
srcdir='.'
432-
AWK=''
432+
AWK='gawk'
433433
test -n "$AWK" || AWK=awk
434434
# The default lists apply if the user does not specify any file.
435435
ac_need_defaults=:
@@ -496,7 +496,7 @@ if $ac_cs_silent; then
496496
fi
497497

498498
if $ac_cs_recheck; then
499-
set X /bin/sh './configure' '--disable-mpi' '--disable-gpu' '--with-par=128k' $ac_configure_extra_args --no-create --no-recursion
499+
set X /bin/sh './configure' '--with-par=b1m' '--enable-simd=sse' '--enable-mcmodel=large' $ac_configure_extra_args --no-create --no-recursion
500500
shift
501501
$as_echo "running CONFIG_SHELL=/bin/sh $*" >&6
502502
CONFIG_SHELL='/bin/sh'
@@ -585,18 +585,18 @@ fi
585585
echo 'BEGIN {' >"$ac_tmp/subs1.awk" &&
586586
cat >>"$ac_tmp/subs1.awk" <<\_ACAWK &&
587587
S["LTLIBOBJS"]=""
588-
S["MMAX"]="1024"
588+
S["MMAX"]="2048"
589589
S["LMAX"]="600"
590-
S["KMAX"]="65536"
591-
S["NMAX"]="131072"
590+
S["KMAX"]="512000"
591+
S["NMAX"]="1510720"
592592
S["EXTRARESULT"]=""
593-
S["EXTRAOBJ"]=" ${AVX_OBJECTS}"
594-
S["EXTRASRC"]=" $(GPU_FSOURCES)"
595-
S["RESULT"]=".avx"
596-
S["CUFLAGS"]=""
597-
S["AWK"]=""
598-
S["SED"]=""
599-
S["NVCC"]=""
593+
S["EXTRAOBJ"]=" $(CUDA_OBJECTS) ${IRRSSE_OBJECTS}"
594+
S["EXTRASRC"]=" $(GPU_FSOURCES) $(MPI_FSOURCES)"
595+
S["RESULT"]=".sse.gpu.mpi"
596+
S["CUFLAGS"]=" -O3 -D CUDA_5 -I /p/software/jusuf/stages/2019a/software/CUDA/10.1.105/samples/common/inc"
597+
S["AWK"]="gawk"
598+
S["SED"]="/usr/bin/sed"
599+
S["NVCC"]="nvcc"
600600
S["LIBOBJS"]=""
601601
S["EGREP"]="/usr/bin/grep -E"
602602
S["CXXCPP"]="g++ -E"
@@ -607,19 +607,19 @@ S["CFLAGS"]="-g -O2"
607607
S["CC"]="gcc"
608608
S["ac_ct_CXX"]="g++"
609609
S["CPPFLAGS"]=""
610-
S["CXXFLAGS"]=" -O3 -fopenmp -I../include -march=native -fPIC -mcmodel=medium -mavx ${OMP_FLAGS}"
610+
S["CXXFLAGS"]=" -O3 -fopenmp -I../include -march=native -fPIC -mcmodel=large -msse ${OMP_FLAGS}"
611611
S["CXX"]="g++"
612612
S["OPENMP_FCFLAGS"]="-fopenmp"
613613
S["OBJEXT"]="o"
614614
S["EXEEXT"]=""
615615
S["ac_ct_FC"]="gfortran"
616-
S["LDFLAGS"]=""
617-
S["FCFLAGS"]=" -I../extra_inc/nompi -O3 -fPIC -mcmodel=medium -fopenmp -I../include ${SIMD_FLAGS} $(GPU_FLAGS) ${OMP_FLAGS}"
618-
S["FC"]="gfortran"
616+
S["LDFLAGS"]=" -L"
617+
S["FCFLAGS"]=" -O3 -fPIC -mcmodel=large -fopenmp -I../include $(GPU_FLAGS) $(MPI_FLAGS) ${SIMD_FLAGS} ${OMP_FLAGS}"
618+
S["FC"]="mpif77"
619619
S["target_alias"]=""
620620
S["host_alias"]=""
621621
S["build_alias"]=""
622-
S["LIBS"]=" -lstdc++"
622+
S["LIBS"]=" -lstdc++ -lcudart"
623623
S["ECHO_T"]=""
624624
S["ECHO_N"]="-n"
625625
S["ECHO_C"]=""
@@ -629,7 +629,7 @@ S["DEFS"]="-DPACKAGE_NAME=\\\"Nbody6++\\\" -DPACKAGE_TARNAME=\\\"nbody6--\\\" -D
629629
"E_H=1 -DHAVE_UNISTD_H=1 -DHAVE_STDLIB_H=1 -DHAVE_MALLOC=1 -DHAVE_GETTIMEOFDAY=1 -DHAVE_MEMSET=1 -DHAVE_SQRT=1 -DHAVE_STRCHR=1"
630630
S["mandir"]="${datarootdir}/man"
631631
S["localedir"]="${datarootdir}/locale"
632-
S["libdir"]="${exec_prefix}/lib64"
632+
S["libdir"]="${exec_prefix}/lib"
633633
S["psdir"]="${docdir}"
634634
S["pdfdir"]="${docdir}"
635635
S["dvidir"]="${docdir}"
@@ -643,7 +643,7 @@ S["sharedstatedir"]="${prefix}/com"
643643
S["sysconfdir"]="${prefix}/etc"
644644
S["datadir"]="${datarootdir}"
645645
S["datarootdir"]="${prefix}/share"
646-
S["libexecdir"]="${exec_prefix}/lib"
646+
S["libexecdir"]="${exec_prefix}/libexec"
647647
S["sbindir"]="${exec_prefix}/sbin"
648648
S["bindir"]="${exec_prefix}/bin"
649649
S["program_transform_name"]="s,x,x,"

include/params.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* ------------------
33
*
44
* Choose between small or large run.
5-
PARAMETER (NMAX=131072,KMAX=65536,LMAX=600,MMAX=1024,
5+
PARAMETER (NMAX=1510720,KMAX=512000,LMAX=600,MMAX=2048,
66
& MLD=22,MLR=600,MLV=200,MCL=10,NCMAX=10,NTMAX=100)
77
parameter (maxpe=1024,ithread=8)
88
*

src/Main/expel.f

100644100755
+14-4
Original file line numberDiff line numberDiff line change
@@ -171,12 +171,22 @@ SUBROUTINE EXPEL(J1,J2,ICASE)
171171
WHICH1 = ' HYPERB '
172172
NHYPC = NHYPC + 1
173173
END IF
174+
* if(rank.eq.0)
175+
* & WRITE (6,10) WHICH1, NAME(I1), NAME(I2), KSTAR(I1),KSTAR(I2),
176+
* & KW1, KW2, M1, M2, DM*ZMBAR, ECC0, ECC, R1, R2,
177+
* & SEMI0*SU, SEMI*SU
178+
* 10 FORMAT (A8,'CE NAM K0* K* M1 M2 DM E0 E R1 R2 A0 A ',
179+
* & 2I6,4I3,3F5.1,2F8.4,2F7.1,1P,E9.1,0P,F7.1)
180+
ICM = N + IPAIR
174181
if(rank.eq.0)
175-
& WRITE (6,10) WHICH1, NAME(I1), NAME(I2), KSTAR(I1),KSTAR(I2),
176-
& KW1, KW2, M1, M2, DM*ZMBAR, ECC0, ECC, R1, R2,
182+
& WRITE (6,1010) WHICH1,TTOT,NAME(I1),NAME(I2),NAME(IPAIR),
183+
& KSTAR(I1),KSTAR(I2),KSTAR(ICM),KW1,KW2,BODY(I1)*ZMBAR,
184+
& BODY(I2)*ZMBAR,M1,M2,DM*ZMBAR, ECC0, ECC, R1, R2,
177185
& SEMI0*SU, SEMI*SU
178-
10 FORMAT (A8,'CE NAM K0* K* M1 M2 DM E0 E R1 R2 A0 A ',
179-
& 2I6,4I3,3F5.1,2F8.4,2F7.1,1P,E9.1,0P,F7.1)
186+
1010 FORMAT(A8,'CE : T=',1P,E13.5,' N1/2/IP=',3I10,' KW1/2/IP=',
187+
& 3I4,' KW1,KW2=',2I4,' BODY1/2=',2E13.5,' M1/2,DM=',
188+
& 3E13.5,' E0,E=',2E13.5,' R1/2=',2E13.5,' A1/2=',
189+
& 2E13.5)
180190
*
181191
* Check common envelope condition again after circularization (09/08).
182192
IF (ECC0.GT.0.001D0.AND.ECC.LE.0.001D0) THEN

src/Main/gpunb.gpu.cu

100644100755
File mode changed.

src/Main/gpunb.velocity.cu

100644100755
File mode changed.

src/Main/hrplot.F

+4-2
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,10 @@ SUBROUTINE HRPLOT
134134
R2 = LOG10(RM2)
135135
ZL1 = LOG10(LUM)
136136
ZL2 = LOG10(LUM2)
137-
TE1 = 0.25*(ZL1 - 2.0*R1) + 3.7
138-
TE2 = 0.25*(ZL2 - 2.0*R2) + 3.7
137+
* TE1 = 0.25*(ZL1 - 2.0*R1) + 3.7
138+
* TE2 = 0.25*(ZL2 - 2.0*R2) + 3.7
139+
TE1 = 0.25*(ZL1 - 2.0*R1) + 3.761777537508
140+
TE2 = 0.25*(ZL2 - 2.0*R2) + 3.761777537508
139141
if(rank.eq.0)
140142
& WRITE (82,5) TTOT, J1, J2, NAME(J1), NAME(J2), KW, KW2,
141143
& KSTAR(ICM),

src/Main/intgrt.F

+14-16
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,6 @@ SUBROUTINE INTGRT
216216
* Find all particles due at next block time.
217217
1 CONTINUE
218218
*
219-
*
220219
* Redetermine TMIN after main change to catch new small steps after chain.
221220
ICALL = ICALL + 1
222221
IF (ICALL.EQ.2) GO TO 999
@@ -255,7 +254,6 @@ SUBROUTINE INTGRT
255254
call flush(6)
256255
call abort()
257256
END IF
258-
C IF(J.EQ.9951) print*,'L',L,'J',J,'T',TIME
259257
END DO
260258
DO L = NXTLEN+1,NXTLIMIT
261259
J = NXTLST(L)
@@ -268,7 +266,6 @@ SUBROUTINE INTGRT
268266
call flush(6)
269267
call abort()
270268
END IF
271-
C IF(J.EQ.9951) print*,'L',L,'J',J,'T',TIME
272269
END DO
273270
* --07/07/14 23:44-lwang-end----------------------------------------*
274271
#endif
@@ -351,7 +348,6 @@ SUBROUTINE INTGRT
351348

352349
call cputim(tttiend)
353350
ttintb = ttintb +(tttiend-tttbegin)*60
354-
355351
*
356352
* Include commensurability test (may be suppressed if no problems).
357353
* IF (STEP(IMIN).LT.1.0E-15.OR.DMOD(TIME,STEP(IMIN)).NE.0.0D0) THEN
@@ -393,7 +389,6 @@ SUBROUTINE INTGRT
393389
*
394390
IF (IQ.LT.0) GO TO 999
395391
END IF
396-
397392
*
398393
* Check regularization criterion for single particles.
399394
call cputim(tttbegin)
@@ -541,8 +536,8 @@ SUBROUTINE INTGRT
541536
call cputim(ttsimdc)
542537
ttsimdcalc = ttsimdcalc + (ttsimdc-ttnb1)*60.
543538
#else
544-
* print*,' before nbint,t,nxtlen,lst=',time,nxtlen,
545-
* & (nxtlst(k),k=1,nxtlen)
539+
print*,' before nbint,t,nxtlen,lst=',time,nxtlen,
540+
& (nxtlst(k),k=1,nxtlen)
546541
* print*,' step(lst)=',(step(nxtlst(k)),k=1,nxtlen)
547542
* print*,' x(1,lst)=',(x(1,nxtlst(k)),k=1,nxtlen)
548543
* print*,' x0(1,lst)=',(x0(1,nxtlst(k)),k=1,nxtlen)
@@ -730,19 +725,20 @@ SUBROUTINE INTGRT
730725
istrec = ista(irank)
731726
icnt2 = inum(irank)
732727
*
733-
* if(time.lt.0.1d0.and.icnt.gt.0)then
728+
* if(time.lt.0.1d0)then
734729
* print*,' NXT: rank t',rank,time,' ir ',ir,' send ',istsen,
735730
* * ' thru ',istsen+icnt-1,' to ',isend,' cnt ',icnt,
736731
* * ' istart,iend=',istart,iend
737732
* end if
738-
* if(time.lt.0.1d0.and.icnt2.gt.0)then
733+
* if(time.lt.0.1d0)then
739734
* print*,' NXT: rank t',rank,time,' ir ',ir,' recv ',istrec,
740735
* * ' thru ',istrec+icnt2-1,' fr ',irecv,' cnt2 ',icnt2,
741736
* * ' istart,iend=',istart,iend
742737
* end if
743738
*
744739
#ifdef PUREMPI
745740
call cputim(tta)
741+
print*,' INTGRT: MPI Irr icnt;2,*20=',icnt,icnt2,20*icnt
746742
CALL MPI_SENDRECV(XMPI(1,istsen),20*icnt,MPI_REAL8,isend,
747743
* rank,XMPI(1,istrec),20*icnt2,MPI_REAL8,irecv,irecv,
748744
* MPI_COMM_WORLD,status,ierr)
@@ -1083,20 +1079,22 @@ SUBROUTINE INTGRT
10831079
istrec = ista(irank)
10841080
icnt2 = inum(irank)
10851081
*
1086-
c$$$ if (ixxxx.le.10000000.and.time.gt.1.33) then
1087-
c$$$ print*,' INTGRT-R: rank t',rank,time,' ir ',ir,' send ',istsen,
1088-
c$$$ * ' thru ',istsen+icnt-1,' to ',isend,' cnt ',icnt
1089-
c$$$ print*,' INTGRT-R: rank t',rank,time,' ir ',ir,' recv ',istrec,
1090-
c$$$ * ' thru ',istrec+icnt2-1,' fr ',irecv,' cnt2 ',icnt2
1091-
c$$$ call flush(6)
1092-
c$$$ end if
1082+
* if (time.lt.0.1) then
1083+
* print*,' INTGRT-R: rank t',rank,time,' ir ',ir,' send ',istsen,
1084+
* * ' thru ',istsen+icnt-1,' to ',isend,' cnt ',icnt
1085+
* print*,' INTGRT-R: rank t',rank,time,' ir ',ir,' recv ',istrec,
1086+
* * ' thru ',istrec+icnt2-1,' fr ',irecv,' cnt2 ',icnt2
1087+
* call flush(6)
1088+
* end if
10931089
*
10941090
#ifdef PUREMPI
1091+
print*,' INTGRT: MPI Reg 1 icnt;2,*41=',icnt,icnt2,41*icnt
10951092
call cputim(tta)
10961093
CALL MPI_SENDRECV(YMPI(1,istsen),41*icnt,MPI_REAL8,isend,
10971094
* rank,YMPI(1,istrec),41*icnt2,MPI_REAL8,irecv,irecv,
10981095
* MPI_COMM_WORLD,status,ierr)
10991096
*
1097+
print*,' INTGRT: MPI Reg 2 icnt;2,*lmax=',icnt,icnt2,lmax*icnt
11001098
c$$$ call mpi_barrier(MPI_COMM_WORLD,ierr)
11011099
CALL MPI_SENDRECV(IMPI(1,istsen),lmax*icnt,MPI_INTEGER,
11021100
* isend,rank,IMPI(1,istrec),lmax*icnt2,MPI_INTEGER,

src/Main/mdot.F

+5-6
Original file line numberDiff line numberDiff line change
@@ -725,12 +725,11 @@ SUBROUTINE MDOT
725725
CALL GRRAD(MASS(1),MASS(2),SEP,ECC,JORB,DJGR,DELET)
726726
DJORB = DJT + DJGR
727727
DTGR = 0.02D0*JORB/ABS(DJORB)
728-
IGR = IGR + 1
729-
* changed output RS March 2019 test
730-
IF (rank.eq.0.and.IGR.LT.1000000)
731-
& WRITE (6,45)IGR,TIME,MASS,-1,-1,SEP,ECC,JORB,DJGR,DTM
732-
45 FORMAT (' GR BRAKE IGR T M1 M2 ? ? SEP ECC JORB DJ DTM ',
733-
& 1P,I8,3E14.5,2I4,5E14.5)
728+
* RSp updated output July 2020
729+
IF (rank.eq.0.and.DJGR.GT.1.E-06)
730+
& WRITE (6,45)TTOT,MASS,SEP,ECC,JORB,DJGR,DTM
731+
45 FORMAT (' GR BRAKE T M1 M2 SEP ECC JORB DJGR DTM ',
732+
& 1P,8E14.5)
734733
* if(rank.eq.0)
735734
* & write(*,*)' grrad ',sep,ecc,djgr,delet,dtxmin
736735
*

src/Main/nbody6.F

+25-17
Original file line numberDiff line numberDiff line change
@@ -69,21 +69,16 @@ PROGRAM NBODY6
6969
call flush(6)
7070
*
7171
* Read start/restart indicator & CPU time.
72-
IF(rank.eq.0)READ (5,*) KSTART, TCOMP, TCRITP,
73-
* isernb,iserreg,iserks
72+
* 19:41-lwang-debug-10/03/14-----------------------------*
73+
IF(rank.eq.0)THEN
7474
#ifdef DEBUG
75-
* --10/03/14 19:41-lwang-debug--------------------------------------*
76-
***** Note:------------------------------------------------------------**
77-
if(rank.eq.0) read(5,*) adtime,dumptime,dprintt,dtprint,namep
78-
#if MPIINIT
79-
CALL MPI_BCAST(adtime,1,MPI_REAL8,0,MPI_COMM_WORLD,ierr)
80-
CALL MPI_BCAST(dumptime,1,MPI_REAL8,0,MPI_COMM_WORLD,ierr)
81-
CALL MPI_BCAST(dprintt,1,MPI_REAL8,0,MPI_COMM_WORLD,ierr)
82-
CALL MPI_BCAST(dtprint,1,MPI_REAL8,0,MPI_COMM_WORLD,ierr)
83-
CALL MPI_BCAST(namep,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
84-
#endif
85-
* --10/03/14 19:41-lwang-end----------------------------------------*
75+
READ (5,*) KSTART, TCOMP, TCRITP,
76+
* isernb,iserreg,iserks,adtime,dumptime,dprintt,dtprint,namep
77+
#else
78+
READ (5,*) KSTART, TCOMP, TCRITP,
79+
* isernb,iserreg,iserks
8680
#endif
81+
END IF
8782
*
8883
#if MPIINIT
8984
CALL MPI_BCAST(isernb,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
@@ -92,14 +87,27 @@ PROGRAM NBODY6
9287
CALL MPI_BCAST(KSTART,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
9388
CALL MPI_BCAST(TCOMP,1,MPI_REAL8,0,MPI_COMM_WORLD,ierr)
9489
CALL MPI_BCAST(TCRITP,1,MPI_REAL8,0,MPI_COMM_WORLD,ierr)
90+
*
91+
#ifdef DEBUG
92+
CALL MPI_BCAST(adtime,1,MPI_REAL8,0,MPI_COMM_WORLD,ierr)
93+
CALL MPI_BCAST(dumptime,1,MPI_REAL8,0,MPI_COMM_WORLD,ierr)
94+
CALL MPI_BCAST(dprintt,1,MPI_REAL8,0,MPI_COMM_WORLD,ierr)
95+
CALL MPI_BCAST(dtprint,1,MPI_REAL8,0,MPI_COMM_WORLD,ierr)
96+
CALL MPI_BCAST(namep,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
97+
#endif
9598
*
9699
isernb = max(isize,isernb*icore)
97100
iserreg = max(isize,iserreg*icore)
98101
* iserks = max(isize,iserks*icore)
99-
IF(rank.eq.0) then
100-
PRINT*,' iserreg,isernb,iserks,ithread=',iserreg,isernb,iserks,
101-
& ithread
102-
end if
102+
103+
IF(rank.eq.0) THEN
104+
PRINT*,' MPI: iserreg,isernb,iserks,ithread=',
105+
& iserreg,isernb,iserks,ithread
106+
#ifdef DEBUG
107+
PRINT*,' DEBUG: adtime,dumptime,dprintt,dtprint,namep =',
108+
& adtime,dumptime,dprintt,dtprint,namep
109+
#endif
110+
END IF
103111
#endif
104112
*
105113
IF (KSTART.EQ.1) THEN

src/Main/out1.ps~

Whitespace-only changes.

src/Main/pot.sse.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ void gpupot(
122122

123123
double t1 = get_wtime();
124124
#ifdef PROFILE
125-
fprintf(stderr, "[R.%d SSE Pot.A] Ni %d NTOT %d pot(s) %f\n", rank,ni,n,t1 - t0);
125+
fprintf(stderr, "[R.%d AVX Pot.A] Ni %d NTOT %d pot(s) %f\n", rank,ni,n,t1 - t0);
126126
#endif
127127

128128
}

0 commit comments

Comments
 (0)