Skip to content

Commit dba1980

Browse files

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+14166
-1031
lines changed

Makefile

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#
2+
# Makefile for NBODY6++ June 2003 R.Sp., Changed by Long Wang on Aug 20, 2014
3+
#
4+
# Please use:
5+
# make
6+
# make install
7+
#
8+
9+
RESULT = nbody6++.sse.gpu.mpi
10+
INSTALLDIR = /usr/local
11+
EXTRATOOLS = nb6++dumpb2a libinitial.so libnb6out3.so nb6++snapshot
12+
ifeq ("x", "x $(EXTRATOOLS)")
13+
EXTRAOBJS = installtools
14+
endif
15+
16+
VPATH=./build
17+
18+
$(RESULT) :
19+
$(MAKE) -C ./build
20+
21+
clean:
22+
rm -f ./build/*.o ./build/*.so ./build/$(RESULT) $(EXTRATARGET)
23+
24+
install: $(RESULT) $(EXTRAOBJS)
25+
@[ -d $(INSTALLDIR)/bin ] || mkdir $(INSTALLDIR)/bin
26+
@[ -d $(INSTALLDIR)/share ] || mkdir $(INSTALLDIR)/share
27+
@[ -d $(INSTALLDIR)/share/doc ] || mkdir $(INSTALLDIR)/share/doc
28+
cp ./build/$(RESULT) $(INSTALLDIR)/bin
29+
cp ./doc/nbody6++_manual.pdf $(INSTALLDIR)/share/doc/
30+
31+
32+
installtools:
33+
@[ -d $(INSTALLDIR)/bin ] || mkdir $(INSTALLDIR)/bin
34+
@[ -d $(INSTALLDIR)/include ] || mkdir $(INSTALLDIR)/include
35+
@[ -d $(INSTALLDIR)/lib ] || mkdir $(INSTALLDIR)/lib
36+
cp ./build/nb6++dumpb2a $(INSTALLDIR)/bin
37+
cp ./build/nb6++snapshot $(INSTALLDIR)/bin
38+
cp ./include/initial.h $(INSTALLDIR)/include
39+
cp ./build/libinitial.so $(INSTALLDIR)/lib
40+
cp ./include/nb6out3.h $(INSTALLDIR)/include
41+
cp ./build/libnb6out3.so $(INSTALLDIR)/lib
42+
43+
uninstall:
44+
rm -f $(INSTALLDIR)/bin/$(RESULT)
45+
rm -f $(INSTALLDIR)/lib/libinitial.so $(INSTALLDIR)/lib/libnb6out3.so
46+
rm -f $(INSTALLDIR)/share/doc/nbody6++_manual.pdf
47+
rm -f $(INSTALLDIR)/bin/nb6++dumpb2a $(INSTALLDIR)/bin/nb6++snapshot

Makefile.in

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#
2+
# Makefile for NBODY6++ June 2003 R.Sp., Changed by Long Wang on Aug 20, 2014
3+
#
4+
# Please use:
5+
# make
6+
# make install
7+
#
8+
9+
RESULT = nbody6++@RESULT@
10+
INSTALLDIR = @prefix@
11+
EXTRATOOLS = nb6++dumpb2a libinitial.so libnb6out3.so nb6++snapshot
12+
ifeq ("x@EXTRARESULT@", "x $(EXTRATOOLS)")
13+
EXTRAOBJS = installtools
14+
endif
15+
16+
VPATH=./build
17+
18+
$(RESULT) @EXTRARESULT@:
19+
$(MAKE) -C ./build
20+
21+
clean:
22+
rm -f ./build/*.o ./build/*.so ./build/$(RESULT) $(EXTRATARGET)
23+
24+
install: $(RESULT) $(EXTRAOBJS)
25+
@[ -d $(INSTALLDIR)/bin ] || mkdir $(INSTALLDIR)/bin
26+
@[ -d $(INSTALLDIR)/share ] || mkdir $(INSTALLDIR)/share
27+
@[ -d $(INSTALLDIR)/share/doc ] || mkdir $(INSTALLDIR)/share/doc
28+
cp ./build/$(RESULT) $(INSTALLDIR)/bin
29+
cp ./doc/nbody6++_manual.pdf $(INSTALLDIR)/share/doc/
30+
31+
32+
installtools: @EXTRARESULT@
33+
@[ -d $(INSTALLDIR)/bin ] || mkdir $(INSTALLDIR)/bin
34+
@[ -d $(INSTALLDIR)/include ] || mkdir $(INSTALLDIR)/include
35+
@[ -d $(INSTALLDIR)/lib ] || mkdir $(INSTALLDIR)/lib
36+
cp ./build/nb6++dumpb2a $(INSTALLDIR)/bin
37+
cp ./build/nb6++snapshot $(INSTALLDIR)/bin
38+
cp ./include/initial.h $(INSTALLDIR)/include
39+
cp ./build/libinitial.so $(INSTALLDIR)/lib
40+
cp ./include/nb6out3.h $(INSTALLDIR)/include
41+
cp ./build/libnb6out3.so $(INSTALLDIR)/lib
42+
43+
uninstall:
44+
rm -f $(INSTALLDIR)/bin/$(RESULT)
45+
rm -f $(INSTALLDIR)/lib/libinitial.so $(INSTALLDIR)/lib/libnb6out3.so
46+
rm -f $(INSTALLDIR)/share/doc/nbody6++_manual.pdf
47+
rm -f $(INSTALLDIR)/bin/nb6++dumpb2a $(INSTALLDIR)/bin/nb6++snapshot

README

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
2+
June 2021
3+
4+
NBODY6++GPU-Dec2020 - Beijing version Dec2020 - maintained by R. Spurzem and team [email protected]
5+
=====================================================================================================
6+
7+
Note that this version has a common origin with Long Wang's github version https://github.com/nbodyx/ ,
8+
but it differs in quite a number of technical and a few astrophysical issues.
9+
10+
This tarball distribution is preliminary and should be replaced by a proper svn service at the same
11+
location in the future: http://silkroad.bao.ac.cn/repos/Nbody6++GPU-Dec2020/
12+
13+
Recommended usage:
14+
15+
./configure --with-par=b1m --enable-simd=sse --enable-mcmodel=large
16+
make clean ; make -j
17+
18+
It is for up to one million bodies with many initial binaries. The configure script written by
19+
Long Wang has a multitude of further options, check in it or ask.
20+
21+
Sources are in src/Main/ . Due to urgent bugfixes few routines are later then Dec2020.
22+
23+
After make you find the executable and object files in build/ .
24+
25+
This tarball provides two run directories: 16k_sse/ and 100k_sse/ - just test runs for 16k and 100k
26+
single particles, 100 N-body time units. You have to copy the executable file to the run directories.
27+
There is a little script @nb6++.run, which shows an example how to run the code on one (16k) or two
28+
(100k) MPI processes. The environment variable OMP_NUM_THREADS has to be set to the desired value of
29+
OpenMP threads per MPI process. (Maybe your system has it predefined). I also recommend to set
30+
OMP_STACKSIZE=4096M the shell where you run the code.
31+
32+
Documentation: Manual in doc/nbody6++_manual.pdf
33+
34+
This code and the documentation is given without warranty, hopefully it is helpful. All may contain errors.
35+
The code is an offspring of Sverre Aarseth's direct N-body codes see www.sverre.com .
36+
This is the code suitable for parallel and GPU accelerated runs on supercomputers and workstations.
37+
It is inefficient (and even more error prone) for particle numbers below about 50k-100k particles
38+
(depending on hardware). For smaller N you are advised to use Nbody6 and Nbody6GPU for single node/process.
39+
40+
The test runs provided are just creating Plummer models and have initially only single particles. For any
41+
other initial model (special density profiles, initial binaries) it is recommended to provide a dat.10
42+
file in N-body input format (see manual). Such file can be produced by other prograns, like Mccluster.
43+
44+
Seleted References:
45+
https://ui.adsabs.harvard.edu/abs/1999PASP..111.1333A/abstract (Aarseth: NBODY1 to NBODY6)
46+
https://ui.adsabs.harvard.edu/abs/1999JCoAM.109..407S/abstract (Spurzem on NBODY6++)
47+
https://ui.adsabs.harvard.edu/abs/2005MNRAS.363..293H/abstract (Hurley+ on SSE/BSE, earlier references therein)
48+
https://ui.adsabs.harvard.edu/abs/2012MNRAS.424..545N/abstract (Nitadori+: NBODY6GPU)
49+
https://ui.adsabs.harvard.edu/abs/2015MNRAS.450.4070W/abstract (Wang+: NBODY6++GPU)
50+
https://ui.adsabs.harvard.edu/abs/2021arXiv210508067K/abstract (Kamlah+: More on current stellar evol.)
51+
52+
53+
Known Problems:
54+
55+
1. For systems with more than one GPU on one node the association of MPI rank id and GPU bus id is not
56+
well defined, will be improved in next version.
57+
2. Runs with a million or more bodies and huge numbers of binaries (5% or more) use extreme amounts of
58+
computing time for the KS binaries (much much more than should be expected). We work on this.
59+
3. On some systems heap and stack management when using OpenMP and MPI together seem to produce very
60+
strange errors and segmentation faults. The exact reason is not known; we work on this.
61+
62+
=======================================================================================================
63+
64+
65+
66+
67+
68+
69+
70+

build/Makefile

+166
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
#
2+
# Makefile for NBODY6++ June 2003 R.Sp., Changed by Long Wang on Aug 20, 2014
3+
#
4+
# Please use:
5+
# make
6+
# make install
7+
#
8+
# Extra Tools
9+
# dump_btoa (A small routine for transfering between unformatted
10+
# fort.1/2 and formatted dump.dat. dump.dat is very
11+
# useful when you want to restart the simulation in a
12+
# different system)
13+
14+
FC = mpif77
15+
CXX = g++
16+
CC = gcc
17+
NVCC = nvcc
18+
19+
RESULT = nbody6++.sse.gpu.mpi
20+
#FFLAGS = -O3 -fbounds-check -fbacktrace -fno-automatic -fmax-stack-var-size=0 -fPIC -mcmodel=large -Wall
21+
OMP_FLAGS= -D OMP
22+
MPI_FLAGS = -D PARALLEL -D PUREMPI
23+
GPU_FLAGS = -D GPU
24+
SIMD_FLAGS = -D SIMD
25+
HDF5_FLAGS = -D H5OUTPUT
26+
DEBUG_FLAGS= -D DEBUG
27+
TT_FLAGS= -D TT
28+
INTEL_FLAGS= -D __USE_INTEL -no-wrap-margin
29+
30+
FFLAGS = -g -fbounds-check -fbacktrace -O3 -fPIC -mcmodel=large -fopenmp -I../include $(GPU_FLAGS) $(MPI_FLAGS) ${SIMD_FLAGS} ${OMP_FLAGS}
31+
CXXFLAGS = -O3 -fopenmp -I../include -march=native -fPIC -mcmodel=large -msse ${OMP_FLAGS}
32+
CUFLAGS = -O3 -I ../extra_inc/cuda
33+
LDFLAGS = -o $(RESULT)
34+
35+
MPI_FSOURCES= energy_mpi.f fpoly1_mpi.f fpoly2_mpi.f
36+
GPU_FSOURCES= phicor.f
37+
HDF5_FSOURCES=
38+
TT_FSOURCES= ttforce.f ttgalaxy.f ttinit.f ttcal.f jacobi_transform.f
39+
40+
IRRAVX_OBJECTS=irr.avx.o
41+
IRRSSE_OBJECTS=irr.sse.o
42+
AVX_OBJECTS=reg.avx.o pot.avx.o irr.avx.o
43+
SSE_OBJECTS=reg.sse.o pot.sse.o irr.sse.o
44+
CUDA_OBJECTS = gpunb.velocity.o gpupot.gpu.o
45+
CUDA_OBJECTS_SINGLE = gpunb.gpu.o gpupot.gpu.o
46+
#CUDAOBJECTS = gpunb.gpu.o gpupot.gpu.o gpupot.mdot.o
47+
#CUDAOBJECTS = gpunb.velocity.o gpupot.gpu.o gpupot.mdot.o
48+
#CUDAOBJECTS = gpunb.gpu.o gpupot.gpu.o
49+
EXTRATOOLS=nb6++dumpb2a libinitial.so libnb6out3.so nb6++snapshot
50+
51+
EXTRASRC= $(GPU_FSOURCES) $(MPI_FSOURCES)
52+
EXTRAOBJ= $(CUDA_OBJECTS) ${IRRSSE_OBJECTS}
53+
54+
#VISITFLAGS = -DVISIT
55+
#VISITLD = -L ./lvisit -llvisit_nbody -L/work/Tuc/spurzem/visit/visit/lvisit/lib -llvisit -L/work/Tuc/spurzem/visit/visit/visit20/lib -lvisit
56+
57+
#VPATH= ../src/Chain:../src/Nchain:../src/Init:../src/Main:../src/Cloud:../src/GPU:../src/SE:../src/SIMD:../src/Tidal:../src/Tlist:../src/Tools:../src/Ellan:../src/KS:../src/Output:../src/HIR:../include
58+
VPATH= ../src/Main:../src/Tools:../include
59+
60+
INC = params.h common6.h timing.h commonc.h common2.h kspars.h
61+
62+
SOURCE = ${EXTRASRC} nbody6.f file_init.f ellan.f eigenvalue.f indexx.f \
63+
adjust.f assess.f bindat.f binev.f binout.f binpop.f block.f bodies.f \
64+
brake.f brake2.f brake3.f bsetid.f chaos0.f chaos.f \
65+
check.f checkl.f chrect.f clint.f cloud.f cloud0.f \
66+
cmbody.f cmcorr.f cmfirr.f cmfreg.f coal.f comenv.f core.f corerd.f \
67+
cputim.f data.f decide.f deform.f degen.f delay.f \
68+
dgcore.f dtchck.f eccmod.f ecirc.f edot.f efac2.f efac3.f \
69+
expel.f escape.f events.f expand.f fclose.f \
70+
fcloud.f fcorr.f fdisk.f fhalo.f ficorr.f findj.f findm.f \
71+
flyby.f fnuc.f fpcorr.f fpert.f fpoly1.f fpoly2.f \
72+
gcinit.f gcint.f giant.f giant3.f gntage.f grrad.f hcorr.f \
73+
hiarch.f hicirc.f hidat.f higrow.f himax.f himod.f \
74+
hipop.f hirect.f histab.f hivel.f hmdot.f hmdot2.f hotsys.f \
75+
hrdiag.f hrplot.f hut.f hut2.f iblock.f imf.f imfbd.f imf2.f \
76+
impact.f induce.f input.f insert.f instar.f intgrt.f \
77+
jacobi.f kick.f kick2.f ksapo.f kscorr.f \
78+
ksin2.f ksinit.f ksint.f kslist.f ksmod.f ksperi.f kspert.f \
79+
kspoly.f kspred.f ksrect.f ksreg.f ksres.f ksres2.f ksterm.f \
80+
kstide.f lagr.f lagr2.f levels.f magbrk.f matrix.f mdot.f merge.f \
81+
merge2.f mix.f mloss.f mlwind.f modify.f mrenv.f mydump.f \
82+
nbint.f nblist.f nbpot.f nbrem.f nbrest.f \
83+
newtev.f nstab.f offset.f orbit.f output.f peri.f permit.f \
84+
pfac.f poti.f proto_star.f qtides.f ran2.f regint.f \
85+
remove.f rename_ks.f reset.f reset2.f resolv.f rkint.f rl.f roche.f \
86+
rpmax.f rpmax2.f rpmin.f scale.f search.f setup.f short.f shrink.f \
87+
sort1.f spiral.f stability.f star.f start.f stepk.f steps.f stumpf.f \
88+
subint.f synch.f tcirc.f tides.f tides2.f \
89+
tides3.f touch.f tpert.f trdot.f trdot2.f trflow.f tstab.f tstep.f \
90+
units.f unpert.f update.f verify.f xtrnl0.f xtrnld.f xtrnlf.f xtrnlp.f \
91+
xtrnlv.f xvpred.f zare.f zcnsts.f zero.f zfuncs.f \
92+
triple.f derqp3.f difsy3.f erel3.f extend.f qpmod3.f stabl3.f \
93+
stablz.f start3.f subsys.f tperi.f trans3.f \
94+
quad.f derqp4.f difsy4.f endreg.f erel4.f ichain.f newreg.f newsys.f \
95+
qpmod4.f rchain.f rsort.f stabl4.f start4.f status.f trans4.f \
96+
cfuncs.f chain.f chstab.f const.f cstab2.f cstab3.f cstab4.f cstab5.f \
97+
derqp.f difsy1.f erel.f hpsort.f inclin.f invert.f ksphys.f physks.f \
98+
qforce.f qpmod.f r2sort.f recoil.f redraw.f select.f slow.f stablc.f \
99+
swcond.f switch.f transk.f transq.f transx.f vector.f xtf.f \
100+
ycopy.f ysave.f \
101+
absorb.f chaos2.f chdata.f chfind.f chfirr.f chinit.f chlist.f chmod_chain.f \
102+
chpot.f chterm.f expel2.f fchain.f ghost.f giant2.f kcpert.f \
103+
reduce.f reinit.f renew.f setsys.f tchain.f xcpred.f xtpert.f \
104+
xbpredall.f jpred.f jpred_int.f fpoly1_ks.f fpoly2_ks.f cmfirr_cor.f cmfirr_ucor.f \
105+
kcpert_cor.f ksparmpi.f remove_ks.f nbint_cor.f string_left.f \
106+
sort_tlist.f next_tlist.f shrink_tlist.f repair_tlist.f remove_tlist.f \
107+
add_tlist.f exchange_tlist.f replace_tlist.f delay_store_tlist.f \
108+
delay_add_tlist.f delay_remove_tlist.f rmesc_tlist.f shift_tlist.f k_step.f \
109+
imbhinit.f bhplot.f energy.f kspinit.f kspreg.f counter_reset.f util_gpu.f \
110+
regcor_gpu.f fpoly0.f tail0.f ntint.f xtrnlt.f fbulge.f steps2.f tstep2.f \
111+
custom_output.f custom_output_facility.f ksres_op.f kstran.f \
112+
global_output.f global_params_gether.f
113+
114+
.SUFFIXES : .o .F
115+
116+
OBJECTS = $(SOURCE:.f=.o)
117+
118+
TARGET: $(RESULT)
119+
120+
$(RESULT): $(OBJECTS) $(EXTRAOBJ)
121+
$(FC) $(FFLAGS) $(LDFLAGS) $(OBJECTS) $(EXTRAOBJ) -lstdc++ -lcudart
122+
123+
libinitial.so: initial.h initial.cpp
124+
$(CXX) -shared $(CXXFLAGS) ../src/Tools/initial.cpp -o libinitial.so
125+
126+
libnb6out3.so: nb6out3.h nb6out3.cxx
127+
$(CXX) -shared $(CXXFLAGS) ../src/Tools/nb6out3.cxx -o libnb6out3.so
128+
129+
nb6++dumpb2a: dump_btoa.F
130+
$(FC) $(FFLAGS) $^ -o nb6++dumpb2a
131+
132+
nb6++snapshot: libinitial.so libnb6out3.so snapshot.cpp
133+
$(CXX) $(CXXFLAGS) ../src/Tools/snapshot.cpp -L./ -lnb6out3 -linitial -o nb6++snapshot
134+
135+
$(OBJECTS): $(INC)
136+
137+
irr.avx.o: irr.avx.cpp
138+
$(CXX) $(CXXFLAGS) $^ -c -o $@
139+
140+
reg.avx.o: reg.avx.cpp
141+
$(CXX) $(CXXFLAGS) $^ -c -o $@
142+
143+
pot.avx.o: pot.avx.cpp
144+
$(CXX) $(CXXFLAGS) $^ -c -o $@
145+
146+
irr.sse.o: irr.sse.cpp
147+
$(CXX) $(CXXFLAGS) $^ -c -o $@
148+
149+
reg.sse.o: reg.sse.cpp
150+
$(CXX) $(CXXFLAGS) $^ -c -o $@
151+
152+
pot.sse.o: pot.sse.cpp
153+
$(CXX) $(CXXFLAGS) $^ -c -o $@
154+
155+
gpunb.velocity.o: gpunb.velocity.cu
156+
$(NVCC) -c $(CUFLAGS) -Xcompiler "$(CXXFLAGS)" $^
157+
158+
gpunb.gpu.o: gpunb.gpu.cu
159+
$(NVCC) -c $(CUFLAGS) -Xcompiler "$(CXXFLAGS)" $^
160+
161+
gpupot.gpu.o: gpupot.gpu.cu
162+
$(NVCC) -c $(CUFLAGS) -Xcompiler "$(CXXFLAGS)" $^
163+
164+
gpupot.mdot.o: gpupot.mdot.cu
165+
$(NVCC) -c $(CUFLAGS) -Xcompiler "$(CXXFLAGS)" $^
166+

0 commit comments

Comments
 (0)