diff --git a/chapel/.gitignore b/chapel/.gitignore new file mode 100644 index 0000000..c60fea6 --- /dev/null +++ b/chapel/.gitignore @@ -0,0 +1,2 @@ +bude* +*.o diff --git a/chapel/Bude.chpl b/chapel/Bude.chpl new file mode 100644 index 0000000..a114703 --- /dev/null +++ b/chapel/Bude.chpl @@ -0,0 +1,475 @@ +// Use Chapel argument parser +module Bude { + use IO; + use Time; + use AutoMath; + + config param WGSIZE = 4, + DEFAULT_ITERS = 8, + DEFAULT_NPOSES = 65536, + REF_NPOSES = 65536, + DATA_DIR = "../data/bm1", + FILE_LIGAND = "/ligand.in", + FILE_PROTEIN = "/protein.in", + FILE_FORCEFIELD = "/forcefield.in", + FILE_POSES = "/poses.in", + FILE_REF_ENERGIES = "/ref_energies.out", + ATOM_SIZE = 16, + FFPARAMS_SIZE = 16; + + // Energy evaluation parameters + param CNSTNT: real(32) = 45.0; + param HBTYPE_F: real(32) = 70.0; + param HBTYPE_E: real(32) = 69.0; + param HARDNESS: real(32) = 38.0; + param NPNPDIST: real(32) = 5.5; + param NPPDIST: real(32) = 1.0; + + const WORK_GROUP = 0..= argc || parseInt(this.iterations, args[i+1]) < 0 { + writeln("Invalid number of iterations"); + exit(1); + } + i += 1; + } else if arg == "--numposes" || arg == "-n" { + if i + 1 >= argc || parseInt(this.nposes, args[i+1]) < 0 { + writeln("Invalid number of poses"); + exit(1); + } + i += 1; + } else if arg == "--help" || arg == "-h" { + writeln(""); + writeln("Usage: ./bude [OPTIONS]"); + writeln("Options:"); + writeln(" -h --help Print this message"); + writeln(" -i --iterations I Repeat kernel I times (default: ", DEFAULT_ITERS, ")"); + writeln(" -n --numposes N Compute energies for N poses (default: ", DEFAULT_NPOSES, ")"); + writeln(" --deck DECK Use the DECK directory as input deck (default: ", DATA_DIR, ")"); + writeln(""); + exit(0); + } else if arg == "--deck" { + if (i + 1 >= argc) { + writeln("Invalid deck"); + exit(1); + } + t_deckDir = args[i + 1]; + i += 1; + } else { + writeln("Unrecognized argument '", arg, "' (try '--help')\n"); + exit(1); + } + i += 1; + } + + this.deckDir = t_deckDir; + var length: int; + var aFile: file; + + /* init ligand array */ + aFile = openFile(this.deckDir, FILE_LIGAND, iomode.r, length); + this.natlig = length / ATOM_SIZE; + this.ligandDom = {0..(this.natlig-1)}; + + /* init protein array */ + aFile = openFile(this.deckDir, FILE_PROTEIN, iomode.r, length); + this.natpro = length / ATOM_SIZE; + this.proteinDom = {0..(this.natpro-1)}; + + /* init forcefield array */ + aFile = openFile(this.deckDir, FILE_FORCEFIELD, iomode.r, length); + this.ntypes = length / FFPARAMS_SIZE; + this.forcefieldDom = {0..(this.ntypes-1)}; + + /* init poses array */ + this.posesDom = { 0..5, (0..this.nposes-1) }; + } + + proc load() { + var length: int; + var aFile: file; + + /* load ligand */ + aFile = openFile(this.deckDir, FILE_LIGAND, iomode.r, length); + loadData(aFile, this.ligand, ATOM_SIZE); + + /* load protein */ + aFile = openFile(this.deckDir, FILE_PROTEIN, iomode.r, length); + loadData(aFile, this.protein, ATOM_SIZE); + + /* load forcefields */ + aFile = openFile(this.deckDir, FILE_FORCEFIELD, iomode.r, length); + loadData(aFile, this.forcefield, FFPARAMS_SIZE); + + /* load poses */ + aFile = openFile(this.deckDir, FILE_POSES, iomode.r, length); + var available = length / (6 * 4); + var cur_poses = 0, fetch, address = 0; + while (cur_poses < this.nposes) { + fetch = this.nposes - cur_poses; + if (fetch > available) { + fetch = available; + } + address = 0; // rewind + for i in 0..<6 { + address = i * available * 4; + for j in 0..(fetch-1) { + loadDataPiece(aFile, this.poses(i, cur_poses+j), address, 4); + address += 4; + } + } + cur_poses += fetch; + } + + this.nposes = cur_poses; + } + + /* Load data from file to record array */ + proc loadData(aFile: file, ref A: [] ?t, size: int) { + const n = A.size; + var readChannel = try! aFile.reader(kind=iokind.native, region=0..n*size); + try! readChannel.read(A); + try! readChannel.close(); + } + + /* Load data piece */ + proc loadDataPiece(aFile: file, ref A: ?t, base: int, offset: int) { + var r = try! aFile.reader(kind=iokind.native, region=base..base+offset); + try! r.read(A); + try! r.close(); + } + + /* Convert a string to integer */ + proc parseInt(ref x: int, s: string): int { + try { + x = s: int; + } catch { + return -1; + } + return x; + } + } + + proc openFile(parent: string, child: string, mode: iomode, ref length: int): file { + const name = parent + child; + var aFile: file; + + try { + aFile = open(name, mode); + length = aFile.size; + } catch { + try { + stderr.writeln("Failed to open '", name, "'"); + exit(0); + } catch { + exit(0); + } + } + + return aFile; + } + + var params: context; + + proc main(args: [] string) { + params = new context(args); + params.load(); + + // Show meta-information + writeln(""); + writeln("Poses : ", params.nposes); + writeln("Iterations: ", params.iterations); + writeln("Ligands : ", params.natlig); + writeln("Proteins : ", params.natpro); + writeln("Deck : ", params.deckDir); + + + var energiesChapel: [0.. REF_NPOSES) { + writeln("Only validating the first ", REF_NPOSES, " poses"); + n_ref_poses = REF_NPOSES; + } + + var reader = try! ref_energies.reader(); + for i in 0.. maxdiff) { + maxdiff = diff; + } + } + + writef("\nLargest difference was %{.###}%%.\n\n", 100 * maxdiff); + } + + + proc compute(results: [] real(32)) { + writeln("\nRunning Chapel"); + + var buffer: [0.. 0.0; + + // Transform ligand atom + var lpos_x: [0..WGSIZE] real(32) = noinit; + var lpos_y: [0..WGSIZE] real(32) = noinit; + var lpos_z: [0..WGSIZE] real(32) = noinit; + + foreach l in WORK_GROUP { + lpos_x[l] = transform(0, 3, l) + + l_atom.x * transform(0, 0, l) + + l_atom.y * transform(0, 1, l) + + l_atom.z * transform(0, 2, l); + + lpos_y[l] = transform(1, 3, l) + + l_atom.x * transform(1, 0, l) + + l_atom.y * transform(1, 1, l) + + l_atom.z * transform(1, 2, l); + + lpos_z[l] = transform(2, 3, l) + + l_atom.x * transform(2, 0, l) + + l_atom.y * transform(2, 1, l) + + l_atom.z * transform(2, 2, l); + } + + foreach ip in 0.. 0; + const phphb_nz = p_params.hphb != 0; + + const p_hphb = p_params.hphb + * if phphb_ltz && lhphb_gtz then -1.0: real(32) else 1.0: real(32); + + const l_hphb = l_params.hphb + * if phphb_gtz && lhphb_ltz then -1.0: real(32) else 1.0: real(32); + + const distdslv = + if phphb_ltz + then ( + if lhphb_ltz + then NPNPDIST + else NPPDIST + ) else ( + if lhphb_ltz + then NPPDIST + else -max(real(32)) + ); + + const r_distdslv = 1.0 / distdslv; + const chrg_init = l_params.elsc * p_params.elsc; + const dslv_init = p_hphb + l_hphb; + + foreach l in WORK_GROUP { + // Calculate distance between atoms + const x = lpos_x(l) - p_atom.x; + const y = lpos_y(l) - p_atom.y; + const z = lpos_z(l) - p_atom.z; + const distij = sqrt(x * x + y * y + z* z); + + // Calculate the sum of the sphere radii + const distbb = distij - radij; + const zone1 = distbb < 0.0: real(32); + + // Calculate steric energy + etot[l] += (1.0 - distij * r_radij) + * if zone1 then 2.0: real(32) * HARDNESS else 0.0: real(32); + + // Calculate formal and dipole charge interactions + var chrg_e = + chrg_init * ( + if zone1 + then 1.0: real(32) + else 1.0: real(32) - distbb * elcdst1 + ) * ( + if distbb < elcdst + then 1.0: real(32) + else 0.0: real(32) + ); + + var neg_chrg_e = -abs(chrg_e); + chrg_e = if type_E then neg_chrg_e else chrg_e; + etot[l] += chrg_e * CNSTNT; + + const coeff = 1.0 - distbb * r_distdslv; + var dslv_e = dslv_init + * if distbb < distdslv && phphb_nz then 1.0: real(32) else 0.0: real(32); + + dslv_e *= if zone1 then 1.0: real(32) else coeff; + etot[l] += dslv_e; + } + } + } + + results[group * WGSIZE..<(group + 1) * WGSIZE] = 0.5 : real(32) * etot; + } +} \ No newline at end of file diff --git a/chapel/Makefile b/chapel/Makefile new file mode 100644 index 0000000..96e01a2 --- /dev/null +++ b/chapel/Makefile @@ -0,0 +1,44 @@ +SHELL := bash +.SHELLFLAGS := -eu -o pipefail -c +.DELETE_ON_ERROR: + +MAKEFLAGS += --warn-undefined-variables --no-builtin-rules + +WGSIZE = 64 +ARCH = native + +# ------- + +MACHINE = $(shell uname -m) + +ifeq ($(MACHINE), x86_64) + +# On Skylake and Cascade Lake, 256-bit vectors are used by default, but 512-bit is more beneficial +AVX512 = $(if $(filter skylake% cascadelake% %avx512, $(ARCH)),yes,) + +# Intel platforms benefit more from unrolling, specially Skylake and later +WGSIZE = 256 + +endif + +VI = 4 +ifeq ($(shell expr $(WGSIZE) \< 32), 1) + VI = 1 +endif + +CHPLFLAGS = --warnings --fast --no-ieee-float --mllvm "-force-vector-interleave=$(VI)" -s WGSIZE=$(WGSIZE) $(if $(AVX512),--mllvm "-force-vector-width=512") +CHPL = chpl + +# ------- +EXE = bude + + +.PHONY: all $(EXE) clean + +all: $(EXE) + +$(EXE): Bude.chpl + $(CHPL) $(CHPLFLAGS) Bude.chpl -o $@ + +clean: + rm -f $(EXE) diff --git a/chapel/README.md b/chapel/README.md new file mode 100644 index 0000000..b5c7761 --- /dev/null +++ b/chapel/README.md @@ -0,0 +1,31 @@ +# miniBUDE Chapel + +This is an implementation of miniBUDE using Chapel. + +## Building + +Prerequisites + + * Chapel >= 1.28 + +### Block Sizes + +This implementation includes a tunable block size similar to OpenCL workgroups. +The default value is `64`, which is suitable for 512-bit vectors. +Intel platforms benefit from more unrolling, so the default on x86 is `256`. + +This parameter can be set using the `WGSIZE` parameter, as follows: + + make WGSIZE=16 + +For AVX-512 targets, the 512-bit registers (`zmm`) are used by default, because this increases performance. +To disable this and fall back to the compiler's default, which is 256-bit vectors as of Cascade Lake, set `AVX512` to the empty string: + + make ARCH=skylake-avx512 AVX512='' + + +## Running + +This implementation has no special run-time options. +The `-n` and `-i` parameters are available, and the number of threads should be set through the `CHPL_RT_NUM_THREADS_PER_LOCALE` environment variable. +Run `bude -h` for a help message.