Skip to content

Commit

Permalink
Add standalone n-body SoA and AoSoA for code comparison
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Jan 11, 2024
1 parent d7ea189 commit 2b94b4b
Show file tree
Hide file tree
Showing 5 changed files with 238 additions and 35 deletions.
14 changes: 11 additions & 3 deletions examples/nbody_code_comp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,22 @@

cmake_minimum_required (VERSION 3.18.3)

project(llama-nbody-baseline CXX)
add_executable(${PROJECT_NAME} nbody_baseline.cpp)
project(llama-nbody-aos-baseline CXX)
add_executable(${PROJECT_NAME} nbody-AoS-baseline.cpp)
target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_20)

project(llama-nbody-soa CXX)
add_executable(${PROJECT_NAME} nbody-SoA.cpp)
target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_20)

project(llama-nbody-aosoa CXX)
add_executable(${PROJECT_NAME} nbody-AoSoA.cpp)
target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_20)

project(llama-nbody-ported CXX)
if (NOT TARGET llama::llama)
find_package(llama CONFIG REQUIRED)
endif()
add_executable(${PROJECT_NAME} nbody_ported.cpp)
add_executable(${PROJECT_NAME} nbody-ported.cpp)
target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_20)
target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama)
Original file line number Diff line number Diff line change
Expand Up @@ -13,63 +13,48 @@ constexpr int steps = 5, problemSize = 64 * 1024;

struct Vec3 {
FP x, y, z;

auto operator*=(Vec3 v) -> Vec3& {
x *= v.x;
y *= v.y;
z *= v.z;
return *this;
}

auto operator+=(Vec3 v) -> Vec3& {
x += v.x;
y += v.y;
z += v.z;
return *this;
}

friend auto operator-(Vec3 a, Vec3 b) -> Vec3 {
return Vec3{a.x - b.x, a.y - b.y, a.z - b.z};
}

friend auto operator*(Vec3 a, FP s) -> Vec3 {
return Vec3{a.x * s, a.y * s, a.z * s};
}
};

struct Particle {
Vec3 pos, vel;
FP mass;
};

inline void pPInteraction(Particle& pi, const Particle& pj) {
auto dist = pi.pos - pj.pos;
dist *= dist;
auto dist = Vec3{pi.pos.x - pj.pos.x, pi.pos.y - pj.pos.y, pi.pos.z - pj.pos.z};
dist.x *= dist.x;
dist.y *= dist.y;
dist.z *= dist.z;
const auto distSqr = eps2 + dist.x + dist.y + dist.z;
const auto distSixth = distSqr * distSqr * distSqr;
const auto invDistCube = FP{1} / std::sqrt(distSixth);
const auto sts = pj.mass * timestep * invDistCube;
pi.vel += dist * sts;
pi.vel.x += dist.x * sts;
pi.vel.y += dist.y * sts;
pi.vel.z += dist.z * sts;
}

void update(std::span<Particle> particles) {
#pragma GCC ivdep
for(int i = 0; i < problemSize; i++) {
Particle pi = particles[i];
for(std::size_t j = 0; j < problemSize; ++j)
for(int j = 0; j < problemSize; ++j)
pPInteraction(pi, particles[j]);
particles[i].vel = pi.vel;
}
}

void move(std::span<Particle> particles) {
#pragma GCC ivdep
for(int i = 0; i < problemSize; i++)
particles[i].pos += particles[i].vel * timestep;
for(int i = 0; i < problemSize; i++) {
particles[i].pos.x += particles[i].vel.x * timestep;
particles[i].pos.y += particles[i].vel.y * timestep;
particles[i].pos.z += particles[i].vel.z * timestep;
}
}

auto main() -> int {
auto particles = std::vector<Particle>(problemSize);

std::default_random_engine engine;
std::normal_distribution<FP> dist(FP{0}, FP{1});
for(auto& p : particles) {
Expand All @@ -86,5 +71,6 @@ auto main() -> int {
update(particles);
::move(particles);
}

return 0;
}
103 changes: 103 additions & 0 deletions examples/nbody_code_comp/nbody-AoSoA.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright 2024 Bernhard Manfred Gruber
// SPDX-License-Identifier: MPL-2.0

// clang-format off

#include <random>
#include <span>
#include <vector>

using FP = float;
constexpr FP timestep = 0.0001f, eps2 = 0.01f;
constexpr int steps = 5, problemSize = 64 * 1024;

constexpr auto lanes = 16;
constexpr auto blocks = problemSize / lanes;

struct alignas(lanes * sizeof(FP)) Vec3Block {
FP x[lanes];
FP y[lanes];
FP z[lanes];
};
struct alignas(lanes * sizeof(FP)) ParticleBlock {
Vec3Block pos, vel;
FP mass[lanes];
};

inline void pPInteraction(FP piposx, FP piposy, FP piposz, FP& pivelx, FP& pively, FP& pivelz,
FP pjposx, FP pjposy, FP pjposz, FP pjmass) {
auto xdist = piposx - pjposx;
auto ydist = piposy - pjposy;
auto zdist = piposz - pjposz;
xdist *= xdist;
ydist *= ydist;
zdist *= zdist;
const auto distSqr = eps2 + xdist + ydist + zdist;
const auto distSixth = distSqr * distSqr * distSqr;
const auto invDistCube = FP{1} / std::sqrt(distSixth);
const auto sts = pjmass * timestep * invDistCube;
pivelx += xdist * sts;
pively += ydist * sts;
pivelz += zdist * sts;
}

void update(std::span<ParticleBlock> particles) {
for(int bi = 0; bi < blocks; bi++) {
auto blockI = particles[bi];
for(int bj = 0; bj < blocks; bj++)
for(int j = 0; j < lanes; j++) {
#pragma GCC ivdep
for(int i = 0; i < lanes; i++) {
pPInteraction(
blockI.pos.x[i],
blockI.pos.y[i],
blockI.pos.z[i],
blockI.vel.x[i],
blockI.vel.y[i],
blockI.vel.z[i],
particles[bj].pos.x[j],
particles[bj].pos.y[j],
particles[bj].pos.z[j],
particles[bj].mass[j]);
}
}

particles[bi].vel = blockI.vel;
}
}

void move(std::span<ParticleBlock> particles) {
for(int bi = 0; bi < blocks; bi++) {
#pragma GCC ivdep
for(std::size_t i = 0; i < lanes; ++i) {
particles[bi].pos.x[i] += particles[bi].vel.x[i] * timestep;
particles[bi].pos.y[i] += particles[bi].vel.y[i] * timestep;
particles[bi].pos.z[i] += particles[bi].vel.z[i] * timestep;
}
}
}

auto main() -> int {
auto particles = std::vector<ParticleBlock>(blocks);

std::default_random_engine engine;
std::normal_distribution<FP> dist(FP{0}, FP{1});
for(int bi = 0; bi < blocks; ++bi) {
for(int i = 0; i < lanes; ++i) {
particles[bi].pos.x[i] = dist(engine);
particles[bi].pos.y[i] = dist(engine);
particles[bi].pos.z[i] = dist(engine);
particles[bi].vel.x[i] = dist(engine) / FP{10};
particles[bi].vel.y[i] = dist(engine) / FP{10};
particles[bi].vel.z[i] = dist(engine) / FP{10};
particles[bi].mass[i] = dist(engine) / FP{100};
}
}

for(int s = 0; s < steps; ++s) {
update(particles);
::move(particles);
}

return 0;
}
97 changes: 97 additions & 0 deletions examples/nbody_code_comp/nbody-SoA.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// Copyright 2024 Bernhard Manfred Gruber
// SPDX-License-Identifier: MPL-2.0

// clang-format off

#include <random>
#include <vector>

using FP = float;
constexpr FP timestep = 0.0001f, eps2 = 0.01f;
constexpr int steps = 5, problemSize = 64 * 1024;

inline void pPInteraction(FP piposx, FP piposy, FP piposz, FP& pivelx, FP& pively, FP& pivelz,
FP pjposx, FP pjposy, FP pjposz, FP pjmass) {
auto xdist = piposx - pjposx;
auto ydist = piposy - pjposy;
auto zdist = piposz - pjposz;
xdist *= xdist;
ydist *= ydist;
zdist *= zdist;
const auto distSqr = eps2 + xdist + ydist + zdist;
const auto distSixth = distSqr * distSqr * distSqr;
const auto invDistCube = FP{1} / std::sqrt(distSixth);
const auto sts = pjmass * timestep * invDistCube;
pivelx += xdist * sts;
pively += ydist * sts;
pivelz += zdist * sts;
}

void update(const FP* posx, const FP* posy, const FP* posz, FP* velx, FP* vely, FP* velz, const FP* mass) {
#pragma GCC ivdep
for(int i = 0; i < problemSize; i++) {
const auto piposx = posx[i];
const auto piposy = posy[i];
const auto piposz = posz[i];
auto pivelx = velx[i];
auto pively = vely[i];
auto pivelz = velz[i];
for(int j = 0; j < problemSize; ++j)
pPInteraction(piposx, piposy, piposz, pivelx, pively, pivelz, posx[j], posy[j], posz[j], mass[j]);
velx[i] = pivelx;
vely[i] = pively;
velz[i] = pivelz;
}
}

void move(FP* posx, FP* posy, FP* posz, const FP* velx, const FP* vely, const FP* velz) {
#pragma GCC ivdep
for(int i = 0; i < problemSize; i++) {
posx[i] += velx[i] * timestep;
posy[i] += vely[i] * timestep;
posz[i] += velz[i] * timestep;
}
}

template<typename T>
struct AlignedAllocator {
using value_type = T;

auto allocate(std::size_t n) const -> T* {
return new(std::align_val_t{64}) T[n];
}

void deallocate(T* p, std::size_t) const {
::operator delete[] (p, std::align_val_t{64});
}
};

auto main() -> int {
using Vector = std::vector<FP, AlignedAllocator<FP>>;
auto posx = Vector(problemSize);
auto posy = Vector(problemSize);
auto posz = Vector(problemSize);
auto velx = Vector(problemSize);
auto vely = Vector(problemSize);
auto velz = Vector(problemSize);
auto mass = Vector(problemSize);

std::default_random_engine engine;
std::normal_distribution<FP> dist(FP{0}, FP{1});
for(int i = 0; i < problemSize; ++i) {
posx[i] = dist(engine);
posy[i] = dist(engine);
posz[i] = dist(engine);
velx[i] = dist(engine) / FP{10};
vely[i] = dist(engine) / FP{10};
velz[i] = dist(engine) / FP{10};
mass[i] = dist(engine) / FP{100};
}

for(int s = 0; s < steps; ++s) {
update(posx.data(), posy.data(), posz.data(), velx.data(), vely.data(), velz.data(), mass.data());
move(posx.data(), posy.data(), posz.data(), velx.data(), vely.data(), velz.data());
}

return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
// clang-format off

#include "llama/llama.hpp"

#include <random>
#include <vector>

Expand All @@ -13,8 +12,16 @@ constexpr FP timestep = 0.0001f, eps2 = 0.01f;
constexpr int steps = 5, problemSize = 64 * 1024;

struct Pos{}; struct Vel{}; struct X{}; struct Y{}; struct Z{}; struct Mass{};
using V3 = llama::Record<llama::Field<X, FP>, llama::Field<Y, FP>, llama::Field<Z, FP>>;
using Particle = llama::Record<llama::Field<Pos, V3>, llama::Field<Vel, V3>, llama::Field<Mass, FP>>;
using Vec3 = llama::Record<
llama::Field<X, FP>,
llama::Field<Y, FP>,
llama::Field<Z, FP>
>;
using Particle = llama::Record<
llama::Field<Pos, Vec3>,
llama::Field<Vel, Vec3>,
llama::Field<Mass, FP>
>;

LLAMA_FN_HOST_ACC_INLINE void pPInteraction(auto&& pi, auto&& pj) {
auto dist = pi(Pos{}) - pj(Pos{});
Expand Down Expand Up @@ -47,6 +54,7 @@ auto main() -> int {
const auto extents = ArrayExtents{problemSize};
auto mapping = llama::mapping::AoS<ArrayExtents, Particle>{extents};
auto particles = llama::allocViewUninitialized(mapping);

std::default_random_engine engine;
std::normal_distribution<FP> dist(FP{0}, FP{1});
for(auto&& p : particles) {
Expand All @@ -63,5 +71,6 @@ auto main() -> int {
update(particles);
::move(particles);
}

return 0;
}

0 comments on commit 2b94b4b

Please sign in to comment.