From 9da3ca75f9ef7c192c52d6eb8c221f69d83350a8 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Sun, 5 Nov 2023 01:54:17 -0700 Subject: [PATCH] Pandas: `ParticleContainer_*.to_df()` Copy all particles into a `pandas.DataFrame`. Supports local and MPI-gathered results. --- .github/workflows/ubuntu.yml | 5 +- .github/workflows/windows.yml | 3 +- README.md | 5 ++ docs/source/install/dependencies.rst | 9 +-- src/Particle/ParticleContainer.H | 10 +++ src/amrex/ParticleContainer.py | 104 +++++++++++++++++++++++++++ src/amrex/space1d/__init__.py | 2 + src/amrex/space2d/__init__.py | 2 + src/amrex/space3d/__init__.py | 2 + tests/test_particleContainer.py | 37 ++++++++++ 10 files changed, 172 insertions(+), 7 deletions(-) create mode 100644 src/amrex/ParticleContainer.py diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 9b9b9145..ba9118cd 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -82,7 +82,7 @@ jobs: export CXX=$(which g++-10) python3 -m pip install -U pip setuptools wheel python3 -m pip install -U cmake - python3 -m pip install -U pytest mpi4py + python3 -m pip install -U pandas pytest mpi4py cmake -S . -B build \ -DCMAKE_BUILD_TYPE=Debug \ @@ -174,7 +174,8 @@ jobs: export CCACHE_MAXSIZE=300M ccache -z - python3 -m pip install -U pip pytest + python3 -m pip install -U pip + python3 -m pip install -U pandas pytest python3 -m pip install -v . python3 -c "import amrex.space1d as amr; print(amr.__version__)" python3 -c "import amrex.space2d as amr; print(amr.__version__)" diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 6682c9dd..b4b8a474 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -18,7 +18,8 @@ jobs: python-version: '3.x' - name: Build & Install run: | - python3 -m pip install -U pip pytest + python3 -m pip install -U pip + python3 -m pip install -U pandas pytest python3 -m pip install -v . if(!$?) { Exit $LASTEXITCODE } diff --git a/README.md b/README.md index c4945f83..dc85c629 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,7 @@ Optional dependencies include: - [mpi4py](https://mpi4py.readthedocs.io) 2.1+: for multi-node and/or multi-GPU execution - [CCache](https://ccache.dev): to speed up rebuilds (for CUDA support, needs 3.7.9+ and 4.2+ is recommended) - further [optional dependencies of AMReX](https://github.com/AMReX-Codes/amrex/) +- [pandas](https://pandas.pydata.org/) 2+: for DataFrame support - [pytest](https://docs.pytest.org/en/stable/) 6.2+: for running unit tests Optional CUDA-capable dependencies for tests include: @@ -105,6 +106,10 @@ If you wish to run unit tests, then please install `pytest` python3 -m pip install -U pytest ``` +Some of our tests depend on optional third-party modules (e.g., `pandas`, `cupy`, `numba`, and/or `pytorch`). +If these are not installed then their tests will be skipped. + + ### Configure your compiler For example, using the Clang compiler: diff --git a/docs/source/install/dependencies.rst b/docs/source/install/dependencies.rst index dd51b0fe..d43a6f15 100644 --- a/docs/source/install/dependencies.rst +++ b/docs/source/install/dependencies.rst @@ -28,10 +28,11 @@ Optional dependencies include: - further `optional dependencies of AMReX `__ - `Python dependencies `__ - - `mpi4py `__ - - `cupy `__ 11.2+ - - `numba `__ 0.56+ - - `torch `__ 1.12+ + - `mpi4py 2.1+ `__: for multi-node and/or multi-GPU execution + - `cupy 11.2+ `__ + - `numba 0.56+ `__ + - `pandas 2+ `__: for DataFrame support + - `torch 1.12+ `__ For all other systems, we recommend to use a **package dependency manager**: Pick *one* of the installation methods below to install all dependencies for pyAMReX development in a consistent manner. diff --git a/src/Particle/ParticleContainer.H b/src/Particle/ParticleContainer.H index d283d85b..114144b8 100644 --- a/src/Particle/ParticleContainer.H +++ b/src/Particle/ParticleContainer.H @@ -68,6 +68,8 @@ void make_Base_Iterators (py::module &m, std::string allocstr) py::return_value_policy::reference_internal) .def_property_readonly_static("is_soa_particle", [](const py::object&){ return ParticleType::is_soa_particle;}) + .def_property_readonly("size", &iterator_base::numParticles, + "the number of particles on this tile") .def_property_readonly("num_particles", &iterator_base::numParticles) .def_property_readonly("num_real_particles", &iterator_base::numRealParticles) .def_property_readonly("num_neighbor_particles", &iterator_base::numNeighborParticles) @@ -382,6 +384,14 @@ void make_ParticleContainer_and_Iterators (py::module &m, std::string allocstr) make_Iterators< false, iterator, Allocator >(m, allocstr); using const_iterator = amrex::ParConstIter_impl; make_Iterators< true, const_iterator, Allocator >(m, allocstr); + + // simpler particle iterator loops: return types of this particle box + py_pc + .def_property_readonly_static("iterator", [](py::object /* pc */){ return py::type::of(); }, + "amrex iterator for particle boxes") + .def_property_readonly_static("const_iterator", [](py::object /* pc */){ return py::type::of(); }, + "amrex constant iterator for particle boxes (read-only)") + ; } /** Create ParticleContainers and Iterators diff --git a/src/amrex/ParticleContainer.py b/src/amrex/ParticleContainer.py new file mode 100644 index 00000000..266496d9 --- /dev/null +++ b/src/amrex/ParticleContainer.py @@ -0,0 +1,104 @@ +""" +This file is part of pyAMReX + +Copyright 2023 AMReX community +Authors: Axel Huebl +License: BSD-3-Clause-LBNL +""" + + +def pc_to_df(self, local=True, comm=None, root_rank=0): + """ + Copy all particles into a pandas.DataFrame + + Parameters + ---------- + self : amrex.ParticleContainer_* + A ParticleContainer class in pyAMReX + local : bool + MPI-local particles + comm : MPI Communicator + if local is False, this defaults to mpi4py.MPI.COMM_WORLD + root_rank : MPI root rank to gather to + if local is False, this defaults to 0 + + Returns + ------- + A concatenated pandas.DataFrame with particles from all levels. + + Returns None if no particles were found. + If local=False, then all ranks but the root_rank will return None. + """ + import pandas as pd + + # create a DataFrame per particle box and append it to the list of + # local DataFrame(s) + dfs_local = [] + for lvl in range(self.finest_level + 1): + for pti in self.const_iterator(self, level=lvl): + if pti.size == 0: + continue + + if self.is_soa_particle: + next_df = pd.DataFrame() + else: + # AoS + aos_np = pti.aos().to_numpy(copy=True) + next_df = pd.DataFrame(aos_np) + next_df.set_index("cpuid") + next_df.index.name = "cpuid" + + # SoA + soa_view = pti.soa().to_numpy(copy=True) + soa_np_real = soa_view.real + soa_np_int = soa_view.int + + for idx, array in enumerate(soa_np_real): + next_df[f"SoA_real_{idx}"] = array + for idx, array in enumerate(soa_np_int): + next_df[f"SoA_int_{idx}"] = array + + dfs_local.append(next_df) + + # MPI Gather to root rank if requested + if local: + if len(dfs_local) == 0: + df = None + else: + df = pd.concat(dfs_local) + else: + from mpi4py import MPI + + if comm is None: + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + + # a list for each rank's list of DataFrame(s) + df_list_list = comm.gather(dfs_local, root=root_rank) + + if rank == root_rank: + flattened_list = [df for sublist in df_list_list for df in sublist] + + if len(flattened_list) == 0: + df = pd.DataFrame() + else: + df = pd.concat(flattened_list, ignore_index=True) + else: + df = None + + return df + + +def register_ParticleContainer_extension(amr): + """ParticleContainer helper methods""" + import inspect + import sys + + # register member functions for every ParticleContainer_* type + for _, ParticleContainer_type in inspect.getmembers( + sys.modules[amr.__name__], + lambda member: inspect.isclass(member) + and member.__module__ == amr.__name__ + and member.__name__.startswith("ParticleContainer_"), + ): + ParticleContainer_type.to_df = pc_to_df diff --git a/src/amrex/space1d/__init__.py b/src/amrex/space1d/__init__.py index 060a451d..ab4d70f7 100644 --- a/src/amrex/space1d/__init__.py +++ b/src/amrex/space1d/__init__.py @@ -48,6 +48,7 @@ def Print(*args, **kwargs): from ..ArrayOfStructs import register_AoS_extension from ..MultiFab import register_MultiFab_extension from ..PODVector import register_PODVector_extension +from ..ParticleContainer import register_ParticleContainer_extension from ..StructOfArrays import register_SoA_extension register_Array4_extension(amrex_1d_pybind) @@ -55,3 +56,4 @@ def Print(*args, **kwargs): register_PODVector_extension(amrex_1d_pybind) register_SoA_extension(amrex_1d_pybind) register_AoS_extension(amrex_1d_pybind) +register_ParticleContainer_extension(amrex_1d_pybind) diff --git a/src/amrex/space2d/__init__.py b/src/amrex/space2d/__init__.py index 6c654c52..799e9c7c 100644 --- a/src/amrex/space2d/__init__.py +++ b/src/amrex/space2d/__init__.py @@ -48,6 +48,7 @@ def Print(*args, **kwargs): from ..ArrayOfStructs import register_AoS_extension from ..MultiFab import register_MultiFab_extension from ..PODVector import register_PODVector_extension +from ..ParticleContainer import register_ParticleContainer_extension from ..StructOfArrays import register_SoA_extension register_Array4_extension(amrex_2d_pybind) @@ -55,3 +56,4 @@ def Print(*args, **kwargs): register_PODVector_extension(amrex_2d_pybind) register_SoA_extension(amrex_2d_pybind) register_AoS_extension(amrex_2d_pybind) +register_ParticleContainer_extension(amrex_2d_pybind) diff --git a/src/amrex/space3d/__init__.py b/src/amrex/space3d/__init__.py index baa757e7..9c1b1de9 100644 --- a/src/amrex/space3d/__init__.py +++ b/src/amrex/space3d/__init__.py @@ -48,6 +48,7 @@ def Print(*args, **kwargs): from ..ArrayOfStructs import register_AoS_extension from ..MultiFab import register_MultiFab_extension from ..PODVector import register_PODVector_extension +from ..ParticleContainer import register_ParticleContainer_extension from ..StructOfArrays import register_SoA_extension register_Array4_extension(amrex_3d_pybind) @@ -55,3 +56,4 @@ def Print(*args, **kwargs): register_PODVector_extension(amrex_3d_pybind) register_SoA_extension(amrex_3d_pybind) register_AoS_extension(amrex_3d_pybind) +register_ParticleContainer_extension(amrex_3d_pybind) diff --git a/tests/test_particleContainer.py b/tests/test_particleContainer.py index 668ed42c..9f1897e4 100644 --- a/tests/test_particleContainer.py +++ b/tests/test_particleContainer.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import importlib + import numpy as np import pytest @@ -272,3 +274,38 @@ def test_per_cell(empty_particle_container, std_geometry, std_particle): assert pc.TotalNumberOfParticles() == pc.NumberOfParticlesAtLevel(0) == ncells print("npts * real_1", ncells * std_particle.real_array_data[1]) assert ncells * std_particle.real_array_data[1] == sum_1 + + +@pytest.mark.skipif( + importlib.util.find_spec("pandas") is None, reason="pandas is not available" +) +def test_pc_df(particle_container, Npart): + pc = particle_container + print(f"pc={pc}") + df = pc.to_df() + print(df.columns) + print(df) + + +@pytest.mark.skipif( + importlib.util.find_spec("pandas") is None, reason="pandas is not available" +) +def test_pc_empty_df(empty_particle_container, Npart): + pc = empty_particle_container + print(f"pc={pc}") + df = pc.to_df() + assert df is None + + +@pytest.mark.skipif( + importlib.util.find_spec("pandas") is None, reason="pandas is not available" +) +@pytest.mark.skipif(not amr.Config.have_mpi, reason="Requires AMReX_MPI=ON") +def test_pc_df_mpi(particle_container, Npart): + pc = particle_container + print(f"pc={pc}") + df = pc.to_df(local=False) + if df is not None: + # only rank 0 + print(df.columns) + print(df)