From 3cf749a42bdfefffa54d793b772daef6418c0a3a Mon Sep 17 00:00:00 2001 From: Karol Sierocinski Date: Fri, 1 Apr 2022 13:55:30 +0200 Subject: [PATCH] decl_hdf5 MPIO Collective or Independent, fix #419 --- example/decl_hdf5.yml | 1 + plugins/decl_hdf5/AUTHORS | 3 + plugins/decl_hdf5/README.md | 3 +- plugins/decl_hdf5/dataset_op.cxx | 16 +- plugins/decl_hdf5/dataset_op.h | 9 +- plugins/decl_hdf5/file_op.cxx | 10 +- plugins/decl_hdf5/tests/CMakeLists.txt | 13 +- .../decl_hdf5/tests/decl_hdf5_mpi_test_07.c | 232 ++++++++++++++++++ 8 files changed, 273 insertions(+), 14 deletions(-) create mode 100644 plugins/decl_hdf5/tests/decl_hdf5_mpi_test_07.c diff --git a/example/decl_hdf5.yml b/example/decl_hdf5.yml index 2d4e28b5f..0779f8ce8 100644 --- a/example/decl_hdf5.yml +++ b/example/decl_hdf5.yml @@ -28,6 +28,7 @@ pdi: write: main_field: # the name of the data to write dataset: data + mpio: COLLECTIVE # or INDEPENDENT when: '$iter<10' # do only write the first 10 iterations (0...9) memory_selection: # exclude ghosts from the data in memory size: ['$dsize[0]-2', '$dsize[1]-2'] diff --git a/plugins/decl_hdf5/AUTHORS b/plugins/decl_hdf5/AUTHORS index 7aeb5fb44..fc5562033 100644 --- a/plugins/decl_hdf5/AUTHORS +++ b/plugins/decl_hdf5/AUTHORS @@ -11,6 +11,9 @@ Julien Bigot - CEA (julien.bigot@cea.fr) * Run tests that depend on the filesystem in their own temporary directory * Buildsystem +Yacine Ould Rouis - CNRS (yacine.ould-rouis@inria.fr) +* contribution to feature design, validation + Thomas Padioleau - CEA (thomas.padioleau@cea.fr) * Fixed a bug with parallel file deletion diff --git a/plugins/decl_hdf5/README.md b/plugins/decl_hdf5/README.md index acf02947e..885954c3e 100644 --- a/plugins/decl_hdf5/README.md +++ b/plugins/decl_hdf5/README.md @@ -145,6 +145,8 @@ The possible values for the keys are as follow: See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5P.html#Property-SetFletcher32 for more information. +* `mpio` : a string expression to define the type of MPIIO parallel pointer +for the operation among two choices : `COLLECTIVE` (default) and `INDEPENDENT`. ### SELECTION_DESC @@ -162,7 +164,6 @@ Memory selection default values: * If the `size` is not specified, it defaults to size of the whole data in each dimension. * If the `start` is not specified it defaults to 0 in all dimensions. - Dataset selection default values: * If the `size` is not specified: * if the number of dimensions match that of the memory, the size defaults to diff --git a/plugins/decl_hdf5/dataset_op.cxx b/plugins/decl_hdf5/dataset_op.cxx index 7da392e9e..a0c9a0e8c 100644 --- a/plugins/decl_hdf5/dataset_op.cxx +++ b/plugins/decl_hdf5/dataset_op.cxx @@ -1,6 +1,6 @@ /******************************************************************************* * Copyright (C) 2015-2024 Commissariat a l'energie atomique et aux energies alternatives (CEA) - * Copyright (C) 2021 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC) + * Copyright (C) 2022 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC) * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -164,6 +164,14 @@ Dataset_op::Dataset_op(Direction dir, string name, Expression default_when, PC_t m_fletcher = value; } else if (key == "attributes") { // pass + } else if (key == "mpio") { + if (to_string(value) == "INDEPENDENT") { + m_mpio = H5FD_MPIO_INDEPENDENT; + } else if (to_string(value) == "COLLECTIVE") { + m_mpio = H5FD_MPIO_COLLECTIVE; + } else { + throw Config_error{key_tree, "Not valid MPIO value: `{}'", to_string(value)}; + } } else if (key == "collision_policy") { m_collision_policy = to_collision_policy(to_string(value)); } else { @@ -203,8 +211,12 @@ void Dataset_op::fletcher(Context& ctx, Expression value) } } -void Dataset_op::execute(Context& ctx, hid_t h5_file, hid_t xfer_lst, const unordered_map& dsets) +void Dataset_op::execute(Context& ctx, hid_t h5_file, bool use_mpio, const unordered_map& dsets) { + Raii_hid xfer_lst = make_raii_hid(H5Pcreate(H5P_DATASET_XFER), H5Pclose); + if (use_mpio) { + if (0 > H5Pset_dxpl_mpio(xfer_lst, m_mpio)) handle_hdf5_err(); + } if (m_direction == READ) do_read(ctx, h5_file, xfer_lst); else diff --git a/plugins/decl_hdf5/dataset_op.h b/plugins/decl_hdf5/dataset_op.h index b5d3be40e..30961c99d 100644 --- a/plugins/decl_hdf5/dataset_op.h +++ b/plugins/decl_hdf5/dataset_op.h @@ -1,6 +1,6 @@ /******************************************************************************* * Copyright (C) 2015-2024 Commissariat a l'energie atomique et aux energies alternatives (CEA) - * Copyright (C) 2021 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC) + * Copyright (C) 2021-2022 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC) * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -67,6 +67,8 @@ class Dataset_op /// direction of the transfer (read or write) Direction m_direction; + H5FD_mpio_xfer_t m_mpio = H5FD_MPIO_COLLECTIVE; + /// the name of the dataset where to transfer PDI::Expression m_dataset; @@ -182,11 +184,10 @@ class Dataset_op * * \param ctx the context in which to operate * \param h5_file the already opened HDF5 file id - * \param xfer_lst the already created transfer property list including any - * parallel HDF5 required property. + * \param use_mpi whether the hdf5 read/write is parallel * \param dsets the type of the explicitly typed datasets */ - void execute(PDI::Context& ctx, hid_t h5_file, hid_t xfer_lst, const std::unordered_map& dsets); + void execute(PDI::Context& ctx, hid_t h5_file, bool use_mpio, const std::unordered_map& dsets); private: void do_read(PDI::Context& ctx, hid_t h5_file, hid_t read_lst); diff --git a/plugins/decl_hdf5/file_op.cxx b/plugins/decl_hdf5/file_op.cxx index 1f084b9cd..61f296823 100644 --- a/plugins/decl_hdf5/file_op.cxx +++ b/plugins/decl_hdf5/file_op.cxx @@ -1,6 +1,6 @@ /******************************************************************************* * Copyright (C) 2015-2024 Commissariat a l'energie atomique et aux energies alternatives (CEA) - * Copyright (C) 2021 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC) + * Copyright (C) 2021-2022 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC) * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -280,7 +280,7 @@ void File_op::execute(Context& ctx) std::string filename = m_file.to_string(ctx); Raii_hid file_lst = make_raii_hid(H5Pcreate(H5P_FILE_ACCESS), H5Pclose); - Raii_hid xfer_lst = make_raii_hid(H5Pcreate(H5P_DATASET_XFER), H5Pclose); + bool use_mpio = false; #ifdef H5_HAVE_PARALLEL MPI_Comm comm = MPI_COMM_SELF; if (communicator()) { @@ -288,7 +288,7 @@ void File_op::execute(Context& ctx) } if (comm != MPI_COMM_SELF) { if (0 > H5Pset_fapl_mpio(file_lst, comm, MPI_INFO_NULL)) handle_hdf5_err(); - if (0 > H5Pset_dxpl_mpio(xfer_lst, H5FD_MPIO_COLLECTIVE)) handle_hdf5_err(); + use_mpio = true; ctx.logger().debug("Opening `{}' file in parallel mode", filename); } #endif @@ -337,10 +337,10 @@ void File_op::execute(Context& ctx) Raii_hid h5_file = make_raii_hid(h5_file_raw, H5Fclose, ("Cannot open `" + filename + "' file").c_str()); for (auto&& one_dset_op: dset_writes) { - one_dset_op.execute(ctx, h5_file, xfer_lst, m_datasets); + one_dset_op.execute(ctx, h5_file, use_mpio, m_datasets); } for (auto&& one_dset_op: dset_reads) { - one_dset_op.execute(ctx, h5_file, xfer_lst, m_datasets); + one_dset_op.execute(ctx, h5_file, use_mpio, m_datasets); } for (auto&& one_attr_op: attr_writes) { one_attr_op.execute(ctx, h5_file); diff --git a/plugins/decl_hdf5/tests/CMakeLists.txt b/plugins/decl_hdf5/tests/CMakeLists.txt index 092984773..35e1db68f 100644 --- a/plugins/decl_hdf5/tests/CMakeLists.txt +++ b/plugins/decl_hdf5/tests/CMakeLists.txt @@ -1,7 +1,7 @@ #============================================================================= # Copyright (C) 2015-2024 Commissariat a l'energie atomique et aux energies alternatives (CEA) # Copyright (C) 2021 Institute of Bioorganic Chemistry Polish Academy of Science (PSNC) -# +# Copyright (C) 2022 Centre National de Recherche Scientifique (CNRS) # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -92,7 +92,7 @@ if("${BUILD_HDF5_PARALLEL}") set_property(TEST decl_hdf5_mpi_04_C PROPERTY PROCESSORS 4) endif() -# bcommunicator as a reference +# communicator as a reference if("${BUILD_HDF5_PARALLEL}") add_executable(decl_hdf5_mpi_05_C decl_hdf5_mpi_test_05.c) target_link_libraries(decl_hdf5_mpi_05_C PDI::PDI_C MPI::MPI_C) @@ -110,6 +110,15 @@ if("${BUILD_HDF5_PARALLEL}") set_property(TEST decl_hdf5_mpi_06_C PROPERTY PROCESSORS 4) endif() +# mpio dataset attribute +if("${BUILD_HDF5_PARALLEL}") + add_executable(decl_hdf5_mpi_07_C decl_hdf5_mpi_test_07.c) + target_link_libraries(decl_hdf5_mpi_07_C PDI::PDI_C MPI::MPI_C) + add_test(NAME decl_hdf5_mpi_07_C COMMAND "${RUNTEST_DIR}" "${MPIEXEC}" "${MPIEXEC_NUMPROC_FLAG}" 4 ${MPIEXEC_PREFLAGS} "$" ${MPIEXEC_POSTFLAGS}) + set_property(TEST decl_hdf5_mpi_07_C PROPERTY TIMEOUT 15) + set_property(TEST decl_hdf5_mpi_07_C PROPERTY PROCESSORS 4) +endif() + add_executable(decl_hdf5_IO_options_C decl_hdf5_test_IO_options.c) target_link_libraries(decl_hdf5_IO_options_C PDI::PDI_C ${HDF5_DEPS}) add_test(NAME decl_hdf5_IO_options_C COMMAND "${RUNTEST_DIR}" "$") diff --git a/plugins/decl_hdf5/tests/decl_hdf5_mpi_test_07.c b/plugins/decl_hdf5/tests/decl_hdf5_mpi_test_07.c new file mode 100644 index 000000000..26d988bd1 --- /dev/null +++ b/plugins/decl_hdf5/tests/decl_hdf5_mpi_test_07.c @@ -0,0 +1,232 @@ +/******************************************************************************* + * Copyright (C) 2015-2019 Commissariat a l'energie atomique et aux energies alternatives (CEA) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * forcumentation and/or other materials provided with the distribution. + * * Neither the name of CEA nor the names of its contributors may be used to + * enforrse or promote products derived from this software without specific + * prior written permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#include +#include +#include + +#define IMX 5 +#define JMX 4 +#define NI_GHOST 1 +#define NJ_GHOST 2 +#define DIM 2 + +const char* CONFIG_YAML + = "logging: trace \n" + "metadata: \n" + " input: int \n" + " ni: int \n" + " nj: int \n" + " nig: int \n" + " njg: int \n" + " nit: int \n" + " njt: int \n" + " istart: int \n" + " jstart: int \n" + "data: \n" + " reals: \n" + " type: array \n" + " subtype: double \n" + " size: [$nj + 2*$njg, $ni + 2*$nig] \n" + " subsize: [$nj, $ni] \n" + " start: [$njg, $nig] \n" + " values: \n" + " type: array \n" + " subtype: int \n" + " size: [$nj + 2*$njg, $ni + 2*$nig] \n" + " subsize: [$nj, $ni] \n" + " start: [$njg, $nig] \n" + "plugins: \n" + " mpi: \n" + " decl_hdf5: \n" + " file: decl_hdf5_mpi_test_06_C.h5 \n" + " communicator: $MPI_COMM_WORLD \n" + " datasets: \n" + " reals: {type: array, subtype: double, size: [$njt, $nit]} \n" + " values: {type: array, subtype: int, size: [$njt, $nit]} \n" + " write: \n" + " reals: \n" + " when: $input=0 \n" + " mpio: INDEPENDENT \n" + " dataset_selection: {start: [$jstart, $istart]} \n" + " values: \n" + " mpio: COLLECTIVE \n" + " when: $input=0 \n" + " dataset_selection: {start: [$jstart, $istart]} \n" + " read: \n" + " reals: \n" + " mpio: COLLECTIVE \n" + " when: $input=1 \n" + " dataset_selection: {start: [$jstart, $istart]} \n" + " values: \n" + " mpio: INDEPENDENT \n" + " when: $input=1 \n" + " dataset_selection: {start: [$jstart, $istart]} \n"; + +int main(int argc, char* argv[]) +{ + const int icst = -1; /// constants values in the ghost nodes + const double rcst = -1.01; + + int nig = NI_GHOST, njg = NJ_GHOST; + int ni = IMX, nj = JMX; + int values[JMX + 2 * NJ_GHOST][IMX + NI_GHOST * 2] = {{0}}, cp_values[JMX + 2 * NJ_GHOST][IMX + NI_GHOST * 2] = {{0}}; + double reals[JMX + 2 * NJ_GHOST][IMX + NI_GHOST * 2] = {{0}}, cp_reals[JMX + 2 * NJ_GHOST][IMX + NI_GHOST * 2] = {{0}}; + int i, j, input; + int nit, njt; + + /// MPI and parallel data or info + int dims[DIM], coord[DIM], periodic[DIM]; + int istart, jstart; + MPI_Comm comm2D; + periodic[0] = 0; + periodic[1] = 0; + dims[0] = 2; + dims[1] = 2; + + + MPI_Init(&argc, &argv); + PC_tree_t conf = PC_parse_string(CONFIG_YAML); + MPI_Comm world = MPI_COMM_WORLD; + PDI_init(conf); + int rank; + MPI_Comm_rank(world, &rank); + + if (0 == rank) { + remove("decl_hdf5_mpi_test_02_C.h5"); + } + + { + /// setting nb of procs. + int size; + MPI_Comm_size(world, &size); + if (size != 4) { + printf("Run on 4 procs only."); + MPI_Abort(MPI_COMM_WORLD, -1); + } + PDI_expose("nproc", &size, PDI_OUT); + } + + + MPI_Cart_create(world, DIM, dims, periodic, 0, &comm2D); + MPI_Cart_coords(comm2D, rank, DIM, coord); + + istart = coord[1] * ni; + jstart = coord[0] * nj; + + nit = 2 * ni; + njt = 2 * nj; + + PDI_expose("nig", &nig, PDI_OUT); /// Ghost cells + PDI_expose("njg", &njg, PDI_OUT); + + PDI_expose("ni", &ni, PDI_OUT); /// Size of the portion of the array for a given MPI task + PDI_expose("nj", &nj, PDI_OUT); + + PDI_expose("nit", &nit, PDI_OUT); /// size of the distributed array + PDI_expose("njt", &njt, PDI_OUT); + + PDI_expose("istart", &istart, PDI_OUT); /// offset + PDI_expose("jstart", &jstart, PDI_OUT); + + // Fill arrays + for (j = 0; j < nj + 2 * njg; ++j) { + for (i = 0; i < ni + 2 * nig; ++i) { + cp_values[j][i] = icst; + cp_reals[j][i] = rcst; /// array initialized with const values + } + } + /// Values and reals == 0 in the ghost. + double cst = -rcst; + for (j = njg; j < nj + njg; ++j) { + for (i = nig; i < ni + nig; ++i) { + values[j][i] = (i + coord[1] * ni - nig) + (j + coord[0] * nj - njg) * 10; + reals[j][i] = (i + coord[1] * ni - nig) * cst + (j + coord[0] * nj - njg) * 10 * cst; + } + } + + input = 0; + PDI_expose("rank", &rank, PDI_OUT); + PDI_expose("input", &input, PDI_OUT); + + /// Test that export/exchange works + PDI_expose("input", &input, PDI_OUT); + PDI_expose("reals", &reals, PDI_OUT); // output real + PDI_expose("values", &values, PDI_INOUT); // output integers + + input = 1; + /// Import should also work + PDI_expose("input", &input, PDI_OUT); // update metadata => HDF5 now import only + PDI_expose("reals", &cp_reals, PDI_IN); // input real + PDI_expose("values", &cp_values, PDI_INOUT); // input integers + + /// So the data should be the same + fprintf(stderr, "Data exported | Data imported\n"); + + for (int j = njg; j < nj + njg; ++j) { // Should be the same inside + for (int i = nig; i < ni + nig; i++) { + if ((values[j][i] != cp_values[j][i]) || (reals[j][i] != cp_reals[j][i])) { + fprintf(stderr, "Ghost: integer (export) / integer(imported) :: %3d %3d\n", values[j][i], cp_values[j][i]); + fprintf(stderr, "Ghost: reals (export) / reals (imported) :: %6f %6f\n", reals[j][i], cp_reals[j][i]); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + } + for (int j = 0; j < njg; j++) { // and should be icst/rcst outside + for (int i = 0; i < nig; i++) { + if ((icst != cp_values[j][i]) || (rcst != cp_reals[j][i])) { + fprintf(stderr, "Ghost: integer (export) / integer(imported) :: %3d %3d\n", icst, cp_values[j][i]); + fprintf(stderr, "Ghost: reals (export) / reals (imported) :: %6f %6f\n", rcst, cp_reals[j][i]); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + for (int i = ni + nig; i < ni + 2 * nig; ++i) { + if ((icst != cp_values[j][i]) || (rcst != cp_reals[j][i])) { + fprintf(stderr, "Ghost: integer (export) / integer(imported) :: %3d %3d\n", icst, cp_values[j][i]); + fprintf(stderr, "Ghost: reals (export) / reals (imported) :: %6f %6f\n", rcst, cp_reals[j][i]); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + } + for (int j = nj + njg; j < nj + 2 * njg; ++j) { + for (int i = 0; i < nig; i++) { + if ((icst != cp_values[j][i]) || (rcst != cp_reals[j][i])) { + fprintf(stderr, "Ghost: integer (export) / integer(imported) :: %3d %3d\n", icst, cp_values[j][i]); + fprintf(stderr, "Ghost: reals (export) / reals (imported) :: %6f %6f\n", rcst, cp_reals[j][i]); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + for (int i = ni + nig; i < ni + 2 * nig; ++i) { + if ((icst != cp_values[j][i]) || (rcst != cp_reals[j][i])) { + fprintf(stderr, "Ghost: integer (export) / integer(imported) :: %3d %3d\n", icst, cp_values[j][i]); + fprintf(stderr, "Ghost: reals (export) / reals (imported) :: %6f %6f\n", rcst, cp_reals[j][i]); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + } + + PDI_finalize(); + PC_tree_destroy(&conf); + MPI_Finalize(); +}