Skip to content

Commit

Permalink
ops: embiggen the ops framework
Browse files Browse the repository at this point in the history
to take a count argument of type size_t.

related to #12226

related to #9194

Signed-off-by: Howard Pritchard <[email protected]>
  • Loading branch information
hppritcha committed Jan 3, 2025
1 parent 0bccfcd commit b047013
Show file tree
Hide file tree
Showing 18 changed files with 244 additions and 213 deletions.
4 changes: 3 additions & 1 deletion ompi/mca/op/aarch64/op_aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
* reserved.
* Copyright (c) 2019 Arm Ltd. All rights reserved.
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2025 Triad National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
Expand Down Expand Up @@ -31,7 +33,7 @@ BEGIN_C_DECLS
*/
typedef struct {
/** The base op component struct */
ompi_op_base_component_1_0_0_t super;
ompi_op_base_component_2_0_0_t super;

/* What follows is aarch64-component-specific cached information. We
tend to use this scheme (caching information on the aarch64
Expand Down
10 changes: 6 additions & 4 deletions ompi/mca/op/aarch64/op_aarch64_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2024 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2025 Triad National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
Expand Down Expand Up @@ -34,7 +36,7 @@ static int mca_op_aarch64_component_open(void);
static int mca_op_aarch64_component_close(void);
static int mca_op_aarch64_component_init_query(bool enable_progress_threads,
bool enable_mpi_thread_multiple);
static struct ompi_op_base_module_1_0_0_t *
static struct ompi_op_base_module_2_0_0_t *
mca_op_aarch64_component_op_query(struct ompi_op_t *op, int *priority);
static int mca_op_aarch64_component_register(void);

Expand All @@ -43,7 +45,7 @@ ompi_op_aarch64_component_t mca_op_aarch64_component = {
information about the component itself */
{
.opc_version = {
OMPI_OP_BASE_VERSION_1_0_0,
OMPI_OP_BASE_VERSION_2_0_0,

.mca_component_name = "aarch64",
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
Expand Down Expand Up @@ -164,7 +166,7 @@ ompi_op_aarch64_3buff_functions_sve[OMPI_OP_BASE_FORTRAN_OP_MAX][OMPI_OP_BASE_TY
/*
* Query whether this component can be used for a specific op
*/
static struct ompi_op_base_module_1_0_0_t *
static struct ompi_op_base_module_2_0_0_t *
mca_op_aarch64_component_op_query(struct ompi_op_t *op, int *priority)
{
/* Sanity check -- although the framework should never invoke the
Expand Down Expand Up @@ -242,5 +244,5 @@ static struct ompi_op_base_module_1_0_0_t *
if (NULL != module) {
*priority = 50;
}
return (ompi_op_base_module_1_0_0_t *) module;
return (ompi_op_base_module_2_0_0_t *) module;
}
28 changes: 15 additions & 13 deletions ompi/mca/op/aarch64/op_aarch64_functions.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2024 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2025 Triad National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
Expand Down Expand Up @@ -114,11 +116,11 @@ _Generic((*(out)), \
#if defined(GENERATE_NEON_CODE)
#define OP_AARCH64_FUNC(name, type_name, type_size, type_cnt, type, op) \
static void OP_CONCAT(ompi_op_aarch64_2buff_##name##_##type##type_size##_t, \
APPEND)(const void *_in, void *_out, int *count, \
APPEND)(const void *_in, void *_out, size_t *count, \
struct ompi_datatype_t **dtype, \
struct ompi_op_base_module_1_0_0_t *module) \
struct ompi_op_base_module_2_0_0_t *module) \
{ \
int left_over = *count; \
size_t left_over = *count; \
type##type_size##_t *in = (type##type_size##_t *) _in, \
*out = (type##type_size##_t *) _out; \
OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##x##type_cnt##_t) vsrc, vdst; \
Expand All @@ -138,12 +140,12 @@ _Generic((*(out)), \
#elif defined(GENERATE_SVE_CODE)
#define OP_AARCH64_FUNC(name, type_name, type_size, type_cnt, type, op) \
static void OP_CONCAT(ompi_op_aarch64_2buff_##name##_##type##type_size##_t, APPEND) \
(const void *_in, void *_out, int *count, \
(const void *_in, void *_out, size_t *count, \
struct ompi_datatype_t **dtype, \
struct ompi_op_base_module_1_0_0_t *module) \
struct ompi_op_base_module_2_0_0_t *module) \
{ \
const int types_per_step = svcnt(*((type##type_size##_t *) _in)); \
const int cnt = *count; \
const size_t cnt = *count; \
type##type_size##_t *in = (type##type_size##_t *) _in, \
*out = (type##type_size##_t *) _out; \
OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
Expand Down Expand Up @@ -279,11 +281,11 @@ _Generic((*(out)), \
#if defined(GENERATE_NEON_CODE)
#define OP_AARCH64_FUNC_3BUFF(name, type_name, type_size, type_cnt, type, op) \
static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
(const void *_in1, const void *_in2, void *_out, int *count, \
(const void *_in1, const void *_in2, void *_out, size_t *count, \
struct ompi_datatype_t **dtype, \
struct ompi_op_base_module_1_0_0_t *module) \
struct ompi_op_base_module_2_0_0_t *module) \
{ \
int left_over = *count; \
size_t left_over = *count; \
type##type_size##_t *in1 = (type##type_size##_t *) _in1, \
*in2 = (type##type_size##_t *) _in2, \
*out = (type##type_size##_t *) _out; \
Expand All @@ -304,17 +306,17 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
#elif defined(GENERATE_SVE_CODE)
#define OP_AARCH64_FUNC_3BUFF(name, type_name, type_size, type_cnt, type, op) \
static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
(const void *_in1, const void *_in2, void *_out, int *count, \
(const void *_in1, const void *_in2, void *_out, size_t *count, \
struct ompi_datatype_t **dtype, \
struct ompi_op_base_module_1_0_0_t *module) \
struct ompi_op_base_module_2_0_0_t *module) \
{ \
const int types_per_step = svcnt(*((type##type_size##_t *) _in1)); \
type##type_size##_t *in1 = (type##type_size##_t *) _in1, \
*in2 = (type##type_size##_t *) _in2, \
*out = (type##type_size##_t *) _out; \
const int cnt = *count; \
const size_t cnt = *count; \
OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
for (int idx=0; idx < cnt; idx += types_per_step) { \
for (size_t idx=0; idx < cnt; idx += types_per_step) { \
svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
vsrc = svld1(pred, &in1[idx]); \
vdst = svld1(pred, &in2[idx]); \
Expand Down
4 changes: 3 additions & 1 deletion ompi/mca/op/avx/op_avx.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
* Copyright (c) 2019-2020 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2025 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -37,7 +39,7 @@ BEGIN_C_DECLS
*/
typedef struct {
/** The base op component struct */
ompi_op_base_component_1_0_0_t super;
ompi_op_base_component_2_0_0_t super;

/* What follows is avx-component-specific cached information. We
tend to use this scheme (caching information on the avx
Expand Down
10 changes: 6 additions & 4 deletions ompi/mca/op/avx/op_avx_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
* Copyright (c) 2020 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2025 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -32,7 +34,7 @@ static int avx_component_open(void);
static int avx_component_close(void);
static int avx_component_init_query(bool enable_progress_threads,
bool enable_mpi_thread_multiple);
static struct ompi_op_base_module_1_0_0_t *
static struct ompi_op_base_module_2_0_0_t *
avx_component_op_query(struct ompi_op_t *op, int *priority);
static int avx_component_register(void);

Expand Down Expand Up @@ -131,7 +133,7 @@ static uint32_t has_intel_AVX_features(void)
ompi_op_avx_component_t mca_op_avx_component = {
{
.opc_version = {
OMPI_OP_BASE_VERSION_1_0_0,
OMPI_OP_BASE_VERSION_2_0_0,

.mca_component_name = "avx",
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
Expand Down Expand Up @@ -250,7 +252,7 @@ avx_component_init_query(bool enable_progress_threads,
/*
* Query whether this component can be used for a specific op
*/
static struct ompi_op_base_module_1_0_0_t*
static struct ompi_op_base_module_2_0_0_t*
avx_component_op_query(struct ompi_op_t *op, int *priority)
{
ompi_op_base_module_t *module = NULL;
Expand Down Expand Up @@ -325,5 +327,5 @@ avx_component_op_query(struct ompi_op_t *op, int *priority)
if (NULL != module) {
*priority = 50;
}
return (ompi_op_base_module_1_0_0_t *) module;
return (ompi_op_base_module_2_0_0_t *) module;
}
Loading

0 comments on commit b047013

Please sign in to comment.