6
6
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
7
7
* Copyright (c) 2024 Research Organization for Information Science
8
8
* and Technology (RIST). All rights reserved.
9
+ * Copyright (c) 2025 Triad National Security, LLC. All rights
10
+ * reserved.
9
11
*
10
12
* $COPYRIGHT$
11
13
*
@@ -114,11 +116,11 @@ _Generic((*(out)), \
114
116
#if defined(GENERATE_NEON_CODE )
115
117
#define OP_AARCH64_FUNC (name , type_name , type_size , type_cnt , type , op ) \
116
118
static void OP_CONCAT(ompi_op_aarch64_2buff_##name##_##type##type_size##_t, \
117
- APPEND)(const void *_in, void *_out, int *count, \
119
+ APPEND)(const void *_in, void *_out, size_t *count, \
118
120
struct ompi_datatype_t **dtype, \
119
- struct ompi_op_base_module_1_0_0_t *module) \
121
+ struct ompi_op_base_module_2_0_0_t *module) \
120
122
{ \
121
- int left_over = *count; \
123
+ size_t left_over = *count; \
122
124
type##type_size##_t *in = (type##type_size##_t *) _in, \
123
125
*out = (type##type_size##_t *) _out; \
124
126
OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##x##type_cnt##_t) vsrc, vdst; \
@@ -138,12 +140,12 @@ _Generic((*(out)), \
138
140
#elif defined(GENERATE_SVE_CODE )
139
141
#define OP_AARCH64_FUNC (name , type_name , type_size , type_cnt , type , op ) \
140
142
static void OP_CONCAT(ompi_op_aarch64_2buff_##name##_##type##type_size##_t, APPEND) \
141
- (const void *_in, void *_out, int *count, \
143
+ (const void *_in, void *_out, size_t *count, \
142
144
struct ompi_datatype_t **dtype, \
143
- struct ompi_op_base_module_1_0_0_t *module) \
145
+ struct ompi_op_base_module_2_0_0_t *module) \
144
146
{ \
145
147
const int types_per_step = svcnt(*((type##type_size##_t *) _in)); \
146
- const int cnt = *count; \
148
+ const size_t cnt = *count; \
147
149
type##type_size##_t *in = (type##type_size##_t *) _in, \
148
150
*out = (type##type_size##_t *) _out; \
149
151
OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
@@ -279,11 +281,11 @@ _Generic((*(out)), \
279
281
#if defined(GENERATE_NEON_CODE )
280
282
#define OP_AARCH64_FUNC_3BUFF (name , type_name , type_size , type_cnt , type , op ) \
281
283
static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
282
- (const void *_in1, const void *_in2, void *_out, int *count, \
284
+ (const void *_in1, const void *_in2, void *_out, size_t *count, \
283
285
struct ompi_datatype_t **dtype, \
284
- struct ompi_op_base_module_1_0_0_t *module) \
286
+ struct ompi_op_base_module_2_0_0_t *module) \
285
287
{ \
286
- int left_over = *count; \
288
+ size_t left_over = *count; \
287
289
type##type_size##_t *in1 = (type##type_size##_t *) _in1, \
288
290
*in2 = (type##type_size##_t *) _in2, \
289
291
*out = (type##type_size##_t *) _out; \
@@ -304,17 +306,17 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
304
306
#elif defined(GENERATE_SVE_CODE )
305
307
#define OP_AARCH64_FUNC_3BUFF (name , type_name , type_size , type_cnt , type , op ) \
306
308
static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
307
- (const void *_in1, const void *_in2, void *_out, int *count, \
309
+ (const void *_in1, const void *_in2, void *_out, size_t *count, \
308
310
struct ompi_datatype_t **dtype, \
309
- struct ompi_op_base_module_1_0_0_t *module) \
311
+ struct ompi_op_base_module_2_0_0_t *module) \
310
312
{ \
311
313
const int types_per_step = svcnt(*((type##type_size##_t *) _in1)); \
312
314
type##type_size##_t *in1 = (type##type_size##_t *) _in1, \
313
315
*in2 = (type##type_size##_t *) _in2, \
314
316
*out = (type##type_size##_t *) _out; \
315
- const int cnt = *count; \
317
+ const size_t cnt = *count; \
316
318
OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
317
- for (int idx=0; idx < cnt; idx += types_per_step) { \
319
+ for (size_t idx=0; idx < cnt; idx += types_per_step) { \
318
320
svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
319
321
vsrc = svld1(pred, &in1[idx]); \
320
322
vdst = svld1(pred, &in2[idx]); \
0 commit comments