-
Notifications
You must be signed in to change notification settings - Fork 285
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
gpu: misc fixes #5176
gpu: misc fixes #5176
Changes from all commits
f35b82a
5e07de3
d5357df
90f2dbe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
/* | ||
* Copyright (C) by Argonne National Laboratory | ||
* See COPYRIGHT in top-level directory | ||
*/ | ||
|
||
#ifndef MPIR_GPU_UTIL_H_INCLUDED | ||
#define MPIR_GPU_UTIL_H_INCLUDED | ||
|
||
/* If buf is device memory, allocate a host buffer that can hold the data, | ||
* return the host buffer, or NULL if buf is not device memory */ | ||
MPL_STATIC_INLINE_PREFIX void *MPIR_gpu_host_alloc(const void *buf, | ||
MPI_Aint count, MPI_Datatype datatype) | ||
{ | ||
MPL_pointer_attr_t attr; | ||
MPIR_GPU_query_pointer_attr(buf, &attr); | ||
|
||
if (attr.type != MPL_GPU_POINTER_DEV) { | ||
return NULL; | ||
} else { | ||
MPI_Aint extent, true_lb, true_extent; | ||
MPIR_Datatype_get_extent_macro(datatype, extent); | ||
MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent); | ||
|
||
MPI_Aint shift = true_lb; | ||
MPI_Aint size = true_extent; | ||
if (count > 1) { | ||
if (extent >= 0) { | ||
size += extent * (count - 1); | ||
} else { | ||
MPI_Aint size_before = (-extent) * (count - 1); | ||
/* extra counts are being packed *before*, so need *increase* the size */ | ||
size += size_before; | ||
/* the allocated pointer will be *already* shifted by this amount */ | ||
shift -= size_before; | ||
} | ||
} | ||
void *host_buf = MPL_malloc(size, MPL_MEM_OTHER); | ||
MPIR_Assert(host_buf); | ||
|
||
host_buf = (char *) host_buf - shift; | ||
return host_buf; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @raffenet Thought about it a bit more. The previous fix didn't account for the case when Could you review this commit again? Meanwhile, I'll rebase and test it in PR #5000 -- although I don't think DTPool will generate any datatype with negative extent. |
||
} | ||
} | ||
|
||
MPL_STATIC_INLINE_PREFIX void MPIR_gpu_host_free(void *host_buf, | ||
MPI_Aint count, MPI_Datatype datatype) | ||
{ | ||
MPI_Aint extent, true_lb, true_extent; | ||
MPIR_Datatype_get_extent_macro(datatype, extent); | ||
MPIR_Type_get_true_extent_impl(datatype, &true_lb, &true_extent); | ||
|
||
MPI_Aint shift = true_lb; | ||
if (count > 1 && extent < 0) { | ||
MPI_Aint size_before = (-extent) * (count - 1); | ||
shift -= size_before; | ||
} | ||
/* The pointers was previously shifted */ | ||
host_buf = (char *) host_buf + shift; | ||
MPL_free(host_buf); | ||
} | ||
|
||
/* If buf is device memory, allocate a host buffer and copy the content, and return the host | ||
* buffer. Return NULL if buf is not a device memory */ | ||
MPL_STATIC_INLINE_PREFIX void *MPIR_gpu_host_swap(const void *buf, | ||
MPI_Aint count, MPI_Datatype datatype) | ||
{ | ||
void *host_buf = MPIR_gpu_host_alloc(buf, count, datatype); | ||
if (host_buf) { | ||
MPIR_Localcopy(buf, count, datatype, host_buf, count, datatype); | ||
} | ||
|
||
return host_buf; | ||
} | ||
|
||
MPL_STATIC_INLINE_PREFIX void MPIR_gpu_swap_back(void *host_buf, void *gpu_buf, | ||
MPI_Aint count, MPI_Datatype datatype) | ||
{ | ||
MPIR_Localcopy(host_buf, count, datatype, gpu_buf, count, datatype); | ||
MPIR_gpu_host_free(host_buf, count, datatype); | ||
} | ||
|
||
#endif /* MPIR_GPU_UTIL_H_INCLUDED */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So
true_lb
is always <= 0 whencount == 1
orextent >= 0
? Am I reading this right?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No.
true_lb
works whether it's positive or negative, it is an adjustment from the buffer pointer and the arithmetic will work for either positive or negative. However, thetrue_lb
returned fromMPIR_Type_get_true_extent_impl
is thetrue_lb
for a single count datatype. Whenextent
is positive, thetrue_lb
remain the same for multiple count. so no adjustment is needed. However, whenextent
is negative, thetrue_lb
with multiple count will be further extended into the negative direction, thus requires further adjustment.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What I'm trying to understand is we are returning an allocated buffer, but with the address potentially shifted outside of the malloc'd region in the case of positive
true_lb
. Wouldn't writing to or dereferencing that address lead to a potential crash? How do we guard against these scenarios? Is it becauseMPIR_Localcopy
will only be used with a datatype that would not do this?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The dataype packing/unpacking routine should only access the actual data location specified by the MPI datatype. Only a bug will result in a wrong access.
PS: think about
MPI_BOTTOM
, it is okay as long as thedatatype
is correctly constructed.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, only accessing this with a datatype-aware copy routine makes sense. I think it should be OK. I'd still like to see it go thru Jenkins again.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The code has already passed Jenkins both here #5176 (comment) and in the gpu testing #5000 (comment) . The last push is only a rebase with PR #5005
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Per my previous comments, I am going to go ahead merge this, then get #5000 rebased. Any issues should pop up in that PR anyway.