Skip to content

Commit

Permalink
Move asynchronous zero (#226)
Browse files Browse the repository at this point in the history
  • Loading branch information
csbnw authored Oct 2, 2023
1 parent 36fb614 commit 9edfc05
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
### Added
### Changed
- Made the library header only
- Moved asynchronous `::zero` from `Device` to `Stream`
- Replaced `include_cuda_code` helper with `target_embed_source`
### Removed

Expand Down
10 changes: 4 additions & 6 deletions include/cudawrappers/cu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,8 +415,6 @@ class DeviceMemory : public Wrapper<CUdeviceptr> {

void zero(size_t size) { checkCudaCall(cuMemsetD8(_obj, 0, size)); }

void zero(size_t size, Stream &stream);

const void *parameter()
const // used to construct parameter list for launchKernel();
{
Expand Down Expand Up @@ -484,6 +482,10 @@ class Stream : public Wrapper<CUstream> {
checkCudaCall(cuMemPrefetchAsync(devPtr, size, dstDevice, _obj));
}

void zero(CUdeviceptr devPtr, size_t size) {
checkCudaCall(cuMemsetD8Async(devPtr, 0, size, _obj));
}

void launchKernel(Function &function, unsigned gridX, unsigned gridY,
unsigned gridZ, unsigned blockX, unsigned blockY,
unsigned blockZ, unsigned sharedMemBytes,
Expand Down Expand Up @@ -534,10 +536,6 @@ class Stream : public Wrapper<CUstream> {
}
};

inline void DeviceMemory::zero(size_t size, Stream &stream) {
checkCudaCall(cuMemsetD8Async(_obj, 0, size, stream));
}

inline void Event::record(Stream &stream) {
checkCudaCall(cuEventRecord(_obj, stream._obj));
}
Expand Down
2 changes: 1 addition & 1 deletion tests/test_cu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ TEST_CASE("Test zeroing cu::DeviceMemory", "[zero]") {

cu::Stream stream;
stream.memcpyHtoDAsync(mem, src, size);
mem.zero(size, stream);
stream.zero(mem, size);
stream.memcpyDtoHAsync(tgt, mem, size);
stream.synchronize();

Expand Down

0 comments on commit 9edfc05

Please sign in to comment.