Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] remove dependencies from memcpy, memset and kernel launch #2075

Draft
wants to merge 1 commit into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/alpaka/dev/DevGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ namespace alpaka
for(auto& q_ptr : m_queues)
{
if(auto ptr = q_ptr.lock(); ptr != nullptr)
ptr->register_dependency(event);
ptr->getNativeHandle().ext_oneapi_submit_barrier({event});
}
}

Expand Down
8 changes: 4 additions & 4 deletions include/alpaka/event/EventGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ namespace alpaka::trait
{
static auto enqueue(QueueGenericSyclNonBlocking<TDev>& queue, EventGenericSycl<TDev>& event)
{
event.setEvent(queue.m_spQueueImpl->get_last_event());
event.setEvent(queue.getNativeHandle().ext_oneapi_submit_barrier());
}
};

Expand All @@ -97,7 +97,7 @@ namespace alpaka::trait
{
static auto enqueue(QueueGenericSyclBlocking<TDev>& queue, EventGenericSycl<TDev>& event)
{
event.setEvent(queue.m_spQueueImpl->get_last_event());
event.setEvent(queue.getNativeHandle().ext_oneapi_submit_barrier());
}
};

Expand All @@ -120,7 +120,7 @@ namespace alpaka::trait
{
static auto waiterWaitFor(QueueGenericSyclNonBlocking<TDev>& queue, EventGenericSycl<TDev> const& event)
{
queue.m_spQueueImpl->register_dependency(event.getNativeHandle());
queue.getNativeHandle().ext_oneapi_submit_barrier({event.getNativeHandle()});
}
};

Expand All @@ -130,7 +130,7 @@ namespace alpaka::trait
{
static auto waiterWaitFor(QueueGenericSyclBlocking<TDev>& queue, EventGenericSycl<TDev> const& event)
{
queue.m_spQueueImpl->register_dependency(event.getNativeHandle());
queue.getNativeHandle().ext_oneapi_submit_barrier({event.getNativeHandle()});
}
};

Expand Down
14 changes: 6 additions & 8 deletions include/alpaka/mem/buf/sycl/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ namespace alpaka::detail

using TaskCopySyclBase<TDim, TViewDst, TViewSrc, TExtent>::TaskCopySyclBase;

auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
auto operator()(sycl::queue& queue) const -> sycl::event
{
ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

Expand Down Expand Up @@ -126,8 +126,7 @@ namespace alpaka::detail
this->m_srcMemNative
+ (castVec<SrcSize>(idx) * srcPitchBytesWithoutOutmost)
.foldrAll(std::plus<SrcSize>())),
static_cast<std::size_t>(this->m_extentWidthBytes),
requirements));
static_cast<std::size_t>(this->m_extentWidthBytes)));
});
}

Expand All @@ -144,7 +143,7 @@ namespace alpaka::detail
using TaskCopySyclBase<DimInt<1u>, TViewDst, TViewSrc, TExtent>::TaskCopySyclBase;
using Elem = alpaka::Elem<TViewSrc>;

auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
auto operator()(sycl::queue& queue) const -> sycl::event
{
ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

Expand All @@ -156,8 +155,7 @@ namespace alpaka::detail
return queue.memcpy(
reinterpret_cast<void*>(this->m_dstMemNative),
reinterpret_cast<void const*>(this->m_srcMemNative),
sizeof(Elem) * static_cast<std::size_t>(this->m_extent.prod()),
requirements);
sizeof(Elem) * static_cast<std::size_t>(this->m_extent.prod()));
}
else
{
Expand Down Expand Up @@ -187,9 +185,9 @@ namespace alpaka::detail
ALPAKA_ASSERT(getExtentVec(viewSrc).prod() == 1u);
}

auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
auto operator()(sycl::queue& queue) const -> sycl::event
{
return queue.memcpy(m_dstMemNative, m_srcMemNative, sizeof(Elem), requirements);
return queue.memcpy(m_dstMemNative, m_srcMemNative, sizeof(Elem));
}

void* m_dstMemNative;
Expand Down
14 changes: 6 additions & 8 deletions include/alpaka/mem/buf/sycl/Set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ namespace alpaka

using TaskSetSyclBase<TDim, TView, TExtent>::TaskSetSyclBase;

auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
auto operator()(sycl::queue& queue) const -> sycl::event
{
ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

Expand Down Expand Up @@ -112,8 +112,7 @@ namespace alpaka
+ (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost)
.foldrAll(std::plus<DstSize>())),
this->m_byte,
static_cast<std::size_t>(this->m_extentWidthBytes),
requirements));
static_cast<std::size_t>(this->m_extentWidthBytes)));
});
}

Expand All @@ -128,7 +127,7 @@ namespace alpaka
{
using TaskSetSyclBase<DimInt<1u>, TView, TExtent>::TaskSetSyclBase;

auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
auto operator()(sycl::queue& queue) const -> sycl::event
{
ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

Expand All @@ -140,8 +139,7 @@ namespace alpaka
return queue.memset(
reinterpret_cast<void*>(this->m_dstMemNative),
this->m_byte,
static_cast<std::size_t>(this->m_extentWidthBytes),
requirements);
static_cast<std::size_t>(this->m_extentWidthBytes));
}
else
{
Expand Down Expand Up @@ -178,14 +176,14 @@ namespace alpaka
}
# endif

auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
auto operator()(sycl::queue& queue) const -> sycl::event
{
ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
printDebug();
# endif
return queue.memset(reinterpret_cast<void*>(m_dstMemNative), m_byte, sizeof(Elem), requirements);
return queue.memset(reinterpret_cast<void*>(m_dstMemNative), m_byte, sizeof(Elem));
}

std::uint8_t const m_byte;
Expand Down
54 changes: 3 additions & 51 deletions include/alpaka/queue/sycl/QueueGenericSyclBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
#include <algorithm>
#include <exception>
#include <memory>
#include <mutex>
#include <shared_mutex>
#include <type_traits>
#include <utility>
#include <vector>
Expand Down Expand Up @@ -73,36 +71,9 @@ namespace alpaka::detail
}
}

// Don't call this without locking first!
auto clean_dependencies() -> void
{
// Clean up completed events
auto const start = std::begin(m_dependencies);
auto const old_end = std::end(m_dependencies);
auto const new_end = std::remove_if(
start,
old_end,
[](sycl::event ev) {
return ev.get_info<sycl::info::event::command_execution_status>()
== sycl::info::event_command_status::complete;
});

m_dependencies.erase(new_end, old_end);
}

auto register_dependency(sycl::event event) -> void
{
std::lock_guard<std::shared_mutex> lock{m_mutex};

clean_dependencies();
m_dependencies.push_back(event);
}

auto empty() const -> bool
{
std::shared_lock<std::shared_mutex> lock{m_mutex};
return m_last_event.get_info<sycl::info::event::command_execution_status>()
== sycl::info::event_command_status::complete;
return m_queue.ext_oneapi_empty();
}

auto wait() -> void
Expand All @@ -111,41 +82,26 @@ namespace alpaka::detail
m_queue.wait_and_throw();
}

auto get_last_event() const -> sycl::event
{
std::shared_lock<std::shared_mutex> lock{m_mutex};
return m_last_event;
}

template<bool TBlocking, typename TTask>
auto enqueue(TTask const& task) -> void
{
{
std::lock_guard<std::shared_mutex> lock{m_mutex};

clean_dependencies();

// Execute task
if constexpr(is_sycl_task<TTask> && !is_sycl_kernel<TTask>) // Copy / Fill
{
m_last_event = task(m_queue, m_dependencies); // Will call queue.{copy, fill} internally
task(m_queue); // Will call queue.{copy, fill} internally
}
else
{
m_last_event = m_queue.submit(
m_queue.submit(
[this, &task](sycl::handler& cgh)
{
if(!m_dependencies.empty())
cgh.depends_on(m_dependencies);

if constexpr(is_sycl_kernel<TTask>) // Kernel
task(cgh); // Will call cgh.parallel_for internally
else // Host
cgh.host_task(task);
});
}

m_dependencies.clear();
}

if constexpr(TBlocking)
Expand All @@ -157,10 +113,6 @@ namespace alpaka::detail
return m_queue;
}

std::vector<sycl::event> m_dependencies;
sycl::event m_last_event;
std::shared_mutex mutable m_mutex;

private:
sycl::queue m_queue;
};
Expand Down