From fc4d42ac385c2fc6cc59f526ab7d54bd9368d997 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Mon, 18 Dec 2023 13:38:25 +0100 Subject: [PATCH] Track USM allocations --- include/simsycl/detail/allocation.hh | 8 + include/simsycl/detail/coordinate.hh | 5 +- include/simsycl/detail/reference_type.hh | 47 +++--- include/simsycl/sycl/allocator.hh | 2 - include/simsycl/sycl/async_handler.hh | 4 +- include/simsycl/sycl/binary_ops.hh | 4 +- include/simsycl/sycl/buffer.hh | 5 + include/simsycl/sycl/context.hh | 11 +- include/simsycl/sycl/device.hh | 11 +- include/simsycl/sycl/event.hh | 31 ++-- include/simsycl/sycl/forward.hh | 4 +- include/simsycl/sycl/platform.hh | 9 +- include/simsycl/sycl/queue.hh | 65 ++++---- include/simsycl/sycl/usm.hh | 145 ++++++++---------- include/simsycl/system.hh | 4 +- src/simsycl/device.cc | 16 +- src/simsycl/platform.cc | 10 +- src/simsycl/system.cc | 181 +++++++++++++++++++++-- 18 files changed, 369 insertions(+), 193 deletions(-) diff --git a/include/simsycl/detail/allocation.hh b/include/simsycl/detail/allocation.hh index ef5210d..581c841 100644 --- a/include/simsycl/detail/allocation.hh +++ b/include/simsycl/detail/allocation.hh @@ -1,7 +1,11 @@ #pragma once +#include "../sycl/enums.hh" +#include "../sycl/forward.hh" + #include #include +#include #include @@ -25,6 +29,10 @@ inline void aligned_free(void *ptr) { #endif } +void *usm_alloc(const sycl::context &context, sycl::usm::alloc kind, std::optional opt_device, + size_t size_bytes, size_t alignment_bytes); +void usm_free(void *ptr, const sycl::context &context); + // floats and doubles filled with this pattern show up as "-nan" inline constexpr std::byte uninitialized_memory_pattern = std::byte(0xff); diff --git a/include/simsycl/detail/coordinate.hh b/include/simsycl/detail/coordinate.hh index 80b01e9..38bc3a8 100644 --- a/include/simsycl/detail/coordinate.hh +++ b/include/simsycl/detail/coordinate.hh @@ -2,9 +2,8 @@ #include "check.hh" +#include #include -#include -#include namespace simsycl::detail { @@ -159,7 +158,7 @@ class coordinate { #undef SIMSYCL_DETAIL_DEFINE_COORDINATE_UNARY_POSTFIX_OPERATOR private: - size_t m_values[Dimensions]; + size_t m_values[Dimensions]{}; // interface type construction helper to use in friend operators // (because friendship is not transitive) diff --git a/include/simsycl/detail/reference_type.hh b/include/simsycl/detail/reference_type.hh index 542a72c..3ab4a76 100644 --- a/include/simsycl/detail/reference_type.hh +++ b/include/simsycl/detail/reference_type.hh @@ -3,6 +3,7 @@ #include "check.hh" #include +#include namespace simsycl::detail { @@ -13,23 +14,15 @@ class reference_type; template struct std::hash> { - size_t operator()(const Derived &rt) { return static_cast(reinterpret_cast(rt.m_state.get())); } + size_t operator()(const Derived &rt) const { + return static_cast(reinterpret_cast(rt.m_state.get())); + } }; namespace simsycl::detail { -template -class weak_ref { - public: - weak_ref() = default; - - weak_ref(std::weak_ptr &&state) : m_state(std::move(state)) {} - - Derived lock() const { return Derived(m_state.lock()); } - - private: - std::weak_ptr m_state; -}; +template +class weak_ref; template class reference_type { @@ -62,18 +55,34 @@ class reference_type { return *m_state; } - detail::weak_ref weak_ref() { - SIMSYCL_CHECK(m_state != nullptr); - return detail::weak_ref(std::weak_ptr(m_state)); - } - private: friend struct std::hash>; - template + template friend class weak_ref; std::shared_ptr m_state; }; +template +class weak_ref { + private: + using state_type = typename Derived::state_type; + + public: + weak_ref() = default; + + explicit weak_ref(const Derived &ref) : m_state(ref.m_state) { + static_assert(std::is_base_of_v, Derived>); + } + + std::optional lock() const { + if(auto state = m_state.lock(); state != nullptr) { return Derived(std::move(state)); } + return std::nullopt; + } + + private: + std::weak_ptr m_state; +}; + } // namespace simsycl::detail diff --git a/include/simsycl/sycl/allocator.hh b/include/simsycl/sycl/allocator.hh index 14493d4..06247c3 100644 --- a/include/simsycl/sycl/allocator.hh +++ b/include/simsycl/sycl/allocator.hh @@ -3,8 +3,6 @@ #include #include -namespace simsycl::detail {} // namespace simsycl::detail - namespace simsycl::sycl { template diff --git a/include/simsycl/sycl/async_handler.hh b/include/simsycl/sycl/async_handler.hh index 02adf33..cb279fe 100644 --- a/include/simsycl/sycl/async_handler.hh +++ b/include/simsycl/sycl/async_handler.hh @@ -25,7 +25,7 @@ class exception_list : private std::vector { using async_handler = std::function; -} +} // namespace simsycl::sycl namespace simsycl::detail { @@ -33,4 +33,4 @@ namespace simsycl::detail { void call_async_handler(const sycl::async_handler &handler_opt, sycl::exception_list exceptions); -} +} // namespace simsycl::detail diff --git a/include/simsycl/sycl/binary_ops.hh b/include/simsycl/sycl/binary_ops.hh index 586d9bf..f8323a6 100644 --- a/include/simsycl/sycl/binary_ops.hh +++ b/include/simsycl/sycl/binary_ops.hh @@ -78,7 +78,7 @@ struct minimum { template<> struct minimum { template - decltype(auto) operator()(T &&x, U &&y) const { + decltype(auto) operator()(T && x, U && y) const { return x < y ? std::forward(x) : std::forward(y); } }; @@ -97,7 +97,7 @@ struct maximum { template<> struct maximum { template - decltype(auto) operator()(T &&x, U &&y) const { + decltype(auto) operator()(T && x, U && y) const { return x > y ? std::forward(x) : std::forward(y); } }; diff --git a/include/simsycl/sycl/buffer.hh b/include/simsycl/sycl/buffer.hh index bd4e3a5..a6312bf 100644 --- a/include/simsycl/sycl/buffer.hh +++ b/include/simsycl/sycl/buffer.hh @@ -250,10 +250,15 @@ class buffer final reinterpret() const; private: + template + friend class detail::weak_ref; + template friend U *simsycl::detail::get_buffer_data(sycl::buffer &buf); using reference_type::state; + + buffer(std::shared_ptr &&state) : reference_type(std::move(state)) {} }; // Deduction guides diff --git a/include/simsycl/sycl/context.hh b/include/simsycl/sycl/context.hh index 014a59c..97f593c 100644 --- a/include/simsycl/sycl/context.hh +++ b/include/simsycl/sycl/context.hh @@ -50,10 +50,19 @@ class context final : public detail::reference_type + friend class detail::weak_ref; + struct internal_t { } inline static constexpr internal{}; - explicit context(internal_t, const std::vector &devices, const async_handler &async_handler, const property_list &prop_list); + explicit context(internal_t, const std::vector &devices, const async_handler &async_handler, + const property_list &prop_list); + context(std::shared_ptr &&state) : reference_type(std::move(state)) {} }; } // namespace simsycl::sycl + +template<> +struct std::hash + : public std::hash> {}; diff --git a/include/simsycl/sycl/device.hh b/include/simsycl/sycl/device.hh index 9901814..73049ac 100644 --- a/include/simsycl/sycl/device.hh +++ b/include/simsycl/sycl/device.hh @@ -107,10 +107,13 @@ class device final : public detail::reference_type static std::vector get_devices(info::device_type device_type = info::device_type::all); private: + template + friend class detail::weak_ref; + friend device simsycl::create_device(sycl::platform &platform, const device_config &config); - device(detail::device_state state); device(const detail::device_selector &selector); + device(std::shared_ptr &&state) : reference_type(std::move(state)) {} }; template @@ -123,4 +126,8 @@ inline constexpr bool any_device_has_v = any_device_has::value; template inline constexpr bool all_devices_have_v = all_devices_have::value; -} // namespace simsycl::sycl \ No newline at end of file +} // namespace simsycl::sycl + +template<> +struct std::hash + : public std::hash> {}; diff --git a/include/simsycl/sycl/event.hh b/include/simsycl/sycl/event.hh index 3b5b7fb..32be4cf 100644 --- a/include/simsycl/sycl/event.hh +++ b/include/simsycl/sycl/event.hh @@ -12,20 +12,20 @@ namespace simsycl::detail { -struct execution_status { +struct event_state { std::chrono::steady_clock::time_point t_submit{}; std::chrono::steady_clock::time_point t_start{}; std::chrono::steady_clock::time_point t_end{}; void start() { t_start = std::chrono::steady_clock::now(); } - [[nodiscard]] static execution_status submit() { - execution_status status; + [[nodiscard]] static event_state submit() { + event_state status; status.t_submit = std::chrono::steady_clock::now(); return status; } - [[nodiscard]] static execution_status submit_and_start() { + [[nodiscard]] static event_state submit_and_start() { auto status = submit(); status.start(); return status; @@ -45,7 +45,7 @@ uint64_t nanoseconds_since_epoch(std::chrono::time_point time_point) namespace simsycl::sycl { -class event : detail::reference_type { +class event : detail::reference_type { public: event() = default; @@ -89,23 +89,32 @@ class event : detail::reference_type { } private: - friend event detail::make_event(const detail::execution_status &status); + template + friend class detail::weak_ref; - explicit event(const detail::execution_status &status) - : detail::reference_type(std::in_place, status) {} + friend event detail::make_event(std::shared_ptr &&state); + + explicit event(std::shared_ptr &&state) + : detail::reference_type(std::move(state)) {} }; } // namespace simsycl::sycl + +template<> +struct std::hash + : public std::hash> {}; + namespace simsycl::detail { -inline sycl::event make_event(const execution_status &status) { return sycl::event(status); } +inline sycl::event make_event(std::shared_ptr &&state) { return sycl::event(std::move(state)); } +inline sycl::event make_event(const event_state &state) { return make_event(std::make_shared(state)); } -inline sycl::event execution_status::end() { +inline sycl::event event_state::end() { t_end = std::chrono::steady_clock::now(); return make_event(*this); } -inline sycl::event execution_status::instant() { return submit_and_start().end(); } +inline sycl::event event_state::instant() { return submit_and_start().end(); } } // namespace simsycl::detail diff --git a/include/simsycl/sycl/forward.hh b/include/simsycl/sycl/forward.hh index 45e0c25..cb5ed56 100644 --- a/include/simsycl/sycl/forward.hh +++ b/include/simsycl/sycl/forward.hh @@ -139,8 +139,8 @@ sycl::handler make_handler(); void **require_local_memory(sycl::handler &cgh, size_t size, size_t align); -struct execution_status; +struct event_state; -sycl::event make_event(const execution_status &status); +sycl::event make_event(std::shared_ptr &&state); } // namespace simsycl::detail diff --git a/include/simsycl/sycl/platform.hh b/include/simsycl/sycl/platform.hh index ee90dc4..4ec7f25 100644 --- a/include/simsycl/sycl/platform.hh +++ b/include/simsycl/sycl/platform.hh @@ -55,17 +55,20 @@ class platform final : public detail::reference_type get_platforms(); private: - template + template friend class detail::weak_ref; friend sycl::platform simsycl::create_platform(const platform_config &config); friend device simsycl::create_device(platform &platform, const device_config &config); - platform(detail::platform_state state); platform(const detail::device_selector &selector); - platform(std::shared_ptr &&state); + platform(std::shared_ptr &&state) : reference_type(std::move(state)) {} void add_device(const device &dev); }; } // namespace simsycl::sycl + +template<> +struct std::hash + : public std::hash> {}; diff --git a/include/simsycl/sycl/queue.hh b/include/simsycl/sycl/queue.hh index b597d3a..221770e 100644 --- a/include/simsycl/sycl/queue.hh +++ b/include/simsycl/sycl/queue.hh @@ -91,7 +91,7 @@ class queue final : public detail::reference_type, template event submit(T cgf) { - auto status = detail::execution_status::submit(); + auto status = detail::event_state::submit(); auto cgh = simsycl::detail::make_handler(); status.start(); cgf(cgh); @@ -112,63 +112,63 @@ class queue final : public detail::reference_type, template event single_task(const KernelType &kernel_func) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); kernel_func(); return status.end(); } template event single_task(event /* dep_event */, const KernelType &kernel_func) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); kernel_func(); return status.end(); } template event single_task(const std::vector & /* dep_events */, const KernelType &kernel_func) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); kernel_func(); return status.end(); } template 0), int> = 0> event parallel_for(range num_work_items, Rest &&...rest) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); simsycl::detail::parallel_for(num_work_items, std::forward(rest)...); return status.end(); } template 0), int> = 0> event parallel_for(range num_work_items, event /* dep_event */, Rest &&...rest) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); simsycl::detail::parallel_for(num_work_items, std::forward(rest)...); return status.end(); } template 0), int> = 0> event parallel_for(range num_work_items, const std::vector & /* dep_events */, Rest &&...rest) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); simsycl::detail::parallel_for(num_work_items, std::forward(rest)...); return status.end(); } template 0), int> = 0> event parallel_for(nd_range execution_range, Rest &&...rest) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); simsycl::detail::parallel_for(execution_range, std::forward(rest)...); return status.end(); } template 0), int> = 0> event parallel_for(nd_range execution_range, event /* dep_event */, Rest &&...rest) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); simsycl::detail::parallel_for(execution_range, std::forward(rest)...); return status.end(); } template 0), int> = 0> event parallel_for(nd_range execution_range, const std::vector & /* dep_events */, Rest &&...rest) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); simsycl::detail::parallel_for(execution_range, std::forward(rest)...); return status.end(); } @@ -176,26 +176,26 @@ class queue final : public detail::reference_type, /* -- USM functions -- */ event memcpy(void *dest, const void *src, size_t num_bytes) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); ::memcpy(dest, src, num_bytes); return status.end(); } event memcpy(void *dest, const void *src, size_t num_bytes, event /* dep_event */) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); ::memcpy(dest, src, num_bytes); return status.end(); } event memcpy(void *dest, const void *src, size_t num_bytes, const std::vector & /* dep_events */) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); ::memcpy(dest, src, num_bytes); return status.end(); } template event copy(const T *src, T *dest, size_t count) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); std::copy_n(src, count, dest); return status.end(); } @@ -203,7 +203,7 @@ class queue final : public detail::reference_type, template event copy(const T *src, T *dest, size_t count, event dep_event) { (void)(dep_event); - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); std::copy_n(src, count, dest); return status.end(); } @@ -211,71 +211,71 @@ class queue final : public detail::reference_type, template event copy(const T *src, T *dest, size_t count, const std::vector &dep_events) { (void)(dep_events); - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); std::copy_n(src, count, dest); return status.end(); } event memset(void *ptr, int value, size_t num_bytes) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); ::memset(ptr, value, num_bytes); return status.end(); } event memset(void *ptr, int value, size_t num_bytes, event /* dep_event */) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); ::memset(ptr, value, num_bytes); return status.end(); } event memset(void *ptr, int value, size_t num_bytes, const std::vector & /* dep_events */) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); ::memset(ptr, value, num_bytes); return status.end(); } template event fill(void *ptr, const T &pattern, size_t count) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); std::fill_n(ptr, count, pattern); return status.end(); } template event fill(void *ptr, const T &pattern, size_t count, event /* dep_event */) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); std::fill_n(ptr, count, pattern); return status.end(); } template event fill(void *ptr, const T &pattern, size_t count, const std::vector & /* dep_events */) { - auto status = detail::execution_status::submit_and_start(); + auto status = detail::event_state::submit_and_start(); std::fill_n(ptr, count, pattern); return status.end(); } - event prefetch(void * /* ptr */, size_t /* num_bytes */) { return detail::execution_status::instant(); } + event prefetch(void * /* ptr */, size_t /* num_bytes */) { return detail::event_state::instant(); } event prefetch(void * /* ptr */, size_t /* num_bytes */, event /* dep_event */) { - return detail::execution_status::instant(); + return detail::event_state::instant(); } event prefetch(void * /* ptr */, size_t /* num_bytes */, const std::vector & /* dep_events */) { - return detail::execution_status::instant(); + return detail::event_state::instant(); } event mem_advise(void * /* ptr */, size_t /* num_bytes */, int /* advice */) { - return detail::execution_status::instant(); + return detail::event_state::instant(); } event mem_advise(void * /* ptr */, size_t /* num_bytes */, int /* advice */, event /* dep_event */) { - return detail::execution_status::instant(); + return detail::event_state::instant(); } event mem_advise( void * /* ptr */, size_t /* num_bytes */, int /* advice */, const std::vector & /* dep_events */) { - return detail::execution_status::instant(); + return detail::event_state::instant(); } /// Placeholder accessor shortcuts @@ -315,9 +315,14 @@ class queue final : public detail::reference_type, SIMSYCL_STOP_IGNORING_DEPRECATIONS private: + template + friend class detail::weak_ref; + struct internal_t { } inline static constexpr internal{}; + queue(std::shared_ptr &&state) : reference_type(std::move(state)) {} + explicit queue(internal_t /* tag */, const detail::device_selector &selector, const async_handler &async_handler, const property_list &prop_list); @@ -329,3 +334,7 @@ class queue final : public detail::reference_type, }; } // namespace simsycl::sycl + +template<> +struct std::hash + : public std::hash> {}; diff --git a/include/simsycl/sycl/usm.hh b/include/simsycl/sycl/usm.hh index c507a94..4f886b5 100644 --- a/include/simsycl/sycl/usm.hh +++ b/include/simsycl/sycl/usm.hh @@ -1,11 +1,14 @@ #pragma once +#include "device.hh" #include "enums.hh" #include "forward.hh" #include "property.hh" +#include "queue.hh" #include "simsycl/detail/allocation.hh" + namespace simsycl::sycl { template @@ -70,259 +73,227 @@ class usm_allocator { inline void *malloc_device( size_t num_bytes, const device &sycl_device, const context &sycl_context, const property_list &prop_list = {}) { - (void)sycl_device; - (void)sycl_context; (void)prop_list; - return detail::aligned_alloc(1, num_bytes); + return detail::usm_alloc(sycl_context, usm::alloc::device, sycl_device, num_bytes, 1); } template T *malloc_device( size_t count, const device &sycl_device, const context &sycl_context, const property_list &prop_list = {}) { - (void)sycl_device; - (void)sycl_context; (void)prop_list; - return static_cast(detail::aligned_alloc(alignof(T), count * sizeof(T))); + return static_cast( + detail::usm_alloc(sycl_context, usm::alloc::device, sycl_device, count * sizeof(T), alignof(T))); } inline void *malloc_device(size_t num_bytes, const queue &sycl_queue, const property_list &prop_list = {}) { - (void)sycl_queue; (void)prop_list; - return detail::aligned_alloc(1, num_bytes); + return detail::usm_alloc(sycl_queue.get_context(), usm::alloc::device, sycl_queue.get_device(), num_bytes, 1); } template T *malloc_device(size_t count, const queue &sycl_queue, const property_list &prop_list = {}) { - (void)sycl_queue; (void)prop_list; - return static_cast(detail::aligned_alloc(alignof(T), count * sizeof(T))); + return static_cast(detail::usm_alloc( + sycl_queue.get_context(), usm::alloc::device, sycl_queue.get_device(), count * sizeof(T), alignof(T))); } inline void *aligned_alloc_device(size_t alignment, size_t num_bytes, const device &sycl_device, const context &sycl_context, const property_list &prop_list = {}) { - (void)sycl_device; - (void)sycl_context; (void)prop_list; - return detail::aligned_alloc(alignment, num_bytes); + return detail::usm_alloc(sycl_context, usm::alloc::device, sycl_device, num_bytes, alignment); } template T *aligned_alloc_device(size_t alignment, size_t count, const device &sycl_device, const context &sycl_context, const property_list &prop_list = {}) { - (void)sycl_device; - (void)sycl_context; (void)prop_list; - return static_cast(detail::aligned_alloc(alignment, count * sizeof(T))); + return static_cast( + detail::usm_alloc(sycl_context, usm::alloc::device, sycl_device, count * sizeof(T), alignment)); } inline void *aligned_alloc_device( size_t alignment, size_t num_bytes, const queue &sycl_queue, const property_list &prop_list = {}) { - (void)sycl_queue; (void)prop_list; - return detail::aligned_alloc(alignment, num_bytes); + return detail::usm_alloc( + sycl_queue.get_context(), usm::alloc::device, sycl_queue.get_device(), num_bytes, alignment); } template T *aligned_alloc_device(size_t alignment, size_t count, const queue &sycl_queue, const property_list &prop_list = {}) { - (void)sycl_queue; (void)prop_list; - return static_cast(detail::aligned_alloc(alignment, count * sizeof(T))); + return static_cast(detail::usm_alloc( + sycl_queue.get_context(), usm::alloc::device, sycl_queue.get_device(), count * sizeof(T), alignment)); }; inline void *malloc_host(size_t num_bytes, const context &sycl_context, const property_list &prop_list = {}) { - (void)sycl_context; (void)prop_list; - return detail::aligned_alloc(1, num_bytes); + return detail::usm_alloc(sycl_context, usm::alloc::host, std::nullopt, num_bytes, 1); } template T *malloc_host(size_t count, const context &sycl_context, const property_list &prop_list = {}) { - (void)sycl_context; (void)prop_list; - return static_cast(detail::aligned_alloc(alignof(T), count * sizeof(T))); + return static_cast( + detail::usm_alloc(sycl_context, usm::alloc::host, std::nullopt, count * sizeof(T), alignof(T))); } inline void *malloc_host(size_t num_bytes, const queue &sycl_queue, const property_list &prop_list = {}) { - (void)sycl_queue; (void)prop_list; - return detail::aligned_alloc(1, num_bytes); + return detail::usm_alloc(sycl_queue.get_context(), usm::alloc::host, std::nullopt, num_bytes, 1); } template T *malloc_host(size_t count, const queue &sycl_queue, const property_list &prop_list = {}) { - (void)sycl_queue; (void)prop_list; - return static_cast(detail::aligned_alloc(alignof(T), count * sizeof(T))); + return static_cast( + detail::usm_alloc(sycl_queue.get_context(), usm::alloc::host, std::nullopt, count * sizeof(T), alignof(T))); } inline void *aligned_alloc_host( size_t alignment, size_t num_bytes, const context &sycl_context, const property_list &prop_list = {}) { - (void)sycl_context; (void)prop_list; - return detail::aligned_alloc(alignment, num_bytes); + return detail::usm_alloc(sycl_context, usm::alloc::host, std::nullopt, num_bytes, alignment); } template T *aligned_alloc_host( size_t alignment, size_t count, const context &sycl_context, const property_list &prop_list = {}) { - (void)sycl_context; (void)prop_list; - return static_cast(detail::aligned_alloc(alignment, count * sizeof(T))); + return static_cast( + detail::usm_alloc(sycl_context, usm::alloc::host, std::nullopt, count * sizeof(T), alignment)); } inline void *aligned_alloc_host( size_t alignment, size_t num_bytes, const queue &sycl_queue, const property_list &prop_list = {}) { - (void)sycl_queue; (void)prop_list; - return detail::aligned_alloc(alignment, num_bytes); + return detail::usm_alloc(sycl_queue.get_context(), usm::alloc::host, std::nullopt, num_bytes, alignment); } template void *aligned_alloc_host(size_t alignment, size_t count, const queue &sycl_queue, const property_list &prop_list = {}) { - (void)sycl_queue; (void)prop_list; - return static_cast(detail::aligned_alloc(alignment, count * sizeof(T))); + return static_cast( + detail::usm_alloc(sycl_queue.get_context(), usm::alloc::host, std::nullopt, count * sizeof(T), alignment)); } inline void *malloc_shared( size_t num_bytes, const device &sycl_device, const context &sycl_context, const property_list &prop_list = {}) { - (void)sycl_device; - (void)sycl_context; (void)prop_list; - return detail::aligned_alloc(1, num_bytes); + return detail::usm_alloc(sycl_context, usm::alloc::shared, sycl_device, num_bytes, 1); } template T *malloc_shared( size_t count, const device &sycl_device, const context &sycl_context, const property_list &prop_list = {}) { - (void)sycl_device; - (void)sycl_context; (void)prop_list; - return static_cast(detail::aligned_alloc(alignof(T), count * sizeof(T))); + return static_cast( + detail::usm_alloc(sycl_context, usm::alloc::shared, sycl_device, count * sizeof(T), alignof(T))); } inline void *malloc_shared(size_t num_bytes, const queue &sycl_queue, const property_list &prop_list = {}) { - (void)sycl_queue; (void)prop_list; - return detail::aligned_alloc(1, num_bytes); + return detail::usm_alloc(sycl_queue.get_context(), usm::alloc::shared, sycl_queue.get_device(), num_bytes, 1); } template T *malloc_shared(size_t count, const queue &sycl_queue, const property_list &prop_list = {}) { - (void)sycl_queue; (void)prop_list; - return static_cast(detail::aligned_alloc(alignof(T), count * sizeof(T))); + return static_cast(detail::usm_alloc( + sycl_queue.get_context(), usm::alloc::shared, sycl_queue.get_device(), count * sizeof(T), alignof(T))); } inline void *aligned_alloc_shared(size_t alignment, size_t num_bytes, const device &sycl_device, const context &sycl_context, const property_list &prop_list = {}) { - (void)sycl_device; - (void)sycl_context; (void)prop_list; - return detail::aligned_alloc(alignment, num_bytes); + return detail::usm_alloc(sycl_context, usm::alloc::shared, sycl_device, num_bytes, alignment); } template T *aligned_alloc_shared(size_t alignment, size_t count, const device &sycl_device, const context &sycl_context, const property_list &prop_list = {}) { - (void)sycl_device; - (void)sycl_context; (void)prop_list; - return static_cast(detail::aligned_alloc(alignment, count * sizeof(T))); + return static_cast( + detail::usm_alloc(sycl_context, usm::alloc::shared, sycl_device, count * sizeof(T), alignment)); } inline void *aligned_alloc_shared( size_t alignment, size_t num_bytes, const queue &sycl_queue, const property_list &prop_list = {}) { - (void)sycl_queue; (void)prop_list; - return detail::aligned_alloc(alignment, num_bytes); + return detail::usm_alloc( + sycl_queue.get_context(), usm::alloc::shared, sycl_queue.get_device(), num_bytes, alignment); } template T *aligned_alloc_shared(size_t alignment, size_t count, const queue &sycl_queue, const property_list &prop_list = {}) { - (void)sycl_queue; (void)prop_list; - return static_cast(detail::aligned_alloc(alignment, count * sizeof(T))); -} + return static_cast(detail::usm_alloc( + sycl_queue.get_context(), usm::alloc::shared, sycl_queue.get_device(), count * sizeof(T), alignment)); +}; inline void *malloc(size_t num_bytes, const device &sycl_device, const context &sycl_context, usm::alloc kind, const property_list &prop_list = {}) { - (void)sycl_device; - (void)sycl_context; - (void)kind; (void)prop_list; - return detail::aligned_alloc(1, num_bytes); + return detail::usm_alloc(sycl_context, kind, sycl_device, num_bytes, 1); } template T *malloc(size_t count, const device &sycl_device, const context &sycl_context, usm::alloc kind, const property_list &prop_list = {}) { - (void)sycl_device; - (void)sycl_context; - (void)kind; (void)prop_list; - return static_cast(detail::aligned_alloc(alignof(T), count * sizeof(T))); + return static_cast(detail::usm_alloc(sycl_context, kind, sycl_device, count * sizeof(T), alignof(T))); } inline void *malloc(size_t num_bytes, const queue &sycl_queue, usm::alloc kind, const property_list &prop_list = {}) { - (void)sycl_queue; - (void)kind; (void)prop_list; - return detail::aligned_alloc(1, num_bytes); + return detail::usm_alloc(sycl_queue.get_context(), kind, + kind != usm::alloc::host ? std::optional(sycl_queue.get_device()) : std::nullopt, num_bytes, 1); } template T *malloc(size_t count, const queue &sycl_queue, usm::alloc kind, const property_list &prop_list = {}) { - (void)sycl_queue; - (void)kind; (void)prop_list; - return static_cast(detail::aligned_alloc(alignof(T), count * sizeof(T))); + return static_cast(detail::usm_alloc(sycl_queue.get_context(), kind, + kind != usm::alloc::host ? std::optional(sycl_queue.get_device()) : std::nullopt, count * sizeof(T), + alignof(T))); } inline void *aligned_alloc(size_t alignment, size_t num_bytes, const device &sycl_device, const context &sycl_context, usm::alloc kind, const property_list &prop_list = {}) { - (void)sycl_device; - (void)sycl_context; - (void)kind; (void)prop_list; - return detail::aligned_alloc(alignment, num_bytes); + return detail::usm_alloc(sycl_context, kind, sycl_device, num_bytes, alignment); } template T *aligned_alloc(size_t alignment, size_t count, const device &sycl_device, const context &sycl_context, usm::alloc kind, const property_list &prop_list = {}) { - (void)sycl_device; - (void)sycl_context; - (void)kind; (void)prop_list; - return static_cast(detail::aligned_alloc(alignment, count * sizeof(T))); + return static_cast(detail::usm_alloc(sycl_context, kind, sycl_device, count * sizeof(T), alignment)); } inline void *aligned_alloc( size_t alignment, size_t num_bytes, const queue &sycl_queue, usm::alloc kind, const property_list &prop_list = {}) { - (void)sycl_queue; - (void)kind; (void)prop_list; - return detail::aligned_alloc(alignment, num_bytes); + return detail::usm_alloc(sycl_queue.get_context(), kind, + kind != usm::alloc::host ? std::optional(sycl_queue.get_device()) : std::nullopt, num_bytes, alignment); } template T *aligned_alloc( size_t alignment, size_t count, const queue &sycl_queue, usm::alloc kind, const property_list &prop_list = {}) { - (void)sycl_queue; - (void)kind; (void)prop_list; - return static_cast(detail::aligned_alloc(alignment, count * sizeof(T))); + return static_cast(detail::usm_alloc(sycl_queue.get_context(), kind, + kind != usm::alloc::host ? std::optional(sycl_queue.get_device()) : std::nullopt, count * sizeof(T), + alignment)); } inline void free(void *ptr, const context &sycl_context) { (void)sycl_context; - detail::aligned_free(ptr); + detail::usm_free(ptr, sycl_context); } inline void free(void *ptr, const queue &sycl_queue) { (void)sycl_queue; - detail::aligned_free(ptr); + detail::usm_free(ptr, sycl_queue.get_context()); } usm::alloc get_pointer_type(const void *ptr, const context &sycl_context); diff --git a/include/simsycl/system.hh b/include/simsycl/system.hh index 32fc9ca..9340fb4 100644 --- a/include/simsycl/system.hh +++ b/include/simsycl/system.hh @@ -109,8 +109,8 @@ struct system_config { std::vector devices{}; }; -const system_config &get_system(); -void set_system(system_config system); +const system_config &get_system_config(); +void configure_system(system_config system); } // namespace simsycl diff --git a/src/simsycl/device.cc b/src/simsycl/device.cc index 64e9ccc..3a5f6e9 100644 --- a/src/simsycl/device.cc +++ b/src/simsycl/device.cc @@ -8,7 +8,7 @@ namespace simsycl::detail { struct device_state { device_config config; - weak_ref platform; + weak_ref platform; }; int default_selector::operator()(const sycl::device &device) const { @@ -25,8 +25,6 @@ int accelerator_selector::operator()(const sycl::device &device) const { return namespace simsycl::sycl { -device::device(detail::device_state state) : reference_type(std::in_place, std::move(state)) {} - device::device() : device(default_selector_v) {} device::device(const detail::device_selector &device_selector) : device(detail::select_device(device_selector)) {} @@ -365,7 +363,7 @@ std::vector device::get_info template<> sycl::platform device::get_info() const { - return state().platform.lock(); + return state().platform.lock().value(); } template<> @@ -462,7 +460,7 @@ bool device::has(aspect asp) const { } std::vector device::get_devices(info::device_type type) { - auto &system = get_system(); + auto &system = get_system_config(); std::vector result; std::copy_if(system.devices.begin(), system.devices.end(), std::back_inserter(result), [type](const device &dev) { return dev.get_info() == type; }); @@ -475,10 +473,10 @@ std::vector device::get_devices(info::device_type type) { namespace simsycl { sycl::device create_device(sycl::platform &platform, const device_config &config) { - detail::device_state state; - state.config = config; - state.platform = platform.weak_ref(); - sycl::device device(state); + auto state = std::make_shared(); + state->config = config; + state->platform = detail::weak_ref(platform); + sycl::device device(std::move(state)); platform.add_device(device); return device; } diff --git a/src/simsycl/platform.cc b/src/simsycl/platform.cc index b0fef7d..4210000 100644 --- a/src/simsycl/platform.cc +++ b/src/simsycl/platform.cc @@ -16,15 +16,11 @@ struct platform_state { namespace simsycl::sycl { -platform::platform(detail::platform_state state) : reference_type(std::in_place, std::move(state)) {} - platform::platform() : platform(default_selector_v) {} platform::platform(const detail::device_selector &selector) : platform(detail::select_device(selector).get_platform()) {} -platform::platform(std::shared_ptr &&state) : reference_type(std::move(state)) {} - std::vector platform::get_devices(info::device_type type) const { std::vector result; std::copy_if(state().devices.begin(), state().devices.end(), std::back_inserter(result), @@ -64,7 +60,7 @@ bool platform::has_extension(const std::string &extension) const { != state().config.extensions.end(); } -std::vector platform::get_platforms() { return get_system().platforms; } +std::vector platform::get_platforms() { return get_system_config().platforms; } void platform::add_device(const device &dev) { state().devices.push_back(dev); } @@ -73,8 +69,8 @@ void platform::add_device(const device &dev) { state().devices.push_back(dev); } namespace simsycl { sycl::platform create_platform(const platform_config &config) { - detail::platform_state state; - state.config = std::move(config); + auto state = std::make_shared(); + state->config = std::move(config); return sycl::platform(std::move(state)); } diff --git a/src/simsycl/system.cc b/src/simsycl/system.cc index b055f42..ab7f8e1 100644 --- a/src/simsycl/system.cc +++ b/src/simsycl/system.cc @@ -1,14 +1,19 @@ #include "simsycl/system.hh" +#include "simsycl/detail/allocation.hh" +#include "simsycl/detail/check.hh" #include "simsycl/sycl/device.hh" #include "simsycl/sycl/platform.hh" #include "simsycl/templates.hh" #include +#include +#include + namespace simsycl::detail { sycl::device select_device(const device_selector &selector) { - auto &system = simsycl::get_system(); + auto &system = simsycl::get_system_config(); SIMSYCL_CHECK(!system.devices.empty()); int max_rating = std::numeric_limits::lowest(); for(const auto &device : system.devices) { @@ -69,7 +74,139 @@ void call_async_handler(const sycl::async_handler &handler_opt, sycl::exception_ handler_opt ? handler_opt(exceptions) : default_async_handler(exceptions); } -std::optional system; +class usm_allocation { + public: + usm_allocation(const sycl::context &ctx, sycl::usm::alloc kind, std::optional device, + void *const begin, void *const end) + : m_ctx(ctx), m_kind(kind), m_device(std::move(device)), m_begin(begin), m_end(end) { + assert(begin < end); + } + + sycl::usm::alloc get_kind() const { return m_kind; }; + void *get_pointer() const { return m_begin; } + size_t get_size_bytes() const { return static_cast(m_end) - static_cast(m_begin); } + std::optional get_context() const { return m_ctx.lock(); } + const std::optional &get_device() const { return m_device; } + + private: + friend struct usm_allocation_order; + weak_ref m_ctx; + sycl::usm::alloc m_kind; + std::optional m_device; + void *m_begin, *m_end; +}; + +struct usm_allocation_order { + using is_transparent = std::true_type; + + bool operator()(const usm_allocation &lhs, const usm_allocation &rhs) const { + SIMSYCL_CHECK((lhs.m_end <= rhs.m_begin || rhs.m_end <= lhs.m_begin) + || (lhs.m_begin == rhs.m_begin && lhs.m_end == rhs.m_end)); + return lhs.m_begin < rhs.m_begin; + } + + bool operator()(const usm_allocation &lhs, const void *rhs) const { return lhs.m_end < rhs; } + bool operator()(const void *lhs, const usm_allocation &rhs) const { return lhs < rhs.m_begin; } +}; + +struct memory_state { + sycl::usm::alloc type; + size_t bytes_free = 0; + std::set allocations; + + explicit memory_state(sycl::usm::alloc type, size_t bytes_free) : type(type), bytes_free(bytes_free) {} +}; + +struct system_state { + system_config config; + std::unordered_map device_bytes_free; + std::set usm_allocations; + + explicit system_state(system_config config) : config(std::move(config)) { + for(const auto &device : this->config.devices) { + device_bytes_free.emplace(device, device.get_info()); + } + } +}; + +std::optional system; + +system_state &get_system() { + if(!detail::system.has_value()) { + system_config config; + auto platform = config.platforms.emplace_back(create_platform(simsycl::templates::platform::cuda_12_2)); + for(int i = 0; i < 4; ++i) { + config.devices.push_back(create_device(platform, simsycl::templates::device::nvidia::rtx_3090)); + } + configure_system(std::move(config)); + } + return system.value(); +} + +void *usm_alloc(const sycl::context &context, sycl::usm::alloc kind, std::optional device, + size_t size_bytes, size_t alignment_bytes) { + SIMSYCL_CHECK(kind != sycl::usm::alloc::unknown); + SIMSYCL_CHECK((kind == sycl::usm::alloc::host) == (!device.has_value())); + + if(size_bytes == 0) { size_bytes = 1; } + + auto &system = get_system(); + + size_t *bytes_free = nullptr; + if(device.has_value()) { + const auto context_devices = context.get_devices(); + if(std::find(context_devices.begin(), context_devices.end(), *device) == context_devices.end()) { + throw sycl::exception(sycl::errc::invalid, "Device not associated with context"); + } + + bytes_free = &system.device_bytes_free.at(*device); + if(*bytes_free < size_bytes) { + throw sycl::exception(sycl::errc::memory_allocation, "Not enough memory available"); + } + } + +#if defined(_MSC_VER) + // MSVC does not have std::aligned_alloc because the pointers it returns cannot be freed with std::free + void *const ptr = _aligned_malloc(size_bytes, alignment_bytes); +#else + void *const ptr = std::aligned_alloc(alignment_bytes, size_bytes); +#endif + if(ptr == nullptr) { throw sycl::exception(sycl::errc::memory_allocation, "Not enough memory available"); } + + std::memset(ptr, static_cast(uninitialized_memory_pattern), size_bytes); + + if(bytes_free != nullptr) { *bytes_free -= size_bytes; } + system.usm_allocations.emplace(context, kind, std::move(device), ptr, static_cast(ptr) + size_bytes); + + return ptr; +} + +void usm_free(void *ptr, const sycl::context &context) { + if(ptr == nullptr) return; + + auto &system = get_system(); + const auto iter = system.usm_allocations.find(ptr); + if(iter == system.usm_allocations.end()) { + throw sycl::exception(sycl::errc::invalid, "Pointer does not point to an allocation"); + } + if(iter->get_pointer() != ptr) { + throw sycl::exception(sycl::errc::invalid, "Pointer points to the inside of an allocation"); + } + if(iter->get_context() != context) { + throw sycl::exception(sycl::errc::invalid, "Pointer is not associated with the given context"); + } + +#if defined(_MSC_VER) + _aligned_free(ptr); +#else + std::free(ptr); +#endif + + if(iter->get_device().has_value()) { + system.device_bytes_free.at(iter->get_device().value()) += iter->get_size_bytes(); + } + system.usm_allocations.erase(iter); +} } // namespace simsycl::detail @@ -79,21 +216,39 @@ std::error_code make_error_code(errc e) noexcept { return {static_cast(e), const std::error_category &sycl_category() noexcept { return detail::error_category_v; } -} // namespace simsycl::sycl +usm::alloc get_pointer_type(const void *ptr, const context &sycl_context) { + auto &system = detail::get_system(); + if(const auto iter = system.usm_allocations.find(ptr); iter != system.usm_allocations.end()) { + return iter->get_context() == sycl_context ? iter->get_kind() : usm::alloc::unknown; + } + return usm::alloc::unknown; +} -namespace simsycl { +device get_pointer_device(const void *ptr, const context &sycl_context) { + auto &system = detail::get_system(); + const auto iter = system.usm_allocations.find(ptr); + if(iter == system.usm_allocations.end()) { + throw sycl::exception(sycl::errc::invalid, "Pointer does not point to an allocation"); + } -const system_config &get_system() { - if(!detail::system.has_value()) { - auto &system = detail::system.emplace(); // gpuc3 - auto platform = system.platforms.emplace_back(create_platform(simsycl::templates::platform::cuda_12_2)); - for(int i = 0; i < 4; ++i) { - system.devices.push_back(create_device(platform, simsycl::templates::device::nvidia::rtx_3090)); - } + if(iter->get_kind() == usm::alloc::host) { return sycl_context.get_devices().at(0); } + + assert(iter->get_device().has_value()); + const auto &device = *iter->get_device(); + + const auto context_devices = sycl_context.get_devices(); + if(std::find(context_devices.begin(), context_devices.end(), device) == context_devices.end()) { + throw sycl::exception(sycl::errc::invalid, "Device not associated with context"); } - return *detail::system; + return device; } -void set_system(system_config system) { detail::system = std::move(system); } +} // namespace simsycl::sycl + +namespace simsycl { + +const system_config &get_system_config() { return detail::get_system().config; } + +void configure_system(system_config system) { detail::system.emplace(std::move(system)); } } // namespace simsycl