Skip to content

Commit

Permalink
Support for USM; queue profiling
Browse files Browse the repository at this point in the history
  • Loading branch information
fknorr committed Dec 12, 2023
1 parent 05e4c95 commit b8ac69c
Show file tree
Hide file tree
Showing 11 changed files with 612 additions and 55 deletions.
1 change: 1 addition & 0 deletions include/simsycl/sycl.hh
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@
#include "sycl/reduction.hh"
#include "sycl/sub_group.hh"
#include "sycl/type_traits.hh"
#include "sycl/usm.hh"
17 changes: 14 additions & 3 deletions include/simsycl/sycl/enums.hh
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#pragma once

#include <cstdint>


namespace simsycl::sycl {

enum class addressing_mode { mirrored_repeat, repeat, clamp_to_edge, clamp, none };
Expand Down Expand Up @@ -307,9 +310,17 @@ struct command_execution_status;

namespace simsycl::sycl::info::event_profiling {

struct command_submit;
struct command_start;
struct command_end;
struct command_submit {
using return_type = uint64_t;
};

struct command_start {
using return_type = uint64_t;
};

struct command_end {
using return_type = uint64_t;
};

} // namespace simsycl::sycl::info::event_profiling

Expand Down
106 changes: 90 additions & 16 deletions include/simsycl/sycl/event.hh
Original file line number Diff line number Diff line change
@@ -1,36 +1,110 @@
#pragma once

#include "forward.hh"
#include "type_traits.hh"

#include "../detail/reference_type.hh"

#include <chrono>
#include <vector>


namespace simsycl::detail {

struct execution_status {
std::chrono::steady_clock::time_point t_submit{};
std::chrono::steady_clock::time_point t_start{};
std::chrono::steady_clock::time_point t_end{};

void start() { t_start = std::chrono::steady_clock::now(); }

[[nodiscard]] static execution_status submit() {
execution_status status;
status.t_submit = std::chrono::steady_clock::now();
return status;
}

[[nodiscard]] static execution_status submit_and_start() {
auto status = submit();
status.start();
return status;
}

[[nodiscard]] sycl::event end();

[[nodiscard]] static sycl::event instant();
};

template <typename Clock, typename Dur>
uint64_t nanoseconds_since_epoch(std::chrono::time_point<Clock, Dur> time_point) {
return std::chrono::duration_cast<std::chrono::nanoseconds>(time_point.time_since_epoch()).count();
}

} // namespace simsycl::detail

namespace simsycl::sycl {

class event {
public:
event() = default;
class event : detail::reference_type<event, detail::execution_status> {
public:
event() = default;

/* -- common interface members -- */
backend get_backend() const noexcept;

backend get_backend() const noexcept;
std::vector<event> get_wait_list();

std::vector<event> get_wait_list();
void wait() {}

void wait() {}
static void wait(const std::vector<event> & /* event_list */) {}

static void wait(const std::vector<event>& /* event_list */) {}
void wait_and_throw() {}

void wait_and_throw() {}
static void wait_and_throw(const std::vector<event> & /* event_list */) {}

static void wait_and_throw(const std::vector<event>& /* event_list */) {}
template <typename Param>
typename Param::return_type get_info() const {
if constexpr(std::is_same_v<Param, info::event::command_execution_status>) {
return info::event_command_status::complete;
} else {
static_assert(detail::always_false<Param>, "Unknown event::get_info() parameter");
}
}

template <typename Param> typename Param::return_type get_info() const;
template <typename Param>
typename Param::return_type get_backend_info() const {
static_assert(detail::always_false<Param>, "Unknown event::get_backend_info() parameter");
}

template <typename Param>
typename Param::return_type get_backend_info() const;
template <typename Param>
typename Param::return_type get_profiling_info() const {
if constexpr(std::is_same_v<Param, info::event_profiling::command_submit>) {
return detail::nanoseconds_since_epoch(state().t_submit);
} else if constexpr(std::is_same_v<Param, info::event_profiling::command_start>) {
return detail::nanoseconds_since_epoch(state().t_start);
} else if constexpr(std::is_same_v<Param, info::event_profiling::command_end>) {
return detail::nanoseconds_since_epoch(state().t_end);
} else {
static_assert(detail::always_false<Param>, "Unknown event::get_profiling_info() parameter");
}
}

template <typename Param>
typename Param::return_type get_profiling_info() const;
private:
friend event detail::make_event(const detail::execution_status &status);

explicit event(const detail::execution_status &status)
: detail::reference_type<event, detail::execution_status>(std::in_place, status) {}
};

} // namespace sycl
} // namespace simsycl::sycl

namespace simsycl::detail {

inline sycl::event make_event(const execution_status &status) { return sycl::event(status); }

inline sycl::event execution_status::end() {
t_end = std::chrono::steady_clock::now();
return make_event(*this);
}

inline sycl::event execution_status::instant() { return submit_and_start().end(); }

} // namespace simsycl::detail
4 changes: 4 additions & 0 deletions include/simsycl/sycl/forward.hh
Original file line number Diff line number Diff line change
Expand Up @@ -110,4 +110,8 @@ sycl::handler make_handler();

void **require_local_memory(sycl::handler &cgh, size_t size, size_t align);

struct execution_status;

sycl::event make_event(const execution_status &status);

} // namespace simsycl::detail
4 changes: 4 additions & 0 deletions include/simsycl/sycl/group_algorithms.hh
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,10 @@ T permute_group(G g, T x, typename G::linear_id_type mask) {
}});
}

template <typename Group, typename T>
T permute_group_by_xor(Group g, T x, typename Group::linear_id_type mask); // TODO


// select

template <SubGroup G, TriviallyCopyable T>
Expand Down
33 changes: 32 additions & 1 deletion include/simsycl/sycl/multi_ptr.hh
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#pragma once

#include "accessor.hh"
#include "enums.hh"
#include "forward.hh"

Expand Down Expand Up @@ -306,4 +305,36 @@ using global_ptr = multi_ptr<ElementType, access::address_space::global_space, I
template <typename ElementType, access::decorated IsDecorated = access::decorated::legacy>
using local_ptr = multi_ptr<ElementType, access::address_space::local_space, IsDecorated>;

// Deprecated in SYCL 2020
template <typename ElementType>

using constant_ptr [[deprecated]]
= multi_ptr<ElementType, access::address_space::constant_space, access::decorated::legacy>;

template <typename ElementType, access::decorated IsDecorated = access::decorated::legacy>
using private_ptr = multi_ptr<ElementType, access::address_space::private_space, IsDecorated>;

// Template specialization aliases for different pointer address spaces.
// The interface exposes non-decorated pointer while keeping the address space information internally.

template <typename ElementType>
using raw_global_ptr = multi_ptr<ElementType, access::address_space::global_space, access::decorated::no>;

template <typename ElementType>
using raw_local_ptr = multi_ptr<ElementType, access::address_space::local_space, access::decorated::no>;

template <typename ElementType>
using raw_private_ptr = multi_ptr<ElementType, access::address_space::private_space, access::decorated::no>;

// Template specialization aliases for different pointer address spaces.
// The interface exposes decorated pointer.
template <typename ElementType>
using decorated_global_ptr = multi_ptr<ElementType, access::address_space::global_space, access::decorated::yes>;

template <typename ElementType>
using decorated_local_ptr = multi_ptr<ElementType, access::address_space::local_space, access::decorated::yes>;

template <typename ElementType>
using decorated_private_ptr = multi_ptr<ElementType, access::address_space::private_space, access::decorated::yes>;

} // namespace simsycl::sycl
46 changes: 46 additions & 0 deletions include/simsycl/sycl/nd_item.hh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "group.hh"
#include "id.hh"
#include "multi_ptr.hh"
#include "range.hh"
#include "sub_group.hh"

Expand Down Expand Up @@ -41,6 +42,11 @@ sycl::nd_item<Dimensions> make_nd_item(const sycl::item<Dimensions, false> &glob

namespace simsycl::sycl {

class device_event {
public:
void wait() noexcept {}
};

template <int Dimensions>
class nd_item {
public:
Expand Down Expand Up @@ -104,6 +110,46 @@ class nd_item {
SIMSYCL_NOT_IMPLEMENTED(access_space);
}

// Deprecated in SYCL 2020.
template <typename DataT>
[[deprecated]] device_event async_work_group_copy(
local_ptr<DataT> dest, global_ptr<DataT> src, size_t num_elements) const;

// Deprecated in SYCL 2020.
template <typename DataT>
[[deprecated]] device_event async_work_group_copy(
global_ptr<DataT> dest, local_ptr<DataT> src, size_t num_elements) const;

// Deprecated in SYCL 2020.
template <typename DataT>
[[deprecated]] device_event async_work_group_copy(
local_ptr<DataT> dest, global_ptr<DataT> src, size_t num_elements, size_t src_stride) const;

// Deprecated in SYCL 2020.
template <typename DataT>
[[deprecated]] device_event async_work_group_copy(
global_ptr<DataT> dest, local_ptr<DataT> src, size_t num_elements, size_t dest_stride) const;

template <typename DestDataT, typename SrcDataT>
requires(std::is_same_v<DestDataT, std::remove_const_t<SrcDataT>>)
device_event async_work_group_copy(
decorated_local_ptr<DestDataT> dest, decorated_global_ptr<SrcDataT> src, size_t num_elements) const;

template <typename DestDataT, typename SrcDataT>
requires(std::is_same_v<DestDataT, std::remove_const_t<SrcDataT>>)
device_event async_work_group_copy(
decorated_global_ptr<DestDataT> dest, decorated_local_ptr<SrcDataT> src, size_t num_elements) const;

template <typename DestDataT, typename SrcDataT>
requires(std::is_same_v<DestDataT, std::remove_const_t<SrcDataT>>)
device_event async_work_group_copy(decorated_local_ptr<DestDataT> dest, decorated_global_ptr<SrcDataT> src,
size_t num_elements, size_t src_stride) const;

template <typename DestDataT, typename SrcDataT>
requires(std::is_same_v<DestDataT, std::remove_const_t<SrcDataT>>)
device_event async_work_group_copy(decorated_global_ptr<DestDataT> dest, decorated_local_ptr<SrcDataT> src,
size_t num_elements, size_t dest_stride) const;

template <typename... Events>
void wait_for(Events... events) const {
m_group.wait_for(events...);
Expand Down
16 changes: 16 additions & 0 deletions include/simsycl/sycl/property.hh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,22 @@ class property_list {
requires(is_property_v<Properties> && ...)
property_list(Properties... props) : m_properties{props...} {}

// Implemented by hipSYCL and DPC++ although the spec does not mention any members beside the constructor
template <typename Property>
bool has_property() const noexcept {
return std::any_of(m_properties.begin(), m_properties.end(),
[](const std::any &prop) { return prop.type() == typeid(Property); });
}

// Implemented by hipSYCL and DPC++ although the spec does not mention any members beside the constructor
template <typename Property>
Property get_property() const {
const auto iter = std::find_if(m_properties.begin(), m_properties.end(),
[](const std::any &prop) { return prop.type() == typeid(Property); });
SIMSYCL_CHECK(iter != m_properties.end());
return std::any_cast<Property>(*iter);
}

private:
friend class detail::property_interface;

Expand Down
Loading

0 comments on commit b8ac69c

Please sign in to comment.