diff --git a/.github/workflows/simsycl_ci.yml b/.github/workflows/simsycl_ci.yml index cb26d57..35fde59 100644 --- a/.github/workflows/simsycl_ci.yml +++ b/.github/workflows/simsycl_ci.yml @@ -13,33 +13,13 @@ jobs: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - build_type: [Release, Debug] - c_compiler: [gcc, clang, cl] + os: [windows-latest] + build_type: [Debug] + c_compiler: [cl] include: - os: windows-latest c_compiler: cl cpp_compiler: cl - - os: ubuntu-latest - c_compiler: gcc - cpp_compiler: g++ - - os: ubuntu-latest - c_compiler: clang - cpp_compiler: clang++ - - os: macos-latest - c_compiler: gcc - cpp_compiler: g++-13 - exclude: - - os: windows-latest - c_compiler: gcc - - os: windows-latest - c_compiler: clang - - os: ubuntu-latest - c_compiler: cl - - os: macos-latest - c_compiler: cl - - os: macos-latest - c_compiler: clang steps: - uses: actions/checkout@v3 @@ -79,8 +59,7 @@ jobs: # Build your program with the given configuration. Note that --config is needed because the default Windows generator is a multi-config generator (Visual Studio generator). run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }} - - name: Test + - name: Test deprecated parallel_for signature working-directory: ${{ steps.strings.outputs.build-output-dir }} - # Execute tests defined by the CMake configuration. Note that --build-config is needed because the default Windows generator is a multi-config generator (Visual Studio generator). - # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail - run: ctest --build-config ${{ matrix.build_type }} + timeout-minutes: 1 + run: D:\a\SimSYCL\SimSYCL\build\test\Debug\tests.exe "Calls to the deprecated parallel_for signature are not ambiguous" diff --git a/include/simsycl/detail/parallel_for.hh b/include/simsycl/detail/parallel_for.hh index 2d00bf9..a356c92 100644 --- a/include/simsycl/detail/parallel_for.hh +++ b/include/simsycl/detail/parallel_for.hh @@ -68,7 +68,9 @@ void execute_parallel_for(const sycl::range &range, const Offset &of const KernelFunc &func, Reducers &...reducers) // { + printf("execute_parallel_for 71\n"); register_kernel_on_static_construction(); + printf("execute_parallel_for 73\n"); simple_kernel> kernel; if constexpr(std::is_invocable_v>, Reducers &..., @@ -79,7 +81,9 @@ void execute_parallel_for(const sycl::range &range, const Offset &of std::is_invocable_v>, Reducers &...>); kernel = [&](const sycl::item &item) { func(item, reducers...); }; } - sequential_for(range, offset, kernel); + printf("execute_parallel_for 84\n"); + sequential_for(range, offset, kernel); + printf("execute_parallel_for 86\n"); } template @@ -137,6 +141,7 @@ void parallel_for(sycl::range num_work_items, sycl::kernel_handler k template void parallel_for(sycl::range num_work_items, sycl::id work_item_offset, sycl::kernel_handler kh, const KernelFunc &kernel_func) { + printf("parallel_for 140\n"); execute_parallel_for(num_work_items, work_item_offset, kh, kernel_func); } diff --git a/include/simsycl/detail/preprocessor.hh b/include/simsycl/detail/preprocessor.hh index 0757880..3248928 100644 --- a/include/simsycl/detail/preprocessor.hh +++ b/include/simsycl/detail/preprocessor.hh @@ -21,3 +21,7 @@ #define SIMSYCL_DETAIL_DEPRECATED_IN_SYCL #define SIMSYCL_DETAIL_DEPRECATED_IN_SYCL_V(message) #endif + +#define SIMSYCL_STRINGIZE_DETAIL(x) #x +#define SIMSYCL_STRINGIZE(x) SIMSYCL_STRINGIZE_DETAIL(x) +#define SIMSYCL_LINE_STRING SIMSYCL_STRINGIZE(__LINE__) diff --git a/include/simsycl/sycl/handler.hh b/include/simsycl/sycl/handler.hh index 668355e..06f065f 100644 --- a/include/simsycl/sycl/handler.hh +++ b/include/simsycl/sycl/handler.hh @@ -91,6 +91,7 @@ class handler { template SIMSYCL_DETAIL_DEPRECATED_IN_SYCL void parallel_for( range num_work_items, id work_item_offset, KernelType &&kernel_func) { + printf("parallel_for 94\n"); detail::parallel_for(num_work_items, work_item_offset, kernel_handler(this), kernel_func); } diff --git a/src/simsycl/schedule.cc b/src/simsycl/schedule.cc index 93e4b24..93cbc7b 100644 --- a/src/simsycl/schedule.cc +++ b/src/simsycl/schedule.cc @@ -43,27 +43,39 @@ namespace simsycl::detail { template void sequential_for(const sycl::range &range, const Offset &offset, const simple_kernel> &kernel) { + printf("sequential_for " SIMSYCL_LINE_STRING "\n"); // limit the number of work items scheduled at a time to avoid allocating huge index buffers constexpr size_t max_schedule_chunk_size = 16 << 10; + printf("sequential_for " SIMSYCL_LINE_STRING "\n"); const auto schedule_chunk_size = std::min(range.size(), max_schedule_chunk_size); + printf("sequential_for " SIMSYCL_LINE_STRING "\n"); const auto &schedule = get_cooperative_schedule(); + printf("sequential_for " SIMSYCL_LINE_STRING "\n"); std::vector order(schedule_chunk_size); + printf("sequential_for " SIMSYCL_LINE_STRING "\n"); auto schedule_state = schedule.init(order); - + printf("sequential_for " SIMSYCL_LINE_STRING "\n"); + for(size_t schedule_offset = 0; schedule_offset < range.size(); schedule_offset += max_schedule_chunk_size) { + printf("sequential_for " SIMSYCL_LINE_STRING "\n"); for(size_t schedule_id = 0; schedule_id < schedule_chunk_size; ++schedule_id) { + printf("sequential_for " SIMSYCL_LINE_STRING "\n"); const auto linear_id = schedule_offset + order[schedule_id]; if(linear_id < range.size()) { if constexpr(with_offset_v) { const auto id = offset + linear_index_to_id(range, linear_id); - kernel(make_item(id, range, offset)); + printf("sequential_for 61\n"); + kernel(make_item(id, range, offset)); + printf("sequential_for 63\n"); } else { const auto id = linear_index_to_id(range, linear_id); kernel(make_item(id, range)); } } } + printf("sequential_for 68\n"); schedule_state = schedule.update(schedule_state, order); + printf("sequential_for 70\n"); } } diff --git a/src/simsycl/system.cc b/src/simsycl/system.cc index d7a1a8d..0d5001d 100644 --- a/src/simsycl/system.cc +++ b/src/simsycl/system.cc @@ -291,12 +291,18 @@ std::shared_ptr void parse_environment() { if(g_environment_parsed) return; + printf("parse_environment " SIMSYCL_LINE_STRING "\n"); auto prefix = env::prefix("SIMSYCL"); const auto system = prefix.register_variable( - "SYSTEM", [](const std::string_view repr) { return read_system_config(std::string(repr)); }); + "SYSTEM", [](const std::string_view repr) { + printf("parse_environment " SIMSYCL_LINE_STRING "\n"); + return read_system_config(std::string(repr)); + }); + printf("parse_environment " SIMSYCL_LINE_STRING "\n"); const auto schedule = prefix.register_variable>( "SCHEDULE", [](const std::string_view repr) -> std::shared_ptr { + printf("parse_environment " SIMSYCL_LINE_STRING "\n"); if(repr == "rr") return std::make_unique(); if(repr == "shuffle") return std::make_unique(); if(repr.starts_with("shuffle:")) { @@ -306,11 +312,14 @@ void parse_environment() { throw env::parser_error{ fmt::format("Invalid schedule '{}', permitted values are 'rr', 'shuffle', and 'shuffle:'", repr)}; }); + printf("parse_environment " SIMSYCL_LINE_STRING "\n"); if(const auto parsed = prefix.parse_and_validate(); parsed.ok()) { + printf("parse_environment " SIMSYCL_LINE_STRING "\n"); g_env_system_config = parsed.get(system); g_env_cooperative_schedule = parsed.get_or(schedule, nullptr); } else { + printf("parse_environment " SIMSYCL_LINE_STRING "\n"); std::cerr << parsed.warning_message() << parsed.error_message(); } g_environment_parsed = true; @@ -325,13 +334,17 @@ std::shared_ptr g_cooperative_schedule; namespace simsycl { const cooperative_schedule &get_cooperative_schedule() { + printf("get_cooperative_schedule " SIMSYCL_LINE_STRING "\n"); detail::parse_environment(); + printf("get_cooperative_schedule " SIMSYCL_LINE_STRING "\n"); if(detail::g_cooperative_schedule == nullptr) { + printf("get_cooperative_schedule " SIMSYCL_LINE_STRING "\n"); if(detail::g_env_cooperative_schedule != nullptr) { detail::g_cooperative_schedule = detail::g_env_cooperative_schedule; } else { detail::g_cooperative_schedule = std::make_shared(); } + printf("get_cooperative_schedule " SIMSYCL_LINE_STRING "\n"); } return *detail::g_cooperative_schedule; } diff --git a/src/simsycl/system_config.cc b/src/simsycl/system_config.cc index ac26240..3e21cad 100644 --- a/src/simsycl/system_config.cc +++ b/src/simsycl/system_config.cc @@ -354,6 +354,7 @@ void from_json(const nlohmann::json &json, system_config &system) { system_config read_system_config(const std::string &path_to_json_file) { std::ifstream ifs(path_to_json_file); + printf("read_system_config " SIMSYCL_LINE_STRING "\n"); return nlohmann::json::parse(ifs).get(); } diff --git a/test/ambiguity_tests.cc b/test/ambiguity_tests.cc index 7d7827a..21749ec 100644 --- a/test/ambiguity_tests.cc +++ b/test/ambiguity_tests.cc @@ -2,28 +2,361 @@ #include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#pragma comment(lib, "psapi.lib") +#pragma comment(lib, "dbghelp.lib") + +// Some versions of imagehlp.dll lack the proper packing directives themselves +// so we need to do it. +#pragma pack( push, before_imagehlp, 8 ) +#include +#pragma pack( pop, before_imagehlp ) + +struct module_data { + std::string image_name; + std::string module_name; + void *base_address; + DWORD load_size; +}; +typedef std::vector ModuleList; + +HANDLE thread_ready; + +bool show_stack(std::ostream &, HANDLE hThread, CONTEXT& c); +DWORD Filter( EXCEPTION_POINTERS *ep ); +void *load_modules_symbols( HANDLE hProcess, DWORD pid ); + + +// if you use C++ exception handling: install a translator function +// with set_se_translator(). In the context of that function (but *not* +// afterwards), you can either do your stack dump, or save the CONTEXT +// record as a local copy. Note that you must do the stack dump at the +// earliest opportunity, to avoid the interesting stack-frames being gone +// by the time you do the dump. +DWORD Filter(EXCEPTION_POINTERS *ep) { + HANDLE thread; + + DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), + GetCurrentProcess(), &thread, 0, false, DUPLICATE_SAME_ACCESS); + std::cout << "Walking stack."; + show_stack(std::cout, thread, *(ep->ContextRecord)); + std::cout << "\nEnd of stack walk.\n"; + CloseHandle(thread); + + return EXCEPTION_EXECUTE_HANDLER; +} + +class SymHandler { + HANDLE p; +public: + SymHandler(HANDLE process, char const *path=NULL, bool intrude = false) : p(process) { + if (!SymInitialize(p, path, intrude)) + throw(std::logic_error("Unable to initialize symbol handler")); + } + ~SymHandler() { SymCleanup(p); } +}; + +#ifdef _M_X64 +STACKFRAME64 init_stack_frame(CONTEXT c) { + STACKFRAME64 s; + s.AddrPC.Offset = c.Rip; + s.AddrPC.Mode = AddrModeFlat; + s.AddrStack.Offset = c.Rsp; + s.AddrStack.Mode = AddrModeFlat; + s.AddrFrame.Offset = c.Rbp; + s.AddrFrame.Mode = AddrModeFlat; + return s; +} +#else +STACKFRAME64 init_stack_frame(CONTEXT c) { + STACKFRAME64 s; + s.AddrPC.Offset = c.Eip; + s.AddrPC.Mode = AddrModeFlat; + s.AddrStack.Offset = c.Esp; + s.AddrStack.Mode = AddrModeFlat; + s.AddrFrame.Offset = c.Ebp; + s.AddrFrame.Mode = AddrModeFlat; + return s; +} +#endif + +void sym_options(DWORD add, DWORD remove=0) { + DWORD symOptions = SymGetOptions(); + symOptions |= add; + symOptions &= ~remove; + SymSetOptions(symOptions); +} + +//Returns the last Win32 error, in string format. Returns an empty string if there is no error. +std::string GetLastErrorAsString() +{ + //Get the error message ID, if any. + DWORD errorMessageID = ::GetLastError(); + if(errorMessageID == 0) { + return std::string(); //No error message has been recorded + } + + LPSTR messageBuffer = nullptr; + + //Ask Win32 to give us the string version of that message ID. + //The parameters we pass in, tell Win32 to create the buffer that holds the message for us (because we don't yet know how long the message string will be). + size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, errorMessageID, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); + + //Copy the error message into a std::string. + std::string message(messageBuffer, size); + + //Free the Win32's string's buffer. + LocalFree(messageBuffer); + + return message; +} + +class symbol { + typedef IMAGEHLP_SYMBOL64 sym_type; + sym_type *sym; + static const int max_name_len = 1024; + bool bad_sym = false; +public: + symbol(HANDLE process, DWORD64 address) : sym((sym_type *)::operator new(sizeof(*sym) + max_name_len)) { + memset(sym, '\0', sizeof(*sym) + max_name_len); + sym->SizeOfStruct = sizeof(*sym); + sym->MaxNameLength = max_name_len; + DWORD64 displacement; + + if(!SymGetSymFromAddr64(process, address, &displacement, sym)) { + bad_sym = true; + } + } + + std::string name() { return bad_sym ? "bad_sym" : std::string(sym->Name); } + std::string undecorated_name() { + if(bad_sym) return "bad_sym"; + std::vector und_name(max_name_len); + UnDecorateSymbolName(sym->Name, &und_name[0], max_name_len, UNDNAME_COMPLETE); + return std::string(&und_name[0], strlen(&und_name[0])); + } +}; + +bool show_stack(std::ostream &os, HANDLE hThread, CONTEXT& c) { + HANDLE process = GetCurrentProcess(); + int frame_number=0; + DWORD offset_from_symbol=0; + IMAGEHLP_LINE64 line = {0}; + + SymHandler handler(process, NULL, true); + + sym_options(SYMOPT_LOAD_LINES | SYMOPT_UNDNAME); + + void *base = load_modules_symbols(process, GetCurrentProcessId()); + + STACKFRAME64 s = init_stack_frame(c); + + line.SizeOfStruct = sizeof line; + + IMAGE_NT_HEADERS *h = ImageNtHeader(base); + DWORD image_type = h->FileHeader.Machine; + + do { + if (!StackWalk64(image_type, process, hThread, &s, &c, NULL, SymFunctionTableAccess64, SymGetModuleBase64, NULL)) + return false; + + os << std::setw(3) << "\n" << frame_number << "\t"; + if ( s.AddrPC.Offset != 0 ) { + std::cout << symbol(process, s.AddrPC.Offset).undecorated_name(); + + if (SymGetLineFromAddr64( process, s.AddrPC.Offset, &offset_from_symbol, &line ) ) + os << "\t" << line.FileName << "(" << line.LineNumber << ")"; + } + else + os << "(No Symbols: PC == 0)"; + ++frame_number; + } while (s.AddrReturn.Offset != 0); + return true; +} + +class get_mod_info { + HANDLE process; + static const int buffer_length = 4096; +public: + get_mod_info(HANDLE h) : process(h) {} + + module_data operator()(HMODULE module) { + module_data ret; + char temp[buffer_length]; + MODULEINFO mi; + + GetModuleInformation(process, module, &mi, sizeof(mi)); + ret.base_address = mi.lpBaseOfDll; + ret.load_size = mi.SizeOfImage; + + GetModuleFileNameEx(process, module, temp, sizeof(temp)); + ret.image_name = temp; + GetModuleBaseName(process, module, temp, sizeof(temp)); + ret.module_name = temp; + std::vector img(ret.image_name.begin(), ret.image_name.end()); + std::vector mod(ret.module_name.begin(), ret.module_name.end()); + SymLoadModule64(process, 0, &img[0], &mod[0], (DWORD64)ret.base_address, ret.load_size); + return ret; + } +}; + +void *load_modules_symbols(HANDLE process, DWORD pid) { + ModuleList modules; + + DWORD cbNeeded; + std::vector module_handles(1); + + EnumProcessModules(process, &module_handles[0], module_handles.size() * sizeof(HMODULE), &cbNeeded); + module_handles.resize(cbNeeded/sizeof(HMODULE)); + EnumProcessModules(process, &module_handles[0], module_handles.size() * sizeof(HMODULE), &cbNeeded); + + std::transform(module_handles.begin(), module_handles.end(), std::back_inserter(modules), get_mod_info(process)); + return modules[0].base_address; +} + +std::optional libenvpp_convert_string(const std::wstring& str) +{ + const auto buffer_size = + WideCharToMultiByte(CP_UTF8, 0, str.c_str(), static_cast(str.length()), nullptr, 0, nullptr, nullptr); + if (buffer_size == 0) { + return {}; + } + auto buffer = std::string(buffer_size, '\0'); + [[maybe_unused]] const auto res = WideCharToMultiByte(CP_UTF8, 0, str.c_str(), static_cast(str.length()), + buffer.data(), buffer_size, nullptr, nullptr); + assert(res == buffer_size); + return buffer; +} + +std::optional libenvpp_convert_string(const std::string& str) +{ + const auto buffer_size = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), static_cast(str.length()), nullptr, 0); + if (buffer_size == 0) { + return {}; + } + auto buffer = std::wstring(buffer_size, L'\0'); + [[maybe_unused]] const auto res = + MultiByteToWideChar(CP_UTF8, 0, str.c_str(), static_cast(str.length()), buffer.data(), buffer_size); + assert(res == buffer_size); + return buffer; +} + +[[nodiscard]] std::unordered_map libenvpp_get_environment() +{ + printf("libenvpp_get_environment " SIMSYCL_LINE_STRING "\n"); + auto env_map = std::unordered_map{}; + + const auto environment = GetEnvironmentStringsW(); + if (!environment) { + return env_map; + } + printf("libenvpp_get_environment " SIMSYCL_LINE_STRING "\n"); + + for (const auto* var = environment; *var; ++var) { + printf("libenvpp_get_environment " SIMSYCL_LINE_STRING "\n"); + auto var_name_value = std::array{}; + auto idx = std::size_t{0}; + for (; *var; ++var) { + if (idx == 0 && *var == L'=') { + ++idx; + } else { + var_name_value[idx] += *var; + } + } + printf("libenvpp_get_environment " SIMSYCL_LINE_STRING "\n"); + if (!var_name_value[0].empty()) { + env_map[*libenvpp_convert_string(var_name_value[0])] = *libenvpp_convert_string(var_name_value[1]); + } + printf("libenvpp_get_environment " SIMSYCL_LINE_STRING "\n"); + } + + [[maybe_unused]] const auto env_strings_were_freed = FreeEnvironmentStringsW(environment); + assert(env_strings_were_freed); + + return env_map; +} + + using namespace sycl; SIMSYCL_START_IGNORING_DEPRECATIONS TEST_CASE("Calls to the deprecated parallel_for signature are not ambiguous", "[ambiguity][parallel_for]") { + // spawn a thread which prints the backtrace of the current thread after 10 seconds + auto this_tread = GetCurrentThread(); + std::atomic stopped = false; + auto t = std::thread([&]() { + Sleep(5000); + if(!stopped) { + CONTEXT c; + memset(&c, 0, sizeof(CONTEXT)); + c.ContextFlags = CONTEXT_FULL; + GetThreadContext(this_tread, &c); + show_stack(std::cout, this_tread, c); + exit(0); + } + }); + + auto env = libenvpp_get_environment(); + for(auto& [k, v] : env) { + printf("%s=%s\n", k.c_str(), v.c_str()); + } + printf("\n\n"); + + printf("START\n"); queue q; + printf("q\n"); constexpr int offset = 7; + printf("1D\n"); SECTION("1D") { + printf("1D A\n"); q.submit([&](handler &cgh) { - cgh.parallel_for(range<1>{1}, id<1>{offset}, [=](id<1> i) { CHECK(i[0] == offset); }); + printf("1D B\n"); + cgh.parallel_for(range<1>{1}, id<1>{offset}, [=](id<1> i) { + printf("1D C\n"); + CHECK(i[0] == offset); + printf("1D D\n"); + }); + printf("1D E\n"); }); + printf("1D F\n"); } + printf("2D\n"); SECTION("2D") { + printf("2D A\n"); q.submit([&](handler &cgh) { - cgh.parallel_for(range<2>{1, 1}, id<2>{0, offset}, [=](id<2> i) { CHECK(i == id<2>{0, offset}); }); + printf("2D B\n"); + cgh.parallel_for(range<2>{1, 1}, id<2>{0, offset}, [=](id<2> i) { + printf("2D C\n"); + CHECK(i == id<2>{0, offset}); + printf("2D D\n"); + }); + printf("2D F\n"); }); } + printf("3D\n"); SECTION("3D") { q.submit([&](handler &cgh) { cgh.parallel_for(range<3>{1, 1, 1}, id<3>{0, offset, 0}, [=](id<3> i) { CHECK(i == id<3>{0, offset, 0}); }); }); } + printf("END\n"); + + stopped = true; + t.join(); } SIMSYCL_STOP_IGNORING_DEPRECATIONS