diff --git a/include/dwarfs/util.h b/include/dwarfs/util.h index cbdd767e..eda54763 100644 --- a/include/dwarfs/util.h +++ b/include/dwarfs/util.h @@ -55,6 +55,7 @@ void utf8_sanitize(std::string& str); void shorten_path_string(std::string& path, char separator, size_t max_len); std::filesystem::path canonical_path(std::filesystem::path p); +std::string path_to_utf8_string_sanitized(std::filesystem::path const& p); bool getenv_is_enabled(char const* var); diff --git a/include/dwarfs/writer/internal/entry.h b/include/dwarfs/writer/internal/entry.h index bca8fd24..1ce53f2e 100644 --- a/include/dwarfs/writer/internal/entry.h +++ b/include/dwarfs/writer/internal/entry.h @@ -121,6 +121,9 @@ class entry : public entry_interface { private: std::u8string u8name() const; +#ifdef _WIN32 + std::filesystem::path path_; +#endif std::string name_; std::weak_ptr parent_; file_stat stat_; diff --git a/src/util.cpp b/src/util.cpp index eac79ada..7d0427bd 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #if __has_include() #include @@ -264,6 +265,25 @@ std::filesystem::path canonical_path(std::filesystem::path p) { return p; } +std::string path_to_utf8_string_sanitized(std::filesystem::path const& p) { +#ifdef _WIN32 + if constexpr (std::is_same_v) { + auto const& in = p.native(); + if (in.empty()) { + return {}; + } + int size_needed = ::WideCharToMultiByte( + CP_UTF8, 0, in.data(), (int)in.size(), NULL, 0, NULL, NULL); + std::string out(size_needed, 0); + ::WideCharToMultiByte(CP_UTF8, 0, in.data(), (int)in.size(), &out[0], + size_needed, NULL, NULL); + return out; + } +#endif + + return u8string_to_string(p.u8string()); +} + bool getenv_is_enabled(char const* var) { if (auto val = std::getenv(var)) { if (auto maybeBool = try_to(val); maybeBool && *maybeBool) { diff --git a/src/writer/internal/entry.cpp b/src/writer/internal/entry.cpp index 201cee16..99035b84 100644 --- a/src/writer/internal/entry.cpp +++ b/src/writer/internal/entry.cpp @@ -58,20 +58,19 @@ bool is_root_path(std::string_view path) { #endif } -std::string entry_name(fs::path const& path, bool has_parent) { - if (has_parent) { - return u8string_to_string(path.filename().u8string()); - } - return u8string_to_string(path.u8string()); -} - } // namespace entry::entry(fs::path const& path, std::shared_ptr parent, file_stat const& st) - : name_{entry_name(path, static_cast(parent))} +#ifdef _WIN32 + : path_{parent ? path.filename() : path} + , name_{path_to_utf8_string_sanitized(path_)} +#else + : name_{path_to_utf8_string_sanitized(parent ? path.filename() : path)} +#endif , parent_{std::move(parent)} - , stat_{st} {} + , stat_{st} { +} bool entry::has_parent() const { if (parent_.lock()) { @@ -88,11 +87,17 @@ void entry::set_name(const std::string& name) { name_ = name; } std::u8string entry::u8name() const { return string_to_u8string(name_); } fs::path entry::fs_path() const { +#ifdef _WIN32 + fs::path self = path_; +#else + fs::path self = name_; +#endif + if (auto parent = parent_.lock()) { - return parent->fs_path() / u8name(); + return parent->fs_path() / self; } - return fs::path(u8name()); + return self; } std::string entry::path_as_string() const { diff --git a/src/writer/scanner.cpp b/src/writer/scanner.cpp index eece194b..dd5a91a5 100644 --- a/src/writer/scanner.cpp +++ b/src/writer/scanner.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -330,6 +331,7 @@ class scanner_ final : public scanner::impl { os_access const& os_; std::vector> filters_; std::vector> transformers_; + std::unordered_set invalid_filenames_; }; template @@ -362,6 +364,27 @@ scanner_::add_entry(std::filesystem::path const& name, file_scanner& fs, bool debug_filter) { try { auto pe = entry_factory_.create(os_, name, parent); + + if constexpr (!std::is_same_v) { + try { + auto tmp [[maybe_unused]] = name.filename().u8string(); + } catch (std::system_error const& e) { + LOG_ERROR << fmt::format( + "invalid file name in \"{}\", storing as \"{}\": {}", + path_to_utf8_string_sanitized(name.parent_path()), pe->name(), + e.what()); + + prog.errors++; + + if (!invalid_filenames_.emplace(path_to_utf8_string_sanitized(name)) + .second) { + LOG_ERROR << fmt::format( + "cannot store \"{}\" as the name already exists", pe->name()); + return nullptr; + } + } + } + bool const exclude = std::any_of(filters_.begin(), filters_.end(), [&pe](auto const& f) { return f->filter(*pe) == filter_action::remove; @@ -452,7 +475,8 @@ scanner_::add_entry(std::filesystem::path const& name, return pe; } catch (const std::system_error& e) { - LOG_ERROR << fmt::format("error reading entry (path={}): {}", name.string(), + LOG_ERROR << fmt::format("error reading entry (path={}): {}", + path_to_utf8_string_sanitized(name), exception_str(e)); prog.errors++; }