Skip to content

Commit

Permalink
Improve cstring cache internals (p4lang#4780)
Browse files Browse the repository at this point in the history
* Switch to abseil node_hash_set for cstring internal hash. Reduce number of lookups while there.

Signed-off-by: Anton Korobeynikov <[email protected]>

* Import get_cached method

Signed-off-by: Anton Korobeynikov <[email protected]>

* Usual "unbreak Bazel" commit

Signed-off-by: Anton Korobeynikov <[email protected]>

* Apply suggestions from code review

Co-authored-by: Vladimír Štill <[email protected]>
Signed-off-by: Anton Korobeynikov <[email protected]>

---------

Signed-off-by: Anton Korobeynikov <[email protected]>
Co-authored-by: Vladimír Štill <[email protected]>
  • Loading branch information
asl and vlstill authored Jul 4, 2024
1 parent a484745 commit db050c6
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 20 deletions.
2 changes: 2 additions & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ cc_library(
":config_h",
"@boost//:format",
"@boost//:multiprecision",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:node_hash_set",
"@com_google_absl//absl/numeric:bits",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
Expand Down
60 changes: 40 additions & 20 deletions lib/cstring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ limitations under the License.

#include "cstring.h"

#include "absl/container/node_hash_set.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_replace.h"

Expand Down Expand Up @@ -135,45 +136,64 @@ class table_entry {
return length() == other.length() && std::memcmp(string(), other.string(), length()) == 0;
}

bool operator==(std::string_view other) const {
return length() == other.length() && std::memcmp(string(), other.data(), length()) == 0;
}

private:
bool is_inplace() const {
return (m_flags & table_entry_flags::inplace) == table_entry_flags::inplace;
}
};
} // namespace

namespace std {
template <>
struct hash<table_entry> {
std::size_t operator()(const table_entry &entry) const {
// We'd make Util::Hash to be transparent instead. However, this would enable
// transparent hashing globally and in some cases in very undesired manner. So
// for now aim for more fine-grained approach.
struct TableEntryHash {
using is_transparent = void;

// IMPORTANT: These two hashes MUST match in order for heterogenous
// lookup to work properly
size_t operator()(const table_entry &entry) const {
return Util::hash(entry.string(), entry.length());
}

size_t operator()(std::string_view entry) const {
return Util::hash(entry.data(), entry.length());
}
};
} // namespace std

namespace {
std::unordered_set<table_entry> &cache() {
static std::unordered_set<table_entry> g_cache;
auto &cache() {
// We need node_hash_set due to SSO: we return address of embedded string
// that should be stable
static absl::node_hash_set<table_entry, TableEntryHash, std::equal_to<>> g_cache;

return g_cache;
}

const char *save_to_cache(const char *string, std::size_t length, table_entry_flags flags) {
if ((flags & table_entry_flags::no_need_copy) == table_entry_flags::no_need_copy) {
return cache().emplace(string, length, flags).first->string();
}
// Checks if string is already cached and if not, calls ctor to construct in
// place. As a result, only a single lookup is performed regardless whether
// entry is in cache or not.
return cache()
.lazy_emplace(table_entry(string, length, table_entry_flags::no_need_copy),
[string, length, flags](const auto &ctor) { ctor(string, length, flags); })
->string();
}

// temporary table_entry, used for searching only. no need to copy string
auto found = cache().find(table_entry(string, length, table_entry_flags::no_need_copy));
} // namespace

if (found == cache().end()) {
return cache().emplace(string, length, flags).first->string();
}
bool cstring::is_cached(std::string_view s) { return cache().contains(s); }

return found->string();
}
cstring cstring::get_cached(std::string_view s) {
auto entry = cache().find(s);
if (entry == cache().end()) return nullptr;

} // namespace
cstring res;
res.str = entry->string();

return res;
}

void cstring::construct_from_shared(const char *string, std::size_t length) {
str = save_to_cache(string, length, table_entry_flags::none);
Expand Down
5 changes: 5 additions & 0 deletions lib/cstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,11 @@ class cstring {
return result;
}

/// @return true if a given string is interned (contained in cstring cache)
static bool is_cached(std::string_view s);
/// @return corresponding cstring if it was interned, null cstring otherwise
static cstring get_cached(std::string_view s);

private:
// passed string is shared, we not unique owners
void construct_from_shared(const char *string, std::size_t length);
Expand Down
15 changes: 15 additions & 0 deletions test/gtest/cstring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,19 @@ TEST(cstring, literalSuffix) {
EXPECT_TRUE((std::is_same_v<cstring, decltype(""_cs)>));
}

TEST(cstring, is_cached) {
[[maybe_unused]] cstring test = "test"_cs;
EXPECT_FALSE(
cstring::is_cached("we really do not expect that this string is already in cstring cache"));
EXPECT_TRUE(cstring::is_cached("test"));
}

TEST(cstring, get_cached) {
[[maybe_unused]] cstring test = "test"_cs;
EXPECT_TRUE(
cstring::get_cached("we really do not expect that this string is already in cstring cache")
.isNull());
EXPECT_FALSE(cstring::get_cached("test").isNullOrEmpty());
}

} // namespace Test

0 comments on commit db050c6

Please sign in to comment.