Skip to content

Commit

Permalink
rename asin_str2int to fixed_len_10_str2int
Browse files Browse the repository at this point in the history
  • Loading branch information
vpung committed Aug 14, 2024
1 parent a0a3fd8 commit cc1fd3f
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 39 deletions.
2 changes: 1 addition & 1 deletion .github/style_type_check_cfg/.flake8
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[flake8]
ignore = E203,E501,W605,F541
extend-ignore = E203,E501,W605,F541
max_line_length = 100
6 changes: 3 additions & 3 deletions pecos/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2070,7 +2070,7 @@ def link_mmap_hashmap_methods(self):
Specify C-lib's Memory-mappable Hashmap methods arguments and return types.
"""
fn_prefix = "mmap_hashmap"
map_type_list = ["str2int", "fixed_len_str2int", "asin_str2int", "int2int"]
map_type_list = ["str2int", "fixed_len_str2int", "fixed_len_10_str2int", "int2int"]
key_args_dict = {
"str2int": [
c_char_p, # pointer of key string
Expand All @@ -2080,7 +2080,7 @@ def link_mmap_hashmap_methods(self):
c_char_p, # pointer of key string
c_uint32, # length of key string
],
"asin_str2int": [
"fixed_len_10_str2int": [
c_char_p, # pointer of key string
c_uint32, # length of key string
],
Expand All @@ -2097,7 +2097,7 @@ def link_mmap_hashmap_methods(self):
c_void_p, # List of pointer of key string
POINTER(c_uint32), # List of length of key string
],
"asin_str2int": [
"fixed_len_10_str2int": [
c_void_p, # List of pointer of key string
POINTER(c_uint32), # List of length of key string
],
Expand Down
22 changes: 11 additions & 11 deletions pecos/core/libpecos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ extern "C" {

typedef pecos::mmap_hashmap::Str2IntMap<pecos::mmap_hashmap::details_::AnkerlStr2IntMmapableVector> mmap_hashmap_str2int;
typedef pecos::mmap_hashmap::Str2IntMap<pecos::mmap_hashmap::details_::AnkerlFixedLenStr2IntMmapableVector> mmap_hashmap_fixed_len_str2int;
typedef pecos::mmap_hashmap::Str2IntMap<pecos::mmap_hashmap::details_::AnkerlAsinStr2IntMmapableVector> mmap_hashmap_asin_str2int;
typedef pecos::mmap_hashmap::Str2IntMap<pecos::mmap_hashmap::details_::AnkerlFixedLen10Str2IntMmapableVector> mmap_hashmap_fixed_len_10_str2int;
typedef pecos::mmap_hashmap::Int2IntMap mmap_hashmap_int2int;

// New
Expand All @@ -672,7 +672,7 @@ extern "C" {
return static_cast<void*>(new mmap_hashmap_ ## SUFFIX()); }
MMAP_MAP_NEW(str2int)
MMAP_MAP_NEW(fixed_len_str2int)
MMAP_MAP_NEW(asin_str2int)
MMAP_MAP_NEW(fixed_len_10_str2int)
MMAP_MAP_NEW(int2int)

// Destruct
Expand All @@ -681,7 +681,7 @@ extern "C" {
delete static_cast<mmap_hashmap_ ## SUFFIX *>(map_ptr); }
MMAP_MAP_DESTRUCT(str2int)
MMAP_MAP_DESTRUCT(fixed_len_str2int)
MMAP_MAP_DESTRUCT(asin_str2int)
MMAP_MAP_DESTRUCT(fixed_len_10_str2int)
MMAP_MAP_DESTRUCT(int2int)

// Save
Expand All @@ -690,7 +690,7 @@ extern "C" {
static_cast<mmap_hashmap_ ## SUFFIX *>(map_ptr)->save(map_dir); }
MMAP_MAP_SAVE(str2int)
MMAP_MAP_SAVE(fixed_len_str2int)
MMAP_MAP_SAVE(asin_str2int)
MMAP_MAP_SAVE(fixed_len_10_str2int)
MMAP_MAP_SAVE(int2int)

// Load
Expand All @@ -701,7 +701,7 @@ extern "C" {
return static_cast<void *>(map_ptr); }
MMAP_MAP_LOAD(str2int)
MMAP_MAP_LOAD(fixed_len_str2int)
MMAP_MAP_LOAD(asin_str2int)
MMAP_MAP_LOAD(fixed_len_10_str2int)
MMAP_MAP_LOAD(int2int)

// Size
Expand All @@ -710,7 +710,7 @@ extern "C" {
return static_cast<mmap_hashmap_ ## SUFFIX *>(map_ptr)->size(); }
MMAP_MAP_SIZE(str2int)
MMAP_MAP_SIZE(fixed_len_str2int)
MMAP_MAP_SIZE(asin_str2int)
MMAP_MAP_SIZE(fixed_len_10_str2int)
MMAP_MAP_SIZE(int2int)

// Insert
Expand All @@ -720,7 +720,7 @@ extern "C" {
static_cast<mmap_hashmap_ ## SUFFIX *>(map_ptr)->insert(FUNC_CALL_KEY, val); }
MMAP_MAP_INSERT(str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_INSERT(fixed_len_str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_INSERT(asin_str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_INSERT(fixed_len_10_str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_INSERT(int2int, uint64_t key, key)

// Get
Expand All @@ -729,23 +729,23 @@ extern "C" {
return static_cast<mmap_hashmap_ ## SUFFIX *>(map_ptr)->get(FUNC_CALL_KEY); }
MMAP_MAP_GET(str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_GET(fixed_len_str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_GET(asin_str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_GET(fixed_len_10_str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_GET(int2int, uint64_t key, key)

#define MMAP_MAP_GET_W_DEFAULT(SUFFIX, KEY, FUNC_CALL_KEY) \
uint64_t mmap_hashmap_get_w_default_ ## SUFFIX (void* map_ptr, KEY, uint64_t def_val) { \
return static_cast<mmap_hashmap_ ## SUFFIX *>(map_ptr)->get_w_default(FUNC_CALL_KEY, def_val); }
MMAP_MAP_GET_W_DEFAULT(str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_GET_W_DEFAULT(fixed_len_str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_GET_W_DEFAULT(asin_str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_GET_W_DEFAULT(fixed_len_10_str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_GET_W_DEFAULT(int2int, uint64_t key, key)

#define MMAP_MAP_BATCH_GET_W_DEFAULT(SUFFIX, KEY, FUNC_CALL_KEY) \
void mmap_hashmap_batch_get_w_default_ ## SUFFIX (void* map_ptr, const uint32_t n_key, KEY, uint64_t def_val, uint64_t* vals, const int threads) { \
static_cast<mmap_hashmap_ ## SUFFIX *>(map_ptr)->batch_get_w_default(n_key, FUNC_CALL_KEY, def_val, vals, threads); }
MMAP_MAP_BATCH_GET_W_DEFAULT(str2int, KEY_SINGLE_ARG(const char* const* keys, const uint32_t* keys_lens), KEY_SINGLE_ARG(keys, keys_lens))
MMAP_MAP_BATCH_GET_W_DEFAULT(fixed_len_str2int, KEY_SINGLE_ARG(const char* const* keys, const uint32_t* keys_lens), KEY_SINGLE_ARG(keys, keys_lens))
MMAP_MAP_BATCH_GET_W_DEFAULT(asin_str2int, KEY_SINGLE_ARG(const char* const* keys, const uint32_t* keys_lens), KEY_SINGLE_ARG(keys, keys_lens))
MMAP_MAP_BATCH_GET_W_DEFAULT(fixed_len_10_str2int, KEY_SINGLE_ARG(const char* const* keys, const uint32_t* keys_lens), KEY_SINGLE_ARG(keys, keys_lens))
MMAP_MAP_BATCH_GET_W_DEFAULT(int2int, const uint64_t* key, key)

// Contains
Expand All @@ -754,7 +754,7 @@ extern "C" {
return static_cast<mmap_hashmap_ ## SUFFIX *>(map_ptr)->contains(FUNC_CALL_KEY); }
MMAP_MAP_CONTAINS(str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_CONTAINS(fixed_len_str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_CONTAINS(asin_str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_CONTAINS(fixed_len_10_str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_CONTAINS(int2int, uint64_t key, key)


Expand Down
43 changes: 25 additions & 18 deletions pecos/core/utils/mmap_hashmap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,15 +285,20 @@ class AnkerlFixedLenStr2IntMmapableVector {
std::forward_as_tuple(size_),
std::forward< std::tuple<Args...> >(args));


size_type key_length = k.size();

// Length of new key should be the same as previous keys
if (fixed_str_len_ != -1 && fixed_str_len_ != k.size()) {
throw std::runtime_error("String length differs from previous keys.");
if (key_length == 0) {
throw std::runtime_error("Key length should be greater than 0.");
} else if (fixed_str_len_ != 0 && fixed_str_len_ != key_length) {
throw std::runtime_error("Key length differs from previous keys.");
} else {
fixed_str_len_ = k.size();
fixed_str_len_ = key_length;
}

// Append key string
str_store_.insert(str_store_.end(), k.data(), k.data() + k.size());
str_store_.insert(str_store_.end(), k.data(), k.data() + key_length);

// Update pointers
size_ = store_.size();
Expand Down Expand Up @@ -349,7 +354,7 @@ class AnkerlFixedLenStr2IntMmapableVector {
value_type* data_ = nullptr;
char* str_data_ = nullptr;

size_type fixed_str_len_ = -1;
size_type fixed_str_len_ = 0;

// Actual data storage for in-memory case
std::vector<value_type> store_;
Expand Down Expand Up @@ -435,16 +440,18 @@ class AnkerlFixedLenStr2IntMmapableVector {
// Memory-mappable vector of std::pair<StrView, uint64_t> for Ankerl
// This vector takes/gets std::string_view as the key, but emplace back as the special mmap format StrView
// The key must be of length 10
class AnkerlAsinStr2IntMmapableVector {
class AnkerlFixedLen10Str2IntMmapableVector {
template <bool IsConst>
class iter_t;

static constexpr std::size_t fixed_str_len = 10;

struct StrView {
char str[10];
char str[fixed_str_len];

StrView(const char* input_str = nullptr) {
if (input_str) {
std::strncpy(str, input_str, 10);
std::memcpy(str, input_str, fixed_str_len);
}
}
};
Expand All @@ -463,8 +470,8 @@ class AnkerlAsinStr2IntMmapableVector {
using iterator = iter_t<false>;
using const_iterator = iter_t<true>;

AnkerlAsinStr2IntMmapableVector() = default;
AnkerlAsinStr2IntMmapableVector(allocator_type alloc)
AnkerlFixedLen10Str2IntMmapableVector() = default;
AnkerlFixedLen10Str2IntMmapableVector(allocator_type alloc)
: store_(alloc) {}

value_type* data() { return data_; }
Expand Down Expand Up @@ -493,12 +500,12 @@ class AnkerlAsinStr2IntMmapableVector {
// Extract key
key_type key_string = std::get<0>(key);

if (key_string.size() != 10) {
if (key_string.size() != fixed_str_len) {
throw std::runtime_error("ASIN string length is not 10.");
}

char key_arr[10];
std::strncpy(key_arr, key_string.data(), key_string.size());
char key_arr[fixed_str_len];
std::memcpy(key_arr, key_string.data(), key_string.size());

// Emplace back std::pair<StrView, uint64_t>
auto eb_val = store_.emplace_back(
Expand All @@ -524,7 +531,7 @@ class AnkerlAsinStr2IntMmapableVector {

/* Get key for given member */
key_type get_key(value_type const& vt) const {
return key_type(vt.first.str, 10);
return key_type(vt.first.str, fixed_str_len);
}

/* Mmap save/load with MmapStore */
Expand Down Expand Up @@ -564,20 +571,20 @@ class AnkerlAsinStr2IntMmapableVector {
template <bool IsConst>
class iter_t {
using ptr_t = typename std::conditional_t<IsConst,
AnkerlAsinStr2IntMmapableVector::const_pointer, AnkerlAsinStr2IntMmapableVector::pointer>;
AnkerlFixedLen10Str2IntMmapableVector::const_pointer, AnkerlFixedLen10Str2IntMmapableVector::pointer>;
ptr_t iter_data_{};

template <bool B>
friend class iter_t;

public:
using iterator_category = std::forward_iterator_tag;
using difference_type = AnkerlAsinStr2IntMmapableVector::difference_type;
using value_type = AnkerlAsinStr2IntMmapableVector::value_type;
using difference_type = AnkerlFixedLen10Str2IntMmapableVector::difference_type;
using value_type = AnkerlFixedLen10Str2IntMmapableVector::value_type;
using reference = typename std::conditional_t<IsConst,
value_type const&, value_type&>;
using pointer = typename std::conditional_t<IsConst,
AnkerlAsinStr2IntMmapableVector::const_pointer, AnkerlAsinStr2IntMmapableVector::pointer>;
AnkerlFixedLen10Str2IntMmapableVector::const_pointer, AnkerlFixedLen10Str2IntMmapableVector::pointer>;

iter_t() noexcept = default;

Expand Down
12 changes: 10 additions & 2 deletions pecos/utils/mmap_hashmap_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,11 @@ def init(cls, map_type, map_dir, lazy_load):
fn_dict = clib.mmap_hashmap_init(map_type)
map_ptr = fn_dict["load"](map_dir.encode("utf-8"), lazy_load)

if map_type == "str2int" or map_type == "fixed_len_str2int" or map_type == "asin_str2int":
if (
map_type == "str2int"
or map_type == "fixed_len_str2int"
or map_type == "fixed_len_10_str2int"
):
return _MmapHashmapStr2IntReadOnly(map_ptr, fn_dict)
elif map_type == "int2int":
return _MmapHashmapInt2IntReadOnly(map_ptr, fn_dict)
Expand Down Expand Up @@ -340,7 +344,11 @@ def init(cls, map_type, map_dir):
fn_dict = clib.mmap_hashmap_init(map_type)
map_ptr = fn_dict["new"]()

if map_type == "str2int" or map_type == "fixed_len_str2int" or map_type == "asin_str2int":
if (
map_type == "str2int"
or map_type == "fixed_len_str2int"
or map_type == "fixed_len_10_str2int"
):
return _MmapHashmapStr2IntWrite(map_ptr, fn_dict, map_dir)
elif map_type == "int2int":
return _MmapHashmapInt2IntWrite(map_ptr, fn_dict, map_dir)
Expand Down
8 changes: 4 additions & 4 deletions test/pecos/utils/test_mmap_hashmap_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,14 @@ def test_fixed_len_str2int_mmap_hashmap(tmpdir):
assert r_map_batch_getter.get(ks, 10).tolist() == vs


def test_asin_str2int_mmap_hashmap(tmpdir):
def test_fixed_len_10_str2int_mmap_hashmap(tmpdir):
from pecos.utils.mmap_hashmap_util import MmapHashmap, MmapHashmapBatchGetter

map_dir = tmpdir.join("asin_str2int").realpath().strpath
map_dir = tmpdir.join("fixed_len_10_str2int").realpath().strpath
kv_dict = {"aaaaaaaaaa".encode("utf-8"): 2, "bbbbbbbbbb".encode("utf-8"): 3}

# Write-only Mode
w_map = MmapHashmap("asin_str2int")
w_map = MmapHashmap("fixed_len_10_str2int")
w_map.open("w", map_dir)
# Insert
w_map.map.insert("aaaaaaaaaa".encode("utf-8"), 1) # Test for overwrite later
Expand All @@ -139,7 +139,7 @@ def test_asin_str2int_mmap_hashmap(tmpdir):
w_map.close()

# Read-only Mode
r_map = MmapHashmap("asin_str2int")
r_map = MmapHashmap("fixed_len_10_str2int")
r_map.open("r", map_dir)
# Get
for k, v in kv_dict.items():
Expand Down

0 comments on commit cc1fd3f

Please sign in to comment.