Skip to content

Commit

Permalink
feat: on bitcask open read hint files if present
Browse files Browse the repository at this point in the history
  • Loading branch information
eiri committed Mar 27, 2024
1 parent e6e170e commit 46cf451
Showing 1 changed file with 43 additions and 17 deletions.
60 changes: 43 additions & 17 deletions src/py_bitcask/bitcask.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class Bitcask:
HEADER_FORMAT = ">I16sLL"
header_size = 28 # struct.calcsize(HEADER_FORMAT)
HINT_FORMAT = ">16sLLL"
hint_size = 32
hint_size = 28

def __init__(self, threshold: Optional[int] = DEFAULT_THRESHOLD) -> None:
"""
Expand Down Expand Up @@ -159,24 +159,14 @@ def _read_hints(self) -> Optional[Dict[str, List[Hint]]]:
Returns None if DataDir been set to ":memory".
"""
if self.__dirname == ":memory":
return
KeyState = namedtuple("KeyState", "tstamp deleted file_id hint")
keys = {}
files = os.listdir(self.__dirname)
for file in files:
file_id, ext = os.path.splitext(file)
# TODO: check if hint file is here and read it instead
if ext != ".db":
continue
file_name = os.path.join(self.__dirname, file)
if (
os.path.isfile(file_name)
and os.path.getsize(file_name) >= self.header_size
):
current = open(file_name, "rb")
while current.tell() < os.path.getsize(file_name):

def read_data_file(file_name, keys):
with open(file_name, "rb") as current:
while True:
data = current.read(self.header_size)
if not data:
break
_, ts_bytes, key_sz, value_sz = unpack(
self.HEADER_FORMAT, data
)
Expand All @@ -188,6 +178,42 @@ def _read_hints(self) -> Optional[Dict[str, List[Hint]]]:
deleted = value_sz == 0
keys[key] = KeyState(tstamp, deleted, file_id, hint)
current.seek(value_sz, 1)

def read_hint_file(file_name, keys):
with open(file_name, "rb") as current:
while True:
data = current.read(self.hint_size)
if not data:
break
ts_bytes, key_sz, value_sz, value_pos = unpack(
self.HINT_FORMAT, data
)
tstamp = uuid.UUID(int=int.from_bytes(ts_bytes, "big"))
key = current.read(key_sz)
hint = Hint(tstamp, key_sz, value_sz, value_pos, key)
keys[key] = KeyState(tstamp, False, file_id, hint)

if self.__dirname == ":memory":
return
keys = {}
files = os.listdir(self.__dirname)
for file in files:
file_id, ext = os.path.splitext(file)
if ext != ".db":
continue
file_name = os.path.join(self.__dirname, file_id + ".hint")
if (
os.path.isfile(file_name)
and os.path.getsize(file_name) >= self.hint_size
):
read_hint_file(file_name, keys)
continue
file_name = os.path.join(self.__dirname, file)
if (
os.path.isfile(file_name)
and os.path.getsize(file_name) >= self.header_size
):
read_data_file(file_name, keys)
hint_files = {}
for key_state in keys.values():
if key_state.deleted:
Expand Down

0 comments on commit 46cf451

Please sign in to comment.