Skip to content

Commit

Permalink
Address comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
fchirica committed Nov 14, 2024
1 parent 3745e52 commit c308207
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 26 deletions.
14 changes: 8 additions & 6 deletions chia/_tests/core/data_layer/test_data_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -961,27 +961,29 @@ async def test_kv_diff(data_store: DataStore, store_id: bytes32) -> None:
insertions = 0
expected_diff: set[DiffData] = set()
root_start = None
keys: list[bytes] = []

for i in range(500):
key = (i + 100).to_bytes(4, byteorder="big")
value = (i + 200).to_bytes(4, byteorder="big")
seed = leaf_hash(key=key, value=value)
node = await data_store.get_terminal_node_for_seed(seed, store_id)
side_seed = bytes(seed)[0]
side = None if node is None else (Side.LEFT if side_seed < 128 else Side.RIGHT)

if random.randint(0, 4) > 0 or insertions < 10:
insertions += 1
await data_store.autoinsert(
reference_node_hash = node.hash if node is not None else None
await data_store.insert(
key=key,
value=value,
store_id=store_id,
status=Status.COMMITTED,
reference_node_hash=reference_node_hash,
side=side,
)
keys.append(key)
if i > 200:
expected_diff.add(DiffData(OperationType.INSERT, key, value))
else:
key = random.choice(keys)
keys.remove(key)
node = await data_store.get_node_by_key(key, store_id)
assert isinstance(node, TerminalNode)
await data_store.delete(key=node.key, store_id=store_id, status=Status.COMMITTED)
if i > 200:
Expand Down
23 changes: 19 additions & 4 deletions chia/data_layer/data_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -1062,14 +1062,26 @@ async def get_keys(

return keys

def get_reference_kid_side(self, merkle_blob: MerkleBlob, key: bytes, value: bytes) -> tuple[KVId, Side]:
seed = leaf_hash(key=key, value=value)
def get_reference_kid_side(self, merkle_blob: MerkleBlob, seed: bytes32) -> tuple[KVId, Side]:
side_seed = bytes(seed)[0]
side = Side.LEFT if side_seed < 128 else Side.RIGHT
reference_node = merkle_blob.get_random_leaf_node(seed)
kid = reference_node.key
return (kid, side)

async def get_terminal_node_for_seed(self, seed: bytes32, store_id: bytes32) -> Optional[TerminalNode]:
root = await self.get_tree_root(store_id=store_id)
if root is None or root.node_hash is None:
return None

merkle_blob = await self.get_merkle_blob(root.node_hash)
assert not merkle_blob.empty()
kid, _ = self.get_reference_kid_side(merkle_blob, seed)
key = await self.get_blob_from_kvid(kid, store_id)
assert key is not None
node = await self.get_node_by_key(key, store_id)
return node

async def insert(
self,
key: bytes,
Expand All @@ -1095,7 +1107,9 @@ async def insert(
if not was_empty and reference_kid is None:
if side is not None:
raise Exception("Side specified without reference node hash")
reference_kid, side = self.get_reference_kid_side(merkle_blob, key, value)

seed = leaf_hash(key=key, value=value)
reference_kid, side = self.get_reference_kid_side(merkle_blob, seed)

try:
merkle_blob.insert(kid, vid, hash, reference_kid, side)
Expand Down Expand Up @@ -1210,7 +1224,8 @@ async def insert_batch(
batch_hashes.append(hash)
continue
if not merkle_blob.empty():
reference_kid, side = self.get_reference_kid_side(merkle_blob, key, value)
seed = leaf_hash(key=key, value=value)
reference_kid, side = self.get_reference_kid_side(merkle_blob, seed)

merkle_blob.insert(kid, vid, hash, reference_kid, side)
elif change["action"] == "delete":
Expand Down
46 changes: 30 additions & 16 deletions chia/data_layer/util/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,13 @@
import tempfile
import time
from pathlib import Path
from random import Random

from chia.data_layer.data_layer_util import Status
from chia.data_layer.data_layer_util import Side, Status, leaf_hash
from chia.data_layer.data_store import DataStore
from chia.types.blockchain_format.sized_bytes import bytes32


async def generate_datastore(num_nodes: int, seed: int = 101) -> None:
random = Random()
random.seed(seed, version=2)

async def generate_datastore(num_nodes: int) -> None:
with tempfile.TemporaryDirectory() as temp_directory:
temp_directory_path = Path(temp_directory)
db_path = temp_directory_path.joinpath("dl_benchmark.sqlite")
Expand All @@ -31,40 +27,58 @@ async def generate_datastore(num_nodes: int, seed: int = 101) -> None:

insert_time = 0.0
insert_count = 0
autoinsert_time = 0.0
autoinsert_count = 0
delete_time = 0.0
delete_count = 0
keys: list[bytes] = []

for i in range(num_nodes):
if i % 3 == 0 or i % 3 == 1:
key = i.to_bytes(4, byteorder="big")
value = (2 * i).to_bytes(4, byteorder="big")
keys.append(key)
key = i.to_bytes(4, byteorder="big")
value = (2 * i).to_bytes(4, byteorder="big")
seed = leaf_hash(key, value)
node = await data_store.get_terminal_node_for_seed(seed, store_id)

if i % 3 == 0:
t1 = time.time()
await data_store.autoinsert(
key=key,
value=value,
store_id=store_id,
status=Status.COMMITTED,
)
t2 = time.time()
autoinsert_count += 1
elif i % 3 == 1:
assert node is not None
reference_node_hash = node.hash
side_seed = bytes(seed)[0]
side = Side.LEFT if side_seed < 128 else Side.RIGHT
t1 = time.time()
await data_store.insert(
key=key,
value=value,
store_id=store_id,
reference_node_hash=reference_node_hash,
side=side,
status=Status.COMMITTED,
)
t2 = time.time()
insert_time += t2 - t1
insert_count += 1
else:
key = random.choice(keys)
keys.remove(key)
t1 = time.time()
await data_store.delete(key=key, store_id=store_id, status=Status.COMMITTED)
await data_store.delete(key=node.key, store_id=store_id, status=Status.COMMITTED)
t2 = time.time()
delete_time += t2 - t1
delete_count += 1

print(f"Average insert time: {insert_time / insert_count}")
print(f"Average autoinsert time: {autoinsert_time / autoinsert_count}")
print(f"Average delete time: {delete_time / delete_count}")
print(f"Total time for {num_nodes} operations: {insert_time + delete_time}")
print(f"Total time for {num_nodes} operations: {insert_time + delete_time + autoinsert_time}")
root = await data_store.get_tree_root(store_id=store_id)
print(f"Root hash: {root.node_hash}")


if __name__ == "__main__":
asyncio.run(generate_datastore(int(sys.argv[1]), int(sys.argv[2])))
asyncio.run(generate_datastore(int(sys.argv[1])))

0 comments on commit c308207

Please sign in to comment.