From 2e20dc19106cff88dbbe217d4f9faf27c39ddae0 Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Thu, 30 Jan 2025 19:45:24 -0800 Subject: [PATCH 1/3] hack mm hash Signed-off-by: Chen Zhang --- tests/v1/core/test_kv_cache_utils.py | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/v1/core/test_kv_cache_utils.py b/tests/v1/core/test_kv_cache_utils.py index f4081766e39a2..dc820705f3575 100644 --- a/tests/v1/core/test_kv_cache_utils.py +++ b/tests/v1/core/test_kv_cache_utils.py @@ -227,6 +227,40 @@ def test_hash_request_tokens(): assert block_hashes[1].extra_keys == ("hash2", ) +def test_hash_tokens_with_mm_input(): + request1 = make_request( + request_id=0, + prompt_token_ids=[_ for _ in range(6)], + mm_positions=[{ + "offset": 0, + "length": 3 + }, { + "offset": 3, + "length": 3 + }], + mm_hashes=["hash1", "hash2"], + ) + request2 = make_request( + request_id=1, + prompt_token_ids=[_ for _ in range(6)], + mm_positions=[{ + "offset": 0, + "length": 3 + }, { + "offset": 3, + "length": 3 + }], + mm_hashes=["hash3", "hash2"], + ) + block_size = 3 + block_hashes1 = hash_request_tokens(block_size, request1) + block_hashes2 = hash_request_tokens(block_size, request2) + print(block_hashes1) + print(block_hashes2) + assert block_hashes1[0] != block_hashes2[0] + assert block_hashes1[1] != block_hashes2[1] + + def test_hash_request_tokens_no_mm_inputs(): request = make_request( request_id=0, From 57bd6e2753650869f6982e9bcf125cf2bfa4b0a9 Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Thu, 30 Jan 2025 21:50:59 -0800 Subject: [PATCH 2/3] add extra key to hash Signed-off-by: Chen Zhang --- tests/v1/core/test_kv_cache_utils.py | 6 ++---- vllm/v1/core/kv_cache_utils.py | 5 +++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/v1/core/test_kv_cache_utils.py b/tests/v1/core/test_kv_cache_utils.py index dc820705f3575..0a5ba1f98221f 100644 --- a/tests/v1/core/test_kv_cache_utils.py +++ b/tests/v1/core/test_kv_cache_utils.py @@ -192,7 +192,7 @@ def test_hash_block_tokens(): extra_keys) assert isinstance(block_hash, BlockHashType) assert block_hash.hash_value == hash( - (parent_block_hash, *curr_block_token_ids)) + (parent_block_hash, curr_block_token_ids, extra_keys)) assert block_hash.token_ids == curr_block_token_ids assert block_hash.extra_keys == extra_keys @@ -227,7 +227,7 @@ def test_hash_request_tokens(): assert block_hashes[1].extra_keys == ("hash2", ) -def test_hash_tokens_with_mm_input(): +def test_hash_tokens_different_mm_input(): request1 = make_request( request_id=0, prompt_token_ids=[_ for _ in range(6)], @@ -255,8 +255,6 @@ def test_hash_tokens_with_mm_input(): block_size = 3 block_hashes1 = hash_request_tokens(block_size, request1) block_hashes2 = hash_request_tokens(block_size, request2) - print(block_hashes1) - print(block_hashes2) assert block_hashes1[0] != block_hashes2[0] assert block_hashes1[1] != block_hashes2[1] diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py index dbdda51aedaa0..12d6681ea6535 100644 --- a/vllm/v1/core/kv_cache_utils.py +++ b/vllm/v1/core/kv_cache_utils.py @@ -262,8 +262,9 @@ def hash_block_tokens( The hash value of the block and the token ids in the block. The entire tuple is used as the hash key of the block. """ - return BlockHashType(hash((parent_block_hash, *curr_block_token_ids)), - tuple(curr_block_token_ids), extra_keys) + return BlockHashType( + hash((parent_block_hash, tuple(curr_block_token_ids), extra_keys)), + tuple(curr_block_token_ids), extra_keys) def hash_request_tokens(block_size: int, From 03686c57bed96547576cc4e0189b094573627d50 Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Thu, 30 Jan 2025 21:53:04 -0800 Subject: [PATCH 3/3] small fix Signed-off-by: Chen Zhang --- vllm/v1/core/kv_cache_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py index 12d6681ea6535..2b6557ad3ce66 100644 --- a/vllm/v1/core/kv_cache_utils.py +++ b/vllm/v1/core/kv_cache_utils.py @@ -262,9 +262,10 @@ def hash_block_tokens( The hash value of the block and the token ids in the block. The entire tuple is used as the hash key of the block. """ + curr_block_token_ids_tuple = tuple(curr_block_token_ids) return BlockHashType( - hash((parent_block_hash, tuple(curr_block_token_ids), extra_keys)), - tuple(curr_block_token_ids), extra_keys) + hash((parent_block_hash, curr_block_token_ids_tuple, extra_keys)), + curr_block_token_ids_tuple, extra_keys) def hash_request_tokens(block_size: int,