diff --git a/src/cpp/src/block_manager.hpp b/src/cpp/src/block_manager.hpp index 3e80217f14..7f12bfd8d6 100644 --- a/src/cpp/src/block_manager.hpp +++ b/src/cpp/src/block_manager.hpp @@ -251,52 +251,17 @@ class BlockManager { return m_block_table[seq_id]; } - const size_t free_rightest_blocks(SequenceGroup::Ptr sequence_group) { - size_t blocks_released = 0; + const size_t free_group_partially(SequenceGroup::Ptr sequence_group, size_t num_required_blocks) { + size_t blocks_num = std::ceil(num_required_blocks / sequence_group->get_not_finished_sequences().size()); auto running_sequences = sequence_group->get_not_finished_sequences(); std::set blocks_released_indices; for (size_t idx = 0; idx < running_sequences.size(); ++idx) { auto seq_id = running_sequences[idx]->get_id(); OPENVINO_ASSERT(m_block_table.count(seq_id) > 0, "Invalid sequence group."); auto block_table = m_block_table[seq_id]; - if (free_last_block(seq_id)) { - blocks_released++; - } - } - return blocks_released; - } - - const bool free_group_partially_multiple_runnning_sequence(SequenceGroup::Ptr sequence_group, size_t num_required_blocks, size_t& phisical_blocks_released, size_t& logical_blocks_released) { - phisical_blocks_released = 0; - logical_blocks_released = 0; - while (num_required_blocks > phisical_blocks_released) { - size_t released_count = free_rightest_blocks(sequence_group); - logical_blocks_released += 1; - if (get_number_of_blocks_occupied_by_sequence(sequence_group) == 0) { - break; - } - phisical_blocks_released += released_count; + free_sequence_partially(seq_id, blocks_num); } - return num_required_blocks <= phisical_blocks_released; - } - - const bool free_group_partially_single_runnning_sequence(SequenceGroup::Ptr sequence_group, size_t num_required_blocks, size_t& phisical_blocks_released) { - auto sequences = sequence_group->get_not_finished_sequences(); - OPENVINO_ASSERT(sequences.size() == 1); - auto running_sequence = sequences[0]; - auto seq_id = running_sequence->get_id(); - if (!has_block_table(seq_id)) { - // no blocks are allocated for this sequence, so it can't be preempted - return false; - } - auto block_table = get_block_table(seq_id); - auto prev_blocks_count = num_free_blocks(); - free_sequence_partially_single_runnning_sequence(seq_id, num_required_blocks); - - // calculate the number of released blocks - phisical_blocks_released = num_free_blocks() - prev_blocks_count; - - return num_required_blocks <= phisical_blocks_released; + return blocks_num; } const size_t get_number_of_blocks_occupied_by_sequence(SequenceGroup::Ptr sequence_group) { @@ -399,15 +364,13 @@ class BlockManager { return block_table[block_idx]->is_free(); } - void free_sequence_partially_single_runnning_sequence(size_t seq_id, size_t block_num) { - // this method is applicable only for groups with single sequences + void free_sequence_partially(size_t seq_id, size_t block_num) { auto block_table = m_block_table[seq_id]; OPENVINO_ASSERT(block_table.size() >= block_num); for (size_t idx = 0; idx < block_num; idx++) { size_t block_idx = m_block_table[seq_id].size() - idx - 1; m_allocator.free(block_table[block_idx]); - OPENVINO_ASSERT(block_table[block_idx]->is_free()); } m_block_table[seq_id].resize(m_block_table[seq_id].size() - block_num); diff --git a/src/cpp/src/scheduler.hpp b/src/cpp/src/scheduler.hpp index c52ed8d7a6..b0a0fd512d 100644 --- a/src/cpp/src/scheduler.hpp +++ b/src/cpp/src/scheduler.hpp @@ -117,31 +117,14 @@ class Scheduler { return m_block_manager.num_free_blocks() > prev_blocks_count; } - if (num_running_sequences > 1) { - size_t phisycal_blocks_released; - size_t logical_blocks_released; - m_block_manager.free_group_partially_multiple_runnning_sequence(sequence_group, blocks_needed, phisycal_blocks_released, logical_blocks_released); - - // calculate the number of preempted tokens - auto tokens_in_last_block = processed_tokens % block_size; - if (tokens_in_last_block == 0) { - tokens_in_last_block = block_size; - } - preempted_tokens = tokens_in_last_block + std::max((int)logical_blocks_released - 1, 0) * block_size; + size_t logical_blocks_released = m_block_manager.free_group_partially(sequence_group, blocks_needed); + // calculate the number of preempted tokens + auto tokens_in_last_block = processed_tokens % block_size; + if (tokens_in_last_block == 0) { + tokens_in_last_block = block_size; } - else { - OPENVINO_ASSERT(num_running_sequences == 1); - size_t phisycal_blocks_released; - m_block_manager.free_group_partially_single_runnning_sequence(sequence_group, blocks_needed, phisycal_blocks_released); - - // calculate the number of preempted tokens - auto tokens_in_last_block = processed_tokens % block_size; - if (tokens_in_last_block == 0) { - tokens_in_last_block = block_size; - } - preempted_tokens = tokens_in_last_block + std::max((int)phisycal_blocks_released - 1, 0) * block_size; - } + preempted_tokens = tokens_in_last_block + std::max((int)logical_blocks_released - 1, 0) * block_size; // case when preemption requires preempt prompt tokens if (!m_config.dynamic_split_fuse && processed_tokens - preempted_tokens < sequence_group->get_prompt_len()) { diff --git a/tests/cpp/block_manager.cpp b/tests/cpp/block_manager.cpp index 4621c184f5..40e2783807 100644 --- a/tests/cpp/block_manager.cpp +++ b/tests/cpp/block_manager.cpp @@ -26,7 +26,7 @@ TEST(TestBlockManager, general_test) { EXPECT_EQ(bm.get_block_table(seq_id).size(), 6); EXPECT_EQ(bm.num_free_blocks(), 0); - bm.free_sequence_partially_single_runnning_sequence(seq_id, 4); + bm.free_sequence_partially(seq_id, 4); EXPECT_EQ(bm.get_block_table(seq_id).size(), 2); EXPECT_EQ(bm.num_free_blocks(), 4);