From 19fea86c3a0c287c1345610ce683750b3472010d Mon Sep 17 00:00:00 2001 From: Lyu Han Date: Mon, 9 Oct 2023 11:04:52 +0800 Subject: [PATCH] Change `shared_instance` type from `weakptr` to `shared_ptr` (#507) * change shared_instances_ from weakptr to sharedptr * update --- src/turbomind/triton_backend/llama/LlamaTritonModel.cc | 4 ++-- src/turbomind/triton_backend/llama/LlamaTritonModel.h | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/turbomind/triton_backend/llama/LlamaTritonModel.cc b/src/turbomind/triton_backend/llama/LlamaTritonModel.cc index e670753701..8a7674a2ab 100644 --- a/src/turbomind/triton_backend/llama/LlamaTritonModel.cc +++ b/src/turbomind/triton_backend/llama/LlamaTritonModel.cc @@ -273,7 +273,7 @@ LlamaTritonModel::createModelInstance(int std::shared_ptr> instance; { std::lock_guard lock(shared_mutexes_[device_id]); - instance = shared_instances_[device_id].lock(); + instance = shared_instances_[device_id]; if (!instance) { instance = createSharedModelInstance(device_id, rank, nccl_params, custom_all_reduce_comm); instance->llm->setFfiLock(ffi_lock_); @@ -347,7 +347,7 @@ LlamaTritonModel::createNcclParams(const int node_id, const int device_id_sta // create nccl group when there are non-occupied devices for (int i = 0; i < device_count; ++i) { std::lock_guard lock(shared_mutexes_[i]); - if (shared_instances_[i].expired()) { + if (shared_instances_[i] == nullptr) { need_nccl_params = true; break; } diff --git a/src/turbomind/triton_backend/llama/LlamaTritonModel.h b/src/turbomind/triton_backend/llama/LlamaTritonModel.h index 332000ce62..b7d8f439ca 100644 --- a/src/turbomind/triton_backend/llama/LlamaTritonModel.h +++ b/src/turbomind/triton_backend/llama/LlamaTritonModel.h @@ -108,9 +108,8 @@ struct LlamaTritonModel: public AbstractTransformerModel { std::shared_ptr::SharedState> shared_state_; - // weak_ptr is used so that the instances get released when all strong references are gone - std::vector>> shared_instances_; - std::deque shared_mutexes_; // is locking really needed? + std::vector>> shared_instances_; + std::deque shared_mutexes_; // is locking really needed? bool is_fp16_; int enable_custom_all_reduce_ = 0;