diff --git a/src/turbomind/triton_backend/llama/LlamaTritonModel.cc b/src/turbomind/triton_backend/llama/LlamaTritonModel.cc index e670753701..8a7674a2ab 100644 --- a/src/turbomind/triton_backend/llama/LlamaTritonModel.cc +++ b/src/turbomind/triton_backend/llama/LlamaTritonModel.cc @@ -273,7 +273,7 @@ LlamaTritonModel::createModelInstance(int std::shared_ptr> instance; { std::lock_guard lock(shared_mutexes_[device_id]); - instance = shared_instances_[device_id].lock(); + instance = shared_instances_[device_id]; if (!instance) { instance = createSharedModelInstance(device_id, rank, nccl_params, custom_all_reduce_comm); instance->llm->setFfiLock(ffi_lock_); @@ -347,7 +347,7 @@ LlamaTritonModel::createNcclParams(const int node_id, const int device_id_sta // create nccl group when there are non-occupied devices for (int i = 0; i < device_count; ++i) { std::lock_guard lock(shared_mutexes_[i]); - if (shared_instances_[i].expired()) { + if (shared_instances_[i] == nullptr) { need_nccl_params = true; break; } diff --git a/src/turbomind/triton_backend/llama/LlamaTritonModel.h b/src/turbomind/triton_backend/llama/LlamaTritonModel.h index 332000ce62..b7d8f439ca 100644 --- a/src/turbomind/triton_backend/llama/LlamaTritonModel.h +++ b/src/turbomind/triton_backend/llama/LlamaTritonModel.h @@ -108,9 +108,8 @@ struct LlamaTritonModel: public AbstractTransformerModel { std::shared_ptr::SharedState> shared_state_; - // weak_ptr is used so that the instances get released when all strong references are gone - std::vector>> shared_instances_; - std::deque shared_mutexes_; // is locking really needed? + std::vector>> shared_instances_; + std::deque shared_mutexes_; // is locking really needed? bool is_fp16_; int enable_custom_all_reduce_ = 0;