Skip to content

Commit

Permalink
Change shared_instance type from weakptr to shared_ptr (#507)
Browse files Browse the repository at this point in the history
* change shared_instances_ from weakptr to sharedptr

* update
  • Loading branch information
lvhan028 authored Oct 9, 2023
1 parent 0268414 commit 19fea86
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 5 deletions.
4 changes: 2 additions & 2 deletions src/turbomind/triton_backend/llama/LlamaTritonModel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ LlamaTritonModel<T>::createModelInstance(int
std::shared_ptr<LlamaTritonSharedModelInstance<T>> instance;
{
std::lock_guard<std::mutex> lock(shared_mutexes_[device_id]);
instance = shared_instances_[device_id].lock();
instance = shared_instances_[device_id];
if (!instance) {
instance = createSharedModelInstance(device_id, rank, nccl_params, custom_all_reduce_comm);
instance->llm->setFfiLock(ffi_lock_);
Expand Down Expand Up @@ -347,7 +347,7 @@ LlamaTritonModel<T>::createNcclParams(const int node_id, const int device_id_sta
// create nccl group when there are non-occupied devices
for (int i = 0; i < device_count; ++i) {
std::lock_guard<std::mutex> lock(shared_mutexes_[i]);
if (shared_instances_[i].expired()) {
if (shared_instances_[i] == nullptr) {
need_nccl_params = true;
break;
}
Expand Down
5 changes: 2 additions & 3 deletions src/turbomind/triton_backend/llama/LlamaTritonModel.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,8 @@ struct LlamaTritonModel: public AbstractTransformerModel {

std::shared_ptr<typename ft::LlamaV2<T>::SharedState> shared_state_;

// weak_ptr is used so that the instances get released when all strong references are gone
std::vector<std::weak_ptr<LlamaTritonSharedModelInstance<T>>> shared_instances_;
std::deque<std::mutex> shared_mutexes_; // is locking really needed?
std::vector<std::shared_ptr<LlamaTritonSharedModelInstance<T>>> shared_instances_;
std::deque<std::mutex> shared_mutexes_; // is locking really needed?

bool is_fp16_;
int enable_custom_all_reduce_ = 0;
Expand Down

0 comments on commit 19fea86

Please sign in to comment.