diff --git a/alua.c b/alua.c index 2b032066..9b36e9f9 100644 --- a/alua.c +++ b/alua.c @@ -440,6 +440,7 @@ static void *alua_lock_thread_fn(void *arg) int alua_implicit_transition(struct tcmu_device *dev, struct tcmulib_cmd *cmd) { struct tcmur_device *rdev = tcmu_get_daemon_dev_private(dev); + pthread_attr_t attr; int ret = SAM_STAT_GOOD; pthread_mutex_lock(&rdev->state_lock); @@ -453,11 +454,18 @@ int alua_implicit_transition(struct tcmu_device *dev, struct tcmulib_cmd *cmd) } rdev->lock_state = TCMUR_DEV_LOCK_LOCKING; + + /* + * Make the lock_thread as detached to fix the memory leakage bug. + */ + pthread_attr_init (&attr); + pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); + /* * The initiator is going to be queueing commands, so do this * in the background to avoid command timeouts. */ - if (pthread_create(&rdev->lock_thread, NULL, alua_lock_thread_fn, + if (pthread_create(&rdev->lock_thread, &attr, alua_lock_thread_fn, dev)) { tcmu_dev_err(dev, "Could not start implicit transition thread:%s\n", strerror(errno)); diff --git a/main.c b/main.c index 609b2391..f5480fda 100644 --- a/main.c +++ b/main.c @@ -799,6 +799,10 @@ static int dev_added(struct tcmu_device *dev) rdev->flags |= TCMUR_DEV_FLAG_IS_OPEN; + ret = pthread_cond_init(&rdev->lock_cond, NULL); + if (ret < 0) + goto cleanup_lock_cond; + /* * Set the optimal unmap granularity to max xfer len. Optimal unmap * alignment starts at the begining of the device. @@ -816,6 +820,8 @@ static int dev_added(struct tcmu_device *dev) close_dev: rhandler->close(dev); +cleanup_lock_cond: + pthread_cond_destroy(&rdev->lock_cond); cleanup_aio_tracking: cleanup_aio_tracking(rdev); cleanup_io_work_queue: @@ -881,6 +887,10 @@ static void dev_removed(struct tcmu_device *dev) cleanup_io_work_queue(dev, false); cleanup_aio_tracking(rdev); + ret = pthread_cond_destroy(&rdev->lock_cond); + if (ret != 0) + tcmu_err("could not cleanup lock cond %d\n", ret); + ret = pthread_mutex_destroy(&rdev->state_lock); if (ret != 0) tcmu_err("could not cleanup state lock %d\n", ret); diff --git a/target.c b/target.c index e81ac218..e41f8f7a 100644 --- a/target.c +++ b/target.c @@ -258,7 +258,7 @@ static void *tgt_port_grp_recovery_thread_fn(void *arg) * cmdproc thread to reopen all these in parallel. */ list_for_each_safe(&tpg->devs, rdev, tmp_rdev, recovery_entry) { - ret = __tcmu_reopen_dev(rdev->dev); + ret = __tcmu_reopen_dev(rdev->dev, false); if (ret) { tcmu_dev_err(rdev->dev, "Could not reinitialize device. (err %d).\n", ret); diff --git a/tcmu-runner.h b/tcmu-runner.h index 51535d1d..a098b2a4 100644 --- a/tcmu-runner.h +++ b/tcmu-runner.h @@ -46,7 +46,6 @@ struct tcmulib_cfg_info; enum { TCMUR_LOCK_SUCCESS, TCMUR_LOCK_FAILED, - TCMUR_LOCK_BUSY, TCMUR_LOCK_NOTCONN, }; diff --git a/tcmur_device.c b/tcmur_device.c index 37ab4b33..5175924f 100644 --- a/tcmur_device.c +++ b/tcmur_device.c @@ -43,7 +43,7 @@ bool tcmu_dev_in_recovery(struct tcmu_device *dev) /* * TCMUR_DEV_FLAG_IN_RECOVERY must be set before calling */ -int __tcmu_reopen_dev(struct tcmu_device *dev) +int __tcmu_reopen_dev(struct tcmu_device *dev, bool in_lock_thread) { struct tcmur_device *rdev = tcmu_get_daemon_dev_private(dev); struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev); @@ -67,7 +67,8 @@ int __tcmu_reopen_dev(struct tcmu_device *dev) * async lock requests in progress that might be accessing * the device. */ - tcmu_cancel_lock_thread(dev); + if (!in_lock_thread) + tcmu_cancel_lock_thread(dev); /* * Force a reacquisition of the lock when we have reopend the @@ -120,7 +121,7 @@ int tcmu_reopen_dev(struct tcmu_device *dev) rdev->flags |= TCMUR_DEV_FLAG_IN_RECOVERY; pthread_mutex_unlock(&rdev->state_lock); - return __tcmu_reopen_dev(dev); + return __tcmu_reopen_dev(dev, true); } void tcmu_cancel_recovery(struct tcmu_device *dev) @@ -208,7 +209,6 @@ void tcmu_notify_lock_lost(struct tcmu_device *dev) int tcmu_cancel_lock_thread(struct tcmu_device *dev) { struct tcmur_device *rdev = tcmu_get_daemon_dev_private(dev); - void *join_retval; int ret; pthread_mutex_lock(&rdev->state_lock); @@ -216,18 +216,16 @@ int tcmu_cancel_lock_thread(struct tcmu_device *dev) pthread_mutex_unlock(&rdev->state_lock); return 0; } - pthread_mutex_unlock(&rdev->state_lock); /* * It looks like lock calls are not cancelable, so * we wait here to avoid crashes. */ tcmu_dev_dbg(dev, "Waiting on lock thread\n"); - ret = pthread_join(rdev->lock_thread, &join_retval); - /* - * We may be called from the lock thread to reopen the device. - */ - if (ret != EDEADLK) - tcmu_dev_err(dev, "pthread_join failed with value %d\n", ret); + + tcmu_dev_dbg(rdev->dev, "waiting for lock thread to exit\n"); + ret = pthread_cond_wait(&rdev->lock_cond, &rdev->state_lock); + pthread_mutex_unlock(&rdev->state_lock); + return ret; } @@ -284,9 +282,6 @@ int tcmu_acquire_dev_lock(struct tcmu_device *dev) ret = rhandler->lock(dev); switch (ret) { - case TCMUR_LOCK_BUSY: - new_state = TCMUR_DEV_LOCK_LOCKING; - break; case TCMUR_LOCK_FAILED: new_state = TCMUR_DEV_LOCK_UNLOCKED; break; @@ -316,6 +311,7 @@ int tcmu_acquire_dev_lock(struct tcmu_device *dev) pthread_mutex_lock(&rdev->state_lock); rdev->lock_state = new_state; tcmu_dev_dbg(dev, "lock call done. lock state %d\n", rdev->lock_state); + pthread_cond_signal(&rdev->lock_cond); pthread_mutex_unlock(&rdev->state_lock); tcmu_unblock_device(dev); diff --git a/tcmur_device.h b/tcmur_device.h index 40a71429..f831a61c 100644 --- a/tcmur_device.h +++ b/tcmur_device.h @@ -57,6 +57,7 @@ struct tcmur_device { uint8_t lock_state; pthread_t lock_thread; + pthread_cond_t lock_cond; /* General lock for lock state, thread, dev state, etc */ pthread_mutex_t state_lock; @@ -84,7 +85,7 @@ int tcmu_cancel_lock_thread(struct tcmu_device *dev); void tcmu_notify_conn_lost(struct tcmu_device *dev); void tcmu_notify_lock_lost(struct tcmu_device *dev); -int __tcmu_reopen_dev(struct tcmu_device *dev); +int __tcmu_reopen_dev(struct tcmu_device *dev, bool in_lock_thread); int tcmu_reopen_dev(struct tcmu_device *dev); int tcmu_acquire_dev_lock(struct tcmu_device *dev);