From 8e2f3ac60f62f44f80be26234d238f410bd18610 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 18 Jan 2018 16:19:26 +0800 Subject: [PATCH] alua: fix memory leakage when doing the alua implicit transition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The error log as: "Could not start implicit transition thread:Cannot allocate memory" When a joinable thread terminates, its memory resources (thread descriptor and stack) are not deallocated until another thread performs pthread_join on it. Therefore, pthread_join must be called once for each joinable thread created to avoid memory leaks. Here we will make the lock pthread as detached to avoid this. Signed-off-by: Xiubo Li Signed-off-by: Zhang Zhuoyu --- alua.c | 10 +++++++++- main.c | 10 ++++++++++ target.c | 2 +- tcmu-runner.h | 1 - tcmur_device.c | 24 ++++++++++-------------- tcmur_device.h | 3 ++- 6 files changed, 32 insertions(+), 18 deletions(-) diff --git a/alua.c b/alua.c index 2b032066..9b36e9f9 100644 --- a/alua.c +++ b/alua.c @@ -440,6 +440,7 @@ static void *alua_lock_thread_fn(void *arg) int alua_implicit_transition(struct tcmu_device *dev, struct tcmulib_cmd *cmd) { struct tcmur_device *rdev = tcmu_get_daemon_dev_private(dev); + pthread_attr_t attr; int ret = SAM_STAT_GOOD; pthread_mutex_lock(&rdev->state_lock); @@ -453,11 +454,18 @@ int alua_implicit_transition(struct tcmu_device *dev, struct tcmulib_cmd *cmd) } rdev->lock_state = TCMUR_DEV_LOCK_LOCKING; + + /* + * Make the lock_thread as detached to fix the memory leakage bug. + */ + pthread_attr_init (&attr); + pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); + /* * The initiator is going to be queueing commands, so do this * in the background to avoid command timeouts. */ - if (pthread_create(&rdev->lock_thread, NULL, alua_lock_thread_fn, + if (pthread_create(&rdev->lock_thread, &attr, alua_lock_thread_fn, dev)) { tcmu_dev_err(dev, "Could not start implicit transition thread:%s\n", strerror(errno)); diff --git a/main.c b/main.c index 258cb8ff..d6cf4d2a 100644 --- a/main.c +++ b/main.c @@ -805,6 +805,10 @@ static int dev_added(struct tcmu_device *dev) rdev->flags |= TCMUR_DEV_FLAG_IS_OPEN; + ret = pthread_cond_init(&rdev->lock_cond, NULL); + if (ret < 0) + goto cleanup_lock_cond; + /* * Set the optimal unmap granularity to max xfer len. Optimal unmap * alignment starts at the begining of the device. @@ -821,6 +825,8 @@ static int dev_added(struct tcmu_device *dev) close_dev: rhandler->close(dev); +cleanup_lock_cond: + pthread_cond_destroy(&rdev->lock_cond); cleanup_aio_tracking: cleanup_aio_tracking(rdev); cleanup_io_work_queue: @@ -864,6 +870,10 @@ static void dev_removed(struct tcmu_device *dev) cleanup_io_work_queue(dev, false); cleanup_aio_tracking(rdev); + ret = pthread_cond_destroy(&rdev->lock_cond); + if (ret != 0) + tcmu_err("could not cleanup lock cond %d\n", ret); + ret = pthread_mutex_destroy(&rdev->state_lock); if (ret != 0) tcmu_err("could not cleanup state lock %d\n", ret); diff --git a/target.c b/target.c index e81ac218..e41f8f7a 100644 --- a/target.c +++ b/target.c @@ -258,7 +258,7 @@ static void *tgt_port_grp_recovery_thread_fn(void *arg) * cmdproc thread to reopen all these in parallel. */ list_for_each_safe(&tpg->devs, rdev, tmp_rdev, recovery_entry) { - ret = __tcmu_reopen_dev(rdev->dev); + ret = __tcmu_reopen_dev(rdev->dev, false); if (ret) { tcmu_dev_err(rdev->dev, "Could not reinitialize device. (err %d).\n", ret); diff --git a/tcmu-runner.h b/tcmu-runner.h index 7b018779..465a03f7 100644 --- a/tcmu-runner.h +++ b/tcmu-runner.h @@ -46,7 +46,6 @@ struct tcmulib_cfg_info; enum { TCMUR_LOCK_SUCCESS, TCMUR_LOCK_FAILED, - TCMUR_LOCK_BUSY, TCMUR_LOCK_NOTCONN, }; diff --git a/tcmur_device.c b/tcmur_device.c index 4fe7145d..0069015b 100644 --- a/tcmur_device.c +++ b/tcmur_device.c @@ -43,7 +43,7 @@ bool tcmu_dev_in_recovery(struct tcmu_device *dev) /* * TCMUR_DEV_FLAG_IN_RECOVERY must be set before calling */ -int __tcmu_reopen_dev(struct tcmu_device *dev) +int __tcmu_reopen_dev(struct tcmu_device *dev, bool in_lock_thread) { struct tcmur_device *rdev = tcmu_get_daemon_dev_private(dev); struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev); @@ -67,7 +67,8 @@ int __tcmu_reopen_dev(struct tcmu_device *dev) * async lock requests in progress that might be accessing * the device. */ - tcmu_cancel_lock_thread(dev); + if (!in_lock_thread) + tcmu_cancel_lock_thread(dev); /* * Force a reacquisition of the lock when we have reopend the @@ -120,7 +121,7 @@ int tcmu_reopen_dev(struct tcmu_device *dev) rdev->flags |= TCMUR_DEV_FLAG_IN_RECOVERY; pthread_mutex_unlock(&rdev->state_lock); - return __tcmu_reopen_dev(dev); + return __tcmu_reopen_dev(dev, true); } void tcmu_cancel_recovery(struct tcmu_device *dev) @@ -207,7 +208,6 @@ void tcmu_notify_lock_lost(struct tcmu_device *dev) int tcmu_cancel_lock_thread(struct tcmu_device *dev) { struct tcmur_device *rdev = tcmu_get_daemon_dev_private(dev); - void *join_retval; int ret; pthread_mutex_lock(&rdev->state_lock); @@ -215,18 +215,16 @@ int tcmu_cancel_lock_thread(struct tcmu_device *dev) pthread_mutex_unlock(&rdev->state_lock); return 0; } - pthread_mutex_unlock(&rdev->state_lock); /* * It looks like lock calls are not cancelable, so * we wait here to avoid crashes. */ tcmu_dev_dbg(dev, "Waiting on lock thread\n"); - ret = pthread_join(rdev->lock_thread, &join_retval); - /* - * We may be called from the lock thread to reopen the device. - */ - if (ret != EDEADLK) - tcmu_dev_err(dev, "pthread_join failed with value %d\n", ret); + + tcmu_dev_dbg(rdev->dev, "waiting for lock thread to exit\n"); + ret = pthread_cond_wait(&rdev->lock_cond, &rdev->state_lock); + pthread_mutex_unlock(&rdev->state_lock); + return ret; } @@ -274,9 +272,6 @@ int tcmu_acquire_dev_lock(struct tcmu_device *dev) ret = rhandler->lock(dev); switch (ret) { - case TCMUR_LOCK_BUSY: - new_state = TCMUR_DEV_LOCK_LOCKING; - break; case TCMUR_LOCK_FAILED: new_state = TCMUR_DEV_LOCK_UNLOCKED; break; @@ -306,6 +301,7 @@ int tcmu_acquire_dev_lock(struct tcmu_device *dev) pthread_mutex_lock(&rdev->state_lock); rdev->lock_state = new_state; tcmu_dev_dbg(dev, "lock call done. lock state %d\n", rdev->lock_state); + pthread_cond_signal(&rdev->lock_cond); pthread_mutex_unlock(&rdev->state_lock); return ret; diff --git a/tcmur_device.h b/tcmur_device.h index 5bf030ce..69dd3346 100644 --- a/tcmur_device.h +++ b/tcmur_device.h @@ -55,6 +55,7 @@ struct tcmur_device { uint8_t lock_state; pthread_t lock_thread; + pthread_cond_t lock_cond; /* General lock for lock state, thread, dev state, etc */ pthread_mutex_t state_lock; @@ -82,7 +83,7 @@ int tcmu_cancel_lock_thread(struct tcmu_device *dev); void tcmu_notify_conn_lost(struct tcmu_device *dev); void tcmu_notify_lock_lost(struct tcmu_device *dev); -int __tcmu_reopen_dev(struct tcmu_device *dev); +int __tcmu_reopen_dev(struct tcmu_device *dev, bool in_lock_thread); int tcmu_reopen_dev(struct tcmu_device *dev); int tcmu_acquire_dev_lock(struct tcmu_device *dev);