From 8e2f3ac60f62f44f80be26234d238f410bd18610 Mon Sep 17 00:00:00 2001
From: Xiubo Li <lixiubo@cmss.chinamobile.com>
Date: Thu, 18 Jan 2018 16:19:26 +0800
Subject: [PATCH] alua: fix memory leakage when doing the alua implicit
 transition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The error log as:
"Could not start implicit transition thread:Cannot allocate memory"

When a joinable thread terminates, its memory resources (thread
descriptor and stack) are not deallocated until another thread
performs pthread_join on it. Therefore, pthread_join must be
called once for each joinable thread created to avoid memory leaks.

Here we will make the lock pthread as detached to avoid this.

Signed-off-by: Xiubo Li <lixiubo@cmss.chinamobile.com>
Signed-off-by: Zhang Zhuoyu <zhangzhuoyu@cmss.chinamobile.com>
---
 alua.c         | 10 +++++++++-
 main.c         | 10 ++++++++++
 target.c       |  2 +-
 tcmu-runner.h  |  1 -
 tcmur_device.c | 24 ++++++++++--------------
 tcmur_device.h |  3 ++-
 6 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/alua.c b/alua.c
index 2b032066..9b36e9f9 100644
--- a/alua.c
+++ b/alua.c
@@ -440,6 +440,7 @@ static void *alua_lock_thread_fn(void *arg)
 int alua_implicit_transition(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
 {
 	struct tcmur_device *rdev = tcmu_get_daemon_dev_private(dev);
+	pthread_attr_t attr;
 	int ret = SAM_STAT_GOOD;
 
 	pthread_mutex_lock(&rdev->state_lock);
@@ -453,11 +454,18 @@ int alua_implicit_transition(struct tcmu_device *dev, struct tcmulib_cmd *cmd)
 	}
 
 	rdev->lock_state = TCMUR_DEV_LOCK_LOCKING;
+
+	/*
+	 * Make the lock_thread as detached to fix the memory leakage bug.
+	 */
+	pthread_attr_init (&attr);
+	pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
+
 	/*
 	 * The initiator is going to be queueing commands, so do this
 	 * in the background to avoid command timeouts.
 	 */
-	if (pthread_create(&rdev->lock_thread, NULL, alua_lock_thread_fn,
+	if (pthread_create(&rdev->lock_thread, &attr, alua_lock_thread_fn,
 			   dev)) {
 		tcmu_dev_err(dev, "Could not start implicit transition thread:%s\n",
 			     strerror(errno));
diff --git a/main.c b/main.c
index 258cb8ff..d6cf4d2a 100644
--- a/main.c
+++ b/main.c
@@ -805,6 +805,10 @@ static int dev_added(struct tcmu_device *dev)
 
 	rdev->flags |= TCMUR_DEV_FLAG_IS_OPEN;
 
+	ret = pthread_cond_init(&rdev->lock_cond, NULL);
+	if (ret < 0)
+		goto cleanup_lock_cond;
+
 	/*
 	 * Set the optimal unmap granularity to max xfer len. Optimal unmap
 	 * alignment starts at the begining of the device.
@@ -821,6 +825,8 @@ static int dev_added(struct tcmu_device *dev)
 
 close_dev:
 	rhandler->close(dev);
+cleanup_lock_cond:
+	pthread_cond_destroy(&rdev->lock_cond);
 cleanup_aio_tracking:
 	cleanup_aio_tracking(rdev);
 cleanup_io_work_queue:
@@ -864,6 +870,10 @@ static void dev_removed(struct tcmu_device *dev)
 	cleanup_io_work_queue(dev, false);
 	cleanup_aio_tracking(rdev);
 
+	ret = pthread_cond_destroy(&rdev->lock_cond);
+	if (ret != 0)
+		tcmu_err("could not cleanup lock cond %d\n", ret);
+
 	ret = pthread_mutex_destroy(&rdev->state_lock);
 	if (ret != 0)
 		tcmu_err("could not cleanup state lock %d\n", ret);
diff --git a/target.c b/target.c
index e81ac218..e41f8f7a 100644
--- a/target.c
+++ b/target.c
@@ -258,7 +258,7 @@ static void *tgt_port_grp_recovery_thread_fn(void *arg)
 	 * cmdproc thread to reopen all these in parallel.
 	 */
 	list_for_each_safe(&tpg->devs, rdev, tmp_rdev, recovery_entry) {
-		ret = __tcmu_reopen_dev(rdev->dev);
+		ret = __tcmu_reopen_dev(rdev->dev, false);
 		if (ret) {
 			tcmu_dev_err(rdev->dev, "Could not reinitialize device. (err %d).\n",
 				     ret);
diff --git a/tcmu-runner.h b/tcmu-runner.h
index 7b018779..465a03f7 100644
--- a/tcmu-runner.h
+++ b/tcmu-runner.h
@@ -46,7 +46,6 @@ struct tcmulib_cfg_info;
 enum {
 	TCMUR_LOCK_SUCCESS,
 	TCMUR_LOCK_FAILED,
-	TCMUR_LOCK_BUSY,
 	TCMUR_LOCK_NOTCONN,
 };
 
diff --git a/tcmur_device.c b/tcmur_device.c
index 4fe7145d..0069015b 100644
--- a/tcmur_device.c
+++ b/tcmur_device.c
@@ -43,7 +43,7 @@ bool tcmu_dev_in_recovery(struct tcmu_device *dev)
 /*
  * TCMUR_DEV_FLAG_IN_RECOVERY must be set before calling
  */
-int __tcmu_reopen_dev(struct tcmu_device *dev)
+int __tcmu_reopen_dev(struct tcmu_device *dev, bool in_lock_thread)
 {
 	struct tcmur_device *rdev = tcmu_get_daemon_dev_private(dev);
 	struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev);
@@ -67,7 +67,8 @@ int __tcmu_reopen_dev(struct tcmu_device *dev)
 	 * async lock requests in progress that might be accessing
 	 * the device.
 	 */
-	tcmu_cancel_lock_thread(dev);
+	if (!in_lock_thread)
+		tcmu_cancel_lock_thread(dev);
 
 	/*
 	 * Force a reacquisition of the lock when we have reopend the
@@ -120,7 +121,7 @@ int tcmu_reopen_dev(struct tcmu_device *dev)
 	rdev->flags |= TCMUR_DEV_FLAG_IN_RECOVERY;
 	pthread_mutex_unlock(&rdev->state_lock);
 
-	return __tcmu_reopen_dev(dev);
+	return __tcmu_reopen_dev(dev, true);
 }
 
 void tcmu_cancel_recovery(struct tcmu_device *dev)
@@ -207,7 +208,6 @@ void tcmu_notify_lock_lost(struct tcmu_device *dev)
 int tcmu_cancel_lock_thread(struct tcmu_device *dev)
 {
 	struct tcmur_device *rdev = tcmu_get_daemon_dev_private(dev);
-	void *join_retval;
 	int ret;
 
 	pthread_mutex_lock(&rdev->state_lock);
@@ -215,18 +215,16 @@ int tcmu_cancel_lock_thread(struct tcmu_device *dev)
 		pthread_mutex_unlock(&rdev->state_lock);
 		return 0;
 	}
-	pthread_mutex_unlock(&rdev->state_lock);
 	/*
 	 * It looks like lock calls are not cancelable, so
 	 * we wait here to avoid crashes.
 	 */
 	tcmu_dev_dbg(dev, "Waiting on lock thread\n");
-	ret = pthread_join(rdev->lock_thread, &join_retval);
-	/*
-	 * We may be called from the lock thread to reopen the device.
-	 */
-	if (ret != EDEADLK)
-		tcmu_dev_err(dev, "pthread_join failed with value %d\n", ret);
+
+	tcmu_dev_dbg(rdev->dev, "waiting for lock thread to exit\n");
+	ret = pthread_cond_wait(&rdev->lock_cond, &rdev->state_lock);
+	pthread_mutex_unlock(&rdev->state_lock);
+
 	return ret;
 }
 
@@ -274,9 +272,6 @@ int tcmu_acquire_dev_lock(struct tcmu_device *dev)
 
 	ret = rhandler->lock(dev);
 	switch (ret) {
-	case TCMUR_LOCK_BUSY:
-		new_state = TCMUR_DEV_LOCK_LOCKING;
-		break;
 	case TCMUR_LOCK_FAILED:
 		new_state = TCMUR_DEV_LOCK_UNLOCKED;
 		break;
@@ -306,6 +301,7 @@ int tcmu_acquire_dev_lock(struct tcmu_device *dev)
 	pthread_mutex_lock(&rdev->state_lock);
 	rdev->lock_state = new_state;
 	tcmu_dev_dbg(dev, "lock call done. lock state %d\n", rdev->lock_state);
+	pthread_cond_signal(&rdev->lock_cond);
 	pthread_mutex_unlock(&rdev->state_lock);
 
 	return ret;
diff --git a/tcmur_device.h b/tcmur_device.h
index 5bf030ce..69dd3346 100644
--- a/tcmur_device.h
+++ b/tcmur_device.h
@@ -55,6 +55,7 @@ struct tcmur_device {
 
 	uint8_t lock_state;
 	pthread_t lock_thread;
+	pthread_cond_t lock_cond;
 
 	/* General lock for lock state, thread, dev state, etc */
 	pthread_mutex_t state_lock;
@@ -82,7 +83,7 @@ int tcmu_cancel_lock_thread(struct tcmu_device *dev);
 void tcmu_notify_conn_lost(struct tcmu_device *dev);
 void tcmu_notify_lock_lost(struct tcmu_device *dev);
 
-int __tcmu_reopen_dev(struct tcmu_device *dev);
+int __tcmu_reopen_dev(struct tcmu_device *dev, bool in_lock_thread);
 int tcmu_reopen_dev(struct tcmu_device *dev);
 
 int tcmu_acquire_dev_lock(struct tcmu_device *dev);