diff --git a/main.c b/main.c index 70b42335..1ad82d06 100644 --- a/main.c +++ b/main.c @@ -1044,9 +1044,15 @@ static int dev_added(struct tcmu_device *dev) goto cleanup_format_lock; } + ret = pthread_cond_init(&rdev->report_event_cond, NULL); + if (ret) { + ret = -ret; + goto cleanup_rdev_lock; + } + ret = setup_io_work_queue(dev); if (ret < 0) - goto cleanup_rdev_lock; + goto cleanup_event_cond; ret = setup_aio_tracking(rdev); if (ret < 0) @@ -1088,6 +1094,8 @@ static int dev_added(struct tcmu_device *dev) cleanup_aio_tracking(rdev); cleanup_io_work_queue: cleanup_io_work_queue(dev, true); +cleanup_event_cond: + pthread_cond_destroy(&rdev->report_event_cond); cleanup_rdev_lock: pthread_mutex_destroy(&rdev->rdev_lock); cleanup_format_lock: @@ -1130,6 +1138,10 @@ static void dev_removed(struct tcmu_device *dev) tcmur_destroy_work(rdev->event_work); + ret = pthread_cond_destroy(&rdev->report_event_cond); + if (ret != 0) + tcmu_err("could not cleanup report event cond %d\n", ret); + ret = pthread_mutex_destroy(&rdev->rdev_lock); if (ret != 0) tcmu_err("could not cleanup state lock %d\n", ret); diff --git a/rbd.c b/rbd.c index b3ea9d6b..4d151a5d 100644 --- a/rbd.c +++ b/rbd.c @@ -146,17 +146,14 @@ static int tcmu_rbd_service_status_update(struct tcmu_device *dev, #endif /* RBD_LOCK_ACQUIRE_SUPPORT */ -static int tcmu_rbd_report_event(struct tcmu_device *dev) +static int tcmu_rbd_report_event(struct tcmu_device *dev, bool has_lock) { - struct tcmur_device *rdev = tcmu_dev_get_private(dev); - /* * We ignore the specific event and report all the current counter * values, because tools like gwcli/dashboard may not see every * update, and we do not want one event to overwrite the info. */ - return tcmu_rbd_service_status_update(dev, - rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKED ? true : false); + return tcmu_rbd_service_status_update(dev, has_lock); } static int tcmu_rbd_service_register(struct tcmu_device *dev) @@ -215,7 +212,7 @@ static int tcmu_rbd_service_register(struct tcmu_device *dev) goto free_meta_buf; } - ret = tcmu_rbd_report_event(dev); + ret = tcmu_rbd_report_event(dev, false); free_meta_buf: free(metadata_buf); diff --git a/target.c b/target.c index 51a0d97c..91cc2d8e 100644 --- a/target.c +++ b/target.c @@ -252,6 +252,12 @@ static void tgt_port_grp_recovery_work_fn(void *arg) */ list_for_each_safe(&tpg->devs, rdev, tmp_rdev, recovery_entry) { list_del(&rdev->recovery_entry); + + pthread_mutex_lock(&rdev->rdev_lock); + if (rdev->flags & TCMUR_DEV_FLAG_REPORTING_EVENT) + pthread_cond_wait(&rdev->report_event_cond, &rdev->rdev_lock); + pthread_mutex_unlock(&rdev->rdev_lock); + ret = __tcmu_reopen_dev(rdev->dev, -1); if (ret) { tcmu_dev_err(rdev->dev, "Could not reinitialize device. (err %d).\n", diff --git a/tcmu-runner.h b/tcmu-runner.h index 844904c6..6157d9e9 100644 --- a/tcmu-runner.h +++ b/tcmu-runner.h @@ -161,7 +161,7 @@ struct tcmur_handler { * * Return 0 on success and a -Exyz error code on error. */ - int (*report_event)(struct tcmu_device *dev); + int (*report_event)(struct tcmu_device *dev, bool has_lock); /* * If the lock is acquired and the tag is not TCMU_INVALID_LOCK_TAG, diff --git a/tcmur_device.c b/tcmur_device.c index 09b29ba9..9ee62ae6 100644 --- a/tcmur_device.c +++ b/tcmur_device.c @@ -86,8 +86,8 @@ int __tcmu_reopen_dev(struct tcmu_device *dev, int retries) rhandler->close(dev); } - pthread_mutex_lock(&rdev->rdev_lock); ret = -EIO; + pthread_mutex_lock(&rdev->rdev_lock); while (ret != 0 && !(rdev->flags & TCMUR_DEV_FLAG_STOPPING) && (retries < 0 || attempt <= retries)) { pthread_mutex_unlock(&rdev->rdev_lock); @@ -128,7 +128,10 @@ int tcmu_reopen_dev(struct tcmu_device *dev, int retries) pthread_mutex_unlock(&rdev->rdev_lock); return -EBUSY; } + rdev->flags |= TCMUR_DEV_FLAG_IN_RECOVERY; + if (rdev->flags & TCMUR_DEV_FLAG_REPORTING_EVENT) + pthread_cond_wait(&rdev->report_event_cond, &rdev->rdev_lock); pthread_mutex_unlock(&rdev->rdev_lock); return __tcmu_reopen_dev(dev, retries); @@ -159,6 +162,7 @@ static void __tcmu_report_event(void *data) struct tcmu_device *dev = data; struct tcmur_device *rdev = tcmu_dev_get_private(dev); struct tcmur_handler *rhandler = tcmu_get_runner_handler(dev); + bool has_lock; int ret; /* @@ -168,9 +172,29 @@ static void __tcmu_report_event(void *data) sleep(1); pthread_mutex_lock(&rdev->rdev_lock); - ret = rhandler->report_event(dev); + /* + * If the device is in recovery, will skip reporting the event + * this time because the event should be report when the device + * is reopening. + */ + if (rdev->flags & (TCMUR_DEV_FLAG_IN_RECOVERY | + TCMUR_DEV_FLAG_STOPPING | + TCMUR_DEV_FLAG_STOPPED)) { + pthread_mutex_unlock(&rdev->rdev_lock); + return; + } + + rdev->flags |= TCMUR_DEV_FLAG_REPORTING_EVENT; + has_lock = rdev->lock_state == TCMUR_DEV_LOCK_WRITE_LOCKED; + pthread_mutex_unlock(&rdev->rdev_lock); + + ret = rhandler->report_event(dev, has_lock); if (ret) tcmu_dev_err(dev, "Could not report events. Error %d.\n", ret); + + pthread_mutex_lock(&rdev->rdev_lock); + rdev->flags &= ~TCMUR_DEV_FLAG_REPORTING_EVENT; + pthread_cond_signal(&rdev->report_event_cond); pthread_mutex_unlock(&rdev->rdev_lock); } diff --git a/tcmur_device.h b/tcmur_device.h index cd37aabe..3c7a449e 100644 --- a/tcmur_device.h +++ b/tcmur_device.h @@ -22,6 +22,7 @@ #define TCMUR_DEV_FLAG_IS_OPEN (1 << 2) #define TCMUR_DEV_FLAG_STOPPING (1 << 3) #define TCMUR_DEV_FLAG_STOPPED (1 << 4) +#define TCMUR_DEV_FLAG_REPORTING_EVENT (1 << 5) #define TCMUR_UA_DEV_SIZE_CHANGED 0 @@ -83,6 +84,8 @@ struct tcmur_device { int cmd_time_out; + pthread_cond_t report_event_cond; + pthread_spinlock_t cmds_list_lock; /* protects cmds_list */ struct list_head cmds_list; };