From a49613d30069e32ff4467d5cdb3df0d455a9e318 Mon Sep 17 00:00:00 2001 From: Don Brady Date: Thu, 28 Sep 2023 17:03:27 +0000 Subject: [PATCH] Add ZED tunable for disk label wait during auto-replace During an auto-replace of a VDEV device, the device is labeled asynchronously in ZED and it waits until the partition link appears before proceeding with the zpool_vdev_attach(). Added a new ZED configuration tunable, ZED_DISK_LABEL_WAIT_TIME which can override the previously hard-coded value of 3000 ms. Sponsored-By: OpenDrives Inc. Sponsored-By: Klara Inc. Signed-off-by: Don Brady --- cmd/zed/agents/zfs_agents.c | 4 +- cmd/zed/agents/zfs_agents.h | 4 +- cmd/zed/agents/zfs_mod.c | 69 ++++++++++++++++++++++--- cmd/zed/zed.d/zed.rc | 8 +++ cmd/zed/zed_event.c | 2 +- include/libzutil.h | 2 +- lib/libzutil/os/linux/zutil_import_os.c | 5 +- 7 files changed, 78 insertions(+), 16 deletions(-) diff --git a/cmd/zed/agents/zfs_agents.c b/cmd/zed/agents/zfs_agents.c index 8fabb8d081a5..f9bdf287e7f2 100644 --- a/cmd/zed/agents/zfs_agents.c +++ b/cmd/zed/agents/zfs_agents.c @@ -388,13 +388,13 @@ zfs_agent_consumer_thread(void *arg) } void -zfs_agent_init(libzfs_handle_t *zfs_hdl) +zfs_agent_init(libzfs_handle_t *zfs_hdl, const char *zedlet_dir) { fmd_hdl_t *hdl; g_zfs_hdl = zfs_hdl; - if (zfs_slm_init() != 0) + if (zfs_slm_init(zedlet_dir) != 0) zed_log_die("Failed to initialize zfs slm"); zed_log_msg(LOG_INFO, "Add Agent: init"); diff --git a/cmd/zed/agents/zfs_agents.h b/cmd/zed/agents/zfs_agents.h index d1a459139b1e..e5acf1a00868 100644 --- a/cmd/zed/agents/zfs_agents.h +++ b/cmd/zed/agents/zfs_agents.h @@ -28,14 +28,14 @@ extern "C" { /* * Agent abstraction presented to ZED */ -extern void zfs_agent_init(libzfs_handle_t *); +extern void zfs_agent_init(libzfs_handle_t *, const char *); extern void zfs_agent_fini(void); extern void zfs_agent_post_event(const char *, const char *, nvlist_t *); /* * ZFS Sysevent Linkable Module (SLM) */ -extern int zfs_slm_init(void); +extern int zfs_slm_init(const char *); extern void zfs_slm_fini(void); extern void zfs_slm_event(const char *, const char *, nvlist_t *); diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c index b2c008ad1d0e..daae1922a5a3 100644 --- a/cmd/zed/agents/zfs_mod.c +++ b/cmd/zed/agents/zfs_mod.c @@ -24,6 +24,7 @@ * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2016, 2017, Intel Corporation. * Copyright (c) 2017 Open-E, Inc. All Rights Reserved. + * Copyright (c) 2023, Klara Inc. */ /* @@ -91,6 +92,9 @@ #define DEV_BYPATH_PATH "/dev/disk/by-path/" #define DEV_BYVDEV_PATH "/dev/disk/by-vdev/" +#define ZED_CONFIG_FILE "zed.rc" +#define ZED_DISK_LABEL_WAIT "ZED_DISK_LABEL_WAIT_TIME" + typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, boolean_t); libzfs_handle_t *g_zfshdl; @@ -100,6 +104,12 @@ tpool_t *g_tpool; boolean_t g_enumeration_done; pthread_t g_zfs_tid; /* zfs_enum_pools() thread */ +/* + * The maximum time to wait in milliseconds for the udev layer to set up + * the device link after a auto-replaced disk has been partitioned. + */ +uint_t zed_label_wait_max = 3000; + typedef struct unavailpool { zpool_handle_t *uap_zhp; list_node_t uap_node; @@ -436,7 +446,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) */ if (zpool_prepare_and_label_disk(g_zfshdl, zhp, leafname, vdev, "autoreplace", &lines, &lines_cnt) != 0) { - zed_log_msg(LOG_INFO, + zed_log_msg(LOG_WARNING, " zpool_prepare_and_label_disk: could not " "label '%s' (%s)", leafname, libzfs_error_description(g_zfshdl)); @@ -468,7 +478,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) sizeof (device->pd_physpath)); list_insert_tail(&g_device_list, device); - zed_log_msg(LOG_INFO, " zpool_label_disk: async '%s' (%llu)", + zed_log_msg(LOG_NOTICE, " zpool_label_disk: async '%s' (%llu)", leafname, (u_longlong_t)guid); return; /* resumes at EC_DEV_ADD.ESC_DISK for partition */ @@ -545,9 +555,10 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) * Wait for udev to verify the links exist, then auto-replace * the leaf disk at same physical location. */ - if (zpool_label_disk_wait(path, 3000) != 0) { - zed_log_msg(LOG_WARNING, "zfs_mod: expected replacement " - "disk %s is missing", path); + if (zpool_label_disk_wait(path, zed_label_wait_max) != 0) { + zed_log_msg(LOG_WARNING, "zfs_mod: after labeling replacement " + "disk, the expected disk partition link '%s' is missing " + "after waiting %u ms", path, zed_label_wait_max); nvlist_free(nvroot); return; } @@ -562,7 +573,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) B_TRUE, B_FALSE); } - zed_log_msg(LOG_INFO, " zpool_vdev_replace: %s with %s (%s)", + zed_log_msg(LOG_WARNING, " zpool_vdev_replace: %s with %s (%s)", fullpath, path, (ret == 0) ? "no errors" : libzfs_error_description(g_zfshdl)); @@ -1279,6 +1290,41 @@ zfs_enum_pools(void *arg) return (NULL); } +#define CONFIG_LINE_MAX 100 + +static int +zed_lookup_tunable(const char *zedlet_dir, const char *tuneable, + unsigned int *wait_time) +{ + char path[MAXPATHLEN]; + char linebuf[CONFIG_LINE_MAX]; + char name[50]; + unsigned int value; + + sprintf(path, "%s/%s", zedlet_dir, ZED_CONFIG_FILE); + FILE *file = fopen(path, "r"); + + if (file == NULL) { + zed_log_msg(LOG_WARNING, "can't open config file '%s'", path); + return (ENOENT); + } + + /* parse zed.rc for wait time */ + while (fgets(linebuf, sizeof (linebuf), file) != NULL) { + if (linebuf[0] == '#' || linebuf[0] == '\n') + continue; + + if (sscanf(linebuf, "%49[A-Z0-9_] = %u ", name, &value) == 2) { + if (strcmp(tuneable, name) == 0) { + *wait_time = value; + return (0); + } + } + } + + return (ENOENT); +} + /* * called from zed daemon at startup * @@ -1287,7 +1333,7 @@ zfs_enum_pools(void *arg) * For now, each agent has its own libzfs instance */ int -zfs_slm_init(void) +zfs_slm_init(const char *zedlet_dir) { if ((g_zfshdl = libzfs_init()) == NULL) return (-1); @@ -1309,6 +1355,15 @@ zfs_slm_init(void) list_create(&g_device_list, sizeof (struct pendingdev), offsetof(struct pendingdev, pd_node)); + unsigned int wait_time; + + if (zed_lookup_tunable(zedlet_dir, ZED_DISK_LABEL_WAIT, + &wait_time) == 0) { + zed_log_msg(LOG_NOTICE, "setting zed_label_wait_max to %u ms", + wait_time); + zed_label_wait_max = wait_time; + } + return (0); } diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc index 78dc1afc7b15..6f1f90f26df3 100644 --- a/cmd/zed/zed.d/zed.rc +++ b/cmd/zed/zed.d/zed.rc @@ -147,3 +147,11 @@ ZED_SYSLOG_SUBCLASS_EXCLUDE="history_event" # help silence misbehaving drives. This assumes your drive enclosure fully # supports slot power control via sysfs. #ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT=1 + +# During auto-replace of vdev device, the device is labeled +# asynchronously on Linux. Use ZED_DISK_LABEL_WAIT_TIME to +# change how long to wait for the partitioned device symlinks +# to show up before giving up. The time is in milliseconds and +# defaults to 3000 if not specified here. +# +#ZED_DISK_LABEL_WAIT_TIME=3000 diff --git a/cmd/zed/zed_event.c b/cmd/zed/zed_event.c index c60d5a4bc22e..bf0fbfd04f0f 100644 --- a/cmd/zed/zed_event.c +++ b/cmd/zed/zed_event.c @@ -64,7 +64,7 @@ zed_event_init(struct zed_conf *zcp) ZFS_DEV, strerror(errno)); } - zfs_agent_init(zcp->zfs_hdl); + zfs_agent_init(zcp->zfs_hdl, zcp->zedlet_dir); if (zed_disk_event_init() != 0) { if (zcp->do_idle) diff --git a/include/libzutil.h b/include/libzutil.h index 237ff976ba62..053b1ed4b52a 100644 --- a/include/libzutil.h +++ b/include/libzutil.h @@ -34,7 +34,7 @@ extern "C" { #endif /* - * Default wait time for a device name to be created. + * Default wait time in milliseconds for a device name to be created. */ #define DISK_LABEL_WAIT (30 * 1000) /* 30 seconds */ diff --git a/lib/libzutil/os/linux/zutil_import_os.c b/lib/libzutil/os/linux/zutil_import_os.c index 8b64369dc29f..44ed697dd490 100644 --- a/lib/libzutil/os/linux/zutil_import_os.c +++ b/lib/libzutil/os/linux/zutil_import_os.c @@ -582,9 +582,8 @@ zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen) * Wait up to timeout_ms for udev to set up the device node. The device is * considered ready when libudev determines it has been initialized, all of * the device links have been verified to exist, and it has been allowed to - * settle. At this point the device the device can be accessed reliably. - * Depending on the complexity of the udev rules this process could take - * several seconds. + * settle. At this point the device can be accessed reliably. Depending on + * the complexity of the udev rules this process could take several seconds. */ int zpool_label_disk_wait(const char *path, int timeout_ms)