Skip to content

Commit

Permalink
Add ZED tunable for disk label wait during auto-replace
Browse files Browse the repository at this point in the history
During an auto-replace of a VDEV device, the device is labeled
asynchronously in ZED and it waits until the partition link
appears before proceeding with the zpool_vdev_attach().

Added a new ZED configuration tunable, ZED_DISK_LABEL_WAIT_TIME
which can override the previously hard-coded value of 3000 ms.

Sponsored-By: OpenDrives Inc.
Sponsored-By: Klara Inc.

Signed-off-by: Don Brady <[email protected]>
  • Loading branch information
don-brady committed Sep 28, 2023
1 parent ec99448 commit a49613d
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 16 deletions.
4 changes: 2 additions & 2 deletions cmd/zed/agents/zfs_agents.c
Original file line number Diff line number Diff line change
Expand Up @@ -388,13 +388,13 @@ zfs_agent_consumer_thread(void *arg)
}

void
zfs_agent_init(libzfs_handle_t *zfs_hdl)
zfs_agent_init(libzfs_handle_t *zfs_hdl, const char *zedlet_dir)
{
fmd_hdl_t *hdl;

g_zfs_hdl = zfs_hdl;

if (zfs_slm_init() != 0)
if (zfs_slm_init(zedlet_dir) != 0)
zed_log_die("Failed to initialize zfs slm");
zed_log_msg(LOG_INFO, "Add Agent: init");

Expand Down
4 changes: 2 additions & 2 deletions cmd/zed/agents/zfs_agents.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ extern "C" {
/*
* Agent abstraction presented to ZED
*/
extern void zfs_agent_init(libzfs_handle_t *);
extern void zfs_agent_init(libzfs_handle_t *, const char *);
extern void zfs_agent_fini(void);
extern void zfs_agent_post_event(const char *, const char *, nvlist_t *);

/*
* ZFS Sysevent Linkable Module (SLM)
*/
extern int zfs_slm_init(void);
extern int zfs_slm_init(const char *);
extern void zfs_slm_fini(void);
extern void zfs_slm_event(const char *, const char *, nvlist_t *);

Expand Down
69 changes: 62 additions & 7 deletions cmd/zed/agents/zfs_mod.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2016, 2017, Intel Corporation.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2023, Klara Inc.
*/

/*
Expand Down Expand Up @@ -91,6 +92,9 @@
#define DEV_BYPATH_PATH "/dev/disk/by-path/"
#define DEV_BYVDEV_PATH "/dev/disk/by-vdev/"

#define ZED_CONFIG_FILE "zed.rc"
#define ZED_DISK_LABEL_WAIT "ZED_DISK_LABEL_WAIT_TIME"

typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, boolean_t);

libzfs_handle_t *g_zfshdl;
Expand All @@ -100,6 +104,12 @@ tpool_t *g_tpool;
boolean_t g_enumeration_done;
pthread_t g_zfs_tid; /* zfs_enum_pools() thread */

/*
* The maximum time to wait in milliseconds for the udev layer to set up
* the device link after a auto-replaced disk has been partitioned.
*/
uint_t zed_label_wait_max = 3000;

typedef struct unavailpool {
zpool_handle_t *uap_zhp;
list_node_t uap_node;
Expand Down Expand Up @@ -436,7 +446,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
*/
if (zpool_prepare_and_label_disk(g_zfshdl, zhp, leafname,
vdev, "autoreplace", &lines, &lines_cnt) != 0) {
zed_log_msg(LOG_INFO,
zed_log_msg(LOG_WARNING,
" zpool_prepare_and_label_disk: could not "
"label '%s' (%s)", leafname,
libzfs_error_description(g_zfshdl));
Expand Down Expand Up @@ -468,7 +478,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
sizeof (device->pd_physpath));
list_insert_tail(&g_device_list, device);

zed_log_msg(LOG_INFO, " zpool_label_disk: async '%s' (%llu)",
zed_log_msg(LOG_NOTICE, " zpool_label_disk: async '%s' (%llu)",
leafname, (u_longlong_t)guid);

return; /* resumes at EC_DEV_ADD.ESC_DISK for partition */
Expand Down Expand Up @@ -545,9 +555,10 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
* Wait for udev to verify the links exist, then auto-replace
* the leaf disk at same physical location.
*/
if (zpool_label_disk_wait(path, 3000) != 0) {
zed_log_msg(LOG_WARNING, "zfs_mod: expected replacement "
"disk %s is missing", path);
if (zpool_label_disk_wait(path, zed_label_wait_max) != 0) {
zed_log_msg(LOG_WARNING, "zfs_mod: after labeling replacement "
"disk, the expected disk partition link '%s' is missing "
"after waiting %u ms", path, zed_label_wait_max);
nvlist_free(nvroot);
return;
}
Expand All @@ -562,7 +573,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
B_TRUE, B_FALSE);
}

zed_log_msg(LOG_INFO, " zpool_vdev_replace: %s with %s (%s)",
zed_log_msg(LOG_WARNING, " zpool_vdev_replace: %s with %s (%s)",
fullpath, path, (ret == 0) ? "no errors" :
libzfs_error_description(g_zfshdl));

Expand Down Expand Up @@ -1279,6 +1290,41 @@ zfs_enum_pools(void *arg)
return (NULL);
}

#define CONFIG_LINE_MAX 100

static int
zed_lookup_tunable(const char *zedlet_dir, const char *tuneable,
unsigned int *wait_time)
{
char path[MAXPATHLEN];
char linebuf[CONFIG_LINE_MAX];
char name[50];
unsigned int value;

sprintf(path, "%s/%s", zedlet_dir, ZED_CONFIG_FILE);
FILE *file = fopen(path, "r");

if (file == NULL) {
zed_log_msg(LOG_WARNING, "can't open config file '%s'", path);
return (ENOENT);
}

/* parse zed.rc for wait time */
while (fgets(linebuf, sizeof (linebuf), file) != NULL) {
if (linebuf[0] == '#' || linebuf[0] == '\n')
continue;

if (sscanf(linebuf, "%49[A-Z0-9_] = %u ", name, &value) == 2) {
if (strcmp(tuneable, name) == 0) {
*wait_time = value;
return (0);
}
}
}

return (ENOENT);
}

/*
* called from zed daemon at startup
*
Expand All @@ -1287,7 +1333,7 @@ zfs_enum_pools(void *arg)
* For now, each agent has its own libzfs instance
*/
int
zfs_slm_init(void)
zfs_slm_init(const char *zedlet_dir)
{
if ((g_zfshdl = libzfs_init()) == NULL)
return (-1);
Expand All @@ -1309,6 +1355,15 @@ zfs_slm_init(void)
list_create(&g_device_list, sizeof (struct pendingdev),
offsetof(struct pendingdev, pd_node));

unsigned int wait_time;

if (zed_lookup_tunable(zedlet_dir, ZED_DISK_LABEL_WAIT,
&wait_time) == 0) {
zed_log_msg(LOG_NOTICE, "setting zed_label_wait_max to %u ms",
wait_time);
zed_label_wait_max = wait_time;
}

return (0);
}

Expand Down
8 changes: 8 additions & 0 deletions cmd/zed/zed.d/zed.rc
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,11 @@ ZED_SYSLOG_SUBCLASS_EXCLUDE="history_event"
# help silence misbehaving drives. This assumes your drive enclosure fully
# supports slot power control via sysfs.
#ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT=1

# During auto-replace of vdev device, the device is labeled
# asynchronously on Linux. Use ZED_DISK_LABEL_WAIT_TIME to
# change how long to wait for the partitioned device symlinks
# to show up before giving up. The time is in milliseconds and
# defaults to 3000 if not specified here.
#
#ZED_DISK_LABEL_WAIT_TIME=3000
2 changes: 1 addition & 1 deletion cmd/zed/zed_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ zed_event_init(struct zed_conf *zcp)
ZFS_DEV, strerror(errno));
}

zfs_agent_init(zcp->zfs_hdl);
zfs_agent_init(zcp->zfs_hdl, zcp->zedlet_dir);

if (zed_disk_event_init() != 0) {
if (zcp->do_idle)
Expand Down
2 changes: 1 addition & 1 deletion include/libzutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ extern "C" {
#endif

/*
* Default wait time for a device name to be created.
* Default wait time in milliseconds for a device name to be created.
*/
#define DISK_LABEL_WAIT (30 * 1000) /* 30 seconds */

Expand Down
5 changes: 2 additions & 3 deletions lib/libzutil/os/linux/zutil_import_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -582,9 +582,8 @@ zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
* Wait up to timeout_ms for udev to set up the device node. The device is
* considered ready when libudev determines it has been initialized, all of
* the device links have been verified to exist, and it has been allowed to
* settle. At this point the device the device can be accessed reliably.
* Depending on the complexity of the udev rules this process could take
* several seconds.
* settle. At this point the device can be accessed reliably. Depending on
* the complexity of the udev rules this process could take several seconds.
*/
int
zpool_label_disk_wait(const char *path, int timeout_ms)
Expand Down

0 comments on commit a49613d

Please sign in to comment.