From 3bd1e76265e7655e601372e51bc2413198378122 Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Wed, 24 Oct 2018 12:27:04 -0400 Subject: [PATCH] OS-4718 ZFS actively hostile to 512e drive replacements --- usr/src/uts/common/fs/zfs/spa.c | 10 +++++++--- usr/src/uts/common/fs/zfs/sys/vdev_impl.h | 1 + usr/src/uts/common/fs/zfs/vdev_disk.c | 22 +++++++++++++++++++++- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index 599ae9faded9..ffc1363cedab 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -5733,11 +5733,15 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); /* - * The new device cannot have a higher alignment requirement - * than the top-level vdev. + * The new device cannot have a higher alignment requirement than the + * top-level vdev. If this is an Advanced Format (e.g. 512e) disk, we + * also need to check the fallback logical ashift value. */ - if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) + if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift && + (newvd->vdev_ashift_af == 0 || + newvd->vdev_ashift_af > oldvd->vdev_top->vdev_ashift)) { return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); + } /* * If this is an in-place replacement, update oldvd's path and devid diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h index 71753cf24f7d..76acba5b3211 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h @@ -201,6 +201,7 @@ struct vdev { uint64_t vdev_min_asize; /* min acceptable asize */ uint64_t vdev_max_asize; /* max acceptable asize */ uint64_t vdev_ashift; /* block alignment shift */ + uint64_t vdev_ashift_af; /* adv. format fallback shift */ uint64_t vdev_state; /* see VDEV_STATE_* #defines */ uint64_t vdev_prevstate; /* used when reopening a vdev */ vdev_ops_t *vdev_ops; /* vdev operations */ diff --git a/usr/src/uts/common/fs/zfs/vdev_disk.c b/usr/src/uts/common/fs/zfs/vdev_disk.c index e4b86b419b9d..b7bbc48aff40 100644 --- a/usr/src/uts/common/fs/zfs/vdev_disk.c +++ b/usr/src/uts/common/fs/zfs/vdev_disk.c @@ -530,11 +530,31 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, VDEV_DEBUG("vdev_disk_open(\"%s\"): " "both DKIOCGMEDIAINFO{,EXT} calls failed, %d\n", vd->vdev_path, error); - pbsize = DEV_BSIZE; + blksz = pbsize = DEV_BSIZE; } *ashift = highbit64(MAX(pbsize, SPA_MINBLOCKSIZE)) - 1; + /* + * Advanced Format (512e) disks have a 4KB physical sector size, but + * also report a 512 byte logical sector size (through emulation in the + * firmware) to better support legacy operating systems. While we + * generally wish to create new pools with a 4KB block size, we also + * need to allow people to use AF disks in their existing 512 byte + * pools, even if not completely optimal. + */ + if (blksz != 0 && blksz < pbsize) { + /* + * The logical block size is smaller than the reported physical + * block size. Record the logical ashift so that + * spa_vdev_attach() can use it as a fallback. + */ + vd->vdev_ashift_af = highbit64(MAX(blksz, + SPA_MINBLOCKSIZE)) - 1; + } else { + vd->vdev_ashift_af = 0; + } + if (vd->vdev_wholedisk == 1) { int wce = 1;