From 9bb6c76ad1b9f1de9595b820cac657d32edfcb2d Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Thu, 2 Jun 2016 10:15:47 +0900 Subject: [PATCH] 8727 Native data and metadata encryption for zfs This change incorporates three major pieces: The first change is a keystore that manages wrapping and encryption keys for encrypted datasets. These commands mostly involve manipulating the new DSL Crypto Key ZAP Objects that live in the MOS. Each encrypted dataset has its own DSL Crypto Key that is protected with a user's key. This level of indirection allows users to change their keys without re-encrypting their entire datasets. The change implements the new subcommands "zfs load-key", "zfs unload-key" and "zfs change-key" which allow the user to manage their encryption keys and settings. In addition, several new flags and properties have been added to allow dataset creation and to make mounting and unmounting more convenient. The second piece of this patch provides the ability to encrypt, decyrpt, and authenticate protected datasets. Each object set maintains a Merkel tree of Message Authentication Codes that protect the lower layers, similarly to how checksums are maintained. This part impacts the zio layer, which handles the actual encryption and generation of MACs, as well as the ARC and DMU, which need to be able to handle encrypted buffers and protected data. The last addition is the ability to do raw, encrypted sends and receives. The idea here is to send raw encrypted and compressed data and receive it exactly as is on a backup system. This means that the dataset on the receiving system is protected using the same user key that is in use on the sending side. By doing so, datasets can be efficiently backed up to an untrusted system without fear of data being compromised. Reviewed by: Matthew Ahrens Reviewed-by: Brian Behlendorf Reviewed-by: Jorgen Lundman Signed-off-by: Tom Caputi Send / Recv Fixes following b52563 This patch fixes several issues discovered after the encryption patch was merged: Fixed a bug where encrypted datasets could attempt to receive embedded data records. Fixed a bug where dirty records created by the recv code wasn't properly setting the dr_raw flag. Fixed a typo where a dmu_tx_commit() was changed to dmu_tx_abort() Fixed a few error handling bugs unrelated to the encryption patch in dmu_recv_stream() Signed-off-by: Tom Caputi Encryption patch follow-up * HKDF implementation moved to its own file and tests added to ensure correctness. * Ztest can now create and test encrypted datasets. This is currently disabled until issue ZOL #6526 is resolved, but otherwise functions as advertised. * Several small bug fixes discovered after enabling ztest to run on encrypted datasets. * Fixed coverity defects added by the encryption patch. * Updated man pages for encrypted send / receive behavior. * Fixed a bug where encrypted datasets could receive DRR_WRITE_EMBEDDED records. * Minor code cleanups / consolidation. Disable crypto tests in ztest * Includes fix in dmu_free_long_object_impl Unless permission is given to compile the crypto framework in userland the crypto tests in ztest are disabled on IllumOS. --- usr/src/cmd/mdb/common/modules/zfs/zfs.c | 21 +- usr/src/cmd/mdb/intel/amd64/libzpool/Makefile | 3 +- usr/src/cmd/mdb/intel/amd64/zfs/Makefile | 1 + usr/src/cmd/mdb/intel/ia32/libzpool/Makefile | 3 +- usr/src/cmd/mdb/intel/ia32/zfs/Makefile | 1 + usr/src/cmd/mdb/sparc/v7/libzpool/Makefile | 3 +- usr/src/cmd/mdb/sparc/v9/libzpool/Makefile | 3 +- usr/src/cmd/zdb/Makefile.com | 3 + usr/src/cmd/zdb/zdb.c | 74 +- usr/src/cmd/zdb/zdb_il.c | 11 +- usr/src/cmd/zfs/zfs_main.c | 281 +- usr/src/cmd/zinject/translate.c | 8 +- usr/src/cmd/zoneadm/Makefile | 2 + usr/src/cmd/zpool/zpool_main.c | 64 +- usr/src/cmd/zstreamdump/zstreamdump.c | 114 +- usr/src/cmd/ztest/ztest.c | 175 +- usr/src/common/zfs/zfeature_common.c | 10 + usr/src/common/zfs/zfeature_common.h | 1 + usr/src/common/zfs/zfs_deleg.c | 2 + usr/src/common/zfs/zfs_deleg.h | 2 + usr/src/common/zfs/zfs_prop.c | 97 +- usr/src/common/zfs/zfs_prop.h | 7 +- usr/src/lib/libuutil/common/libuutil.h | 4 +- usr/src/lib/libzfs/Makefile.com | 10 +- usr/src/lib/libzfs/common/libzfs.h | 22 +- usr/src/lib/libzfs/common/libzfs_changelist.c | 6 +- usr/src/lib/libzfs/common/libzfs_crypto.c | 1533 ++++++++++ usr/src/lib/libzfs/common/libzfs_dataset.c | 134 +- usr/src/lib/libzfs/common/libzfs_diff.c | 5 + usr/src/lib/libzfs/common/libzfs_mount.c | 50 + usr/src/lib/libzfs/common/libzfs_pool.c | 28 +- usr/src/lib/libzfs/common/libzfs_sendrecv.c | 528 +++- usr/src/lib/libzfs/common/libzfs_util.c | 2 + usr/src/lib/libzfs/common/mapfile-vers | 5 + usr/src/lib/libzfs_core/common/libzfs_core.c | 115 +- usr/src/lib/libzfs_core/common/libzfs_core.h | 16 +- usr/src/lib/libzfs_core/common/mapfile-vers | 3 + usr/src/lib/libzpool/Makefile.com | 6 + usr/src/lib/libzpool/common/kernel.c | 89 + usr/src/man/man1m/zfs.1m | 380 ++- usr/src/man/man1m/zpool.1m | 40 +- usr/src/man/man5/zpool-features.5 | 22 + usr/src/pkg/manifests/system-test-zfstest.mf | 131 + usr/src/test/zfs-tests/runfiles/delphix.run | 27 +- usr/src/test/zfs-tests/runfiles/omnios.run | 42 +- .../cli_root/zfs_change-key/Makefile | 21 + .../cli_root/zfs_change-key/cleanup.ksh | 30 + .../cli_root/zfs_change-key/setup.ksh | 32 + .../zfs_change-key/zfs_change-key.ksh | 62 + .../zfs_change-key/zfs_change-key_child.ksh | 86 + .../zfs_change-key/zfs_change-key_format.ksh | 71 + .../zfs_change-key/zfs_change-key_inherit.ksh | 78 + .../zfs_change-key/zfs_change-key_load.ksh | 58 + .../zfs_change-key_location.ksh | 65 + .../zfs_change-key_pbkdf2iters.ksh | 75 + .../zfs_clone/zfs_clone_encrypted.ksh | 83 + .../zfs_create/zfs_create_014_pos.ksh | 59 + .../zfs_create/zfs_create_crypt_combos.ksh | 98 + .../zfs_create/zfs_create_encrypted.ksh | 134 + .../functional/cli_root/zfs_load-key/Makefile | 21 + .../cli_root/zfs_load-key/cleanup.ksh | 30 + .../cli_root/zfs_load-key/setup.ksh | 32 + .../cli_root/zfs_load-key/zfs_load-key.cfg | 26 + .../cli_root/zfs_load-key/zfs_load-key.ksh | 85 + .../zfs_load-key/zfs_load-key_all.ksh | 77 + .../zfs_load-key/zfs_load-key_common.kshlib | 102 + .../zfs_load-key/zfs_load-key_file.ksh | 58 + .../zfs_load-key/zfs_load-key_location.ksh | 73 + .../zfs_load-key/zfs_load-key_noop.ksh | 54 + .../zfs_load-key/zfs_load-key_recursive.ksh | 66 + .../zfs_mount/zfs_mount_encrypted.ksh | 62 + .../zfs_promote_encryptionroot.ksh | 80 + .../functional/cli_root/zfs_receive/setup.ksh | 1 + .../zfs_receive_from_encrypted.ksh | 83 + .../cli_root/zfs_receive/zfs_receive_raw.ksh | 93 + .../zfs_receive_raw_incremental.ksh | 75 + .../zfs_receive/zfs_receive_to_encrypted.ksh | 75 + .../zfs_rename/zfs_rename_encrypted_child.ksh | 78 + .../zfs_rename/zfs_rename_to_encrypted.ksh | 51 + .../cli_root/zfs_send/zfs_send_encrypted.ksh | 76 + .../zfs_send/zfs_send_encrypted_unloaded.ksh | 59 + .../cli_root/zfs_send/zfs_send_raw.ksh | 79 + .../functional/cli_root/zfs_set/setup.ksh | 1 + .../cli_root/zfs_set/zfs_set_keylocation.ksh | 93 + .../cli_root/zfs_unload-key/Makefile | 21 + .../cli_root/zfs_unload-key/cleanup.ksh | 30 + .../cli_root/zfs_unload-key/setup.ksh | 32 + .../zfs_unload-key/zfs_unload-key.ksh | 69 + .../zfs_unload-key/zfs_unload-key_all.ksh | 76 + .../zfs_unload-key_recursive.ksh | 72 + .../zpool_create/zpool_create_005_pos.ksh | 0 .../zpool_create/zpool_create_024_pos.ksh | 111 + .../zpool_create_crypt_combos.ksh | 89 + .../zpool_create/zpool_create_encrypted.ksh | 95 + .../cli_root/zpool_get/zpool_get.cfg | 1 + .../zpool_import/zpool_import_encrypted.ksh | 59 + .../zpool_import_encrypted_load.ksh | 59 + .../zpool_scrub_encrypted_unloaded.ksh | 71 + .../tests/functional/rsend/rsend.kshlib | 2 +- .../rsend/send_encrypted_heirarchy.ksh | 96 + usr/src/uts/common/Makefile.files | 3 + usr/src/uts/common/crypto/core/kcf_prov_lib.c | 4 +- usr/src/uts/common/fs/zfs/abd.c | 24 +- usr/src/uts/common/fs/zfs/arc.c | 1715 +++++++++-- usr/src/uts/common/fs/zfs/bptree.c | 3 +- usr/src/uts/common/fs/zfs/dbuf.c | 241 +- usr/src/uts/common/fs/zfs/ddt.c | 23 +- usr/src/uts/common/fs/zfs/dmu.c | 371 ++- usr/src/uts/common/fs/zfs/dmu_objset.c | 303 +- usr/src/uts/common/fs/zfs/dmu_send.c | 914 ++++-- usr/src/uts/common/fs/zfs/dmu_traverse.c | 46 +- usr/src/uts/common/fs/zfs/dnode.c | 111 +- usr/src/uts/common/fs/zfs/dnode_sync.c | 15 +- usr/src/uts/common/fs/zfs/dsl_crypt.c | 2640 +++++++++++++++++ usr/src/uts/common/fs/zfs/dsl_dataset.c | 131 +- usr/src/uts/common/fs/zfs/dsl_destroy.c | 14 +- usr/src/uts/common/fs/zfs/dsl_dir.c | 55 +- usr/src/uts/common/fs/zfs/dsl_pool.c | 19 +- usr/src/uts/common/fs/zfs/dsl_prop.c | 3 +- usr/src/uts/common/fs/zfs/dsl_scan.c | 17 +- usr/src/uts/common/fs/zfs/hkdf.c | 173 ++ usr/src/uts/common/fs/zfs/metaslab.c | 3 +- usr/src/uts/common/fs/zfs/spa.c | 82 +- usr/src/uts/common/fs/zfs/spa_config.c | 2 +- usr/src/uts/common/fs/zfs/spa_errlog.c | 3 +- usr/src/uts/common/fs/zfs/spa_history.c | 7 +- usr/src/uts/common/fs/zfs/spa_misc.c | 12 +- usr/src/uts/common/fs/zfs/sys/abd.h | 1 + usr/src/uts/common/fs/zfs/sys/arc.h | 69 +- usr/src/uts/common/fs/zfs/sys/dbuf.h | 2 + usr/src/uts/common/fs/zfs/sys/ddt.h | 15 +- usr/src/uts/common/fs/zfs/sys/dmu.h | 93 +- usr/src/uts/common/fs/zfs/sys/dmu_objset.h | 29 +- usr/src/uts/common/fs/zfs/sys/dmu_send.h | 8 +- usr/src/uts/common/fs/zfs/sys/dmu_traverse.h | 9 + usr/src/uts/common/fs/zfs/sys/dnode.h | 27 +- usr/src/uts/common/fs/zfs/sys/dsl_crypt.h | 218 ++ usr/src/uts/common/fs/zfs/sys/dsl_dataset.h | 27 +- usr/src/uts/common/fs/zfs/sys/dsl_deleg.h | 2 + usr/src/uts/common/fs/zfs/sys/dsl_dir.h | 3 + usr/src/uts/common/fs/zfs/sys/dsl_pool.h | 4 +- usr/src/uts/common/fs/zfs/sys/hkdf.h | 29 + usr/src/uts/common/fs/zfs/sys/spa.h | 162 +- usr/src/uts/common/fs/zfs/sys/spa_impl.h | 2 + usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h | 57 +- usr/src/uts/common/fs/zfs/sys/zil.h | 4 +- usr/src/uts/common/fs/zfs/sys/zio.h | 61 +- usr/src/uts/common/fs/zfs/sys/zio_checksum.h | 2 +- usr/src/uts/common/fs/zfs/sys/zio_crypt.h | 146 + usr/src/uts/common/fs/zfs/sys/zio_impl.h | 52 +- usr/src/uts/common/fs/zfs/vdev.c | 5 +- usr/src/uts/common/fs/zfs/vdev_raidz.c | 178 +- usr/src/uts/common/fs/zfs/zfeature.c | 4 +- usr/src/uts/common/fs/zfs/zfs_fm.c | 79 +- usr/src/uts/common/fs/zfs/zfs_ioctl.c | 271 +- usr/src/uts/common/fs/zfs/zfs_vfsops.c | 11 +- usr/src/uts/common/fs/zfs/zfs_vnops.c | 4 +- usr/src/uts/common/fs/zfs/zil.c | 99 +- usr/src/uts/common/fs/zfs/zio.c | 384 ++- usr/src/uts/common/fs/zfs/zio_checksum.c | 146 +- usr/src/uts/common/fs/zfs/zio_crypt.c | 1889 ++++++++++++ usr/src/uts/common/fs/zfs/zvol.c | 27 +- usr/src/uts/common/sys/fm/fs/zfs.h | 1 + usr/src/uts/common/sys/fs/zfs.h | 44 + usr/src/uts/common/sys/mount.h | 6 + 165 files changed, 17172 insertions(+), 1381 deletions(-) create mode 100644 usr/src/lib/libzfs/common/libzfs_crypto.c create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/setup.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_014_pos.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh mode change 100755 => 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_024_pos.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/rsend/send_encrypted_heirarchy.ksh create mode 100644 usr/src/uts/common/fs/zfs/dsl_crypt.c create mode 100644 usr/src/uts/common/fs/zfs/hkdf.c create mode 100644 usr/src/uts/common/fs/zfs/sys/dsl_crypt.h create mode 100644 usr/src/uts/common/fs/zfs/sys/hkdf.h create mode 100644 usr/src/uts/common/fs/zfs/sys/zio_crypt.h create mode 100644 usr/src/uts/common/fs/zfs/zio_crypt.c diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c index 10a2f5a4f7c1..6497cfd7b3e0 100644 --- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c @@ -66,6 +66,10 @@ enum spa_flags { SPA_FLAG_HISTOGRAMS = 1 << 5 }; + +const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {0}; + + /* * If any of these flags are set, call spa_vdevs in spa_print */ @@ -424,7 +428,7 @@ zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) static int blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) { - char type[80], checksum[80], compress[80]; + char type[80], checksum[80], compress[80], *crypt_type; blkptr_t blk, *bp = &blk; char buf[BP_SPRINTF_LEN]; @@ -443,8 +447,19 @@ blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) return (DCMD_ERR); } + if (BP_IS_ENCRYPTED(bp)) { + crypt_type = "encrypted"; + /* LINTED E_SUSPICIOUS_COMPARISON */ + } else if (BP_IS_AUTHENTICATED(bp)) { + crypt_type = "authenticated"; + } else if (BP_HAS_INDIRECT_MAC_CKSUM(bp)) { + crypt_type = "indirect-MAC"; + } else { + crypt_type = "unencrypted"; + } + SNPRINTF_BLKPTR(mdb_snprintf, '\n', buf, sizeof (buf), bp, type, - checksum, compress); + checksum, crypt_type, compress); mdb_printf("%s\n", buf); @@ -2672,8 +2687,6 @@ zfs_blkstats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) dmu_object_type_t t; zfs_blkstat_t *tzb; uint64_t ditto; - dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES + 10]; - /* +10 in case it grew */ if (mdb_readvar(&dmu_ot, "dmu_ot") == -1) { mdb_warn("failed to read 'dmu_ot'"); diff --git a/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile b/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile index 8947c877c511..62c467c646a7 100644 --- a/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile +++ b/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile @@ -39,7 +39,8 @@ MODSRCS_DIR = ../../../common/modules/zfs GENUNIX_DIR = ../../../common/modules/genunix CPPFLAGS += -I../../../../../lib/libzpool/common \ - -I../../../../../uts/common/fs/zfs + -I../../../../../uts/common/fs/zfs \ + -I../../../../../common/zfs C99MODE= -xc99=%all C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/intel/amd64/zfs/Makefile b/usr/src/cmd/mdb/intel/amd64/zfs/Makefile index 870f91ad99e3..a3bad1603a56 100644 --- a/usr/src/cmd/mdb/intel/amd64/zfs/Makefile +++ b/usr/src/cmd/mdb/intel/amd64/zfs/Makefile @@ -39,6 +39,7 @@ include ../../../common/modules/zfs/Makefile.zfs CPPFLAGS += -I../../../../../uts/common/fs/zfs CPPFLAGS += -I../../../../../uts/common/fs/zfs/lua +CPPFLAGS += -I../../../../../common/zfs C99MODE= -xc99=%all C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile b/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile index 68416f725b1f..6f8968329ee0 100644 --- a/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile +++ b/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile @@ -41,7 +41,8 @@ GENUNIX_DIR = ../../../common/modules/genunix CPPFLAGS += -I../../../../../lib/libzpool/common \ -I../../../../../uts/common/fs/zfs \ - -I../../../../../uts/common/fs/zfs/lua + -I../../../../../uts/common/fs/zfs/lua \ + -I../../../../../common/zfs C99MODE= -xc99=%all C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/intel/ia32/zfs/Makefile b/usr/src/cmd/mdb/intel/ia32/zfs/Makefile index d1f1efa74913..6cd144a50c91 100644 --- a/usr/src/cmd/mdb/intel/ia32/zfs/Makefile +++ b/usr/src/cmd/mdb/intel/ia32/zfs/Makefile @@ -38,6 +38,7 @@ include ../../../common/modules/zfs/Makefile.zfs CPPFLAGS += -I../../../../../uts/common/fs/zfs CPPFLAGS += -I../../../../../uts/common/fs/zfs/lua +CPPFLAGS += -I../../../../../common/zfs C99MODE= -xc99=%all C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile b/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile index b00ff3c0bc51..c1f3f34a84ce 100644 --- a/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile +++ b/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile @@ -41,7 +41,8 @@ GENUNIX_DIR = ../../../common/modules/genunix CPPFLAGS += -I../../../../../lib/libzpool/common \ -I../../../../../uts/common/fs/zfs \ - -I../../../../../uts/common/fs/zfs/lua + -I../../../../../uts/common/fs/zfs/lua \ + -I../../../../../common/zfs C99MODE= -xc99=%all C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile b/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile index 5c3b38b76007..309da41472c5 100644 --- a/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile +++ b/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile @@ -42,7 +42,8 @@ GENUNIX_DIR = ../../../common/modules/genunix CPPFLAGS += -I../../../../../lib/libzpool/common \ -I../../../../../uts/common/fs/zfs \ - -I../../../../../uts/common/fs/zfs/lua + -I../../../../../uts/common/fs/zfs/lua \ + -I../../../../../common/zfs C99MODE= -xc99=%all C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/zdb/Makefile.com b/usr/src/cmd/zdb/Makefile.com index 8459f5203224..be54d5f5aa42 100644 --- a/usr/src/cmd/zdb/Makefile.com +++ b/usr/src/cmd/zdb/Makefile.com @@ -62,6 +62,9 @@ LINTFLAGS64 += -xerroff=E_NAME_DEF_NOT_USED2 LINTFLAGS += -erroff=E_STATIC_UNUSED LINTFLAGS64 += -erroff=E_STATIC_UNUSED +LINTFLAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTFLAGS64 += -erroff=E_BAD_PTR_CAST_ALIGN + .KEEP_STATE: all: $(PROG) diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c index ffd6d4298fcd..8a344d9ca3a3 100644 --- a/usr/src/cmd/zdb/zdb.c +++ b/usr/src/cmd/zdb/zdb.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #undef verify @@ -1200,7 +1201,7 @@ static void snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) { const dva_t *dva = bp->blk_dva; - int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1; + unsigned int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1; if (dump_opt['b'] >= 6) { snprintf_blkptr(blkbuf, buflen, bp); @@ -1218,7 +1219,7 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) } blkbuf[0] = '\0'; - for (int i = 0; i < ndvas; i++) + for (unsigned int i = 0; i < ndvas; i++) (void) snprintf(blkbuf + strlen(blkbuf), buflen - strlen(blkbuf), "%llu:%llx:%llx ", (u_longlong_t)DVA_GET_VDEV(&dva[i]), @@ -1641,14 +1642,14 @@ open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp) uint64_t version = 0; VERIFY3P(sa_os, ==, NULL); - err = dmu_objset_own(path, type, B_TRUE, tag, osp); + err = dmu_objset_own(path, type, B_TRUE, B_FALSE, tag, osp); if (err != 0) { (void) fprintf(stderr, "failed to own dataset '%s': %s\n", path, strerror(err)); return (err); } - if (dmu_objset_type(*osp) == DMU_OST_ZFS) { + if (dmu_objset_type(*osp) == DMU_OST_ZFS && !(*osp)->os_encrypted) { (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, &version); if (version >= ZPL_VERSION_SA) { @@ -1660,7 +1661,7 @@ open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp) if (err != 0) { (void) fprintf(stderr, "sa_setup failed: %s\n", strerror(err)); - dmu_objset_disown(*osp, tag); + dmu_objset_disown(*osp, B_FALSE, tag); *osp = NULL; } } @@ -1675,7 +1676,7 @@ close_objset(objset_t *os, void *tag) VERIFY3P(os, ==, sa_os); if (os->os_sa != NULL) sa_tear_down(os); - dmu_objset_disown(os, tag); + dmu_objset_disown(os, B_FALSE, tag); sa_attr_table = NULL; sa_os = NULL; } @@ -1828,6 +1829,7 @@ dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size) { } + static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = { dump_none, /* unallocated */ dump_zap, /* object directory */ @@ -1892,6 +1894,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) dmu_buf_t *db = NULL; dmu_object_info_t doi; dnode_t *dn; + boolean_t dnode_held = B_FALSE; void *bonus = NULL; size_t bsize = 0; char iblk[32], dblk[32], lsize[32], asize[32], fill[32]; @@ -1915,16 +1918,33 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) if (object == 0) { dn = DMU_META_DNODE(os); + dmu_object_info_from_dnode(dn, &doi); } else { - error = dmu_bonus_hold(os, object, FTAG, &db); + /* + * Encrypted datasets will have sensitive bonus buffers + * encrypted. Therefore we cannot hold the bonus buffer and + * must hold the dnode itself instead. + */ + error = dmu_object_info(os, object, &doi); if (error) - fatal("dmu_bonus_hold(%llu) failed, errno %u", - object, error); - bonus = db->db_data; - bsize = db->db_size; - dn = DB_DNODE((dmu_buf_impl_t *)db); + fatal("dmu_object_info() failed, errno %u", error); + + if (os->os_encrypted && + DMU_OT_IS_ENCRYPTED(doi.doi_bonus_type)) { + error = dnode_hold(os, object, FTAG, &dn); + if (error) + fatal("dnode_hold() failed, errno %u", error); + dnode_held = B_TRUE; + } else { + error = dmu_bonus_hold(os, object, FTAG, &db); + if (error) + fatal("dmu_bonus_hold(%llu) failed, errno %u", + object, error); + bonus = db->db_data; + bsize = db->db_size; + dn = DB_DNODE((dmu_buf_impl_t *)db); + } } - dmu_object_info_from_dnode(dn, &doi); zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk)); zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk)); @@ -1968,9 +1988,20 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) (void) printf("\tdnode maxblkid: %llu\n", (longlong_t)dn->dn_phys->dn_maxblkid); - object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object, - bonus, bsize); - object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0); + if (!dnode_held) { + object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, + object, bonus, bsize); + } else { + (void) printf("\t\t(bonus encrypted)\n"); + } + + if (!os->os_encrypted || !DMU_OT_IS_ENCRYPTED(doi.doi_type)) { + object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, + NULL, 0); + } else { + (void) printf("\t\t(object encrypted)\n"); + } + *print_header = 1; } @@ -2014,6 +2045,8 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) if (db != NULL) dmu_buf_rele(db, FTAG); + if (dnode_held) + dnode_rele(dn, FTAG); } static const char *objset_types[DMU_OST_NUMTYPES] = { @@ -2321,7 +2354,7 @@ dump_path(char *ds, char *path) if (err != 0) { (void) fprintf(stderr, "can't lookup root znode: %s\n", strerror(err)); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (EINVAL); } @@ -2898,7 +2931,8 @@ dump_block_stats(spa_t *spa) zdb_cb_t zcb; zdb_blkstats_t *zb, *tzb; uint64_t norm_alloc, norm_space, total_alloc, total_found; - int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD; + int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | + TRAVERSE_NO_DECRYPT | TRAVERSE_HARD; boolean_t leaks = B_FALSE; bzero(&zcb, sizeof (zcb)); @@ -3213,8 +3247,8 @@ dump_simulated_ddt(spa_t *spa) spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, - zdb_ddt_add_cb, &t); + (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | + TRAVERSE_NO_DECRYPT, zdb_ddt_add_cb, &t); spa_config_exit(spa, SCL_CONFIG, FTAG); diff --git a/usr/src/cmd/zdb/zdb_il.c b/usr/src/cmd/zdb/zdb_il.c index 680179fc3b7b..87b25854c643 100644 --- a/usr/src/cmd/zdb/zdb_il.c +++ b/usr/src/cmd/zdb/zdb_il.c @@ -322,8 +322,13 @@ print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg) (u_longlong_t)lr->lrc_txg, (u_longlong_t)lr->lrc_seq); - if (txtype && verbose >= 3) - zil_rec_info[txtype].zri_print(zilog, txtype, lr); + if (txtype && verbose >= 3) { + if (!zilog->zl_os->os_encrypted) { + zil_rec_info[txtype].zri_print(zilog, txtype, lr); + } else { + (void) printf("%s(encrypted)\n", tab_prefix); + } + } zil_rec_info[txtype].zri_count++; zil_rec_info[0].zri_count++; @@ -410,7 +415,7 @@ dump_intent_log(zilog_t *zilog) if (verbose >= 2) { (void) printf("\n"); (void) zil_parse(zilog, print_log_block, print_log_record, NULL, - zh->zh_claim_txg); + zh->zh_claim_txg, B_FALSE); print_log_stats(verbose); } } diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c index 4f52949fba82..dbcad4995a8a 100644 --- a/usr/src/cmd/zfs/zfs_main.c +++ b/usr/src/cmd/zfs/zfs_main.c @@ -108,6 +108,9 @@ static int zfs_do_release(int argc, char **argv); static int zfs_do_diff(int argc, char **argv); static int zfs_do_bookmark(int argc, char **argv); static int zfs_do_channel_program(int argc, char **argv); +static int zfs_do_load_key(int argc, char **argv); +static int zfs_do_unload_key(int argc, char **argv); +static int zfs_do_change_key(int argc, char **argv); /* * Enable a reasonable set of defaults for libumem debugging on DEBUG builds. @@ -156,6 +159,9 @@ typedef enum { HELP_DIFF, HELP_BOOKMARK, HELP_CHANNEL_PROGRAM, + HELP_LOAD_KEY, + HELP_UNLOAD_KEY, + HELP_CHANGE_KEY, } zfs_help_t; typedef struct zfs_command { @@ -210,6 +216,9 @@ static zfs_command_t command_table[] = { { "holds", zfs_do_holds, HELP_HOLDS }, { "release", zfs_do_release, HELP_RELEASE }, { "diff", zfs_do_diff, HELP_DIFF }, + { "load-key", zfs_do_load_key, HELP_LOAD_KEY }, + { "unload-key", zfs_do_unload_key, HELP_UNLOAD_KEY }, + { "change-key", zfs_do_change_key, HELP_CHANGE_KEY }, }; #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) @@ -251,7 +260,7 @@ get_usage(zfs_help_t idx) "[filesystem|volume|snapshot] ...\n")); case HELP_MOUNT: return (gettext("\tmount\n" - "\tmount [-vO] [-o opts] <-a | filesystem>\n")); + "\tmount [-lvO] [-o opts] <-a | filesystem>\n")); case HELP_PROMOTE: return (gettext("\tpromote \n")); case HELP_RECEIVE: @@ -268,16 +277,16 @@ get_usage(zfs_help_t idx) case HELP_ROLLBACK: return (gettext("\trollback [-rRf] \n")); case HELP_SEND: - return (gettext("\tsend [-DnPpRvLec] [-[iI] snapshot] " + return (gettext("\tsend [-DnPpRvLecr] [-[iI] snapshot] " "\n" - "\tsend [-Le] [-i snapshot|bookmark] " + "\tsend [-Lecr] [-i snapshot|bookmark] " "\n" "\tsend [-nvPe] -t \n")); case HELP_SET: return (gettext("\tset ... " " ...\n")); case HELP_SHARE: - return (gettext("\tshare <-a | filesystem>\n")); + return (gettext("\tshare [-l] <-a | filesystem>\n")); case HELP_SNAPSHOT: return (gettext("\tsnapshot [-r] [-o property=value] ... " "@ ...\n")); @@ -332,6 +341,17 @@ get_usage(zfs_help_t idx) return (gettext("\tprogram [-t ] " "[-m ] " "[lua args...]\n")); + case HELP_LOAD_KEY: + return (gettext("\tload-key [-rn] [-L ] " + "<-a | filesystem|volume>\n")); + case HELP_UNLOAD_KEY: + return (gettext("\tunload-key [-r] " + "<-a | filesystem|volume>\n")); + case HELP_CHANGE_KEY: + return (gettext("\tchange-key [-l] [-o keyformat=]" + "\t [-o keylocation=] [-o pbkfd2iters=]" + "\t \n" + "\tchange-key -i [-l] \n")); } abort(); @@ -865,7 +885,7 @@ zfs_do_create(int argc, char **argv) (void) snprintf(msg, sizeof (msg), gettext("cannot create '%s'"), argv[0]); if (props && (real_props = zfs_valid_proplist(g_zfs, type, - props, 0, NULL, zpool_handle, msg)) == NULL) { + props, 0, NULL, zpool_handle, B_TRUE, msg)) == NULL) { zpool_close(zpool_handle); goto error; } @@ -3737,11 +3757,12 @@ zfs_do_send(int argc, char **argv) {"embed", no_argument, NULL, 'e'}, {"resume", required_argument, NULL, 't'}, {"compressed", no_argument, NULL, 'c'}, + {"raw", no_argument, NULL, 'w'}, {0, 0, 0, 0} }; /* check options */ - while ((c = getopt_long(argc, argv, ":i:I:RbDpvnPLet:c", long_options, + while ((c = getopt_long(argc, argv, ":i:I:RbDpvnPLet:cw", long_options, NULL)) != -1) { switch (c) { case 'i': @@ -3789,6 +3810,12 @@ zfs_do_send(int argc, char **argv) case 'c': flags.compress = B_TRUE; break; + case 'w': + flags.raw = B_TRUE; + flags.compress = B_TRUE; + flags.embed_data = B_TRUE; + flags.largeblock = B_TRUE; + break; case ':': /* * If a parameter was not passed, optopt contains the @@ -3896,6 +3923,8 @@ zfs_do_send(int argc, char **argv) lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; if (flags.compress) lzc_flags |= LZC_SEND_FLAG_COMPRESS; + if (flags.raw) + lzc_flags |= LZC_SEND_FLAG_RAW; if (fromname != NULL && (fromname[0] == '#' || fromname[0] == '@')) { @@ -4131,6 +4160,8 @@ zfs_do_receive(int argc, char **argv) #define ZFS_DELEG_PERM_RELEASE "release" #define ZFS_DELEG_PERM_DIFF "diff" #define ZFS_DELEG_PERM_BOOKMARK "bookmark" +#define ZFS_DELEG_PERM_LOAD_KEY "load-key" +#define ZFS_DELEG_PERM_CHANGE_KEY "change-key" #define ZFS_NUM_DELEG_NOTES ZFS_DELEG_NOTE_NONE @@ -4151,6 +4182,8 @@ static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = { { ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE }, { ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT }, { ZFS_DELEG_PERM_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK }, + { ZFS_DELEG_PERM_LOAD_KEY, ZFS_DELEG_NOTE_LOAD_KEY }, + { ZFS_DELEG_PERM_CHANGE_KEY, ZFS_DELEG_NOTE_CHANGE_KEY }, { ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA }, { ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED }, @@ -4718,6 +4751,12 @@ deleg_perm_comment(zfs_deleg_note_t note) case ZFS_DELEG_NOTE_SNAPSHOT: str = gettext(""); break; + case ZFS_DELEG_NOTE_LOAD_KEY: + str = gettext("Allows loading or unloading an encryption key"); + break; + case ZFS_DELEG_NOTE_CHANGE_KEY: + str = gettext("Allows changing or adding an encryption key"); + break; /* * case ZFS_DELEG_NOTE_VSCAN: * str = gettext(""); @@ -5981,7 +6020,7 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol, } if (!zfs_is_mounted(zhp, NULL) && - zfs_mount(zhp, NULL, 0) != 0) + zfs_mount(zhp, NULL, flags) != 0) return (1); if (protocol == NULL) { @@ -6088,7 +6127,7 @@ share_mount(int op, int argc, char **argv) int flags = 0; /* check options */ - while ((c = getopt(argc, argv, op == OP_MOUNT ? ":avo:O" : "a")) + while ((c = getopt(argc, argv, op == OP_MOUNT ? ":alvo:O" : "al")) != -1) { switch (c) { case 'a': @@ -6097,6 +6136,9 @@ share_mount(int op, int argc, char **argv) case 'v': verbose = B_TRUE; break; + case 'l': + flags |= MS_CRYPT; + break; case 'o': if (*optarg == '\0') { (void) fprintf(stderr, gettext("empty mount " @@ -7183,6 +7225,229 @@ zfs_do_channel_program(int argc, char **argv) return (-1); } +typedef struct loadkey_cbdata { + boolean_t cb_loadkey; + boolean_t cb_recursive; + boolean_t cb_noop; + char *cb_keylocation; + uint64_t cb_numfailed; + uint64_t cb_numattempted; +} loadkey_cbdata_t; + +static int +load_key_callback(zfs_handle_t *zhp, void *data) +{ + int ret; + boolean_t is_encroot; + loadkey_cbdata_t *cb = data; + uint64_t keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + + /* + * If we are working recursively, we want to skip loading / unloading + * keys for non-encryption roots and datasets whose keys are already + * in the desired end-state. + */ + if (cb->cb_recursive) { + ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL); + if (ret != 0) + return (ret); + if (!is_encroot) + return (0); + + if ((cb->cb_loadkey && keystatus == ZFS_KEYSTATUS_AVAILABLE) || + (!cb->cb_loadkey && keystatus == ZFS_KEYSTATUS_UNAVAILABLE)) + return (0); + } + + cb->cb_numattempted++; + + if (cb->cb_loadkey) + ret = zfs_crypto_load_key(zhp, cb->cb_noop, cb->cb_keylocation); + else + ret = zfs_crypto_unload_key(zhp); + + if (ret != 0) { + cb->cb_numfailed++; + return (ret); + } + + return (0); +} + +static int +load_unload_keys(int argc, char **argv, boolean_t loadkey) +{ + int c, ret = 0, flags = 0; + boolean_t do_all = B_FALSE; + loadkey_cbdata_t cb = { 0 }; + + cb.cb_loadkey = loadkey; + + while ((c = getopt(argc, argv, "anrL:")) != -1) { + /* noop and alternate keylocations only apply to zfs load-key */ + if (loadkey) { + switch (c) { + case 'n': + cb.cb_noop = B_TRUE; + continue; + case 'L': + cb.cb_keylocation = optarg; + continue; + default: + break; + } + } + + switch (c) { + case 'a': + do_all = B_TRUE; + cb.cb_recursive = B_TRUE; + break; + case 'r': + flags |= ZFS_ITER_RECURSE; + cb.cb_recursive = B_TRUE; + break; + default: + (void) fprintf(stderr, + gettext("invalid option '%c'\n"), optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + if (!do_all && argc == 0) { + (void) fprintf(stderr, + gettext("Missing dataset argument or -a option\n")); + usage(B_FALSE); + } + + if (do_all && argc != 0) { + (void) fprintf(stderr, + gettext("Cannot specify dataset with -a option\n")); + usage(B_FALSE); + } + + if (cb.cb_recursive && cb.cb_keylocation != NULL && + strcmp(cb.cb_keylocation, "prompt") != 0) { + (void) fprintf(stderr, gettext("alternate keylocation may only " + "be 'prompt' with -r or -a\n")); + usage(B_FALSE); + } + + ret = zfs_for_each(argc, argv, flags, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, NULL, NULL, 0, + load_key_callback, &cb); + + if (cb.cb_noop || (cb.cb_recursive && cb.cb_numattempted != 0)) { + (void) printf(gettext("%llu / %llu key(s) successfully %s\n"), + (u_longlong_t)(cb.cb_numattempted - cb.cb_numfailed), + (u_longlong_t)cb.cb_numattempted, + loadkey ? (cb.cb_noop ? "verified" : "loaded") : + "unloaded"); + } + + if (cb.cb_numfailed != 0) + ret = -1; + + return (ret); +} + +static int +zfs_do_load_key(int argc, char **argv) +{ + return (load_unload_keys(argc, argv, B_TRUE)); +} + + +static int +zfs_do_unload_key(int argc, char **argv) +{ + return (load_unload_keys(argc, argv, B_FALSE)); +} + +static int +zfs_do_change_key(int argc, char **argv) +{ + int c, ret; + uint64_t keystatus; + boolean_t loadkey = B_FALSE, inheritkey = B_FALSE; + zfs_handle_t *zhp = NULL; + nvlist_t *props = fnvlist_alloc(); + + while ((c = getopt(argc, argv, "lio:")) != -1) { + switch (c) { + case 'l': + loadkey = B_TRUE; + break; + case 'i': + inheritkey = B_TRUE; + break; + case 'o': + if (parseprop(props, optarg) != 0) { + nvlist_free(props); + return (1); + } + break; + default: + (void) fprintf(stderr, + gettext("invalid option '%c'\n"), optopt); + usage(B_FALSE); + } + } + + if (inheritkey && !nvlist_empty(props)) { + (void) fprintf(stderr, + gettext("Properties not allowed for inheriting\n")); + usage(B_FALSE); + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, gettext("Missing dataset argument\n")); + usage(B_FALSE); + } + + if (argc > 1) { + (void) fprintf(stderr, gettext("Too many arguments\n")); + usage(B_FALSE); + } + + zhp = zfs_open(g_zfs, argv[argc - 1], + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + usage(B_FALSE); + + if (loadkey) { + keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + if (keystatus != ZFS_KEYSTATUS_AVAILABLE) { + ret = zfs_crypto_load_key(zhp, B_FALSE, NULL); + if (ret != 0) { + nvlist_free(props); + zfs_close(zhp); + return (-1); + } + } + + /* refresh the properties so the new keystatus is visible */ + zfs_refresh_properties(zhp); + } + + ret = zfs_crypto_rewrap(zhp, props, inheritkey); + if (ret != 0) { + nvlist_free(props); + zfs_close(zhp); + return (-1); + } + + nvlist_free(props); + zfs_close(zhp); + return (0); +} + int main(int argc, char **argv) { diff --git a/usr/src/cmd/zinject/translate.c b/usr/src/cmd/zinject/translate.c index 53a38e1ea853..090f2448b06e 100644 --- a/usr/src/cmd/zinject/translate.c +++ b/usr/src/cmd/zinject/translate.c @@ -175,7 +175,7 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf, */ sync(); - err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os); + err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, B_FALSE, FTAG, &os); if (err != 0) { (void) fprintf(stderr, "cannot open dataset '%s': %s\n", dataset, strerror(err)); @@ -185,7 +185,7 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf, record->zi_objset = dmu_objset_id(os); record->zi_object = statbuf->st_ino; - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (0); } @@ -261,7 +261,7 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range, * size. */ if ((err = dmu_objset_own(dataset, DMU_OST_ANY, - B_TRUE, FTAG, &os)) != 0) { + B_TRUE, B_FALSE, FTAG, &os)) != 0) { (void) fprintf(stderr, "cannot open dataset '%s': %s\n", dataset, strerror(err)); goto out; @@ -323,7 +323,7 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range, dnode_rele(dn, FTAG); } if (os) - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (ret); } diff --git a/usr/src/cmd/zoneadm/Makefile b/usr/src/cmd/zoneadm/Makefile index 2b01078aec17..23584bbb5bfd 100644 --- a/usr/src/cmd/zoneadm/Makefile +++ b/usr/src/cmd/zoneadm/Makefile @@ -38,6 +38,8 @@ POFILES= $(OBJS:%.o=%.po) LDLIBS += -lzonecfg -lsocket -lgen -lpool -lzfs -luuid -lnvpair -lbrand -ldladm -lsecdb +INCS += -I../../common/zfs + CERRWARN += -_gcc=-Wno-uninitialized .KEEP_STATE: diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c index d3af687dc4f1..b36105d4e13d 100644 --- a/usr/src/cmd/zpool/zpool_main.c +++ b/usr/src/cmd/zpool/zpool_main.c @@ -223,12 +223,13 @@ get_usage(zpool_help_t idx) return (gettext("\thistory [-il] [] ...\n")); case HELP_IMPORT: return (gettext("\timport [-d dir] [-D]\n" - "\timport [-d dir | -c cachefile] [-F [-n]] \n" + "\timport [-d dir | -c cachefile] [-F [-n]] [-l] " + "\n" "\timport [-o mntopts] [-o property=value] ... \n" - "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] " + "\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] " "[-R root] [-F [-n]] -a\n" "\timport [-o mntopts] [-o property=value] ... \n" - "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] " + "\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] " "[-R root] [-F [-n]]\n" "\t [newpool]\n")); case HELP_IOSTAT: @@ -265,7 +266,7 @@ get_usage(zpool_help_t idx) case HELP_SET: return (gettext("\tset \n")); case HELP_SPLIT: - return (gettext("\tsplit [-n] [-R altroot] [-o mntopts]\n" + return (gettext("\tsplit [-nl] [-R altroot] [-o mntopts]\n" "\t [-o property=value] " "[ ...]\n")); case HELP_REGUID: @@ -1902,6 +1903,7 @@ static int do_import(nvlist_t *config, const char *newname, const char *mntopts, nvlist_t *props, int flags) { + int ret = 0; zpool_handle_t *zhp; char *name; uint64_t state; @@ -1962,6 +1964,16 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL) return (1); + /* + * Loading keys is best effort. We don't want to return immediately + * if it fails but we do want to give the error to the caller. + */ + if (flags & ZFS_IMPORT_LOAD_KEYS) { + ret = zfs_crypto_attempt_load_keys(g_zfs, name); + if (ret != 0) + ret = 1; + } + if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && !(flags & ZFS_IMPORT_ONLY) && zpool_enable_datasets(zhp, mntopts, 0) != 0) { @@ -1970,14 +1982,14 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, } zpool_close(zhp); - return (0); + return (ret); } /* * zpool import [-d dir] [-D] - * import [-o mntopts] [-o prop=value] ... [-R root] [-D] + * import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l] * [-d dir | -c cachefile] [-f] -a - * import [-o mntopts] [-o prop=value] ... [-R root] [-D] + * import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l] * [-d dir | -c cachefile] [-f] [-n] [-F] [newpool] * * -c Read pool information from a cachefile instead of searching @@ -2012,6 +2024,8 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, * * -a Import all pools found. * + * -l Load encryption keys while importing. + * * -o Set property=value and/or temporary mount options (without '='). * * The import command scans for pools to import, and import pools based on pool @@ -2048,7 +2062,7 @@ zpool_do_import(int argc, char **argv) char *endptr; /* check options */ - while ((c = getopt(argc, argv, ":aCc:d:DEfFmnNo:rR:T:VX")) != -1) { + while ((c = getopt(argc, argv, ":aCc:d:DEfFlmnNo:rR:T:VX")) != -1) { switch (c) { case 'a': do_all = B_TRUE; @@ -2078,6 +2092,9 @@ zpool_do_import(int argc, char **argv) case 'F': do_rewind = B_TRUE; break; + case 'l': + flags |= ZFS_IMPORT_LOAD_KEYS; + break; case 'm': flags |= ZFS_IMPORT_MISSING_LOG; break; @@ -2146,6 +2163,17 @@ zpool_do_import(int argc, char **argv) usage(B_FALSE); } + if ((flags & ZFS_IMPORT_LOAD_KEYS) && (flags & ZFS_IMPORT_ONLY)) { + (void) fprintf(stderr, gettext("-l is incompatible with -N\n")); + usage(B_FALSE); + } + + if ((flags & ZFS_IMPORT_LOAD_KEYS) && !do_all && argc == 0) { + (void) fprintf(stderr, gettext("-l is only meaningful during " + "an import\n")); + usage(B_FALSE); + } + if ((dryrun || xtreme_rewind) && !do_rewind) { (void) fprintf(stderr, gettext("-n or -X only meaningful with -F\n")); @@ -3439,6 +3467,7 @@ zpool_do_detach(int argc, char **argv) * it were to be split. * -o Set property=value, or set mount options. * -R Mount the split-off pool under an alternate root. + * -l Load encryption keys while importing. * * Splits the named pool and gives it the new pool name. Devices to be split * off may be listed, provided that no more than one device is specified @@ -3456,6 +3485,7 @@ zpool_do_split(int argc, char **argv) char *mntopts = NULL; splitflags_t flags; int c, ret = 0; + boolean_t loadkeys = B_FALSE; zpool_handle_t *zhp; nvlist_t *config, *props = NULL; @@ -3463,7 +3493,7 @@ zpool_do_split(int argc, char **argv) flags.import = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, ":R:no:")) != -1) { + while ((c = getopt(argc, argv, ":R:lno:")) != -1) { switch (c) { case 'R': flags.import = B_TRUE; @@ -3474,6 +3504,9 @@ zpool_do_split(int argc, char **argv) usage(B_FALSE); } break; + case 'l': + loadkeys = B_TRUE; + break; case 'n': flags.dryrun = B_TRUE; break; @@ -3509,6 +3542,12 @@ zpool_do_split(int argc, char **argv) usage(B_FALSE); } + if (!flags.import && loadkeys) { + (void) fprintf(stderr, gettext("loading keys is only " + "valid when importing the pool\n")); + usage(B_FALSE); + } + argc -= optind; argv += optind; @@ -3553,6 +3592,13 @@ zpool_do_split(int argc, char **argv) */ if ((zhp = zpool_open_canfail(g_zfs, newpool)) == NULL) return (1); + + if (loadkeys) { + ret = zfs_crypto_attempt_load_keys(g_zfs, newpool); + if (ret != 0) + ret = 1; + } + if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && zpool_enable_datasets(zhp, mntopts, 0) != 0) { ret = 1; diff --git a/usr/src/cmd/zstreamdump/zstreamdump.c b/usr/src/cmd/zstreamdump/zstreamdump.c index 17adbecd7953..49da0045931b 100644 --- a/usr/src/cmd/zstreamdump/zstreamdump.c +++ b/usr/src/cmd/zstreamdump/zstreamdump.c @@ -196,12 +196,33 @@ print_block(char *buf, int length) } } +/* + * Print an array of bytes to stdout as hexidecimal characters. str must + * have buf_len * 2 + 1 bytes of space. + */ +static void +sprintf_bytes(char *str, uint8_t *buf, uint_t buf_len) +{ + int i, n; + + for (i = 0; i < buf_len; i++) { + n = sprintf(str, "%02x", buf[i] & 0xff); + str += n; + } + + str[0] = '\0'; +} + int main(int argc, char *argv[]) { char *buf = safe_malloc(SPA_MAXBLOCKSIZE); uint64_t drr_record_count[DRR_NUMTYPES] = { 0 }; + char salt[ZIO_DATA_SALT_LEN * 2 + 1]; + char iv[ZIO_DATA_IV_LEN * 2 + 1]; + char mac[ZIO_DATA_MAC_LEN * 2 + 1]; uint64_t total_records = 0; + uint64_t payload_size; dmu_replay_record_t thedrr; dmu_replay_record_t *drr = &thedrr; struct drr_begin *drrb = &thedrr.drr_u.drr_begin; @@ -213,6 +234,7 @@ main(int argc, char *argv[]) struct drr_free *drrf = &thedrr.drr_u.drr_free; struct drr_spill *drrs = &thedrr.drr_u.drr_spill; struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; + struct drr_object_range *drror = &thedrr.drr_u.drr_object_range; struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum; char c; boolean_t verbose = B_FALSE; @@ -412,24 +434,34 @@ main(int argc, char *argv[]) drro->drr_blksz = BSWAP_32(drro->drr_blksz); drro->drr_bonuslen = BSWAP_32(drro->drr_bonuslen); + drro->drr_raw_bonuslen = + BSWAP_32(drro->drr_raw_bonuslen); drro->drr_toguid = BSWAP_64(drro->drr_toguid); } + + payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro); + if (verbose) { (void) printf("OBJECT object = %llu type = %u " - "bonustype = %u blksz = %u bonuslen = %u\n", + "bonustype = %u blksz = %u bonuslen = %u " + "raw_bonuslen = %u flags = %u " + "indblkshift = %u nlevels = %u " + "nblkptr = %u\n", (u_longlong_t)drro->drr_object, drro->drr_type, drro->drr_bonustype, drro->drr_blksz, - drro->drr_bonuslen); + drro->drr_bonuslen, + drro->drr_raw_bonuslen, + drro->drr_flags, + drro->drr_indblkshift, + drro->drr_nlevels, + drro->drr_nblkptr); } if (drro->drr_bonuslen > 0) { - (void) ssread(buf, - P2ROUNDUP(drro->drr_bonuslen, 8), &zc); - if (dump) { - print_block(buf, - P2ROUNDUP(drro->drr_bonuslen, 8)); - } + (void) ssread(buf, payload_size, &zc); + if (dump) + print_block(buf, payload_size); } break; @@ -463,28 +495,40 @@ main(int argc, char *argv[]) BSWAP_64(drrw->drr_compressed_size); } - uint64_t payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); + payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); /* * If this is verbose and/or dump output, * print info on the modified block */ if (verbose) { + sprintf_bytes(salt, drrw->drr_salt, + ZIO_DATA_SALT_LEN); + sprintf_bytes(iv, drrw->drr_iv, + ZIO_DATA_IV_LEN); + sprintf_bytes(mac, drrw->drr_mac, + ZIO_DATA_MAC_LEN); + (void) printf("WRITE object = %llu type = %u " "checksum type = %u compression type = %u\n" - " offset = %llu logical_size = %llu " + " flags = %u offset = %llu " + "logical_size = %llu " "compressed_size = %llu " - "payload_size = %llu " - "props = %llx\n", + "payload_size = %llu props = %llx " + "salt = %s iv = %s mac = %s\n", (u_longlong_t)drrw->drr_object, drrw->drr_type, drrw->drr_checksumtype, drrw->drr_compressiontype, + drrw->drr_flags, (u_longlong_t)drrw->drr_offset, (u_longlong_t)drrw->drr_logical_size, (u_longlong_t)drrw->drr_compressed_size, (u_longlong_t)payload_size, - (u_longlong_t)drrw->drr_key.ddk_prop); + (u_longlong_t)drrw->drr_key.ddk_prop, + salt, + iv, + mac); } /* @@ -555,11 +599,31 @@ main(int argc, char *argv[]) if (do_byteswap) { drrs->drr_object = BSWAP_64(drrs->drr_object); drrs->drr_length = BSWAP_64(drrs->drr_length); + drrs->drr_compressed_size = + BSWAP_64(drrs->drr_compressed_size); + drrs->drr_type = BSWAP_32(drrs->drr_type); } if (verbose) { + sprintf_bytes(salt, drrs->drr_salt, + ZIO_DATA_SALT_LEN); + sprintf_bytes(iv, drrs->drr_iv, + ZIO_DATA_IV_LEN); + sprintf_bytes(mac, drrs->drr_mac, + ZIO_DATA_MAC_LEN); + (void) printf("SPILL block for object = %llu " - "length = %llu\n", drrs->drr_object, - drrs->drr_length); + "length = %llu flags = %u " + "compression type = %u " + "compressed_size = %llu " + "salt = %s iv = %s mac = %s\n", + (u_longlong_t)drrs->drr_object, + (u_longlong_t)drrs->drr_length, + drrs->drr_flags, + drrs->drr_compressiontype, + (u_longlong_t)drrs->drr_compressed_size, + salt, + iv, + mac); } (void) ssread(buf, drrs->drr_length, &zc); if (dump) { @@ -598,6 +662,26 @@ main(int argc, char *argv[]) (void) ssread(buf, P2ROUNDUP(drrwe->drr_psize, 8), &zc); break; + case DRR_OBJECT_RANGE: + if (do_byteswap) { + drror->drr_firstobj = + BSWAP_64(drror->drr_firstobj); + drror->drr_numslots = + BSWAP_64(drror->drr_numslots); + drror->drr_toguid = BSWAP_64(drror->drr_toguid); + } + if (verbose) { + (void) printf("OBJECT_RANGE firstobj = %llu " + "numslots = %llu flags = %u " + "salt = %s iv = %s mac = %s\n", + (u_longlong_t)drror->drr_firstobj, + (u_longlong_t)drror->drr_numslots, + drror->drr_flags, + salt, + iv, + mac); + } + break; } if (drr->drr_type != DRR_BEGIN && very_verbose) { (void) printf(" checksum = %llx/%llx/%llx/%llx\n", diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index d338ee4cab5e..5b99c0c83e3d 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -195,6 +195,7 @@ extern boolean_t zfs_abd_scatter_enabled; static ztest_shared_opts_t *ztest_shared_opts; static ztest_shared_opts_t ztest_opts; +static char *ztest_wkeydata = "abcdefghijklmnopqrstuvwxyz012345"; typedef struct ztest_shared_ds { uint64_t zd_seq; @@ -1085,6 +1086,42 @@ ztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value) return (error); } +static int +ztest_dmu_objset_own(const char *name, dmu_objset_type_t type, + boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp) +{ + int err; + + err = dmu_objset_own(name, type, readonly, decrypt, tag, osp); + if (decrypt && err == EACCES) { + char ddname[ZFS_MAX_DATASET_NAME_LEN]; + dsl_crypto_params_t *dcp; + nvlist_t *crypto_args = fnvlist_alloc(); + char *cp = NULL; + + /* spa_keystore_load_wkey() expects a dsl dir name */ + (void) strcpy(ddname, name); + cp = strchr(ddname, '@'); + if (cp != NULL) + *cp = '\0'; + + fnvlist_add_uint8_array(crypto_args, "wkeydata", + (uint8_t *)ztest_wkeydata, WRAPPING_KEY_LEN); + VERIFY0(dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL, + crypto_args, &dcp)); + err = spa_keystore_load_wkey(ddname, dcp, B_FALSE); + dsl_crypto_params_free(dcp, B_FALSE); + fnvlist_free(crypto_args); + + if (err != 0) + return (err); + + err = dmu_objset_own(name, type, readonly, decrypt, tag, osp); + } + + return (err); +} + static void ztest_rll_init(rll_t *rll) { @@ -1673,7 +1710,7 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap) dmu_write(os, lr->lr_foid, offset, length, data, tx); } else { bcopy(data, abuf->b_data, length); - dmu_assign_arcbuf(db, offset, abuf, tx); + dmu_assign_arcbuf_by_dbuf(db, offset, abuf, tx); } (void) ztest_log_write(zd, tx, lr); @@ -2373,7 +2410,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) */ nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); VERIFY3U(ENOENT, ==, - spa_create("ztest_bad_file", nvroot, NULL, NULL)); + spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); /* @@ -2381,7 +2418,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) */ nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1); VERIFY3U(ENOENT, ==, - spa_create("ztest_bad_mirror", nvroot, NULL, NULL)); + spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); /* @@ -2390,7 +2427,8 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) */ (void) rw_rdlock(&ztest_name_lock); nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1); - VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL)); + VERIFY3U(EEXIST, ==, + spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); @@ -2448,7 +2486,7 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) props = fnvlist_alloc(); fnvlist_add_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), version); - VERIFY0(spa_create(name, nvroot, props, NULL)); + VERIFY0(spa_create(name, nvroot, props, NULL, NULL)); fnvlist_free(nvroot); fnvlist_free(props); @@ -3198,11 +3236,65 @@ ztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) static int ztest_dataset_create(char *dsname) { - uint64_t zilset = ztest_random(100); - int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, + int err; + uint64_t rand; + dsl_crypto_params_t *dcp = NULL; + + /* + * 50% of the time, we create encrypted datasets + * using a random cipher suite and a hard-coded + * wrapping key. + */ +#ifdef WITHCRYPTO + /* + * Until the crypto framework is compiled in userland, the ztest using + * crypto will not work. + */ + rand = ztest_random(2); +#else + rand = 0; +#endif + if (rand != 0) { + nvlist_t *crypto_args = fnvlist_alloc(); + nvlist_t *props = fnvlist_alloc(); + + /* slight bias towards the default cipher suite */ + rand = ztest_random(ZIO_CRYPT_FUNCTIONS); + if (rand < ZIO_CRYPT_AES_128_CCM) + rand = ZIO_CRYPT_ON; + + fnvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), rand); + fnvlist_add_uint8_array(crypto_args, "wkeydata", + (uint8_t *)ztest_wkeydata, WRAPPING_KEY_LEN); + + /* + * These parameters aren't really used by the kernel. They + * are simply stored so that userspace knows how to load + * the wrapping key. + */ + fnvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), ZFS_KEYFORMAT_RAW); + fnvlist_add_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), "prompt"); + fnvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 0ULL); + fnvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 0ULL); + + VERIFY0(dsl_crypto_params_create_nvlist(DCP_CMD_NONE, props, + crypto_args, &dcp)); + + fnvlist_free(crypto_args); + fnvlist_free(props); + } + + err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, dcp, ztest_objset_create_cb, NULL); + dsl_crypto_params_free(dcp, !!err); - if (err || zilset < 80) + rand = ztest_random(100); + if (err || rand < 80) return (err); if (ztest_opts.zo_verbose >= 6) @@ -3222,7 +3314,8 @@ ztest_objset_destroy_cb(const char *name, void *arg) /* * Verify that the dataset contains a directory object. */ - VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os)); + VERIFY0(ztest_dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, + B_TRUE, FTAG, &os)); error = dmu_object_info(os, ZTEST_DIROBJ, &doi); if (error != ENOENT) { /* We could have crashed in the middle of destroying it */ @@ -3230,7 +3323,7 @@ ztest_objset_destroy_cb(const char *name, void *arg) ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER); ASSERT3S(doi.doi_physical_blocks_512, >=, 0); } - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); /* * Destroy the dataset. @@ -3303,11 +3396,13 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) * (invoked from ztest_objset_destroy_cb()) should just throw it away. */ if (ztest_random(2) == 0 && - dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) { + ztest_dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, + B_TRUE, FTAG, &os) == 0) { ztest_zd_init(&zdtmp, NULL, os); zil_replay(os, &zdtmp, ztest_replay_vector); ztest_zd_fini(&zdtmp); - dmu_objset_disown(os, FTAG); + txg_wait_synced(dmu_objset_pool(os), 0); + dmu_objset_disown(os, B_TRUE, FTAG); } /* @@ -3321,8 +3416,8 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) /* * Verify that the destroyed dataset is no longer in the namespace. */ - VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, - FTAG, &os)); + VERIFY3U(ENOENT, ==, ztest_dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, + B_TRUE, FTAG, &os)); /* * Verify that we can create a new dataset. @@ -3337,7 +3432,8 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_create(%s) = %d", name, error); } - VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os)); + VERIFY0(ztest_dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, B_TRUE, + FTAG, &os)); ztest_zd_init(&zdtmp, NULL, os); @@ -3361,7 +3457,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) * Verify that we cannot create an existing dataset. */ VERIFY3U(EEXIST, ==, - dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL)); + dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL, NULL)); /* * Verify that we can hold an objset that is also owned. @@ -3372,11 +3468,12 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) /* * Verify that we cannot own an objset that is already owned. */ - VERIFY3U(EBUSY, ==, - dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2)); + VERIFY3U(EBUSY, ==, ztest_dmu_objset_own(name, DMU_OST_OTHER, + B_FALSE, B_TRUE, FTAG, &os2)); zil_close(zilog); - dmu_objset_disown(os, FTAG); + txg_wait_synced(spa_get_dsl(os->os_spa), 0); + dmu_objset_disown(os, B_TRUE, FTAG); ztest_zd_fini(&zdtmp); (void) rw_unlock(&ztest_name_lock); @@ -3510,19 +3607,20 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); } - error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os); + error = ztest_dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, B_TRUE, + FTAG, &os); if (error) fatal(0, "dmu_objset_own(%s) = %d", snap2name, error); error = dsl_dataset_promote(clone2name, NULL); if (error == ENOSPC) { - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); ztest_record_enospc(FTAG); goto out; } if (error != EBUSY) fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, error); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); out: ztest_dsl_dataset_cleanup(osname, id); @@ -3864,7 +3962,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) * bigobj, at the tail of the nth chunk * * The chunk size is set equal to bigobj block size so that - * dmu_assign_arcbuf() can be tested for object updates. + * dmu_assign_arcbuf_by_dbuf() can be tested for object updates. */ /* @@ -3922,7 +4020,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) /* * In iteration 5 (i == 5) use arcbufs * that don't match bigobj blksz to test - * dmu_assign_arcbuf() when it can't directly + * dmu_assign_arcbuf_by_dbuf() when it can't directly * assign an arcbuf to a dbuf. */ for (j = 0; j < s; j++) { @@ -3967,8 +4065,8 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) /* * 50% of the time don't read objects in the 1st iteration to - * test dmu_assign_arcbuf() for the case when there're no - * existing dbufs for the specified offsets. + * test dmu_assign_arcbuf_by_dbuf() for the case when there are + * no existing dbufs for the specified offsets. */ if (i != 0 || ztest_random(2) != 0) { error = dmu_read(os, packobj, packoff, @@ -4013,12 +4111,12 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0); } if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) { - dmu_assign_arcbuf(bonus_db, off, + dmu_assign_arcbuf_by_dbuf(bonus_db, off, bigbuf_arcbufs[j], tx); } else { - dmu_assign_arcbuf(bonus_db, off, + dmu_assign_arcbuf_by_dbuf(bonus_db, off, bigbuf_arcbufs[2 * j], tx); - dmu_assign_arcbuf(bonus_db, + dmu_assign_arcbuf_by_dbuf(bonus_db, off + chunksize / 2, bigbuf_arcbufs[2 * j + 1], tx); } @@ -5616,7 +5714,8 @@ ztest_dataset_open(int d) } ASSERT(error == 0 || error == EEXIST); - VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os)); + VERIFY0(ztest_dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, + B_TRUE, zd, &os)); (void) rw_unlock(&ztest_name_lock); ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os); @@ -5657,7 +5756,8 @@ ztest_dataset_close(int d) ztest_ds_t *zd = &ztest_ds[d]; zil_close(zd->zd_zilog); - dmu_objset_disown(zd->zd_os, zd); + txg_wait_synced(spa_get_dsl(zd->zd_os->os_spa), 0); + dmu_objset_disown(zd->zd_os, B_TRUE, zd); ztest_zd_fini(zd); } @@ -5707,13 +5807,13 @@ ztest_run(ztest_shared_t *zs) ztest_spa = spa; dmu_objset_stats_t dds; - VERIFY0(dmu_objset_own(ztest_opts.zo_pool, - DMU_OST_ANY, B_TRUE, FTAG, &os)); + VERIFY0(ztest_dmu_objset_own(ztest_opts.zo_pool, + DMU_OST_ANY, B_TRUE, B_TRUE, FTAG, &os)); dsl_pool_config_enter(dmu_objset_pool(os), FTAG); dmu_objset_fast_stat(os, &dds); dsl_pool_config_exit(dmu_objset_pool(os), FTAG); zs->zs_guid = dds.dds_guid; - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; @@ -5927,11 +6027,10 @@ ztest_freeze(void) VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); ASSERT(spa_freeze_txg(spa) == UINT64_MAX); VERIFY3U(0, ==, ztest_dataset_open(0)); - ztest_dataset_close(0); - spa->spa_debug = B_TRUE; ztest_spa = spa; txg_wait_synced(spa_get_dsl(spa), 0); + ztest_dataset_close(0); ztest_reguid(NULL, 0); spa_close(spa, FTAG); @@ -6007,7 +6106,8 @@ ztest_init(ztest_shared_t *zs) spa_feature_table[i].fi_uname); VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0)); } - VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL)); + VERIFY3U(0, ==, + spa_create(ztest_opts.zo_pool, nvroot, props, NULL, NULL)); nvlist_free(nvroot); nvlist_free(props); @@ -6039,7 +6139,6 @@ setup_data_fd(void) (void) unlink(ztest_name_data); } - static int shared_data_size(ztest_shared_hdr_t *hdr) { diff --git a/usr/src/common/zfs/zfeature_common.c b/usr/src/common/zfs/zfeature_common.c index 2ca2120f4b38..c8adf5077f4e 100644 --- a/usr/src/common/zfs/zfeature_common.c +++ b/usr/src/common/zfs/zfeature_common.c @@ -275,4 +275,14 @@ zpool_feature_init(void) "org.illumos:edonr", "edonr", "Edon-R hash algorithm.", ZFEATURE_FLAG_PER_DATASET, edonr_deps); + + static const spa_feature_t encryption_deps[] = { + SPA_FEATURE_EXTENSIBLE_DATASET, + SPA_FEATURE_NONE + }; + zfeature_register(SPA_FEATURE_ENCRYPTION, + "com.datto:encryption", "encryption", + "Support for dataset level encryption", + ZFEATURE_FLAG_PER_DATASET, encryption_deps); + } diff --git a/usr/src/common/zfs/zfeature_common.h b/usr/src/common/zfs/zfeature_common.h index 528ff42bb051..9fe415b47561 100644 --- a/usr/src/common/zfs/zfeature_common.h +++ b/usr/src/common/zfs/zfeature_common.h @@ -56,6 +56,7 @@ typedef enum spa_feature { SPA_FEATURE_SHA512, SPA_FEATURE_SKEIN, SPA_FEATURE_EDONR, + SPA_FEATURE_ENCRYPTION, SPA_FEATURES } spa_feature_t; diff --git a/usr/src/common/zfs/zfs_deleg.c b/usr/src/common/zfs/zfs_deleg.c index b66fac804fb3..c5fab0e413bb 100644 --- a/usr/src/common/zfs/zfs_deleg.c +++ b/usr/src/common/zfs/zfs_deleg.c @@ -65,6 +65,8 @@ zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = { {ZFS_DELEG_PERM_GROUPUSED}, {ZFS_DELEG_PERM_HOLD}, {ZFS_DELEG_PERM_RELEASE}, + {ZFS_DELEG_PERM_LOAD_KEY}, + {ZFS_DELEG_PERM_CHANGE_KEY}, {NULL} }; diff --git a/usr/src/common/zfs/zfs_deleg.h b/usr/src/common/zfs/zfs_deleg.h index 16133c59f33f..3d9ec0221387 100644 --- a/usr/src/common/zfs/zfs_deleg.h +++ b/usr/src/common/zfs/zfs_deleg.h @@ -67,6 +67,8 @@ typedef enum { ZFS_DELEG_NOTE_RELEASE, ZFS_DELEG_NOTE_DIFF, ZFS_DELEG_NOTE_BOOKMARK, + ZFS_DELEG_NOTE_LOAD_KEY, + ZFS_DELEG_NOTE_CHANGE_KEY, ZFS_DELEG_NOTE_NONE } zfs_deleg_note_t; diff --git a/usr/src/common/zfs/zfs_prop.c b/usr/src/common/zfs/zfs_prop.c index 22fbb54fc45a..26950bac3dd5 100644 --- a/usr/src/common/zfs/zfs_prop.c +++ b/usr/src/common/zfs/zfs_prop.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "zfs_prop.h" #include "zfs_deleg.h" @@ -115,6 +116,26 @@ zfs_prop_init(void) { NULL } }; + static zprop_index_t crypto_table[] = { + { "on", ZIO_CRYPT_ON }, + { "off", ZIO_CRYPT_OFF }, + { "aes-128-ccm", ZIO_CRYPT_AES_128_CCM }, + { "aes-192-ccm", ZIO_CRYPT_AES_192_CCM }, + { "aes-256-ccm", ZIO_CRYPT_AES_256_CCM }, + { "aes-128-gcm", ZIO_CRYPT_AES_128_GCM }, + { "aes-192-gcm", ZIO_CRYPT_AES_192_GCM }, + { "aes-256-gcm", ZIO_CRYPT_AES_256_GCM }, + { NULL } + }; + + static zprop_index_t keyformat_table[] = { + { "none", ZFS_KEYFORMAT_NONE }, + { "raw", ZFS_KEYFORMAT_RAW }, + { "hex", ZFS_KEYFORMAT_HEX }, + { "passphrase", ZFS_KEYFORMAT_PASSPHRASE }, + { NULL } + }; + static zprop_index_t snapdir_table[] = { { "hidden", ZFS_SNAPDIR_HIDDEN }, { "visible", ZFS_SNAPDIR_VISIBLE }, @@ -183,6 +204,13 @@ zfs_prop_init(void) { NULL } }; + static zprop_index_t keystatus_table[] = { + { "none", ZFS_KEYSTATUS_NONE }, + { "unavailable", ZFS_KEYSTATUS_UNAVAILABLE }, + { "available", ZFS_KEYSTATUS_AVAILABLE }, + { NULL } + }; + static zprop_index_t logbias_table[] = { { "latency", ZFS_LOGBIAS_LATENCY }, { "throughput", ZFS_LOGBIAS_THROUGHPUT }, @@ -301,12 +329,16 @@ zfs_prop_init(void) PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto", "CANMOUNT", canmount_table); - /* readonly index (boolean) properties */ + /* readonly index properties */ zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table); zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0, PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY", boolean_table); + zprop_register_index(ZFS_PROP_KEYSTATUS, "keystatus", + ZFS_KEYSTATUS_NONE, PROP_READONLY, ZFS_TYPE_DATASET, + "none | unavailable | available", + "KEYSTATUS", keystatus_table); /* set once index properties */ zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0, @@ -317,6 +349,15 @@ zfs_prop_init(void) ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "sensitive | insensitive | mixed", "CASE", case_table); + zprop_register_index(ZFS_PROP_KEYFORMAT, "keyformat", + ZFS_KEYFORMAT_NONE, PROP_ONETIME_DEFAULT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "none | raw | hex | passphrase", "KEYFORMAT", keyformat_table); + zprop_register_index(ZFS_PROP_ENCRYPTION, "encryption", + ZIO_CRYPT_DEFAULT, PROP_ONETIME, ZFS_TYPE_DATASET, + "on | off | aes-128-ccm | aes-192-ccm | aes-256-ccm | " + "aes-128-gcm | aes-192-gcm | aes-256-gcm", "ENCRYPTION", + crypto_table); /* set once index (boolean) properties */ zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME, @@ -347,6 +388,12 @@ zfs_prop_init(void) "receive_resume_token", NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "RESUMETOK"); + zprop_register_string(ZFS_PROP_ENCRYPTION_ROOT, "encryptionroot", NULL, + PROP_READONLY, ZFS_TYPE_DATASET, "", + "ENCROOT"); + zprop_register_string(ZFS_PROP_KEYLOCATION, "keylocation", + "none", PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "prompt | ", "KEYLOCATION"); /* readonly number properties */ zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY, @@ -391,6 +438,9 @@ zfs_prop_init(void) zprop_register_number(ZFS_PROP_SNAPSHOT_COUNT, "snapshot_count", UINT64_MAX, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "SSCOUNT"); + zprop_register_number(ZFS_PROP_PBKDF2_ITERS, "pbkdf2iters", + 0, PROP_ONETIME_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "", "PBKDF2ITERS"); /* default number properties */ zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT, @@ -442,6 +492,11 @@ zfs_prop_init(void) PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "INCONSISTENT"); zprop_register_hidden(ZFS_PROP_PREV_SNAP, "prevsnap", PROP_TYPE_STRING, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "PREVSNAP"); + zprop_register_hidden(ZFS_PROP_PBKDF2_SALT, "pbkdf2salt", + PROP_TYPE_NUMBER, PROP_ONETIME_DEFAULT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "PBKDF2SALT"); + zprop_register_hidden(ZFS_PROP_KEY_GUID, "keyguid", PROP_TYPE_NUMBER, + PROP_READONLY, ZFS_TYPE_DATASET, "KEYGUID"); /* oddball properties */ zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0, @@ -582,7 +637,8 @@ boolean_t zfs_prop_readonly(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_attr == PROP_READONLY || - zfs_prop_table[prop].pd_attr == PROP_ONETIME); + zfs_prop_table[prop].pd_attr == PROP_ONETIME || + zfs_prop_table[prop].pd_attr == PROP_ONETIME_DEFAULT); } /* @@ -600,7 +656,8 @@ zfs_prop_visible(zfs_prop_t prop) boolean_t zfs_prop_setonce(zfs_prop_t prop) { - return (zfs_prop_table[prop].pd_attr == PROP_ONETIME); + return (zfs_prop_table[prop].pd_attr == PROP_ONETIME || + zfs_prop_table[prop].pd_attr == PROP_ONETIME_DEFAULT); } const char * @@ -635,6 +692,40 @@ zfs_prop_inheritable(zfs_prop_t prop) zfs_prop_table[prop].pd_attr == PROP_ONETIME); } +/* + * Returns TRUE if property is one of the encryption properties that requires + * a loaded encryption key to modify. + */ +boolean_t +zfs_prop_encryption_key_param(zfs_prop_t prop) +{ + /* + * keylocation does not count as an encryption property. It can be + * changed at will without needing the master keys. + */ + return (prop == ZFS_PROP_PBKDF2_SALT || prop == ZFS_PROP_PBKDF2_ITERS || + prop == ZFS_PROP_KEYFORMAT); +} + +/* + * Helper function used by both kernelspace and userspace to check the + * keylocation property. If encrypted is set, the keylocation must be valid + * for an encrypted dataset. + */ +boolean_t +zfs_prop_valid_keylocation(const char *str, boolean_t encrypted) +{ + if (strcmp("none", str) == 0) + return (!encrypted); + else if (strcmp("prompt", str) == 0) + return (B_TRUE); + else if (strlen(str) > 8 && strncmp("file:///", str, 8) == 0) + return (B_TRUE); + + return (B_FALSE); +} + + #ifndef _KERNEL /* diff --git a/usr/src/common/zfs/zfs_prop.h b/usr/src/common/zfs/zfs_prop.h index a63262311b3d..45423cc72f75 100644 --- a/usr/src/common/zfs/zfs_prop.h +++ b/usr/src/common/zfs/zfs_prop.h @@ -51,9 +51,12 @@ typedef enum { * ONETIME properties are a sort of conglomeration of READONLY * and INHERIT. They can be set only during object creation, * after that they are READONLY. If not explicitly set during - * creation, they can be inherited. + * creation, they can be inherited. ONETIME_DEFAULT properties + * work the same way, but they will default instead of + * inheriting a value. */ - PROP_ONETIME + PROP_ONETIME, + PROP_ONETIME_DEFAULT } zprop_attr_t; typedef struct zfs_index { diff --git a/usr/src/lib/libuutil/common/libuutil.h b/usr/src/lib/libuutil/common/libuutil.h index ec1bf907c1ab..a6e11ff05441 100644 --- a/usr/src/lib/libuutil/common/libuutil.h +++ b/usr/src/lib/libuutil/common/libuutil.h @@ -245,7 +245,7 @@ void uu_list_pool_destroy(uu_list_pool_t *); * usage: * * foo_t *a; - * a = malloc(sizeof(*a)); + * a = malloc(sizeof (*a)); * uu_list_node_init(a, &a->foo_list, pool); * ... * uu_list_node_fini(a, &a->foo_list, pool); @@ -348,7 +348,7 @@ void uu_avl_pool_destroy(uu_avl_pool_t *); * usage: * * foo_t *a; - * a = malloc(sizeof(*a)); + * a = malloc(sizeof (*a)); * uu_avl_node_init(a, &a->foo_avl, pool); * ... * uu_avl_node_fini(a, &a->foo_avl, pool); diff --git a/usr/src/lib/libzfs/Makefile.com b/usr/src/lib/libzfs/Makefile.com index 6eaf4332f213..8bcd799e107b 100644 --- a/usr/src/lib/libzfs/Makefile.com +++ b/usr/src/lib/libzfs/Makefile.com @@ -40,6 +40,7 @@ OBJS_SHARED= \ OBJS_COMMON= \ libzfs_changelist.o \ libzfs_config.o \ + libzfs_crypto.o \ libzfs_dataset.o \ libzfs_diff.o \ libzfs_fru.o \ @@ -70,7 +71,8 @@ INCS += -I../../libc/inc C99MODE= -xc99=%all C99LMODE= -Xc99=%all LDLIBS += -lc -lm -ldevid -lgen -lnvpair -luutil -lavl -lefi \ - -ladm -lidmap -ltsol -lmd -lumem -lzfs_core -lcmdutils + -ladm -lidmap -ltsol -lcryptoutil -lpkcs11 -lmd -lumem -lzfs_core \ + -lcmdutils CPPFLAGS += $(INCS) -D_LARGEFILE64_SOURCE=1 -D_REENTRANT $(NOT_RELEASE_BUILD)CPPFLAGS += -DDEBUG @@ -84,6 +86,12 @@ SRCS= $(OBJS_COMMON:%.o=$(SRCDIR)/%.c) \ $(OBJS_SHARED:%.o=$(SRC)/common/zfs/%.c) $(LINTLIB) := SRCS= $(SRCDIR)/$(LINTSRC) +# lint complains about unused inline functions, even though +# they are "inline", not "static inline", with "extern inline" +# implementations and usage in libzpool. +LINTFLAGS += -erroff=E_STATIC_UNUSED +LINTFLAGS64 += -erroff=E_STATIC_UNUSED + .KEEP_STATE: all: $(LIBS) diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h index d62c4489da5a..d22910a37782 100644 --- a/usr/src/lib/libzfs/common/libzfs.h +++ b/usr/src/lib/libzfs/common/libzfs.h @@ -130,6 +130,7 @@ typedef enum zfs_error { EZFS_DIFFDATA, /* bad zfs diff data */ EZFS_POOLREADONLY, /* pool is in read-only mode */ EZFS_SCRUB_PAUSED, /* scrub currently paused */ + EZFS_CRYPTOFAILED, /* failed to setup encryption */ EZFS_UNKNOWN } zfs_error_t; @@ -439,8 +440,8 @@ extern uint64_t zfs_prop_default_numeric(zfs_prop_t); extern const char *zfs_prop_column_name(zfs_prop_t); extern boolean_t zfs_prop_align_right(zfs_prop_t); -extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, - nvlist_t *, uint64_t, zfs_handle_t *, zpool_handle_t *, const char *); +extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, nvlist_t *, + uint64_t, zfs_handle_t *, zpool_handle_t *, boolean_t, const char *); extern const char *zfs_prop_to_name(zfs_prop_t); extern int zfs_prop_set(zfs_handle_t *, const char *, const char *); @@ -470,6 +471,19 @@ extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *); extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *); +/* + * zfs encryption management + */ +extern int zfs_crypto_get_encryption_root(zfs_handle_t *, boolean_t *, char *); +extern int zfs_crypto_create(libzfs_handle_t *, char *, nvlist_t *, nvlist_t *, + uint8_t **, uint_t *); +extern int zfs_crypto_clone_check(libzfs_handle_t *, zfs_handle_t *, char *, + nvlist_t *); +extern int zfs_crypto_attempt_load_keys(libzfs_handle_t *, char *); +extern int zfs_crypto_load_key(zfs_handle_t *, boolean_t, char *); +extern int zfs_crypto_unload_key(zfs_handle_t *); +extern int zfs_crypto_rewrap(zfs_handle_t *, nvlist_t *, boolean_t); + typedef struct zprop_list { int pl_prop; char *pl_user_prop; @@ -619,6 +633,9 @@ typedef struct sendflags { /* compressed WRITE records are permitted */ boolean_t compress; + + /* raw encrypted records are permitted */ + boolean_t raw; } sendflags_t; typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); @@ -702,6 +719,7 @@ extern const char *zfs_type_to_name(zfs_type_t); extern void zfs_refresh_properties(zfs_handle_t *); extern int zfs_name_valid(const char *, zfs_type_t); extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t); +extern int zfs_parent_name(zfs_handle_t *, char *, size_t); extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *, zfs_type_t); extern int zfs_spa_version(zfs_handle_t *, int *); diff --git a/usr/src/lib/libzfs/common/libzfs_changelist.c b/usr/src/lib/libzfs/common/libzfs_changelist.c index af5cb35f9d92..99d226019f9b 100644 --- a/usr/src/lib/libzfs/common/libzfs_changelist.c +++ b/usr/src/lib/libzfs/common/libzfs_changelist.c @@ -225,6 +225,7 @@ changelist_postfix(prop_changelist_t *clp) boolean_t sharenfs; boolean_t sharesmb; boolean_t mounted; + boolean_t needs_key; /* * If we are in the global zone, but this dataset is exported @@ -253,9 +254,12 @@ changelist_postfix(prop_changelist_t *clp) shareopts, sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0) && (strcmp(shareopts, "off") != 0)); + needs_key = (zfs_prop_get_int(cn->cn_handle, + ZFS_PROP_KEYSTATUS) == ZFS_KEYSTATUS_UNAVAILABLE); + mounted = zfs_is_mounted(cn->cn_handle, NULL); - if (!mounted && (cn->cn_mounted || + if (!mounted && !needs_key && (cn->cn_mounted || ((sharenfs || sharesmb || clp->cl_waslegacy) && (zfs_prop_get_int(cn->cn_handle, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON)))) { diff --git a/usr/src/lib/libzfs/common/libzfs_crypto.c b/usr/src/lib/libzfs/common/libzfs_crypto.c new file mode 100644 index 000000000000..30a7d6913efa --- /dev/null +++ b/usr/src/lib/libzfs/common/libzfs_crypto.c @@ -0,0 +1,1533 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#ifdef sun +#include +#include +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#include "libzfs_impl.h" +#include "zfeature_common.h" + +/* + * User keys are used to decrypt the master encryption keys of a dataset. This + * indirection allows a user to change his / her access key without having to + * re-encrypt the entire dataset. User keys can be provided in one of several + * ways. Raw keys are simply given to the kernel as is. Similarly, hex keys + * are converted to binary and passed into the kernel. Password based keys are + * a bit more complicated. Passwords alone do not provide suitable entropy for + * encryption and may be too short or too long to be used. In order to derive + * a more appropriate key we use a PBKDF2 function. This function is designed + * to take a (relatively) long time to calculate in order to discourage + * attackers from guessing from a list of common passwords. PBKDF2 requires + * 2 additional parameters. The first is the number of iterations to run, which + * will ultimately determine how long it takes to derive the resulting key from + * the password. The second parameter is a salt that is randomly generated for + * each dataset. The salt is used to "tweak" PBKDF2 such that a group of + * attackers cannot reasonably generate a table of commonly known passwords to + * their output keys and expect it work for all past and future PBKDF2 users. + * We store the salt as a hidden property of the dataset (although it is + * technically ok if the salt is known to the attacker). + */ + +typedef enum key_locator { + KEY_LOCATOR_NONE, + KEY_LOCATOR_PROMPT, + KEY_LOCATOR_URI +} key_locator_t; + +#define MIN_PASSPHRASE_LEN 8 +#define MAX_PASSPHRASE_LEN 512 +#define MAX_KEY_PROMPT_ATTEMPTS 3 + +static int caught_interrupt; + +static zfs_keylocation_t +zfs_prop_parse_keylocation(const char *str) +{ + if (strcmp("prompt", str) == 0) + return (ZFS_KEYLOCATION_PROMPT); + else if (strlen(str) > 8 && strncmp("file:///", str, 8) == 0) + return (ZFS_KEYLOCATION_URI); + + return (ZFS_KEYLOCATION_NONE); +} + +static int +hex_key_to_raw(char *hex, int hexlen, uint8_t *out) +{ + int ret, i; + unsigned int c; + + for (i = 0; i < hexlen; i += 2) { + if (!isxdigit(hex[i]) || !isxdigit(hex[i + 1])) { + ret = EINVAL; + goto error; + } + + ret = sscanf(&hex[i], "%02x", &c); + if (ret != 1) { + ret = EINVAL; + goto error; + } + + out[i / 2] = c; + } + + return (0); + +error: + return (ret); +} + + +static void +catch_signal(int sig) +{ + caught_interrupt = sig; +} + +static char * +get_format_prompt_string(zfs_keyformat_t format) +{ + switch (format) { + case ZFS_KEYFORMAT_RAW: + return ("raw key"); + case ZFS_KEYFORMAT_HEX: + return ("hex key"); + case ZFS_KEYFORMAT_PASSPHRASE: + return ("passphrase"); + default: + /* shouldn't happen */ + return (NULL); + } +} + +static int +get_key_material_raw(FILE *fd, const char *fsname, zfs_keyformat_t keyformat, + boolean_t again, boolean_t newkey, uint8_t **buf, size_t *len_out) +{ + int ret = 0, bytes; + size_t buflen = 0; + struct termios old_term, new_term; + struct sigaction act, osigint, osigtstp; + + *len_out = 0; + + if (isatty(fileno(fd))) { + /* + * handle SIGINT and ignore SIGSTP. This is necessary to + * restore the state of the terminal. + */ + caught_interrupt = 0; + act.sa_flags = 0; + (void) sigemptyset(&act.sa_mask); + act.sa_handler = catch_signal; + + (void) sigaction(SIGINT, &act, &osigint); + act.sa_handler = SIG_IGN; + (void) sigaction(SIGTSTP, &act, &osigtstp); + + /* prompt for the key */ + if (fsname != NULL) { + (void) printf("%s %s%s for '%s': ", + (again) ? "Re-enter" : "Enter", + (newkey) ? "new " : "", + get_format_prompt_string( + (zfs_keyformat_t)keyformat), + fsname); + } else { + (void) printf("%s %s%s: ", + (again) ? "Re-enter" : "Enter", + (newkey) ? "new " : "", + get_format_prompt_string( + (zfs_keyformat_t)keyformat)); + + } + (void) fflush(stdout); + + /* disable the terminal echo for key input */ + (void) tcgetattr(fileno(fd), &old_term); + + new_term = old_term; + new_term.c_lflag &= ~(ECHO | ECHOE | ECHOK | ECHONL); + + ret = tcsetattr(fileno(fd), TCSAFLUSH, &new_term); + if (ret != 0) { + ret = errno; + errno = 0; + goto out; + } + } + + /* read the key material */ + if (keyformat != ZFS_KEYFORMAT_RAW) { + bytes = getline((char **)buf, &buflen, fd); + if (bytes < 0) { + ret = errno; + errno = 0; + goto out; + } + + /* trim the ending newline if it exists */ + if ((*buf)[bytes - 1] == '\n') { + (*buf)[bytes - 1] = '\0'; + bytes--; + } + } else { + /* + * Raw keys may have newline characters in them and so can't + * use getline(). Here we attempt to read 33 bytes so that we + * can properly check the key length (the file should only have + * 32 bytes). + */ + *buf = malloc((WRAPPING_KEY_LEN + 1) * sizeof (char)); + if (*buf == NULL) { + ret = ENOMEM; + goto out; + } + + bytes = fread(*buf, 1, WRAPPING_KEY_LEN + 1, fd); + if (bytes < 0) { + /* size errors are handled by the calling function */ + free(*buf); + *buf = NULL; + ret = errno; + errno = 0; + goto out; + } + } + + *len_out = bytes; + +out: + if (isatty(fileno(fd))) { + /* reset the teminal */ + (void) tcsetattr(fileno(fd), TCSAFLUSH, &old_term); + (void) sigaction(SIGINT, &osigint, NULL); + (void) sigaction(SIGTSTP, &osigtstp, NULL); + + /* if we caught a signal, re-throw it now */ + if (caught_interrupt != 0) { + (void) kill(getpid(), caught_interrupt); + } + + /* print the newline that was not echo'd */ + (void) printf("\n"); + } + + return (ret); + +} + +/* + * Attempts to fetch key material, no matter where it might live. The key + * material is allocated and returned in km_out. *can_retry_out will be set + * to B_TRUE if the user is providing the key material interactively, allowing + * for re-entry attempts. + */ +static int +get_key_material(libzfs_handle_t *hdl, boolean_t do_verify, boolean_t newkey, + zfs_keyformat_t keyformat, char *keylocation, const char *fsname, + uint8_t **km_out, size_t *kmlen_out, boolean_t *can_retry_out) +{ + int ret, i; + zfs_keylocation_t keyloc = ZFS_KEYLOCATION_NONE; + FILE *fd = NULL; + uint8_t *km = NULL, *km2 = NULL; + size_t kmlen, kmlen2; + boolean_t can_retry = B_FALSE; + + /* verify and parse the keylocation */ + keyloc = zfs_prop_parse_keylocation(keylocation); + + /* open the appropriate file descriptor */ + switch (keyloc) { + case ZFS_KEYLOCATION_PROMPT: + fd = stdin; + if (isatty(fileno(fd))) { + can_retry = B_TRUE; + + /* raw keys cannot be entered on the terminal */ + if (keyformat == ZFS_KEYFORMAT_RAW) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Cannot enter raw keys on the terminal")); + goto error; + } + } + break; + case ZFS_KEYLOCATION_URI: + fd = fopen(&keylocation[7], "r"); + if (!fd) { + ret = errno; + errno = 0; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to open key material file")); + goto error; + } + break; + default: + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Invalid keylocation.")); + goto error; + } + + /* fetch the key material into the buffer */ + ret = get_key_material_raw(fd, fsname, keyformat, B_FALSE, newkey, + &km, &kmlen); + if (ret != 0) + goto error; + + /* do basic validation of the key material */ + switch (keyformat) { + case ZFS_KEYFORMAT_RAW: + /* verify the key length is correct */ + if (kmlen < WRAPPING_KEY_LEN) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Raw key too short (expected %u)."), + WRAPPING_KEY_LEN); + goto error; + } + + if (kmlen > WRAPPING_KEY_LEN) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Raw key too long (expected %u)."), + WRAPPING_KEY_LEN); + goto error; + } + break; + case ZFS_KEYFORMAT_HEX: + /* verify the key length is correct */ + if (kmlen < WRAPPING_KEY_LEN * 2) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Hex key too short (expected %u)."), + WRAPPING_KEY_LEN * 2); + goto error; + } + + if (kmlen > WRAPPING_KEY_LEN * 2) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Hex key too long (expected %u)."), + WRAPPING_KEY_LEN * 2); + goto error; + } + + /* check for invalid hex digits */ + for (i = 0; i < WRAPPING_KEY_LEN * 2; i++) { + if (!isxdigit((char)km[i])) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Invalid hex character detected.")); + goto error; + } + } + break; + case ZFS_KEYFORMAT_PASSPHRASE: + /* verify the length is within bounds */ + if (kmlen > MAX_PASSPHRASE_LEN) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Passphrase too long (max %u)."), + MAX_PASSPHRASE_LEN); + goto error; + } + + if (kmlen < MIN_PASSPHRASE_LEN) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Passphrase too short (min %u)."), + MIN_PASSPHRASE_LEN); + goto error; + } + break; + default: + /* can't happen, checked above */ + break; + } + + if (do_verify && isatty(fileno(fd))) { + ret = get_key_material_raw(fd, fsname, keyformat, B_TRUE, + newkey, &km2, &kmlen2); + if (ret != 0) + goto error; + + if (kmlen2 != kmlen || + (memcmp((char *)km, (char *)km2, kmlen) != 0)) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Provided keys do not match.")); + goto error; + } + } + + if (fd != stdin) + (void) fclose(fd); + + if (km2 != NULL) + free(km2); + + *km_out = km; + *kmlen_out = kmlen; + if (can_retry_out != NULL) + *can_retry_out = can_retry; + + return (0); + +error: + if (km != NULL) + free(km); + + if (km2 != NULL) + free(km2); + + if (fd != NULL && fd != stdin) + (void) fclose(fd); + + *km_out = NULL; + *kmlen_out = 0; + + if (can_retry_out != NULL) + *can_retry_out = can_retry; + + return (ret); +} + +/* This needs to be fixed to be compatible with other platforms */ + +static int +pbkdf2(uint8_t *passphrase, size_t passphraselen, uint8_t *salt, + size_t saltlen, uint64_t iterations, uint8_t *output, + size_t outputlen) +{ + int ret = 0; + CK_SESSION_HANDLE session; + char *tmpkeydata = NULL; + size_t tmpkeydatalen = 0; + CK_OBJECT_HANDLE obj; + + /* initialize output */ + (void) memset(output, 0, outputlen); + + ret = SUNW_C_GetMechSession(CKM_PKCS5_PBKD2, &session); + if (ret) { + (void) fprintf(stderr, "failed to connect to pkcs5: %s\r\n", + pkcs11_strerror(ret)); + return (ret); + } + + ret = pkcs11_PasswdToPBKD2Object(session, (char *)passphrase, + passphraselen, salt, saltlen, iterations, CKK_AES, outputlen, 0, + &obj); + + if (ret == CKR_OK) + ret = pkcs11_ObjectToKey(session, obj, (void **)&tmpkeydata, + &tmpkeydatalen, B_TRUE); + + (void) C_CloseSession(session); + if (ret) { + (void) fprintf(stderr, "unable to generate key: %s\r\n", + pkcs11_strerror(ret)); + return (ret); + } + + /* + * Because it allocates an area for the passphrase, we copy it out + * then zero the original + */ + (void) memcpy(output, tmpkeydata, tmpkeydatalen); + (void) memset(tmpkeydata, 0, tmpkeydatalen); + free(tmpkeydata); + + return (ret); +} + +/* ARGSUSED */ +static int +derive_key(libzfs_handle_t *hdl, zfs_keyformat_t format, uint64_t iters, + uint8_t *key_material, size_t key_material_len, uint64_t salt, + uint8_t **key_out) +{ + int ret; + uint8_t *key; + + *key_out = NULL; + + key = zfs_alloc(hdl, WRAPPING_KEY_LEN); + if (!key) + return (ENOMEM); + + switch (format) { + case ZFS_KEYFORMAT_RAW: + bcopy(key_material, key, WRAPPING_KEY_LEN); + break; + case ZFS_KEYFORMAT_HEX: + ret = hex_key_to_raw((char *)key_material, + WRAPPING_KEY_LEN * 2, key); + if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Invalid hex key provided.")); + goto error; + } + break; + case ZFS_KEYFORMAT_PASSPHRASE: + salt = LE_64(salt); + ret = pbkdf2(key_material, strlen((char *)key_material), + ((uint8_t *)&salt), sizeof (uint64_t), iters, + key, WRAPPING_KEY_LEN); + if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to generate key from passphrase.")); + goto error; + } + break; + default: + ret = EINVAL; + goto error; + } + + *key_out = key; + return (0); + +error: + free(key); + + *key_out = NULL; + return (ret); +} + +static boolean_t +encryption_feature_is_enabled(zpool_handle_t *zph) +{ + nvlist_t *features; + uint64_t feat_refcount; + + /* check that features can be enabled */ + if (zpool_get_prop_int(zph, ZPOOL_PROP_VERSION, NULL) + < SPA_VERSION_FEATURES) + return (B_FALSE); + + /* check for crypto feature */ + features = zpool_get_features(zph); + if (!features || nvlist_lookup_uint64(features, + spa_feature_table[SPA_FEATURE_ENCRYPTION].fi_guid, + &feat_refcount) != 0) + return (B_FALSE); + + return (B_TRUE); +} + +static int +populate_create_encryption_params_nvlists(libzfs_handle_t *hdl, + zfs_handle_t *zhp, boolean_t newkey, zfs_keyformat_t keyformat, + char *keylocation, nvlist_t *props, uint8_t **wkeydata, uint_t *wkeylen) +{ + int ret; + uint64_t iters = 0, salt = 0; + uint8_t *key_material = NULL; + size_t key_material_len = 0; + uint8_t *key_data = NULL; + const char *fsname = (zhp) ? zfs_get_name(zhp) : NULL; + + /* get key material from keyformat and keylocation */ + ret = get_key_material(hdl, B_TRUE, newkey, keyformat, keylocation, + fsname, &key_material, &key_material_len, NULL); + if (ret != 0) + goto error; + + /* passphrase formats require a salt and pbkdf2 iters property */ + if (keyformat == ZFS_KEYFORMAT_PASSPHRASE) { +#ifdef sun + /* always generate a new salt */ + ret = pkcs11_get_random(&salt, sizeof (uint64_t)); + if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to generate salt.")); + goto error; + } +#else + random_init(); + + ret = random_get_bytes((uint8_t *)&salt, sizeof (uint64_t)); + if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to generate salt.")); + goto error; + } + + random_fini(); +#endif + + ret = nvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), salt); + if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to add salt to properties.")); + goto error; + } + + /* + * If not otherwise specified, use the default number of + * pbkdf2 iterations. If specified, we have already checked + * that the given value is greater than MIN_PBKDF2_ITERATIONS + * during zfs_valid_proplist(). + */ + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &iters); + if (ret == ENOENT) { + iters = DEFAULT_PBKDF2_ITERATIONS; + ret = nvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), iters); + if (ret != 0) + goto error; + } else if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to get pbkdf2 iterations.")); + goto error; + } + } else { + /* check that pbkdf2iters was not specified by the user */ + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &iters); + if (ret == 0) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Cannot specify pbkdf2iters with a non-passphrase " + "keyformat.")); + goto error; + } + } + + /* derive a key from the key material */ + ret = derive_key(hdl, (zfs_keyformat_t)keyformat, iters, key_material, + key_material_len, salt, &key_data); + if (ret != 0) + goto error; + + free(key_material); + + *wkeydata = key_data; + *wkeylen = WRAPPING_KEY_LEN; + return (0); + +error: + if (key_material != NULL) + free(key_material); + if (key_data != NULL) + free(key_data); + + *wkeydata = NULL; + *wkeylen = 0; + return (ret); +} + +static boolean_t +proplist_has_encryption_props(nvlist_t *props) +{ + int ret; + uint64_t intval; + char *strval; + + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &intval); + if (ret == 0 && intval != ZIO_CRYPT_OFF) + return (B_TRUE); + + ret = nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &strval); + if (ret == 0 && strcmp(strval, "none") != 0) + return (B_TRUE); + + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &intval); + if (ret == 0) + return (B_TRUE); + + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &intval); + if (ret == 0) + return (B_TRUE); + + return (B_FALSE); +} + +int +zfs_crypto_get_encryption_root(zfs_handle_t *zhp, boolean_t *is_encroot, + char *buf) +{ + int ret; + char prop_encroot[MAXNAMELEN]; + + /* if the dataset isn't encrypted, just return */ + if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) == ZIO_CRYPT_OFF) { + *is_encroot = B_FALSE; + if (buf != NULL) + buf[0] = '\0'; + return (0); + } + + ret = zfs_prop_get(zhp, ZFS_PROP_ENCRYPTION_ROOT, prop_encroot, + sizeof (prop_encroot), NULL, NULL, 0, B_TRUE); + if (ret != 0) { + *is_encroot = B_FALSE; + if (buf != NULL) + buf[0] = '\0'; + return (ret); + } + + *is_encroot = strcmp(prop_encroot, zfs_get_name(zhp)) == 0; + if (buf != NULL) + (void) strcpy(buf, prop_encroot); + + return (0); +} + +int +zfs_crypto_create(libzfs_handle_t *hdl, char *parent_name, nvlist_t *props, + nvlist_t *pool_props, uint8_t **wkeydata_out, uint_t *wkeylen_out) +{ + int ret; + char errbuf[1024]; + uint64_t crypt = ZIO_CRYPT_INHERIT, pcrypt = ZIO_CRYPT_INHERIT; + uint64_t keyformat = ZFS_KEYFORMAT_NONE; + char *keylocation = NULL; + zfs_handle_t *pzhp = NULL; + uint8_t *wkeydata = NULL; + uint_t wkeylen = 0; + boolean_t local_crypt = B_TRUE; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Encryption create error")); + + /* lookup crypt from props */ + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &crypt); + if (ret != 0) + local_crypt = B_FALSE; + + /* lookup key location and format from props */ + (void) nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &keyformat); + (void) nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation); + + if (parent_name != NULL) { + /* get a reference to parent dataset */ + pzhp = make_dataset_handle(hdl, parent_name); + if (pzhp == NULL) { + ret = ENOENT; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to lookup parent.")); + goto out; + } + + /* Lookup parent's crypt */ + pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION); + + /* Params require the encryption feature */ + if (!encryption_feature_is_enabled(pzhp->zpool_hdl)) { + if (proplist_has_encryption_props(props)) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Encryption feature not enabled.")); + goto out; + } + + ret = 0; + goto out; + } + } else { + /* + * special case for root dataset where encryption feature + * feature won't be on disk yet + */ + if (!nvlist_exists(pool_props, "feature@encryption")) { + if (proplist_has_encryption_props(props)) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Encryption feature not enabled.")); + goto out; + } + + ret = 0; + goto out; + } + + pcrypt = ZIO_CRYPT_OFF; + } + + /* Check for encryption being explicitly truned off */ + if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Invalid encryption value. Dataset must be encrypted.")); + goto out; + } + + /* Get the inherited encryption property if we don't have it locally */ + if (!local_crypt) + crypt = pcrypt; + + /* + * At this point crypt should be the actual encryption value. If + * encryption is off just verify that no encryption properties have + * been specified and return. + */ + if (crypt == ZIO_CRYPT_OFF) { + if (proplist_has_encryption_props(props)) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Encryption must be turned on to set encryption " + "properties.")); + goto out; + } + + ret = 0; + goto out; + } + + /* + * If we have a parent crypt it is valid to specify encryption alone. + * This will result in a child that is encrypted with the chosen + * encryption suite that will also inherit the parent's key. If + * the parent is not encrypted we need an encryption suite provided. + */ + if (pcrypt == ZIO_CRYPT_OFF && keylocation == NULL && + keyformat == ZFS_KEYFORMAT_NONE) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Keyformat required for new encryption root.")); + goto out; + } + + /* + * Specifying a keylocation implies this will be a new encryption root. + * Check that a keyformat is also specified. + */ + if (keylocation != NULL && keyformat == ZFS_KEYFORMAT_NONE) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Keyformat required for new encryption root.")); + goto out; + } + + /* default to prompt if no keylocation is specified */ + if (keyformat != ZFS_KEYFORMAT_NONE && keylocation == NULL) { + keylocation = "prompt"; + ret = nvlist_add_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), keylocation); + if (ret != 0) + goto out; + } + + /* + * If a local key is provided, this dataset will be a new + * encryption root. Populate the encryption params. + */ + if (keylocation != NULL) { + ret = populate_create_encryption_params_nvlists(hdl, NULL, + B_FALSE, keyformat, keylocation, props, &wkeydata, + &wkeylen); + if (ret != 0) + goto out; + } + + if (pzhp != NULL) + zfs_close(pzhp); + + *wkeydata_out = wkeydata; + *wkeylen_out = wkeylen; + return (0); + +out: + if (pzhp != NULL) + zfs_close(pzhp); + if (wkeydata != NULL) + free(wkeydata); + + *wkeydata_out = NULL; + *wkeylen_out = 0; + return (ret); +} + +int +zfs_crypto_clone_check(libzfs_handle_t *hdl, zfs_handle_t *origin_zhp, + char *parent_name, nvlist_t *props) +{ + int ret; + char errbuf[1024]; + zfs_handle_t *pzhp = NULL; + uint64_t pcrypt, ocrypt; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Encryption clone error")); + + /* + * No encryption properties should be specified. They will all be + * inherited from the origin dataset. + */ + if (nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_KEYFORMAT)) || + nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_KEYLOCATION)) || + nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_ENCRYPTION)) || + nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS))) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Encryption properties must inherit from origin dataset.")); + goto out; + } + + /* get a reference to parent dataset, should never be NULL */ + pzhp = make_dataset_handle(hdl, parent_name); + if (pzhp == NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to lookup parent.")); + return (ENOENT); + } + + /* Lookup parent's crypt */ + pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION); + ocrypt = zfs_prop_get_int(origin_zhp, ZFS_PROP_ENCRYPTION); + + /* all children of encrypted parents must be encrypted */ + if (pcrypt != ZIO_CRYPT_OFF && ocrypt == ZIO_CRYPT_OFF) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Cannot create unencrypted clone as a child " + "of encrypted parent.")); + goto out; + } + + zfs_close(pzhp); + return (0); + +out: + if (pzhp != NULL) + zfs_close(pzhp); + return (ret); +} + +typedef struct loadkeys_cbdata { + uint64_t cb_numfailed; + uint64_t cb_numattempted; +} loadkey_cbdata_t; + +static int +load_keys_cb(zfs_handle_t *zhp, void *arg) +{ + int ret; + boolean_t is_encroot; + loadkey_cbdata_t *cb = arg; + uint64_t keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + + /* only attempt to load keys for encryption roots */ + ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL); + if (ret != 0 || !is_encroot) + goto out; + + /* don't attempt to load already loaded keys */ + if (keystatus == ZFS_KEYSTATUS_AVAILABLE) + goto out; + + /* Attempt to load the key. Record status in cb. */ + cb->cb_numattempted++; + + ret = zfs_crypto_load_key(zhp, B_FALSE, NULL); + if (ret) + cb->cb_numfailed++; + +out: + (void) zfs_iter_filesystems(zhp, load_keys_cb, cb); + zfs_close(zhp); + + /* always return 0, since this function is best effort */ + return (0); +} + +/* + * This function is best effort. It attempts to load all the keys for the given + * filesystem and all of its children. + */ +int +zfs_crypto_attempt_load_keys(libzfs_handle_t *hdl, char *fsname) +{ + int ret; + zfs_handle_t *zhp = NULL; + loadkey_cbdata_t cb = { 0 }; + + zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) { + ret = ENOENT; + goto error; + } + + ret = load_keys_cb(zfs_handle_dup(zhp), &cb); + if (ret) + goto error; + + (void) printf(gettext("%llu / %llu keys successfully loaded\n"), + (u_longlong_t)(cb.cb_numattempted - cb.cb_numfailed), + (u_longlong_t)cb.cb_numattempted); + + if (cb.cb_numfailed != 0) { + ret = -1; + goto error; + } + + zfs_close(zhp); + return (0); + +error: + if (zhp != NULL) + zfs_close(zhp); + return (ret); +} + +int +zfs_crypto_load_key(zfs_handle_t *zhp, boolean_t noop, char *alt_keylocation) +{ + int ret, attempts = 0; + char errbuf[1024]; + uint64_t keystatus, iters = 0, salt = 0; + uint64_t keyformat = ZFS_KEYFORMAT_NONE; + char prop_keylocation[MAXNAMELEN]; + char prop_encroot[MAXNAMELEN]; + char *keylocation = NULL; + uint8_t *key_material = NULL, *key_data = NULL; + size_t key_material_len; + boolean_t is_encroot, can_retry = B_FALSE, correctible = B_FALSE; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Key load error")); + + /* check that encryption is enabled for the pool */ + if (!encryption_feature_is_enabled(zhp->zpool_hdl)) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Encryption feature not enabled.")); + ret = EINVAL; + goto error; + } + + /* Fetch the keyformat. Check that the dataset is encrypted. */ + keyformat = zfs_prop_get_int(zhp, ZFS_PROP_KEYFORMAT); + if (keyformat == ZFS_KEYFORMAT_NONE) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "'%s' is not encrypted."), zfs_get_name(zhp)); + ret = EINVAL; + goto error; + } + + /* + * Fetch the key location. Check that we are working with an + * encryption root. + */ + ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, prop_encroot); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to get encryption root for '%s'."), + zfs_get_name(zhp)); + goto error; + } else if (!is_encroot) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Keys must be loaded for encryption root of '%s' (%s)."), + zfs_get_name(zhp), prop_encroot); + ret = EINVAL; + goto error; + } + + /* + * if the caller has elected to override the keylocation property + * use that instead + */ + if (alt_keylocation != NULL) { + keylocation = alt_keylocation; + } else { + ret = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION, prop_keylocation, + sizeof (prop_keylocation), NULL, NULL, 0, B_TRUE); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to get keylocation for '%s'."), + zfs_get_name(zhp)); + goto error; + } + + keylocation = prop_keylocation; + } + + /* check that the key is unloaded unless this is a noop */ + if (!noop) { + keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + if (keystatus == ZFS_KEYSTATUS_AVAILABLE) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key already loaded for '%s'."), zfs_get_name(zhp)); + ret = EEXIST; + goto error; + } + } + + /* passphrase formats require a salt and pbkdf2_iters property */ + if (keyformat == ZFS_KEYFORMAT_PASSPHRASE) { + salt = zfs_prop_get_int(zhp, ZFS_PROP_PBKDF2_SALT); + iters = zfs_prop_get_int(zhp, ZFS_PROP_PBKDF2_ITERS); + } + +try_again: + /* fetching and deriving the key are correctible errors. set the flag */ + correctible = B_TRUE; + + /* get key material from key format and location */ + ret = get_key_material(zhp->zfs_hdl, B_FALSE, B_FALSE, keyformat, + keylocation, zfs_get_name(zhp), &key_material, &key_material_len, + &can_retry); + if (ret != 0) + goto error; + + /* derive a key from the key material */ + ret = derive_key(zhp->zfs_hdl, keyformat, iters, key_material, + key_material_len, salt, &key_data); + if (ret != 0) + goto error; + + correctible = B_FALSE; + + /* pass the wrapping key and noop flag to the ioctl */ + ret = lzc_load_key(zhp->zfs_name, noop, key_data, WRAPPING_KEY_LEN); + if (ret != 0) { + switch (ret) { + case EINVAL: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Invalid parameters provided for %s."), + zfs_get_name(zhp)); + break; + case EEXIST: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key already loaded for '%s'."), zfs_get_name(zhp)); + break; + case EBUSY: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "'%s' is busy."), zfs_get_name(zhp)); + break; + case EACCES: + correctible = B_TRUE; + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Incorrect key provided for '%s'."), + zfs_get_name(zhp)); + break; + } + goto error; + } + + free(key_material); + free(key_data); + + return (0); + +error: + (void) zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); + if (key_material != NULL) + free(key_material); + if (key_data != NULL) + free(key_data); + + /* + * Here we decide if it is ok to allow the user to retry entering their + * key. The can_retry flag will be set if the user is entering their + * key from an interactive prompt. The correctible flag will only be + * set if an error that occured could be corrected by retrying. Both + * flags are needed to allow the user to attempt key entry again + */ + if (can_retry && correctible && attempts <= MAX_KEY_PROMPT_ATTEMPTS) { + attempts++; + goto try_again; + } + + return (ret); +} + +int +zfs_crypto_unload_key(zfs_handle_t *zhp) +{ + int ret; + char errbuf[1024]; + char prop_encroot[MAXNAMELEN]; + uint64_t keystatus, keyformat; + boolean_t is_encroot; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Key unload error")); + + /* check that encryption is enabled for the pool */ + if (!encryption_feature_is_enabled(zhp->zpool_hdl)) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Encryption feature not enabled.")); + ret = EINVAL; + goto error; + } + + /* Fetch the keyformat. Check that the dataset is encrypted. */ + keyformat = zfs_prop_get_int(zhp, ZFS_PROP_KEYFORMAT); + if (keyformat == ZFS_KEYFORMAT_NONE) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "'%s' is not encrypted."), zfs_get_name(zhp)); + ret = EINVAL; + goto error; + } + + /* + * Fetch the key location. Check that we are working with an + * encryption root. + */ + ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, prop_encroot); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to get encryption root for '%s'."), + zfs_get_name(zhp)); + goto error; + } else if (!is_encroot) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Keys must be unloaded for encryption root of '%s' (%s)."), + zfs_get_name(zhp), prop_encroot); + ret = EINVAL; + goto error; + } + + /* check that the key is loaded */ + keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key already unloaded for '%s'."), zfs_get_name(zhp)); + ret = ENOENT; + goto error; + } + + /* call the ioctl */ + ret = lzc_unload_key(zhp->zfs_name); + + if (ret != 0) { + switch (ret) { + case ENOENT: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key already unloaded for '%s'."), + zfs_get_name(zhp)); + break; + case EBUSY: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "'%s' is busy."), zfs_get_name(zhp)); + break; + } + (void) zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); + } + + return (ret); + +error: + (void) zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); + return (ret); +} + +static int +zfs_crypto_verify_rewrap_nvlist(zfs_handle_t *zhp, nvlist_t *props, + nvlist_t **props_out, char *errbuf) +{ + int ret; + nvpair_t *elem = NULL; + zfs_prop_t prop; + nvlist_t *new_props = NULL; + + new_props = fnvlist_alloc(); + + /* + * loop through all provided properties, we should only have + * keyformat, keylocation and pbkdf2iters. The actual validation of + * values is done by zfs_valid_proplist(). + */ + while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { + const char *propname = nvpair_name(elem); + prop = zfs_name_to_prop(propname); + + switch (prop) { + case ZFS_PROP_PBKDF2_ITERS: + case ZFS_PROP_KEYFORMAT: + case ZFS_PROP_KEYLOCATION: + break; + default: + ret = EINVAL; + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Only keyformat, keylocation and pbkdf2iters may " + "be set with this command.")); + goto error; + } + } + + new_props = zfs_valid_proplist(zhp->zfs_hdl, zhp->zfs_type, props, + zfs_prop_get_int(zhp, ZFS_PROP_ZONED), NULL, zhp->zpool_hdl, + B_TRUE, errbuf); + if (new_props == NULL) + goto error; + + *props_out = new_props; + return (0); + +error: + nvlist_free(new_props); + *props_out = NULL; + return (ret); +} + +int +zfs_crypto_rewrap(zfs_handle_t *zhp, nvlist_t *raw_props, boolean_t inheritkey) +{ + int ret; + char errbuf[1024]; + boolean_t is_encroot; + nvlist_t *props = NULL; + uint8_t *wkeydata = NULL; + uint_t wkeylen = 0; + dcp_cmd_t cmd = (inheritkey) ? DCP_CMD_INHERIT : DCP_CMD_NEW_KEY; + uint64_t crypt, pcrypt, keystatus, pkeystatus; + uint64_t keyformat = ZFS_KEYFORMAT_NONE; + zfs_handle_t *pzhp = NULL; + char *keylocation = NULL; + char origin_name[MAXNAMELEN]; + char prop_keylocation[MAXNAMELEN]; + char parent_name[ZFS_MAX_DATASET_NAME_LEN]; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Key change error")); + + /* check that encryption is enabled for the pool */ + if (!encryption_feature_is_enabled(zhp->zpool_hdl)) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Encryption feature not enabled.")); + ret = EINVAL; + goto error; + } + + /* get crypt from dataset */ + crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); + if (crypt == ZIO_CRYPT_OFF) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Dataset not encrypted.")); + ret = EINVAL; + goto error; + } + + /* get the encryption root of the dataset */ + ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to get encryption root for '%s'."), + zfs_get_name(zhp)); + goto error; + } + + /* Clones use their origin's key and cannot rewrap it */ + ret = zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin_name, + sizeof (origin_name), NULL, NULL, 0, B_TRUE); + if (ret == 0 && strcmp(origin_name, "") != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Keys cannot be changed on clones.")); + ret = EINVAL; + goto error; + } + + /* + * If the user wants to use the inheritkey variant of this function + * we don't need to collect any crypto arguments. + */ + if (!inheritkey) { + /* validate the provided properties */ + ret = zfs_crypto_verify_rewrap_nvlist(zhp, raw_props, &props, + errbuf); + if (ret != 0) + goto error; + + /* + * Load keyformat and keylocation from the nvlist. Fetch from + * the dataset properties if not specified. + */ + (void) nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &keyformat); + (void) nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation); + + if (is_encroot) { + /* + * If this is already an ecryption root, just keep + * any properties not set by the user. + */ + if (keyformat == ZFS_KEYFORMAT_NONE) { + keyformat = zfs_prop_get_int(zhp, + ZFS_PROP_KEYFORMAT); + ret = nvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), + keyformat); + } + + if (keylocation == NULL) { + ret = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION, + prop_keylocation, sizeof (prop_keylocation), + NULL, NULL, 0, B_TRUE); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, + dgettext(TEXT_DOMAIN, "Failed to " + "get existing keylocation " + "property.")); + goto error; + } + + keylocation = prop_keylocation; + } + } else { + /* need a new key for non-encryption roots */ + if (keyformat == ZFS_KEYFORMAT_NONE) { + ret = EINVAL; + zfs_error_aux(zhp->zfs_hdl, + dgettext(TEXT_DOMAIN, "Keyformat required " + "for new encryption root.")); + goto error; + } + + /* default to prompt if no keylocation is specified */ + if (keylocation == NULL) { + keylocation = "prompt"; + ret = nvlist_add_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + keylocation); + if (ret != 0) + goto error; + } + } + + /* fetch the new wrapping key and associated properties */ + ret = populate_create_encryption_params_nvlists(zhp->zfs_hdl, + zhp, B_TRUE, keyformat, keylocation, props, &wkeydata, + &wkeylen); + if (ret != 0) + goto error; + } else { + /* check that zhp is an encryption root */ + if (!is_encroot) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key inheritting can only be performed on " + "encryption roots.")); + ret = EINVAL; + goto error; + } + + /* get the parent's name */ + ret = zfs_parent_name(zhp, parent_name, sizeof (parent_name)); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Root dataset cannot inherit key.")); + ret = EINVAL; + goto error; + } + + /* get a handle to the parent */ + pzhp = make_dataset_handle(zhp->zfs_hdl, parent_name); + if (pzhp == NULL) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to lookup parent.")); + ret = ENOENT; + goto error; + } + + /* parent must be encrypted */ + pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION); + if (pcrypt == ZIO_CRYPT_OFF) { + zfs_error_aux(pzhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Parent must be encrypted.")); + ret = EINVAL; + goto error; + } + + /* check that the parent's key is loaded */ + pkeystatus = zfs_prop_get_int(pzhp, ZFS_PROP_KEYSTATUS); + if (pkeystatus == ZFS_KEYSTATUS_UNAVAILABLE) { + zfs_error_aux(pzhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Parent key must be loaded.")); + ret = EACCES; + goto error; + } + } + + /* check that the key is loaded */ + keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key must be loaded.")); + ret = EACCES; + goto error; + } + + /* call the ioctl */ + ret = lzc_change_key(zhp->zfs_name, cmd, props, wkeydata, wkeylen); + if (ret != 0) { + switch (ret) { + case EINVAL: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Invalid properties for key change.")); + break; + case EACCES: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key is not currently loaded.")); + break; + } + (void) zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); + } + + if (pzhp != NULL) + zfs_close(pzhp); + if (props != NULL) + nvlist_free(props); + if (wkeydata != NULL) + free(wkeydata); + + return (ret); + +error: + if (pzhp != NULL) + zfs_close(pzhp); + if (props != NULL) + nvlist_free(props); + if (wkeydata != NULL) + free(wkeydata); + + (void) zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); + return (ret); +} diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c index 9f55741bc931..37d0c47a5775 100644 --- a/usr/src/lib/libzfs/common/libzfs_dataset.c +++ b/usr/src/lib/libzfs/common/libzfs_dataset.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include "zfs_namecheck.h" @@ -940,7 +941,7 @@ zfs_which_resv_prop(zfs_handle_t *zhp, zfs_prop_t *resv_prop) nvlist_t * zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, uint64_t zoned, zfs_handle_t *zhp, zpool_handle_t *zpool_hdl, - const char *errbuf) + boolean_t key_params_ok, const char *errbuf) { nvpair_t *elem; uint64_t intval; @@ -1097,7 +1098,8 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, } if (zfs_prop_readonly(prop) && - (!zfs_prop_setonce(prop) || zhp != NULL)) { + !(zfs_prop_setonce(prop) && zhp == NULL) && + !(zfs_prop_encryption_key_param(prop) && key_params_ok)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' is readonly"), propname); @@ -1362,6 +1364,48 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, break; + case ZFS_PROP_KEYLOCATION: + if (!zfs_prop_valid_keylocation(strval, B_FALSE)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid keylocation")); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + + if (zhp != NULL) { + uint64_t crypt = + zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); + + if (crypt == ZIO_CRYPT_OFF && + strcmp(strval, "none") != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "keylocation must not be 'none' " + "for encrypted datasets")); + (void) zfs_error(hdl, EZFS_BADPROP, + errbuf); + goto error; + } else if (crypt != ZIO_CRYPT_OFF && + strcmp(strval, "none") == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "keylocation must be 'none' " + "for unencrypted datasets")); + (void) zfs_error(hdl, EZFS_BADPROP, + errbuf); + goto error; + } + } + break; + + case ZFS_PROP_PBKDF2_ITERS: + if (intval < MIN_PBKDF2_ITERATIONS) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "minimum pbkdf2 iterations is %u"), + MIN_PBKDF2_ITERATIONS); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + break; + case ZFS_PROP_UTF8ONLY: chosen_utf = (int)intval; break; @@ -1425,6 +1469,27 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, break; } } + + /* check encryption properties */ + if (zhp != NULL) { + int64_t crypt = zfs_prop_get_int(zhp, + ZFS_PROP_ENCRYPTION); + + switch (prop) { + case ZFS_PROP_COPIES: + if (crypt != ZIO_CRYPT_OFF && intval > 2) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "encrypted datasets cannot have " + "3 copies")); + (void) zfs_error(hdl, EZFS_BADPROP, + errbuf); + goto error; + } + break; + default: + break; + } + } } /* @@ -1580,6 +1645,16 @@ zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err, } break; + case EACCES: + if (prop == ZFS_PROP_KEYLOCATION) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "keylocation may only be set on encryption roots")); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + } else { + (void) zfs_standard_error(hdl, err, errbuf); + } + break; + case EOVERFLOW: /* * This platform can't address a volume this big. @@ -1649,7 +1724,7 @@ zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props) if ((nvl = zfs_valid_proplist(hdl, zhp->zfs_type, props, zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, zhp->zpool_hdl, - errbuf)) == NULL) + B_FALSE, errbuf)) == NULL) goto error; /* @@ -3134,6 +3209,12 @@ parent_name(const char *path, char *buf, size_t buflen) return (0); } +int +zfs_parent_name(zfs_handle_t *zhp, char *buf, size_t buflen) +{ + return (parent_name(zfs_get_name(zhp), buf, buflen)); +} + /* * If accept_ancestor is false, then check to make sure that the given path has * a parent, and that it exists. If accept_ancestor is true, then find the @@ -3352,7 +3433,10 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, int ret; uint64_t size = 0; uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); + uint8_t *wkeydata = NULL; + uint_t wkeylen = 0; char errbuf[1024]; + char parent[MAXNAMELEN]; uint64_t zoned; enum lzc_dataset_type ost; zpool_handle_t *zpool_handle; @@ -3399,7 +3483,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, return (-1); if (props && (props = zfs_valid_proplist(hdl, type, props, - zoned, NULL, zpool_handle, errbuf)) == 0) { + zoned, NULL, zpool_handle, B_TRUE, errbuf)) == 0) { zpool_close(zpool_handle); return (-1); } @@ -3451,15 +3535,21 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, } } + (void) parent_name(path, parent, sizeof (parent)); + if (zfs_crypto_create(hdl, parent, props, NULL, &wkeydata, + &wkeylen) != 0) { + nvlist_free(props); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + } + /* create the dataset */ - ret = lzc_create(path, ost, props); + ret = lzc_create(path, ost, props, wkeydata, wkeylen); nvlist_free(props); + if (wkeydata != NULL) + free(wkeydata); /* check for failure */ if (ret != 0) { - char parent[ZFS_MAX_DATASET_NAME_LEN]; - (void) parent_name(path, parent, sizeof (parent)); - switch (errno) { case ENOENT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, @@ -3476,6 +3566,13 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, "pool must be upgraded to set this " "property or value")); return (zfs_error(hdl, EZFS_BADVERSION, errbuf)); + + case EACCES: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "encryption root's key is not loaded " + "or provided")); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + #ifdef _ILP32 case EOVERFLOW: /* @@ -3668,10 +3765,15 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) type = ZFS_TYPE_FILESYSTEM; } if ((props = zfs_valid_proplist(hdl, type, props, zoned, - zhp, zhp->zpool_hdl, errbuf)) == NULL) + zhp, zhp->zpool_hdl, B_TRUE, errbuf)) == NULL) return (-1); } + if (zfs_crypto_clone_check(hdl, zhp, parent, props) != 0) { + nvlist_free(props); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + } + ret = lzc_clone(target, zhp->zfs_name, props); nvlist_free(props); @@ -3817,7 +3919,7 @@ zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props) if (props != NULL && (props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT, - props, B_FALSE, NULL, zpool_hdl, errbuf)) == NULL) { + props, B_FALSE, NULL, zpool_hdl, B_FALSE, errbuf)) == NULL) { zpool_close(zpool_hdl); return (-1); } @@ -4189,6 +4291,18 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, "a child dataset already has a snapshot " "with the new name")); (void) zfs_error(hdl, EZFS_EXISTS, errbuf); + } else if (errno == EACCES) { + if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) == + ZIO_CRYPT_OFF) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot rename an unencrypted dataset to " + "be a decendent of an encrypted one")); + } else { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot move encryption child outside of " + "its encryption root")); + } + (void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf); } else { (void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf); } diff --git a/usr/src/lib/libzfs/common/libzfs_diff.c b/usr/src/lib/libzfs/common/libzfs_diff.c index d6cf32714d1f..80b2e2bbb89e 100644 --- a/usr/src/lib/libzfs/common/libzfs_diff.c +++ b/usr/src/lib/libzfs/common/libzfs_diff.c @@ -109,6 +109,11 @@ get_stats_for_obj(differ_info_t *di, const char *dsname, uint64_t obj, "The sys_config privilege or diff delegated permission " "is needed\nto discover path names")); return (-1); + } else if (di->zerr == EACCES) { + (void) snprintf(di->errbuf, sizeof (di->errbuf), + dgettext(TEXT_DOMAIN, + "Key must be loaded to discover path names")); + return (-1); } else { (void) snprintf(di->errbuf, sizeof (di->errbuf), dgettext(TEXT_DOMAIN, diff --git a/usr/src/lib/libzfs/common/libzfs_mount.c b/usr/src/lib/libzfs/common/libzfs_mount.c index 9fd37825a31c..50b67c75ea34 100644 --- a/usr/src/lib/libzfs/common/libzfs_mount.c +++ b/usr/src/lib/libzfs/common/libzfs_mount.c @@ -79,6 +79,7 @@ #include #include #include +#include #include @@ -334,6 +335,8 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags) char mountpoint[ZFS_MAXPROPLEN]; char mntopts[MNT_LINE_MAX]; libzfs_handle_t *hdl = zhp->zfs_hdl; + uint64_t keystatus; + int rc; if (options == NULL) mntopts[0] = '\0'; @@ -349,6 +352,39 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags) if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL)) return (0); + /* + * If the filesystem is encrypted the key must be loaded in order to + * mount. If the key isn't loaded, the MS_CRYPT flag decides whether + * or not we attempt to load the keys. Note: we must call + * zfs_refresh_properties() here since some callers of this function + * (most notably zpool_enable_datasets()) may implicitly load our key + * by loading the parent's key first. + */ + if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) { + zfs_refresh_properties(zhp); + keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + + /* + * If the key is unavailable and MS_CRYPT is set give the + * user a chance to enter the key. Otherwise just fail + * immediately. + */ + if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) { + if (flags & MS_CRYPT) { + rc = zfs_crypto_load_key(zhp, B_FALSE, NULL); + if (rc != 0) + return (rc); + } else { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "encryption key not loaded")); + return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED, + dgettext(TEXT_DOMAIN, "cannot mount '%s'"), + mountpoint)); + } + } + + } + /* Create the directory if it doesn't already exist */ if (lstat(mountpoint, &buf) != 0) { if (mkdirp(mountpoint, 0755) != 0) { @@ -1109,6 +1145,12 @@ mount_cb(zfs_handle_t *zhp, void *data) return (0); } + if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) == + ZFS_KEYSTATUS_UNAVAILABLE) { + zfs_close(zhp); + return (0); + } + /* * If this filesystem is inconsistent and has a receive resume * token, we can not mount it. @@ -1198,6 +1240,14 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) ret = 0; for (i = 0; i < cb.cb_used; i++) { + /* + * don't attempt to mount encrypted datasets with + * unloaded keys + */ + if (zfs_prop_get_int(cb.cb_handles[i], ZFS_PROP_KEYSTATUS) == + ZFS_KEYSTATUS_UNAVAILABLE) + continue; + if (zfs_mount(cb.cb_handles[i], mntopts, flags) != 0) ret = -1; else diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c index bf5d5dc081e9..4b9d57d4f0e4 100644 --- a/usr/src/lib/libzfs/common/libzfs_pool.c +++ b/usr/src/lib/libzfs/common/libzfs_pool.c @@ -1114,6 +1114,9 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, zfs_cmd_t zc = { 0 }; nvlist_t *zc_fsprops = NULL; nvlist_t *zc_props = NULL; + nvlist_t *hidden_args = NULL; + uint8_t *wkeydata = NULL; + uint_t wkeylen = 0; char msg[1024]; int ret = -1; @@ -1144,17 +1147,34 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, strcmp(zonestr, "on") == 0); if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM, - fsprops, zoned, NULL, NULL, msg)) == NULL) { + fsprops, zoned, NULL, NULL, B_TRUE, msg)) == NULL) { goto create_failed; } if (!zc_props && (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) { goto create_failed; } + if (zfs_crypto_create(hdl, NULL, zc_fsprops, props, + &wkeydata, &wkeylen) != 0) { + (void) zfs_error(hdl, EZFS_CRYPTOFAILED, msg); + goto create_failed; + } if (nvlist_add_nvlist(zc_props, ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) { goto create_failed; } + if (wkeydata != NULL) { + if (nvlist_alloc(&hidden_args, NV_UNIQUE_NAME, 0) != 0) + goto create_failed; + + if (nvlist_add_uint8_array(hidden_args, "wkeydata", + wkeydata, wkeylen) != 0) + goto create_failed; + + if (nvlist_add_nvlist(zc_props, ZPOOL_HIDDEN_ARGS, + hidden_args) != 0) + goto create_failed; + } } if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0) @@ -1167,6 +1187,9 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, zcmd_free_nvlists(&zc); nvlist_free(zc_props); nvlist_free(zc_fsprops); + nvlist_free(hidden_args); + if (wkeydata != NULL) + free(wkeydata); switch (errno) { case EBUSY: @@ -1232,6 +1255,9 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, zcmd_free_nvlists(&zc); nvlist_free(zc_props); nvlist_free(zc_fsprops); + nvlist_free(hidden_args); + if (wkeydata != NULL) + free(wkeydata); return (ret); } diff --git a/usr/src/lib/libzfs/common/libzfs_sendrecv.c b/usr/src/lib/libzfs/common/libzfs_sendrecv.c index 4e89dc053d1d..c9931be623e8 100644 --- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c +++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c @@ -55,6 +55,7 @@ #include #include #include +#include #include /* in libzfs_dataset.c */ @@ -324,11 +325,9 @@ cksummer(void *arg) struct drr_object *drro = &drr->drr_u.drr_object; if (drro->drr_bonuslen > 0) { (void) ssread(buf, - P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), - ofp); + DRR_OBJECT_PAYLOAD_SIZE(drro), ofp); } - if (dump_record(drr, buf, - P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), + if (dump_record(drr, buf, DRR_OBJECT_PAYLOAD_SIZE(drro), &stream_cksum, outfd) != 0) goto out; break; @@ -337,8 +336,8 @@ cksummer(void *arg) case DRR_SPILL: { struct drr_spill *drrs = &drr->drr_u.drr_spill; - (void) ssread(buf, drrs->drr_length, ofp); - if (dump_record(drr, buf, drrs->drr_length, + (void) ssread(buf, DRR_SPILL_PAYLOAD_SIZE(drrs), ofp); + if (dump_record(drr, buf, DRR_SPILL_PAYLOAD_SIZE(drrs), &stream_cksum, outfd) != 0) goto out; break; @@ -368,7 +367,7 @@ cksummer(void *arg) if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum, zero_cksum) || - !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) { + !DRR_IS_DEDUP_CAPABLE(drrw->drr_flags)) { SHA256_CTX ctx; zio_cksum_t tmpsha256; @@ -384,7 +383,7 @@ cksummer(void *arg) drrw->drr_key.ddk_cksum.zc_word[3] = BE_64(tmpsha256.zc_word[3]); drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256; - drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP; + drrw->drr_flags |= DRR_CHECKSUM_DEDUP; } dataref.ref_guid = drrw->drr_toguid; @@ -413,8 +412,7 @@ cksummer(void *arg) wbr_drrr->drr_checksumtype = drrw->drr_checksumtype; - wbr_drrr->drr_checksumflags = - drrw->drr_checksumtype; + wbr_drrr->drr_flags = drrw->drr_flags; wbr_drrr->drr_key.ddk_cksum = drrw->drr_key.ddk_cksum; wbr_drrr->drr_key.ddk_prop = @@ -453,6 +451,14 @@ cksummer(void *arg) break; } + case DRR_OBJECT_RANGE: + { + if (dump_record(drr, NULL, 0, &stream_cksum, + outfd) != 0) + goto out; + break; + } + default: (void) fprintf(stderr, "INVALID record type 0x%x\n", drr->drr_type); @@ -606,6 +612,7 @@ typedef struct send_data { const char *fsname; const char *fromsnap; const char *tosnap; + boolean_t raw; boolean_t recursive; boolean_t verbose; @@ -625,6 +632,7 @@ typedef struct send_data { * "snapprops" -> { name (lastname) -> { name -> value } } * * "origin" -> number (guid) (if clone) + * "is_encroot" -> boolean * "sent" -> boolean (not on-disk) * } * } @@ -783,7 +791,7 @@ static int send_iterate_fs(zfs_handle_t *zhp, void *arg) { send_data_t *sd = arg; - nvlist_t *nvfs, *nv; + nvlist_t *nvfs = NULL, *nv = NULL; int rv = 0; uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid; uint64_t fromsnap_txg_save = sd->fromsnap_txg; @@ -847,8 +855,37 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg) /* iterate over props */ VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0)); send_iterate_prop(zhp, nv); + + if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) { + boolean_t encroot; + + /* determine if this dataset is an encryption root */ + if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0) { + rv = -1; + goto out; + } + + if (encroot) + VERIFY(0 == nvlist_add_boolean(nvfs, "is_encroot")); + + /* + * Encrypted datasets can only be sent with properties if + * the raw flag is specified because the receive side doesn't + * currently have a mechanism for recursively asking the user + * for new encryption parameters. + */ + if (!sd->raw) { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "cannot send %s@%s: encrypted dataset %s may not " + "be sent with properties without the raw flag\n"), + sd->fsname, sd->tosnap, zhp->zfs_name); + rv = -1; + goto out; + } + + } + VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv)); - nvlist_free(nv); /* iterate over snaps, and set sd->parent_fromsnap_guid */ sd->parent_fromsnap_guid = 0; @@ -864,7 +901,6 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg) (void) snprintf(guidstring, sizeof (guidstring), "0x%llx", (longlong_t)guid); VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs)); - nvlist_free(nvfs); /* iterate over children */ if (sd->recursive) @@ -874,6 +910,8 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg) sd->parent_fromsnap_guid = parent_fromsnap_guid_save; sd->fromsnap_txg = fromsnap_txg_save; sd->tosnap_txg = tosnap_txg_save; + nvlist_free(nv); + nvlist_free(nvfs); zfs_close(zhp); return (rv); @@ -881,7 +919,7 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg) static int gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap, - const char *tosnap, boolean_t recursive, boolean_t verbose, + const char *tosnap, boolean_t recursive, boolean_t raw, boolean_t verbose, nvlist_t **nvlp, avl_tree_t **avlp) { zfs_handle_t *zhp; @@ -897,6 +935,7 @@ gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap, sd.fromsnap = fromsnap; sd.tosnap = tosnap; sd.recursive = recursive; + sd.raw = raw; sd.verbose = verbose; if ((error = send_iterate_fs(zhp, &sd)) != 0) { @@ -928,7 +967,7 @@ typedef struct send_dump_data { uint64_t prevsnap_obj; boolean_t seenfrom, seento, replicate, doall, fromorigin; boolean_t verbose, dryrun, parsable, progress, embed_data, std_out; - boolean_t large_block, compress; + boolean_t large_block, compress, raw; int outfd; boolean_t err; nvlist_t *fss; @@ -970,6 +1009,11 @@ estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj, "not an earlier snapshot from the same fs")); return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); + case EACCES: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "source key must be loaded")); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + case ENOENT: if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_SNAPSHOT)) { @@ -1050,6 +1094,11 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj, "not an earlier snapshot from the same fs")); return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); + case EACCES: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "source key must be loaded")); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + case ENOENT: if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_SNAPSHOT)) { @@ -1231,6 +1280,8 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) flags |= LZC_SEND_FLAG_EMBED_DATA; if (sdd->compress) flags |= LZC_SEND_FLAG_COMPRESS; + if (sdd->raw) + flags |= LZC_SEND_FLAG_RAW; if (!sdd->doall && !isfromsnap && !istosnap) { if (sdd->replicate) { @@ -1614,6 +1665,8 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; if (flags->compress || nvlist_exists(resume_nvl, "compressok")) lzc_flags |= LZC_SEND_FLAG_COMPRESS; + if (flags->raw || nvlist_exists(resume_nvl, "rawok")) + lzc_flags |= LZC_SEND_FLAG_RAW; if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) { if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) { @@ -1691,6 +1744,11 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, switch (error) { case 0: return (0); + case EACCES: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "source key must be loaded")); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + case EXDEV: case ENOENT: case EDQUOT: @@ -1769,7 +1827,14 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, } } - if (flags->dedup && !flags->dryrun) { + /* + * Start the dedup thread if this is a dedup stream. We do not bother + * doing this if this a raw send of an encrypted dataset with dedup off + * because normal encrypted blocks won't dedup. + */ + if (flags->dedup && !flags->dryrun && !(flags->raw && + zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF && + zfs_prop_get_int(zhp, ZFS_PROP_DEDUP) == ZIO_CHECKSUM_OFF)) { featureflags |= (DMU_BACKUP_FEATURE_DEDUP | DMU_BACKUP_FEATURE_DEDUPPROPS); if ((err = pipe(pipefd)) != 0) { @@ -1808,10 +1873,13 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, VERIFY(0 == nvlist_add_boolean(hdrnv, "not_recursive")); } + if (flags->raw) { + VERIFY(0 == nvlist_add_boolean(hdrnv, "raw")); + } err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name, - fromsnap, tosnap, flags->replicate, flags->verbose, - &fss, &fsavl); + fromsnap, tosnap, flags->replicate, flags->raw, + flags->verbose, &fss, &fsavl); if (err) goto err_out; VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss)); @@ -1876,6 +1944,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, sdd.large_block = flags->largeblock; sdd.embed_data = flags->embed_data; sdd.compress = flags->compress; + sdd.raw = flags->raw; sdd.filter_cb = filter_func; sdd.filter_cb_arg = cb_arg; if (debugnvp) @@ -2037,6 +2106,11 @@ zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, } return (zfs_error(hdl, EZFS_NOENT, errbuf)); + case EACCES: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset key must be loaded")); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + case EBUSY: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "target is busy; if a filesystem, " @@ -2127,6 +2201,63 @@ recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp, return (0); } +/* + * Returns the grand origin (origin of origin of origin...) of a given handle. + * If this dataset is not a clone, it simply returns a copy of the original + * handle. + */ +static zfs_handle_t * +recv_open_grand_origin(zfs_handle_t *zhp) +{ + char origin[ZFS_MAX_DATASET_NAME_LEN]; + zprop_source_t src; + zfs_handle_t *ozhp = zfs_handle_dup(zhp); + + while (ozhp != NULL) { + if (zfs_prop_get(ozhp, ZFS_PROP_ORIGIN, origin, + sizeof (origin), &src, NULL, 0, B_FALSE) != 0) + break; + + (void) zfs_close(ozhp); + ozhp = zfs_open(zhp->zfs_hdl, origin, ZFS_TYPE_FILESYSTEM); + } + + return (ozhp); +} + +static int +recv_rename_impl(zfs_handle_t *zhp, zfs_cmd_t *zc) +{ + int err; + zfs_handle_t *ozhp = NULL; + + /* + * Attempt to rename the dataset. If it fails with EACCES we have + * attempted to rename the dataset outside of its encryption root. + * Force the dataset to become an encryption root and try again. + */ + err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_RENAME, &zc); + if (err == EACCES) { + ozhp = recv_open_grand_origin(zhp); + if (ozhp == NULL) { + err = ENOENT; + goto out; + } + + err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY, + NULL, NULL, 0); + if (err != 0) + goto out; + + err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_RENAME, &zc); + } + +out: + if (ozhp != NULL) + zfs_close(ozhp); + return (err); +} + static int recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, int baselen, char *newname, recvflags_t *flags) @@ -2134,20 +2265,23 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, static int seq; zfs_cmd_t zc = { 0 }; int err; - prop_changelist_t *clp; - zfs_handle_t *zhp; + prop_changelist_t *clp = NULL; + zfs_handle_t *zhp = NULL; zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET); - if (zhp == NULL) - return (-1); + if (zhp == NULL) { + err = -1; + goto out; + } clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, flags->force ? MS_FORCE : 0); - zfs_close(zhp); - if (clp == NULL) - return (-1); + if (clp == NULL) { + err = -1; + goto out; + } err = changelist_prefix(clp); if (err) - return (err); + goto out; zc.zc_objset_type = DMU_OST_ZFS; (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); @@ -2161,7 +2295,7 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, (void) printf("attempting rename %s to %s\n", zc.zc_name, zc.zc_value); } - err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc); + err = recv_rename_impl(zhp, &zc); if (err == 0) changelist_rename(clp, name, tryname); } else { @@ -2179,7 +2313,7 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, (void) printf("failed - trying rename %s to %s\n", zc.zc_name, zc.zc_value); } - err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc); + err = recv_rename_impl(zhp, &zc); if (err == 0) changelist_rename(clp, name, newname); if (err && flags->verbose) { @@ -2195,7 +2329,62 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, } (void) changelist_postfix(clp); - changelist_free(clp); + +out: + if (clp != NULL) + changelist_free(clp); + if (zhp != NULL) + zfs_close(zhp); + + return (err); +} + +static int +recv_promote(libzfs_handle_t *hdl, const char *fsname, + const char *origin_fsname, recvflags_t *flags) +{ + int err; + zfs_cmd_t zc = {"\0"}; + zfs_handle_t *zhp = NULL, *ozhp = NULL; + + if (flags->verbose) + (void) printf("promoting %s\n", fsname); + + (void) strlcpy(zc.zc_value, origin_fsname, sizeof (zc.zc_value)); + (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name)); + + /* + * Attempt to promote the dataset. If it fails with EACCES the + * promotion would cause this dataset to leave its encryption root. + * Force the origin to become an encryption root and try again. + */ + err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc); + if (err == EACCES) { + zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET); + if (zhp == NULL) { + err = -1; + goto out; + } + + ozhp = recv_open_grand_origin(zhp); + if (ozhp == NULL) { + err = -1; + goto out; + } + + err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY, + NULL, NULL, 0); + if (err != 0) + goto out; + + err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc); + } + +out: + if (zhp != NULL) + zfs_close(zhp); + if (ozhp != NULL) + zfs_close(ozhp); return (err); } @@ -2397,6 +2586,150 @@ created_before(libzfs_handle_t *hdl, avl_tree_t *avl, return (rv); } +/* + * This function reestablishes the heirarchy of encryption roots after a + * recursive incremental receive has completed. This must be done after the + * second call to recv_incremental_replication() has renamed and promoted all + * sent datasets to their final locations in the dataset heriarchy. + */ +/* ARGSUSED */ +static int +recv_fix_encryption_heirarchy(libzfs_handle_t *hdl, const char *destname, + nvlist_t *stream_nv, avl_tree_t *stream_avl) +{ + int err; + nvpair_t *fselem = NULL; + nvlist_t *stream_fss; + char *cp; + char top_zfs[ZFS_MAX_DATASET_NAME_LEN]; + + (void) strcpy(top_zfs, destname); + cp = strrchr(top_zfs, '@'); + if (cp != NULL) + *cp = '\0'; + + VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss", &stream_fss)); + + while ((fselem = nvlist_next_nvpair(stream_fss, fselem)) != NULL) { + zfs_handle_t *zhp = NULL; + uint64_t crypt; + nvlist_t *snaps, *props, *stream_nvfs = NULL; + nvpair_t *snapel = NULL; + boolean_t is_encroot, is_clone, stream_encroot; + char *cp; + char *stream_keylocation = NULL; + char keylocation[MAXNAMELEN]; + char fsname[ZFS_MAX_DATASET_NAME_LEN]; + + keylocation[0] = '\0'; + VERIFY(0 == nvpair_value_nvlist(fselem, &stream_nvfs)); + VERIFY(0 == nvlist_lookup_nvlist(stream_nvfs, "snaps", &snaps)); + VERIFY(0 == nvlist_lookup_nvlist(stream_nvfs, "props", &props)); + stream_encroot = nvlist_exists(stream_nvfs, "is_encroot"); + + /* find a snapshot from the stream that exists locally */ + err = ENOENT; + while ((snapel = nvlist_next_nvpair(snaps, snapel)) != NULL) { + uint64_t guid; + + VERIFY(0 == nvpair_value_uint64(snapel, &guid)); + err = guid_to_name(hdl, destname, guid, B_FALSE, + fsname); + if (err == 0) + break; + } + + if (err != 0) + continue; + + cp = strchr(fsname, '@'); + if (cp != NULL) + *cp = '\0'; + + zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET); + if (zhp == NULL) { + err = ENOENT; + goto error; + } + + crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); + is_clone = zhp->zfs_dmustats.dds_origin[0] != '\0'; + (void) zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL); + + /* we don't need to do anything for unencrypted filesystems */ + if (crypt == ZIO_CRYPT_OFF) { + zfs_close(zhp); + continue; + } + + /* + * If the dataset is flagged as an encryption root, was not + * received as a clone and is not currently an encryption root, + * force it to become one. Fixup the keylocation if necessary. + */ + if (stream_encroot) { + if (!is_clone && !is_encroot) { + err = lzc_change_key(fsname, + DCP_CMD_FORCE_NEW_KEY, NULL, NULL, 0); + if (err != 0) { + zfs_close(zhp); + goto error; + } + } + + VERIFY(0 == nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + &stream_keylocation)); + + /* + * Refresh the properties in case the call to + * lzc_change_key() changed the value. + */ + zfs_refresh_properties(zhp); + err = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION, + keylocation, sizeof (keylocation), NULL, NULL, + 0, B_TRUE); + if (err != 0) { + zfs_close(zhp); + goto error; + } + + if (strcmp(keylocation, stream_keylocation) != 0) { + err = zfs_prop_set(zhp, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + stream_keylocation); + if (err != 0) { + zfs_close(zhp); + goto error; + } + } + } + + /* + * If the dataset is not flagged as an encryption root and is + * currently an encryption root, force it to inherit from its + * parent. The root of a raw send should never be + * force-inherited. + */ + if (!stream_encroot && is_encroot && + strcmp(top_zfs, fsname) != 0) { + err = lzc_change_key(fsname, DCP_CMD_FORCE_INHERIT, + NULL, NULL, 0); + if (err != 0) { + zfs_close(zhp); + goto error; + } + } + + zfs_close(zhp); + } + + return (0); + +error: + return (err); +} + static int recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl, @@ -2423,7 +2756,7 @@ recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, needagain = progress = B_FALSE; if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL, - recursive, B_FALSE, &local_nv, &local_avl)) != 0) + recursive, B_TRUE, B_FALSE, &local_nv, &local_avl)) != 0) return (error); /* @@ -2472,22 +2805,15 @@ recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, stream_originguid, originguid)) { case 1: { /* promote it! */ - zfs_cmd_t zc = { 0 }; nvlist_t *origin_nvfs; char *origin_fsname; - if (flags->verbose) - (void) printf("promoting %s\n", fsname); - origin_nvfs = fsavl_find(local_avl, originguid, NULL); VERIFY(0 == nvlist_lookup_string(origin_nvfs, "name", &origin_fsname)); - (void) strlcpy(zc.zc_value, origin_fsname, - sizeof (zc.zc_value)); - (void) strlcpy(zc.zc_name, fsname, - sizeof (zc.zc_name)); - error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc); + error = recv_promote(hdl, fsname, origin_fsname, + flags); if (error == 0) progress = B_TRUE; break; @@ -2676,7 +3002,7 @@ recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, goto again; } - return (needagain); + return (needagain || error != 0); } static int @@ -2696,7 +3022,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, int error; boolean_t anyerr = B_FALSE; boolean_t softerr = B_FALSE; - boolean_t recursive; + boolean_t recursive, raw; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive")); @@ -2720,6 +3046,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == ENOENT); + raw = (nvlist_lookup_boolean(stream_nv, "raw") == 0); if (recursive && strchr(destname, '@')) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, @@ -2875,6 +3202,11 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, stream_nv, stream_avl, NULL); } + if (raw && softerr == 0) { + softerr = recv_fix_encryption_heirarchy(hdl, destname, + stream_nv, stream_avl); + } + out: fsavl_destroy(stream_avl); nvlist_free(stream_nv); @@ -3041,14 +3373,18 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, const char *chopprefix; boolean_t newfs = B_FALSE; boolean_t stream_wantsnewfs; + boolean_t newprops = B_FALSE; uint64_t parent_snapguid = 0; prop_changelist_t *clp = NULL; nvlist_t *snapprops_nvlist = NULL; zprop_errflags_t prop_errflags; boolean_t recursive; char *snapname = NULL; + nvlist_t *props = NULL; + char tmp_keylocation[MAXNAMELEN]; begin_time = time(NULL); + bzero(tmp_keylocation, MAXNAMELEN); (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive")); @@ -3057,24 +3393,39 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, ENOENT); if (stream_avl != NULL) { + char *keylocation = NULL; nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid, &snapname); - nvlist_t *props; int ret; (void) nvlist_lookup_uint64(fs, "parentfromsnap", &parent_snapguid); err = nvlist_lookup_nvlist(fs, "props", &props); - if (err) + if (err) { VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0)); + newprops = B_TRUE; + } + /* + * The keylocation property may only be set on encryption roots, + * but this dataset might not become an encryption root until + * recv_fix_encryption_heirarchy() is called. That function + * will fixup the keylocation anyway, so we temporarily unset + * the keylocation for now to avoid any errors from the receive + * ioctl. + */ + err = nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation); + if (err == 0) { + (void) strcpy(tmp_keylocation, keylocation); + (void) nvlist_remove_all(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION)); + } if (flags->canmountoff) { VERIFY(0 == nvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0)); } ret = zcmd_write_src_nvlist(hdl, &zc, props); - if (err) - nvlist_free(props); if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) { VERIFY(0 == nvlist_lookup_nvlist(props, @@ -3193,6 +3544,10 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & DMU_BACKUP_FEATURE_RESUMING; + boolean_t raw = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & + DMU_BACKUP_FEATURE_RAW; + boolean_t embedded = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & + DMU_BACKUP_FEATURE_EMBED_DATA; stream_wantsnewfs = (drrb->drr_fromguid == NULL || (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming; @@ -3298,6 +3653,27 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, return (zfs_error(hdl, EZFS_EXISTS, errbuf)); } + /* + * zfs recv -F cant be used to blow away an existing + * encrypted filesystem. This is because it would require + * the dsl dir to point to the the new key (or lack of a + * key) and the old key at the same time. The -F flag may + * still be used for deleting intermediate snapshots that + * would otherwise prevent the receive from working. + */ + + if (stream_wantsnewfs && flags->force && + zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != + ZIO_CRYPT_OFF) { + zfs_close(zhp); + zcmd_free_nvlists(&zc); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "zfs receive -F cannot be used to " + "destroy an encrypted filesystem")); + err = zfs_error(hdl, EZFS_BADRESTORE, errbuf); + return (-1); + } + if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM && stream_wantsnewfs) { /* We can't do online recv in this case */ @@ -3328,6 +3704,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, zfs_close(zhp); } else { + zfs_handle_t *zhp; + /* * Destination filesystem does not exist. Therefore we better * be creating a new filesystem (either from a full backup, or @@ -3355,7 +3733,39 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, return (zfs_error(hdl, EZFS_BADRESTORE, errbuf)); } + /* + * It is invalid to receive a properties stream that was + * unencrypted on the send side as a child of an encrypted + * parent. Technically there is nothing preventing this, but + * it would mean that the encryption=off property which is + * locally set on the send side would not be received correctly. + * We can infer encryption=off if the stream is not raw and + * properties were included since the send side will only ever + * send the encryption property in a raw nvlist header. + */ + if (!raw && props != NULL) { + uint64_t crypt; + + zhp = zfs_open(hdl, zc.zc_name, ZFS_TYPE_DATASET); + if (zhp == NULL) { + return (zfs_error(hdl, EZFS_BADRESTORE, + errbuf)); + } + + crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); + zfs_close(zhp); + + if (crypt != ZIO_CRYPT_OFF) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "parent '%s' must not be encrypted to " + "receive unenecrypted property"), + zc.zc_name); + return (zfs_error(hdl, EZFS_BADPROP, errbuf)); + } + } + newfs = B_TRUE; + *cp = '/'; } zc.zc_begin_record = *drr_noswap; @@ -3459,7 +3869,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, * get a strange "does not exist" error message. */ *cp = '\0'; - if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE, + if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE, B_TRUE, B_FALSE, &local_nv, &local_avl) == 0) { *cp = '@'; fs = fsavl_find(local_avl, drrb->drr_toguid, NULL); @@ -3495,6 +3905,20 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, "since most recent snapshot"), zc.zc_name); (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); break; + case EACCES: + if (raw && stream_wantsnewfs) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "failed to create encryption key")); + } else if (raw && !stream_wantsnewfs) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "encryption key does not match " + "existing key")); + } else { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "inherited key must be loaded")); + } + (void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf); + break; case EEXIST: cp = strchr(zc.zc_value, '@'); if (newfs) { @@ -3509,6 +3933,10 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, *cp = '@'; break; case EINVAL: + if (embedded && !raw) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "incompatible embedded data stream " + "feature with encrypted receive.")); (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; case ECKSUM: @@ -3596,6 +4024,14 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, buf1, delta, buf2); } + if (tmp_keylocation[0] != '\0') { + VERIFY(0 == nvlist_add_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), tmp_keylocation)); + } + + if (newprops) + nvlist_free(props); + return (0); } diff --git a/usr/src/lib/libzfs/common/libzfs_util.c b/usr/src/lib/libzfs/common/libzfs_util.c index aba044add5e0..9c5df7235918 100644 --- a/usr/src/lib/libzfs/common/libzfs_util.c +++ b/usr/src/lib/libzfs/common/libzfs_util.c @@ -235,6 +235,8 @@ libzfs_error_description(libzfs_handle_t *hdl) return (dgettext(TEXT_DOMAIN, "invalid diff data")); case EZFS_POOLREADONLY: return (dgettext(TEXT_DOMAIN, "pool is read-only")); + case EZFS_CRYPTOFAILED: + return (dgettext(TEXT_DOMAIN, "encryption failure")); case EZFS_UNKNOWN: return (dgettext(TEXT_DOMAIN, "unknown error")); default: diff --git a/usr/src/lib/libzfs/common/mapfile-vers b/usr/src/lib/libzfs/common/mapfile-vers index 7fa722a532e9..891a1fec876e 100644 --- a/usr/src/lib/libzfs/common/mapfile-vers +++ b/usr/src/lib/libzfs/common/mapfile-vers @@ -71,6 +71,11 @@ SYMBOL_VERSION SUNWprivate_1.1 { zfs_close; zfs_create; zfs_create_ancestors; + zfs_crypto_attempt_load_keys; + zfs_crypto_get_encryption_root; + zfs_crypto_load_key; + zfs_crypto_rewrap; + zfs_crypto_unload_key; zfs_dataset_exists; zfs_deleg_share_nfs; zfs_destroy; diff --git a/usr/src/lib/libzfs_core/common/libzfs_core.c b/usr/src/lib/libzfs_core/common/libzfs_core.c index d3e92151f76f..631b6d447017 100644 --- a/usr/src/lib/libzfs_core/common/libzfs_core.c +++ b/usr/src/lib/libzfs_core/common/libzfs_core.c @@ -128,7 +128,7 @@ lzc_ioctl(zfs_ioc_t ioc, const char *name, { zfs_cmd_t zc = { 0 }; int error = 0; - char *packed; + char *packed = NULL; size_t size; ASSERT3S(g_refcount, >, 0); @@ -136,9 +136,11 @@ lzc_ioctl(zfs_ioc_t ioc, const char *name, (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); - packed = fnvlist_pack(source, &size); - zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; - zc.zc_nvlist_src_size = size; + if (source != NULL) { + packed = fnvlist_pack(source, &size); + zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed; + zc.zc_nvlist_src_size = size; + } if (resultp != NULL) { *resultp = NULL; @@ -185,34 +187,49 @@ lzc_ioctl(zfs_ioc_t ioc, const char *name, } out: - fnvlist_pack_free(packed, size); + if (packed != NULL) + fnvlist_pack_free(packed, size); free((void *)(uintptr_t)zc.zc_nvlist_dst); return (error); } int -lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props) +lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props, + uint8_t *wkeydata, uint_t wkeylen) { int error; + nvlist_t *hidden_args = NULL; nvlist_t *args = fnvlist_alloc(); + fnvlist_add_int32(args, "type", (dmu_objset_type_t)type); if (props != NULL) fnvlist_add_nvlist(args, "props", props); + + if (wkeydata != NULL) { + hidden_args = fnvlist_alloc(); + fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, + wkeylen); + fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args); + } + error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); + nvlist_free(hidden_args); nvlist_free(args); return (error); } int -lzc_clone(const char *fsname, const char *origin, - nvlist_t *props) +lzc_clone(const char *fsname, const char *origin, nvlist_t *props) { int error; + nvlist_t *hidden_args = NULL; nvlist_t *args = fnvlist_alloc(); + fnvlist_add_string(args, "origin", origin); if (props != NULL) fnvlist_add_nvlist(args, "props", props); error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); + nvlist_free(hidden_args); nvlist_free(args); return (error); } @@ -534,6 +551,8 @@ lzc_send_resume(const char *snapname, const char *from, int fd, fnvlist_add_boolean(args, "embedok"); if (flags & LZC_SEND_FLAG_COMPRESS) fnvlist_add_boolean(args, "compressok"); + if (flags & LZC_SEND_FLAG_RAW) + fnvlist_add_boolean(args, "rawok"); if (resumeobj != 0 || resumeoff != 0) { fnvlist_add_uint64(args, "resume_object", resumeobj); fnvlist_add_uint64(args, "resume_offset", resumeoff); @@ -604,7 +623,7 @@ recv_read(int fd, void *buf, int ilen) static int recv_impl(const char *snapname, nvlist_t *props, const char *origin, - boolean_t force, boolean_t resumable, int fd, + boolean_t force, boolean_t resumable, boolean_t raw, int fd, const dmu_replay_record_t *begin_record) { /* @@ -697,9 +716,9 @@ recv_impl(const char *snapname, nvlist_t *props, const char *origin, */ int lzc_receive(const char *snapname, nvlist_t *props, const char *origin, - boolean_t force, int fd) + boolean_t raw, boolean_t force, int fd) { - return (recv_impl(snapname, props, origin, force, B_FALSE, fd, NULL)); + return (recv_impl(snapname, props, origin, force, B_FALSE, raw, fd, NULL)); } /* @@ -710,9 +729,9 @@ lzc_receive(const char *snapname, nvlist_t *props, const char *origin, */ int lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, - boolean_t force, int fd) + boolean_t force, boolean_t raw, int fd) { - return (recv_impl(snapname, props, origin, force, B_TRUE, fd, NULL)); + return (recv_impl(snapname, props, origin, force, B_TRUE, raw, fd, NULL)); } /* @@ -728,12 +747,12 @@ lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, */ int lzc_receive_with_header(const char *snapname, nvlist_t *props, - const char *origin, boolean_t force, boolean_t resumable, int fd, - const dmu_replay_record_t *begin_record) + const char *origin, boolean_t force, boolean_t resumable, boolean_t raw, + int fd, const dmu_replay_record_t *begin_record) { if (begin_record == NULL) return (EINVAL); - return (recv_impl(snapname, props, origin, force, resumable, fd, + return (recv_impl(snapname, props, origin, force, resumable, raw, fd, begin_record)); } @@ -931,3 +950,67 @@ lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit, return (error); } + + +/* + * Performs key management functions + * + * crypto_cmd should be a value from zfs_ioc_crypto_cmd_t. If the command + * specifies to load or change a wrapping key, the key should be specified in + * the hidden_args nvlist so that it is not logged + */ +int +lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata, + uint_t wkeylen) +{ + int error; + nvlist_t *ioc_args; + nvlist_t *hidden_args; + + if (wkeydata == NULL) + return (EINVAL); + + ioc_args = fnvlist_alloc(); + hidden_args = fnvlist_alloc(); + fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen); + fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args); + if (noop) + fnvlist_add_boolean(ioc_args, "noop"); + error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL); + nvlist_free(hidden_args); + nvlist_free(ioc_args); + + return (error); +} + +int +lzc_unload_key(const char *fsname) +{ + return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL)); +} + +int +lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props, + uint8_t *wkeydata, uint_t wkeylen) +{ + int error; + nvlist_t *ioc_args = fnvlist_alloc(); + nvlist_t *hidden_args = NULL; + + fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd); + + if (wkeydata != NULL) { + hidden_args = fnvlist_alloc(); + fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, + wkeylen); + fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args); + } + + if (props != NULL) + fnvlist_add_nvlist(ioc_args, "props", props); + + error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL); + nvlist_free(hidden_args); + nvlist_free(ioc_args); + return (error); +} diff --git a/usr/src/lib/libzfs_core/common/libzfs_core.h b/usr/src/lib/libzfs_core/common/libzfs_core.h index 2dcb1f639d5a..79d3cdae484b 100644 --- a/usr/src/lib/libzfs_core/common/libzfs_core.h +++ b/usr/src/lib/libzfs_core/common/libzfs_core.h @@ -48,13 +48,17 @@ enum lzc_dataset_type { }; int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **); -int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *); +int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *, uint8_t *, + uint_t); int lzc_clone(const char *, const char *, nvlist_t *); int lzc_promote(const char *, char *, int); int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **); int lzc_bookmark(nvlist_t *, nvlist_t **); int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **); int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **); +int lzc_load_key(const char *, boolean_t, uint8_t *, uint_t); +int lzc_unload_key(const char *); +int lzc_change_key(const char *, uint64_t, nvlist_t *, uint8_t *, uint_t); int lzc_snaprange_space(const char *, const char *, uint64_t *); @@ -65,7 +69,8 @@ int lzc_get_holds(const char *, nvlist_t **); enum lzc_send_flags { LZC_SEND_FLAG_EMBED_DATA = 1 << 0, LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1, - LZC_SEND_FLAG_COMPRESS = 1 << 2 + LZC_SEND_FLAG_COMPRESS = 1 << 2, + LZC_SEND_FLAG_RAW = 1 << 3 }; int lzc_send(const char *, const char *, int, enum lzc_send_flags); @@ -75,11 +80,12 @@ int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *); struct dmu_replay_record; -int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int); -int lzc_receive_resumable(const char *, nvlist_t *, const char *, +int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, boolean_t, int); +int lzc_receive_resumable(const char *, nvlist_t *, const char *, + boolean_t, boolean_t, int); int lzc_receive_with_header(const char *, nvlist_t *, const char *, boolean_t, - boolean_t, int, const struct dmu_replay_record *); + boolean_t, boolean_t, int, const struct dmu_replay_record *); boolean_t lzc_exists(const char *); diff --git a/usr/src/lib/libzfs_core/common/mapfile-vers b/usr/src/lib/libzfs_core/common/mapfile-vers index 9361062ba8e6..951a9dccd5d2 100644 --- a/usr/src/lib/libzfs_core/common/mapfile-vers +++ b/usr/src/lib/libzfs_core/common/mapfile-vers @@ -47,6 +47,9 @@ SYMBOL_VERSION ILLUMOS_0.1 { lzc_clone; lzc_promote; lzc_create; + lzc_load_key; + lzc_unload_key; + lzc_change_key; lzc_destroy_bookmarks; lzc_destroy_snaps; lzc_exists; diff --git a/usr/src/lib/libzpool/Makefile.com b/usr/src/lib/libzpool/Makefile.com index 13aa1858c602..8cbe24c5239f 100644 --- a/usr/src/lib/libzpool/Makefile.com +++ b/usr/src/lib/libzpool/Makefile.com @@ -78,6 +78,12 @@ CERRWARN += -_gcc=-Wno-empty-body CERRWARN += -_gcc=-Wno-unused-function CERRWARN += -_gcc=-Wno-unused-label +# lint complains about unused inline functions, even though +# they are "inline", not "static inline", with "extern inline" +# implementations and usage in libzpool. +LINTFLAGS += -erroff=E_STATIC_UNUSED +LINTFLAGS64 += -erroff=E_STATIC_UNUSED + .KEEP_STATE: all: $(LIBS) diff --git a/usr/src/lib/libzpool/common/kernel.c b/usr/src/lib/libzpool/common/kernel.c index 590398d11820..41d8e26ac827 100644 --- a/usr/src/lib/libzpool/common/kernel.c +++ b/usr/src/lib/libzpool/common/kernel.c @@ -40,6 +40,11 @@ #include #include #include +#include +#include +#include +#include +#include /* * Emulation of kernel services in userland. @@ -1160,3 +1165,87 @@ geterror(struct buf *bp) } return (error); } + +int +crypto_create_ctx_template(crypto_mechanism_t *mech, + crypto_key_t *key, crypto_ctx_template_t *tmpl, int kmflag) +{ + return (NULL); +} + +crypto_mech_type_t +crypto_mech2id(crypto_mech_name_t name) +{ + return (CRYPTO_MECH_INVALID); +} + +int +crypto_mac(crypto_mechanism_t *mech, crypto_data_t *data, + crypto_key_t *key, crypto_ctx_template_t impl, + crypto_data_t *mac, crypto_call_req_t *cr) +{ + return (0); +} + +int +crypto_encrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext, + crypto_key_t *key, crypto_ctx_template_t tmpl, + crypto_data_t *ciphertext, crypto_call_req_t *cr) +{ + return (0); +} + +/* This could probably be a weak reference */ +int +crypto_decrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext, + crypto_key_t *key, crypto_ctx_template_t tmpl, + crypto_data_t *ciphertext, crypto_call_req_t *cr) +{ + return (0); +} + + +int +crypto_digest_final(crypto_context_t context, crypto_data_t *digest, + crypto_call_req_t *cr) +{ + return (0); +} + +int +crypto_digest_update(crypto_context_t context, crypto_data_t *data, + crypto_call_req_t *cr) +{ + return (0); +} + +int +crypto_digest_init(crypto_mechanism_t *mech, crypto_context_t *ctxp, + crypto_call_req_t *crq) +{ + return (0); +} + +void +crypto_destroy_ctx_template(crypto_ctx_template_t tmpl) +{ +} + +extern int crypto_mac_init(crypto_mechanism_t *mech, crypto_key_t *key, + crypto_ctx_template_t tmpl, crypto_context_t *ctxp, + crypto_call_req_t *cr) +{ + return (0); +} + +extern int crypto_mac_update(crypto_context_t ctx, crypto_data_t *data, + crypto_call_req_t *cr) +{ + return (0); +} + +extern int crypto_mac_final(crypto_context_t ctx, crypto_data_t *data, + crypto_call_req_t *cr) +{ + return (0); +} diff --git a/usr/src/man/man1m/zfs.1m b/usr/src/man/man1m/zfs.1m index 8f3643995b3d..4de426756e26 100644 --- a/usr/src/man/man1m/zfs.1m +++ b/usr/src/man/man1m/zfs.1m @@ -147,7 +147,7 @@ .Cm mount .Nm .Cm mount -.Op Fl Ov +.Op Fl Olv .Op Fl o Ar options .Fl a | Ar filesystem .Nm @@ -165,7 +165,7 @@ .Ar snapshot bookmark .Nm .Cm send -.Op Fl DLPRcenpv +.Op Fl DLPRcenpvw .Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot .Ar snapshot .Nm @@ -271,6 +271,21 @@ .Op Fl m Ar memory_limit .Ar pool script .Op Ar arg1 No ... +.Cm load-key +.Op Fl rn +.Op Fl L Ar keylocation +.Op Fl a Ns | Ns Ar filesystem +.Nm +.Cm unload-key +.Op Fl r +.Op Fl a Ns | Ns Ar filesystem +.Nm +.Cm change-key +.Op Fl l +.Op Fl o Sy keylocation Ns = Ns Ar value +.Op Fl o Sy keyformat Ns = Ns Ar value +.Op Fl o Sy pbkdf2iters Ns = Ns Ar value +.Ar filesystem .Sh DESCRIPTION The .Nm @@ -569,12 +584,35 @@ if the snapshot has been marked for deferred destroy by using the command. Otherwise, the property is .Sy off . +.It Sy encryptionroot +For encrypted datasets, indicates where the dataset is currently inheriting its +encryption key from. +Loading or unloading a key for the +.Sy encryptionroot +will implicitly load / unload the key for any inheriting datasets (see +.Nm zfs Cm load-key +and +.Nm zfs Cm unload-key . +Clones will always share an encryption key with their origin. +See the +.Sy Encryption +section for details. .It Sy filesystem_count The total number of filesystems and volumes that exist under this location in the dataset tree. This value is only available when a .Sy filesystem_limit has been set somewhere in the tree under which the dataset resides. +.It Sy keystatus +Indicates if an encryption key is currently loaded into ZFS. +The possible values are +.Sy none , available , +and +.Sy unavaliable . +See +.Nm Cm load-key +and +.Nm Cm unload-key . .It Sy logicalreferenced The amount of space that is .Qq logically @@ -1090,6 +1128,76 @@ option. Controls whether device nodes can be opened on this file system. The default value is .Sy on . +.It Xo +.Sy encryption Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy aes-128-ccm Ns | Ns +.Sy aes-192-ccm Ns | Ns Sy aes-256-ccm Ns | Ns Sy aes-128-gcm Ns | Ns +.Sy aes-192-gcm Ns | Ns Sy aes-256-gcm +.Xc +Controls the encryption cipher suite +.Pq block cipher, key length, and mode +used for this dataset. +Requires the encryption feature to be enabled on the pool. +Requires a +.Sy keyformat +to be set at dataset creation time. +.Pp +Selecting +.Sy encryption Ns = Ns Sy on +when creating a dataset indicates that the default encryption suite will be +selected, which is currently +.Sy aes-256-ccm . +In order to provide consistent data protection, encryption must be specified at +dataset creation time and it cannot be changed afterwards. +.Pp +For more details and caveats about encryption see the +.Sx Encryption +section. +.It Sy keyformat Ns = Ns Sy raw Ns | Ns Sy hex Ns | Ns Sy passphrase +Controls what format the user's encryption key will be provided as. +This property is only set for encrypted datasets which are encryption roots. +.Pp +Raw keys and hex keys must be 32 bytes long +.Pq regardless of the chosen encryption suite +and must be randomly generated. +A raw key can be generated with the following command: +.Bd -literal +# dd if=/dev/urandom of=/path/to/output/key bs=32 count=1 +.Ed +.Pp +Passphrases must be between 8 and 512 bytes long and will be processed through +PBKDF2 before being used +.Po see the +.Nm pbkdf2iters +property +.Pc . +Even though the encryption suite cannot be changed after dataset creation, the +keyformat can be with +.Nm Cm change-key . +.It Sy keylocation Ns = Ns Sy prompt Ns | Ns Ar file:// +Controls where the user's encryption key will be loaded from by default for +commands such as +.Nm Cm load-key +and +.Nm Cm mount Fl l . +This property is only set for encrypted datasets which are encryption roots. +If unspecified, the default is +.Sy prompt . +.Pp +Even though the encryption suite cannot be changed after dataset creation, the +keylocation can be with either +.Nm Cm set +or +.Nm Cm change-key . +If +.Sy prompt +is selected ZFS will ask for the key at the command prompt when +it is required to access the encrypted data (see +.Nm Cm load-key +). This setting will also allow the key to be passed in via STDIN, but users +should be careful not to place keys which should be kept secret on the +command line. +If a file URI is selected, the key will be loaded from the specified absolute +file path. .It Sy exec Ns = Ns Sy on Ns | Ns Sy off Controls whether processes can be executed from within this file system. The default value is @@ -1144,6 +1252,24 @@ See for more information on .Sy nbmand mounts. +.It Sy pbkdf2iters Ns = Ns Ar iterations +Controls the number of PBKDF2 iterations that a +.Sy passphrase +encryption key should be run through when processing it into an encryption key. +This property is only defined when encryption is enabled and a keyformat of +.Sy passphrase +is selected. +The goal of PBKDF2 is to significantly increase the computational difficulty +needed to brute force a user's passphrase. +This is accomplished by forcing the attacker to run each passphrase through a +computationally expensive hashing function many times before they arrive at the +resulting key. +A user who actually knows the passphrase will only have to pay this cost once. +As CPUs become better at processing, this number should be raised to ensure that +a brute force attack is still not possible. +The current default is 350000 and the minimum is 100000. +This property may be changed with +.Nm Cm change-key . .It Sy primarycache Ns = Ns Sy all Ns | Ns Sy none Ns | Ns Sy metadata Controls what is cached in the primary cache .Pq ARC . @@ -1789,6 +1915,83 @@ installed or upgraded, use the and .Xr dumpadm 1M commands. +.Ss "Encryption" +Enabling the +.Sy encryption +feature allows for the creation of encrypted filesystems and volumes. +ZFS will encrypt all user data including file and zvol data, file attributes, +ACLs, permission bits, directory listings, FUID mappings, and userused/groupused +data. +ZFS +will not encrypt metadata related to the pool structure, including dataset +names, dataset hierarchy, file size, file holes, and dedup tables. +Key rotation is managed internally by the ZFS kernel module and changing the +user's key does not require re-encrypting the entire dataset. +Datasets can be scrubbed, resilvered, renamed, and deleted without the +encryption keys being loaded +.Po see the +.Nm Cm load-key +subcommand for more info on key loading +.Pc . +.Pp +Creating an encrypted dataset requires specifying the +.Sy encryption +and +.Sy keyformat +properties at creation time, along with an optional +.Sy keylocation +and +.Sy pbkdf2iters. +After entering an encryption key, the created +dataset will become an encryption root. +Any descendant datasets will inherit their encryption key from the encryption +root by default, meaning that loading, unloading, or changing the key for the +encryption root will implicitly do the same for all inheriting datasets. +f this inheritance is not desired, simply supply a +.Sy keyformat +when creating the child dataset or use +.Nm Cm change-key +to break an existing relationship, creating a new encryption root on the child. +Note that the child's +.Sy keyformat +may match that of the parent while still creating a new encryption root, and +that changing the +.Sy encryption +property alone does not create a new encryption root; this would simply use a +different cipher suite with the same key as its encryption root. +The one exception is that clones will always use their origin's encryption key. +As a result of this exception, some encryption-related properties (namely +.Sy keystatus , +.Sy keyformat , +.Sy keylocation , +and +.Sy pbkdf2iters ) +do not inherit like other ZFS properties and instead use the value determined +by their encryption root. +Encryption root inheritance can be tracked via the read-only +.Sy encryptionroot +property. +.Pp +Encryption changes the behavior of a few ZFS operations. +Encryption is applied after compression so compression ratios are preserved. +Normally checksums in ZFS are 256 bits long, but for encrypted data the checksum +is 128 bits of the user-chosen checksum and 128 bits of MAC from the encryption +suite, which provides additional protection against maliciously altered data. +Deduplication is still possible with encryption enabled but for security, +datasets will only dedup against themselves, their snapshots, and their clones. +.Pp +There are a few limitations on encrypted datasets. +Encrypted data cannot be embedded via the +.Sy embedded_data +feature. +Encrypted datasets may not have +.Sy copies Ns = Ns Sy 3 +since the implementation stores some encryption metadata where the third copy +would normally be. +Since compression is applied before encryption datasets may be vulnerable to a +CRIME-like attack if applications accessing the data allow for it. +Deduplication with encryption will leak information about which blocks are +equivalent in a dataset and will incur an extra CPU cost per block written. .Sh SUBCOMMANDS All subcommands that modify state are logged persistently to the pool in their original form. @@ -2543,7 +2746,7 @@ Displays all ZFS file systems currently mounted. .It Xo .Nm .Cm mount -.Op Fl Ov +.Op Fl Olv .Op Fl o Ar options .Fl a | Ar filesystem .Xc @@ -2557,6 +2760,16 @@ for more information. .It Fl a Mount all available ZFS file systems. Invoked automatically as part of the boot process. +.It Fl l +Load keys for encrypted filesystems as they are being mounted. +This is equivalent to executing +.Nm Cm load-key +on each encryption root before mounting it. +Note that if a filesystem has a +.Sy keylocation +of +.Sy prompt +this will cause the terminal to interactively block after asking for the key. .It Ar filesystem Mount the specified filesystem. .It Fl o Ar options @@ -2642,7 +2855,7 @@ feature. .It Xo .Nm .Cm send -.Op Fl DLPRcenpv +.Op Fl DLPRcenpvw .Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot .Ar snapshot .Xc @@ -2730,6 +2943,10 @@ If the .Sy lz4_compress feature is active on the sending system, then the receiving system must have that feature enabled as well. +Datasets that are sent with this flag may not be received as an encrypted +dataset, since encrypted datasets cannot use the +.Sy embedded_data +feature. See .Xr zpool-features 5 for details on ZFS feature flags and the @@ -2796,6 +3013,28 @@ This flag is implicit when .Fl R is specified. The receiving system must also support this feature. +Sends of encrypted datasets must use +.Fl w +when using this flag. +.It Fl w, -raw +For encrypted datasets, send data exactly as it exists on disk. +This allows backups to be taken even if encryption keys are not currently +loaded. +The backup may then be received on an untrusted machine since that machine will +not have the encryption keys to read the protected data or alter it without +being detected. +Upon being received, the dataset will have the same encryption keys as it did +on the send side, although the +.Sy keylocation +property will be defaulted to +.Sy prompt +if not otherwise provided. +For unencrypted datasets, this flag will be equivalent to +.Fl Lec . +Note that if you do not use this flag for sending encrypted datasets, +data will be sent unencrypted and may be re-encrypted with a different +encryption key on the receiving system, which will disable the ability +to do a raw send to that system for incrementals. .It Fl v, -verbose Print verbose information about the stream package generated. This information includes a per-second report of how much data has been sent. @@ -2868,6 +3107,10 @@ If the .Sy lz4_compress feature is active on the sending system, then the receiving system must have that feature enabled as well. +Datasets that are sent with this flag may not be received as an encrypted +dataset, since encrypted datasets cannot use the +.Sy embedded_data +feature. See .Xr zpool-features 5 for details on ZFS feature flags and the @@ -2888,6 +3131,25 @@ character and following If the incremental target is a clone, the incremental source can be the origin snapshot, or an earlier snapshot in the origin's filesystem, or the origin's origin, etc. +.It Fl r, -raw +For encrypted datasets, send data exactly as it exists on disk. +This allows backups to be taken even if encryption keys are not currently +loaded. +The backup may then be received on an untrusted machine since that machine will +not have the encryption keys to read the protected data or alter it without +being detected. +Upon being received, the dataset will have the same encryption keys as it did +on the send side, although the +.Sy keylocation +property will be defaulted to +.Sy prompt +if not otherwise provided. +For unencrypted datasets, this flag will be equivalent to +.Fl Lec . +Note that if you do not use this flag for sending encrypted datasets, +data will be sent unencrypted and may be re-encrypted with a different +encryption key on the receiving system, which will disable the ability +to do a raw send to that system for incrementals. .El .It Xo .Nm @@ -2947,6 +3209,15 @@ destroyed by using the .Nm zfs Cm destroy Fl d command. .Pp +Raw encrypted send streams (created with +.Nm zfs Cm send Fl w +) may only be received as is, and cannot be re-encrypted, decrypted, or +recompressed by the receive process. +Unencrypted streams can be received as encrypted datasets, either through +inheritance or by specifying encryption parameters with the +.Fl o +options. +.Pp The name of the snapshot .Pq and file system, if a full stream is received that this subcommand creates depends on the argument type and the use of the @@ -3157,6 +3428,10 @@ diff subcommand Allows lookup of paths within a dataset given an object number, and the ability to create snapshots necessary to 'zfs diff'. +load-key subcommand Allows loading and unloading of encryption key + (see 'zfs load-key' and 'zfs unload-key'). +change-key subcommand Allows changing an encryption key via + 'zfs change-key'. mount subcommand Allows mount/umount of ZFS datasets promote subcommand Must also have the 'mount' and 'promote' ability in the origin file system @@ -3445,6 +3720,103 @@ arguments. See .Xr zfs-program 1M for more information. +.Cm load-key +.Op Fl nr +.Op Fl L Ar keylocation +.Fl a Ns | Ns filesystem +Use +.Ar keylocation +instead of the +.Sy keylocation +property. +This will not change the value of the property on the dataset. +Note that if used with either +.Fl r +or +.Fl a +.Ar keylocation +may only be given as +.Sy prompt . +.Bl -tag -width Ds +.It Fl a +Loads the keys for all encryption roots in all imported pools. +.It Fl n +Do a dry-run +.Cm load-key . +This will cause zfs to simply check that the provided key is correct. +This command may be run even if the key is already loaded. +.It Fl r +Recursively loads the keys for the specified filesystem and all descendent +encryption roots. +.El +.It Xo +.Nm +.Cm unload-key +.Op Fl r +.Fl a Ns | Ns Ar filesystem +.Xc +Unloads a key from ZFS, removing the ability to access the dataset and all of +its children that inherit the +.Sy encryption +property. +This requires that the dataset is not currently open or mounted. +Once the key is unloaded the +.Sy keystatus +property will be set to +.Sy unavailable . +.Bl -tag -width Ds +.It Fl a +Unloads the keys for all encryption roots in all imported pools. +.It Fl r +Recursively unloads the keys for the specified filesystem and all descendent +encryption roots. +.El +.It Xo +.Nm +.Cm change-key +.Op Fl il +.Op Fl o Sy keylocation Ns = Ns Ar value +.Op Fl o Sy keyformat Ns = Ns Ar value +.Op Fl o Sy pbkdf2iters Ns = Ns Ar value +.Ar filesystem +.Xc +Allows a user to change the encryption key used to access a dataset. +This command requires that the existing key for the dataset is already loaded +into ZFS. +This command may also be used to change the +.Sy keylocation , keyformat , +and +.Sy pbkdf2iters +properties as needed. +If the dataset was not previously an encryption root it will become one. +Alternatively, the +.Fl i +flag may be provided to cause an encryption root to inherit the +parent's key instead. +.Bl -tag -width Ds +.It Fl i +Indicates that ZFS should make +.Ar filesystem +inherit the key of its parent. +Note that this command can only be run on an encryption root that has an +encrypted parent. +.It Fl l +Ensures the key is loaded before attempting to change the key. +This is effectively equivalent to +.Qq Nm Cm load-key Ar filesystem ; Nm Cm change-key Ar filesystem . +.It Fl o Sy property Ns = Ns Ar value +Allows the user to set encryption key properties +.Pq +.Sy keyformat , keylocation , +and +.Sy pbkdf2iters +while changing the key. +This is the only way to alter +.Sy keyformat +and +.Sy pbkdf2iters +after the dataset has been created. +.El .El .El .Sh EXIT STATUS diff --git a/usr/src/man/man1m/zpool.1m b/usr/src/man/man1m/zpool.1m index baa534a31e22..d4d1a8d48b86 100644 --- a/usr/src/man/man1m/zpool.1m +++ b/usr/src/man/man1m/zpool.1m @@ -83,7 +83,7 @@ .Nm .Cm import .Fl a -.Op Fl DfmN +.Op Fl DflmN .Op Fl F Op Fl n .Op Fl c Ar cachefile Ns | Ns Fl d Ar dir .Op Fl o Ar mntopts @@ -147,7 +147,7 @@ .Ar pool .Nm .Cm split -.Op Fl n +.Op Fl ln .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... .Op Fl R Ar root .Ar pool newpool @@ -1058,7 +1058,7 @@ Lists destroyed pools only. .Nm .Cm import .Fl a -.Op Fl DfmN +.Op Fl DflmN .Op Fl F Op Fl n .Op Fl c Ar cachefile Ns | Ns Fl d Ar dir .Op Fl o Ar mntopts @@ -1108,6 +1108,16 @@ transactions. Not all damaged pools can be recovered by using this option. If successful, the data from the discarded transactions is irretrievably lost. This option is ignored if the pool is importable or already imported. +.It Fl l +Indicates that this command will request encryption keys for all encrypted +datasets it attempts to mount as it is bringing the pool online. +Note that if any datasets have a +.Sy keylocation +of +.Sy prompt +this command will block waiting for the keys to be entered. +Without this flag encrypted datasets will be left unavailable until the keys are +loaded. .It Fl m Allows a pool to import when there is a missing log device. Recent transactions can be lost because the log device will be discarded. @@ -1204,6 +1214,18 @@ transactions. Not all damaged pools can be recovered by using this option. If successful, the data from the discarded transactions is irretrievably lost. This option is ignored if the pool is importable or already imported. +.It Fl l +Indicates that the zpool command will request encryption keys for all +encrypted datasets it attempts to mount as it is bringing the pool +online. +This is equivalent to running +.Nm Cm mount +on each encrypted dataset immediately after the pool is imported. +If any datasets have a +.Sy prompt +keysource this command will block waiting for the key to be entered. +Otherwise, encrypted datasets will be left unavailable until the keys are +loaded. .It Fl m Allows a pool to import when there is a missing log device. Recent transactions can be lost because the log device will be discarded. @@ -1510,7 +1532,7 @@ values. .It Xo .Nm .Cm split -.Op Fl n +.Op Fl ln .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... .Op Fl R Ar root .Ar pool newpool @@ -1527,6 +1549,16 @@ At the time of the split, will be a replica of .Ar pool . .Bl -tag -width Ds +.It Fl l +Indicates that this command will request encryption keys for all encrypted +datasets it attempts to mount as it is bringing the new pool online. +Note that if any datasets have a +.Sy keylocation +of +.Sy prompt +this command will block waiting for the keys to be entered. +Without this flag encrypted datasets will be left unavailable and unmounted +until the keys are loaded. .It Fl n Do dry run, do not actually perform the split. Print out the expected configuration of diff --git a/usr/src/man/man5/zpool-features.5 b/usr/src/man/man5/zpool-features.5 index de811d6d3d89..ee1a5e2fd27b 100644 --- a/usr/src/man/man5/zpool-features.5 +++ b/usr/src/man/man5/zpool-features.5 @@ -554,5 +554,27 @@ Booting off of pools using \fBedonr\fR is \fBNOT\fR supported -- any attempt to enable \fBedonr\fR on a root pool will fail with an error. +.RE +.sp +.ne 2 +.na +\fB\fBencryption\fR\fR +.ad +.RS 4n +.TS +l l . +GUID com.datto:encryption +READ\-ONLY COMPATIBLE no +DEPENDENCIES extensible_dataset +.TE + +This feature enables the creation and management of natively encrypted datasets. + +This feature becomes \fBactive\fR when an encrypted dataset is created +and will be returned to the \fBenabled\fR state when all datasets that +use this feature are destroyed. + +.RE + .SH "SEE ALSO" \fBzfs\fR(1M), \fBzpool\fR(1M) diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf index b955be4edb5f..126f9ddddbba 100644 --- a/usr/src/pkg/manifests/system-test-zfstest.mf +++ b/usr/src/pkg/manifests/system-test-zfstest.mf @@ -45,12 +45,14 @@ dir path=opt/zfs-tests/tests/functional/clean_mirror dir path=opt/zfs-tests/tests/functional/cli_root dir path=opt/zfs-tests/tests/functional/cli_root/zdb dir path=opt/zfs-tests/tests/functional/cli_root/zfs +dir path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key dir path=opt/zfs-tests/tests/functional/cli_root/zfs_clone dir path=opt/zfs-tests/tests/functional/cli_root/zfs_copies dir path=opt/zfs-tests/tests/functional/cli_root/zfs_create dir path=opt/zfs-tests/tests/functional/cli_root/zfs_destroy dir path=opt/zfs-tests/tests/functional/cli_root/zfs_get dir path=opt/zfs-tests/tests/functional/cli_root/zfs_inherit +dir path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key dir path=opt/zfs-tests/tests/functional/cli_root/zfs_mount dir path=opt/zfs-tests/tests/functional/cli_root/zfs_promote dir path=opt/zfs-tests/tests/functional/cli_root/zfs_property @@ -62,6 +64,7 @@ dir path=opt/zfs-tests/tests/functional/cli_root/zfs_send dir path=opt/zfs-tests/tests/functional/cli_root/zfs_set dir path=opt/zfs-tests/tests/functional/cli_root/zfs_share dir path=opt/zfs-tests/tests/functional/cli_root/zfs_snapshot +dir path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key dir path=opt/zfs-tests/tests/functional/cli_root/zfs_unmount dir path=opt/zfs-tests/tests/functional/cli_root/zfs_unshare dir path=opt/zfs-tests/tests/functional/cli_root/zfs_upgrade @@ -640,6 +643,31 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs/zfs_001_neg mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs/zfs_002_pos mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs/zfs_003_neg mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup \ + mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/setup \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_001_neg \ @@ -662,6 +690,9 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_009_neg \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_copies/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_copies/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.cfg \ @@ -731,9 +762,18 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_013_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_014_pos \ + mode=0555 file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib \ mode=0444 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_destroy/cleanup \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_destroy/setup mode=0555 @@ -830,6 +870,32 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_003_pos \ mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup \ + mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/setup mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg \ + mode=0444 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib \ + mode=0444 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.cfg \ @@ -863,6 +929,9 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_012_neg \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_001_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_promote/cleanup \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_promote/setup mode=0555 @@ -892,6 +961,9 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_008_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_property/cleanup \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_property/setup mode=0555 @@ -940,6 +1012,17 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_013_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted \ + mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename.cfg \ @@ -985,6 +1068,12 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_013_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_reservation/cleanup \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_reservation/setup \ @@ -1034,6 +1123,13 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos \ mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded \ + mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_set/cache_001_pos \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_set/cache_002_neg \ @@ -1096,6 +1192,8 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_003_neg \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib \ mode=0444 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_share/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_share/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share.cfg \ @@ -1155,6 +1253,19 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos \ mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup \ + mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_unmount/cleanup \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_unmount/setup mode=0555 @@ -1365,6 +1476,15 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_023_neg \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_024_pos \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted \ + mode=0555 file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_001_pos \ mode=0555 @@ -1498,6 +1618,12 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_all_001_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load \ + mode=0555 file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_features_001_pos \ mode=0555 @@ -1589,6 +1715,9 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_005_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zpool_set/zpool_set_001_pos \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zpool_set/zpool_set_002_neg \ @@ -2313,6 +2442,8 @@ file path=opt/zfs-tests/tests/functional/rsend/send-c_volume mode=0555 file path=opt/zfs-tests/tests/functional/rsend/send-c_zstreamdump mode=0555 file path=opt/zfs-tests/tests/functional/rsend/send-cpL_varied_recsize \ mode=0555 +file path=opt/zfs-tests/tests/functional/rsend/send_encrypted_heirarchy \ + mode=0555 file path=opt/zfs-tests/tests/functional/rsend/setup mode=0555 file path=opt/zfs-tests/tests/functional/scrub_mirror/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/scrub_mirror/default.cfg mode=0444 diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run index 444add0b9655..1d87a2ee0ba2 100644 --- a/usr/src/test/zfs-tests/runfiles/delphix.run +++ b/usr/src/test/zfs-tests/runfiles/delphix.run @@ -106,11 +106,16 @@ post = [/opt/zfs-tests/tests/functional/cli_root/zfs] tests = ['zfs_001_neg', 'zfs_002_pos', 'zfs_003_neg'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_change-key] +tests = ['zfs_change-key', 'zfs_change-key_child', 'zfs_change-key_format', + 'zfs_change-key_inherit', 'zfs_change-key_load', 'zfs_change-key_location', + 'zfs_change-key_pbkdf2iters'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_clone] tests = ['zfs_clone_001_neg', 'zfs_clone_002_pos', 'zfs_clone_003_pos', 'zfs_clone_004_pos', 'zfs_clone_005_pos', 'zfs_clone_006_pos', 'zfs_clone_007_pos', 'zfs_clone_008_neg', 'zfs_clone_009_neg', - 'zfs_clone_010_pos'] + 'zfs_clone_010_pos', 'zfs_clone_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_copies] tests = ['zfs_copies_001_pos', 'zfs_copies_002_pos', 'zfs_copies_003_pos', @@ -121,7 +126,8 @@ tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos', 'zfs_create_004_pos', 'zfs_create_005_pos', 'zfs_create_006_pos', 'zfs_create_007_pos', 'zfs_create_008_neg', 'zfs_create_009_neg', 'zfs_create_010_neg', 'zfs_create_011_pos', 'zfs_create_012_pos', - 'zfs_create_013_pos'] + 'zfs_create_013_pos', 'zfs_create_encrypted', + 'zfs_create_crypt_combos'] [/opt/zfs-tests/tests/functional/cli_root/zfs_destroy] tests = ['zfs_destroy_001_pos', 'zfs_destroy_002_pos', 'zfs_destroy_003_pos', @@ -139,17 +145,21 @@ tests = ['zfs_get_001_pos', 'zfs_get_002_pos', 'zfs_get_003_pos', [/opt/zfs-tests/tests/functional/cli_root/zfs_inherit] tests = ['zfs_inherit_001_neg', 'zfs_inherit_002_neg', 'zfs_inherit_003_pos'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_load-key] +tests = ['zfs_load-key', 'zfs_load-key_all', 'zfs_load-key_file', + 'zfs_load-key_location', 'zfs_load-key_noop', 'zfs_load-key_recursive'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_mount] tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos', 'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_006_pos', 'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg', - 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_neg', - 'zfs_mount_all_001_pos'] + 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_all_001_pos', + 'zfs_mount_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_promote] tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos', 'zfs_promote_004_pos', 'zfs_promote_005_pos', 'zfs_promote_006_neg', - 'zfs_promote_007_neg', 'zfs_promote_008_pos'] + 'zfs_promote_007_neg', 'zfs_promote_008_pos', 'zfs_promote_encryptionroot'] [/opt/zfs-tests/tests/functional/cli_root/zfs_property] tests = ['zfs_written_property_001_pos'] @@ -159,7 +169,8 @@ tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos', 'zfs_receive_005_neg', 'zfs_receive_006_pos', 'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg', 'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos', - 'zfs_receive_013_pos'] + 'zfs_receive_from_encrypted', 'zfs_receive_raw', + 'zfs_receive_raw_incremental', 'zfs_receive_to_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_rename] tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', @@ -178,7 +189,7 @@ tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos', [/opt/zfs-tests/tests/functional/cli_root/zfs_send] tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos', 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos', - 'zfs_send_007_pos'] + 'zfs_send_007_pos', 'zfs_send_encrypted', 'zfs_send_raw'] [/opt/zfs-tests/tests/functional/cli_root/zfs_set] tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', @@ -358,7 +369,7 @@ tests = ['ctime_001_pos' ] tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_004_pos', 'zfs_allow_005_pos', 'zfs_allow_006_pos', 'zfs_allow_007_pos', 'zfs_allow_008_pos', 'zfs_allow_009_neg', - 'zfs_allow_010_pos', 'zfs_allow_011_neg', 'zfs_allow_012_neg', + 'zfs_allow_011_neg', 'zfs_allow_012_neg', 'zfs_unallow_001_pos', 'zfs_unallow_002_pos', 'zfs_unallow_003_pos', 'zfs_unallow_004_pos', 'zfs_unallow_005_pos', 'zfs_unallow_006_pos', 'zfs_unallow_007_neg', 'zfs_unallow_008_neg'] diff --git a/usr/src/test/zfs-tests/runfiles/omnios.run b/usr/src/test/zfs-tests/runfiles/omnios.run index df7dbb2d0ee3..a0e1c45f4d04 100644 --- a/usr/src/test/zfs-tests/runfiles/omnios.run +++ b/usr/src/test/zfs-tests/runfiles/omnios.run @@ -100,11 +100,16 @@ post = [/opt/zfs-tests/tests/functional/cli_root/zfs] tests = ['zfs_001_neg', 'zfs_002_pos', 'zfs_003_neg'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_change-key] +tests = ['zfs_change-key', 'zfs_change-key_child', 'zfs_change-key_format', + 'zfs_change-key_inherit', 'zfs_change-key_load', 'zfs_change-key_location', + 'zfs_change-key_pbkdf2iters'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_clone] tests = ['zfs_clone_001_neg', 'zfs_clone_002_pos', 'zfs_clone_003_pos', 'zfs_clone_004_pos', 'zfs_clone_005_pos', 'zfs_clone_006_pos', 'zfs_clone_007_pos', 'zfs_clone_008_neg', 'zfs_clone_009_neg', - 'zfs_clone_010_pos'] + 'zfs_clone_010_pos', 'zfs_clone_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_copies] tests = ['zfs_copies_001_pos', 'zfs_copies_002_pos', 'zfs_copies_003_pos', @@ -115,7 +120,8 @@ tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos', 'zfs_create_004_pos', 'zfs_create_005_pos', 'zfs_create_006_pos', 'zfs_create_007_pos', 'zfs_create_008_neg', 'zfs_create_009_neg', 'zfs_create_010_neg', 'zfs_create_011_pos', 'zfs_create_012_pos', - 'zfs_create_013_pos'] + 'zfs_create_013_pos', 'zfs_create_014_pos', 'zfs_create_encrypted', + 'zfs_create_crypt_combos'] [/opt/zfs-tests/tests/functional/cli_root/zfs_destroy] tests = ['zfs_destroy_001_pos', 'zfs_destroy_002_pos', 'zfs_destroy_003_pos', @@ -133,16 +139,21 @@ tests = ['zfs_get_001_pos', 'zfs_get_002_pos', 'zfs_get_003_pos', [/opt/zfs-tests/tests/functional/cli_root/zfs_inherit] tests = ['zfs_inherit_001_neg', 'zfs_inherit_002_neg', 'zfs_inherit_003_pos'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_load-key] +tests = ['zfs_load-key', 'zfs_load-key_all', 'zfs_load-key_file', + 'zfs_load-key_location', 'zfs_load-key_noop', 'zfs_load-key_recursive'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_mount] tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos', 'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_006_pos', 'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg', - 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_all_001_pos'] + 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_all_001_pos', + 'zfs_mount_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_promote] tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos', 'zfs_promote_004_pos', 'zfs_promote_005_pos', 'zfs_promote_006_neg', - 'zfs_promote_007_neg', 'zfs_promote_008_pos'] + 'zfs_promote_007_neg', 'zfs_promote_008_pos', 'zfs_promote_encryptionroot'] [/opt/zfs-tests/tests/functional/cli_root/zfs_property] tests = ['zfs_written_property_001_pos'] @@ -151,7 +162,9 @@ tests = ['zfs_written_property_001_pos'] tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos', 'zfs_receive_005_neg', 'zfs_receive_006_pos', 'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg', - 'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos'] + 'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos', + 'zfs_receive_from_encrypted', 'zfs_receive_raw', + 'zfs_receive_raw_incremental', 'zfs_receive_to_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_rename] tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', @@ -169,7 +182,8 @@ tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos', [/opt/zfs-tests/tests/functional/cli_root/zfs_send] tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos', - 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos'] + 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos', + 'zfs_send_007_pos', 'zfs_send_encrypted', 'zfs_send_raw'] [/opt/zfs-tests/tests/functional/cli_root/zfs_set] tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', @@ -180,7 +194,7 @@ tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', 'ro_props_001_pos', 'share_mount_001_neg', 'snapdir_001_pos', 'user_property_001_pos', 'user_property_002_pos', 'user_property_003_neg', 'user_property_004_pos', 'version_001_neg', 'zfs_set_001_neg', - 'zfs_set_002_neg', 'zfs_set_003_neg'] + 'zfs_set_002_neg', 'zfs_set_003_neg', 'zfs_set_keylocation'] [/opt/zfs-tests/tests/functional/cli_root/zfs_share] tests = ['zfs_share_001_pos', 'zfs_share_002_pos', 'zfs_share_003_pos', @@ -194,6 +208,9 @@ tests = ['zfs_snapshot_001_neg', 'zfs_snapshot_002_neg', 'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg', 'zfs_snapshot_008_neg', 'zfs_snapshot_009_pos'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_unload-key] +tests = ['zfs_unload-key', 'zfs_unload-key_all', 'zfs_unload-key_recursive'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_unmount] tests = ['zfs_unmount_001_pos', 'zfs_unmount_002_pos', 'zfs_unmount_003_pos', 'zfs_unmount_004_pos', 'zfs_unmount_005_pos', 'zfs_unmount_006_pos', @@ -232,7 +249,8 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos', 'zpool_create_015_neg', 'zpool_create_016_pos', 'zpool_create_017_neg', 'zpool_create_018_pos', 'zpool_create_019_pos', 'zpool_create_020_pos', 'zpool_create_021_pos', 'zpool_create_022_pos', 'zpool_create_023_neg', - 'zpool_create_features_001_pos', 'zpool_create_features_002_pos', + 'zpool_create_encrypted', 'zpool_create_crypt_combos', + 'zpool_create_features_001_pos', 'zpool_create_features_002_pos', 'zpool_create_features_003_pos', 'zpool_create_features_004_neg'] [/opt/zfs-tests/tests/functional/cli_root/zpool_destroy] @@ -268,7 +286,8 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos', 'zpool_import_features_001_pos', 'zpool_import_features_002_neg', 'zpool_import_features_003_pos', 'zpool_import_missing_001_pos', 'zpool_import_missing_002_pos', 'zpool_import_missing_003_pos', - 'zpool_import_rename_001_pos'] + 'zpool_import_rename_001_pos', 'zpool_import_encrypted', + 'zpool_import_encrypted_load'] [/opt/zfs-tests/tests/functional/cli_root/zpool_labelclear] tests = ['zpool_labelclear_active', 'zpool_labelclear_exported'] @@ -290,7 +309,8 @@ tests = ['zpool_replace_001_neg'] [/opt/zfs-tests/tests/functional/cli_root/zpool_scrub] tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos', - 'zpool_scrub_004_pos', 'zpool_scrub_005_pos'] + 'zpool_scrub_004_pos', 'zpool_scrub_005_pos', + 'zpool_scrub_encrypted_unloaded'] [/opt/zfs-tests/tests/functional/cli_root/zpool_set] tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg'] @@ -487,7 +507,7 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', 'send-c_lz4_disabled', 'send-c_recv_lz4_disabled', 'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-cD', 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize', - 'send-c_recv_dedup'] + 'send-c_recv_dedup', 'send_encrypted_heirarchy'] [/opt/zfs-tests/tests/functional/scrub_mirror] tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos', diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile new file mode 100644 index 000000000000..5be730f3fac4 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile @@ -0,0 +1,21 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# + +include $(SRC)/Makefile.master + +ROOTOPTPKG = $(ROOT)/opt/zfs-tests +TARGETDIR = $(ROOTOPTPKG)/tests/functional/cli_root/zfs_change-key + +include $(SRC)/test/zfs-tests/Makefile.com diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup.ksh new file mode 100644 index 000000000000..79cd6e9f908e --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/setup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/setup.ksh new file mode 100644 index 000000000000..6a9af3bc28c3 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/setup.ksh @@ -0,0 +1,32 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} + +default_setup $DISK diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh new file mode 100644 index 000000000000..781caae5b5c0 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh @@ -0,0 +1,62 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key' should change the key material. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Attempt to change the key +# 3. Unmount the dataset and unload its key +# 4. Attempt to load the old key +# 5. Verify the key is not loaded +# 6. Attempt to load the new key +# 7. Verify the key is loaded +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -f $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key' should change the key material" + +log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_must eval "echo $PASSPHRASE2 | zfs change-key $TESTPOOL/$TESTFS1" + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_mustnot eval "echo $PASSPHRASE1 | zfs load-key $TESTPOOL/$TESTFS1" +log_must key_unavailable $TESTPOOL/$TESTFS1 + +log_must eval "echo $PASSPHRASE2 | zfs load-key $TESTPOOL/$TESTFS1" +log_must key_available $TESTPOOL/$TESTFS1 + +log_pass "'zfs change-key' changes the key material" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh new file mode 100644 index 000000000000..dda7c1df433c --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh @@ -0,0 +1,86 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key' should promote an encrypted child to an encryption root. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Create an encrypted child dataset +# 3. Attempt to change the key without any flags +# 4. Attempt to change the key specifying keylocation +# 5. Attempt to change the key specifying keyformat +# 6. Verify the new encryption root can unload and load its key +# 7. Recreate the child dataset +# 8. Attempt to change the key specifying both the keylocation and keyformat +# 9. Verify the new encryption root can unload and load its key +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +log_assert "'zfs change-key' should promote an encrypted child to an" \ + "encryption root" + +log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_must zfs create $TESTPOOL/$TESTFS1/child + +log_mustnot eval "echo $PASSPHRASE2 | zfs change-key" \ + "$TESTPOOL/$TESTFS1/child" + +log_mustnot eval "echo $PASSPHRASE2 | zfs change-key -o keylocation=prompt" \ + "$TESTPOOL/$TESTFS1/child" + +log_must eval "echo $PASSPHRASE2 | zfs change-key -o keyformat=passphrase" \ + "$TESTPOOL/$TESTFS1/child" + +log_must zfs unmount $TESTPOOL/$TESTFS1/child +log_must zfs unload-key $TESTPOOL/$TESTFS1/child +log_must key_unavailable $TESTPOOL/$TESTFS1/child + +log_must eval "echo $PASSPHRASE2 | zfs load-key $TESTPOOL/$TESTFS1/child" +log_must key_available $TESTPOOL/$TESTFS1/child + +log_must zfs destroy $TESTPOOL/$TESTFS1/child +log_must zfs create $TESTPOOL/$TESTFS1/child + +log_must eval "echo $PASSPHRASE2 | zfs change-key -o keyformat=passphrase" \ + "-o keylocation=prompt $TESTPOOL/$TESTFS1/child" + +log_must zfs unmount $TESTPOOL/$TESTFS1/child +log_must zfs unload-key $TESTPOOL/$TESTFS1/child +log_must key_unavailable $TESTPOOL/$TESTFS1/child + +log_must eval "echo $PASSPHRASE2 | zfs load-key $TESTPOOL/$TESTFS1/child" +log_must key_available $TESTPOOL/$TESTFS1/child + +log_pass "'zfs change-key' promotes an encrypted child to an encryption root" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh new file mode 100644 index 000000000000..9ad8be204f21 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh @@ -0,0 +1,71 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key -o' should change the key format. +# +# STRATEGY: +# 1. Create an encryption dataset with a passphrase key format +# 2. Unmount the dataset +# 3. Verify the key format is passphrase +# 4. Change the key format to hex +# 5. Verify the key format is hex +# 6. Attempt to reload the dataset's key +# 7. Change the key format to raw +# 8. Verify the key format is raw +# 9. Attempt to reload the dataset's key +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -f $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key -o' should change the key format" + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_must zfs unmount $TESTPOOL/$TESTFS1 + +log_must verify_keyformat $TESTPOOL/$TESTFS1 "passphrase" + +log_must eval "echo $HEXKEY | zfs change-key -o keyformat=hex" \ + "$TESTPOOL/$TESTFS1" +log_must verify_keyformat $TESTPOOL/$TESTFS1 "hex" + +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_must eval "echo $HEXKEY | zfs load-key $TESTPOOL/$TESTFS1" + +log_must eval "echo $RAWKEY | tr -d '\n' | zfs change-key -o keyformat=raw" \ + "$TESTPOOL/$TESTFS1" +log_must verify_keyformat $TESTPOOL/$TESTFS1 "raw" + +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_must eval "echo $RAWKEY | tr -d '\n' | zfs load-key $TESTPOOL/$TESTFS1" + +log_pass "'zfs change-key -o' changes the key format" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh new file mode 100644 index 000000000000..94820c37ecc0 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key -i' should cause a dataset to inherit its parent key +# +# STRATEGY: +# 1. Create a parent encrypted dataset +# 2. Create a child dataset as an encryption root +# 3. Attempt to inherit the parent key +# 4. Verify the key is inherited +# 5. Unmount the parent and unload its key +# 6. Verify the key is unavailable for parent and child +# 7. Load the parent key +# 8. Verify the key is available for parent and child +# 9. Attempt to mount the datasets +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key -i' should cause a dataset to inherit its" \ + "parent key" + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt" \ + "$TESTPOOL/$TESTFS1/child" + +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child \ + "$TESTPOOL/$TESTFS1/child" + +log_must zfs change-key -i $TESTPOOL/$TESTFS1/child +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child "$TESTPOOL/$TESTFS1" + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_must key_unavailable $TESTPOOL/$TESTFS1 +log_must key_unavailable $TESTPOOL/$TESTFS1/child + +log_must eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS1" + +log_must key_available $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1/child + +log_must zfs mount $TESTPOOL/$TESTFS1 +log_must zfs mount $TESTPOOL/$TESTFS1/child + +log_pass "'zfs change-key -i' causes a dataset to inherit its parent key" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh new file mode 100644 index 000000000000..4ed4aadfe0fa --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh @@ -0,0 +1,58 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key -l' should load a dataset's key to change it. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Unload dataset and unload its key +# 3. Attempt to change the key +# 4. Verify the dataset key is loaded +# 3. Attempt to change the key +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -f $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key -l' should load a dataset's key to change it" + +log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_must zfs change-key -l $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1 + +log_must zfs change-key -l $TESTPOOL/$TESTFS1 + +log_pass "'zfs change-key -l' loads a dataset's key to change it" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh new file mode 100644 index 000000000000..5cbe34b269ba --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh @@ -0,0 +1,65 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key -o' should change the keylocation. +# +# STRATEGY: +# 1. Create an encryption dataset with a file key location +# 2. Change the key location to 'prompt' +# 3. Verify the key location +# 4. Unmount the dataset and unload its key +# 5. Attempt to load the dataset's key +# 6. Attempt to change the key location to 'none' +# 7. Attempt to change the key location to an invalid value +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -f $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key -o' should change the keylocation" + +log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 +log_must verify_keylocation $TESTPOOL/$TESTFS1 "file:///$TESTPOOL/pkey" + +log_must eval "echo $PASSPHRASE1 | zfs change-key -o keylocation=prompt" \ + "$TESTPOOL/$TESTFS1" +log_must verify_keylocation $TESTPOOL/$TESTFS1 "prompt" + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_must eval "echo $PASSPHRASE1 | zfs load-key $TESTPOOL/$TESTFS1" + +log_mustnot zfs change-key -o keylocation=none $TESTPOOL/$TESTFS1 +log_mustnot zfs change-key -o keylocation=foobar $TESTPOOL/$TESTFS1 + +log_pass "'zfs change-key -o' changes the keylocation" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh new file mode 100644 index 000000000000..b1672248be12 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh @@ -0,0 +1,75 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key -o' should change the pbkdf2 iterations. +# +# STRATEGY: +# 1. Create an encryption dataset with 200k PBKDF2 iterations +# 2. Unmount the dataset +# 3. Change the PBKDF2 iterations to 150k +# 4. Verify the PBKDF2 iterations +# 5. Unload the dataset's key +# 6. Attempt to load the dataset's key +# + +verify_runnable "both" + +function verify_pbkdf2iters +{ + typeset ds=$1 + typeset iterations=$2 + typeset iters=$(get_prop pbkdf2iters $ds) + + if [[ "$iters" != "$iterations" ]]; then + log_fail "Expected $iterations iterations, got $iters" + fi + + return 0 +} + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -f $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key -o' should change the pbkdf2 iterations" + +log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey -o pbkdf2iters=200000 \ + $TESTPOOL/$TESTFS1 + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must verify_pbkdf2iters $TESTPOOL/$TESTFS1 "200000" + +log_must zfs change-key -o pbkdf2iters=150000 $TESTPOOL/$TESTFS1 +log_must verify_pbkdf2iters $TESTPOOL/$TESTFS1 "150000" + +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_must zfs load-key $TESTPOOL/$TESTFS1 + +log_pass "'zfs change-key -o' changes the pbkdf2 iterations" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh new file mode 100644 index 000000000000..86f335bde2a0 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh @@ -0,0 +1,83 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs clone' should create encrypted clones of encrypted datasets +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Create a snapshot of the dataset +# 3. Attempt to clone the snapshot as an unencrypted dataset +# 4. Attempt to clone the snapshot with a new key +# 5. Attempt to clone the snapshot as a child of an unencrypted dataset +# 6. Attempt to clone the snapshot as a child of an encrypted dataset +# 7. Verify the encryption root of the datasets +# 8. Unmount all datasets and unload their keys +# 9. Attempt to load the encryption root's key +# 10. Verify each dataset's key is loaded +# 11. Attempt to mount each dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -f $TESTPOOL/$TESTFS2 + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs clone' should create encrypted clones of encrypted datasets" + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_must zfs snapshot $TESTPOOL/$TESTFS1@now + +log_mustnot zfs clone -o encryption=off $TESTPOOL/$TESTFS1@now \ + $TESTPOOL/$TESTFS2 +log_mustnot eval "echo $PASSPHRASE1 | zfs clone -o keyformat=passphrase" \ + "$TESTPOOL/$TESTFS1@now $TESTPOOL/$TESTFS2" +log_must zfs clone $TESTPOOL/$TESTFS1@now $TESTPOOL/$TESTFS2 +log_must zfs clone $TESTPOOL/$TESTFS1@now $TESTPOOL/$TESTFS1/child + +log_must verify_encryption_root $TESTPOOL/$TESTFS2 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child $TESTPOOL/$TESTFS1 + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unmount $TESTPOOL/$TESTFS2 +log_must zfs unload-key -a + +log_must eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS1" + +log_must key_available $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1/child +log_must key_available $TESTPOOL/$TESTFS2 + +log_must zfs mount $TESTPOOL/$TESTFS1 +log_must zfs mount $TESTPOOL/$TESTFS1/child +log_must zfs mount $TESTPOOL/$TESTFS2 + +log_pass "'zfs clone' creates encrypted clones of encrypted datasets" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_014_pos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_014_pos.ksh new file mode 100644 index 000000000000..d1a8153d60e5 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_014_pos.ksh @@ -0,0 +1,59 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# check 'zfs create ' works at the name length boundary +# +# STRATEGY: +# 1. Verify creating filesystem with name length 255 would succeed +# 2. Verify creating filesystem with name length 256 would fail +# 3. Verify the pool can be re-imported + +verify_runnable "both" + +# namelen 255 and 256 +TESTFS1=$(for i in $(seq $((254 - ${#TESTPOOL}))); do echo z ; done | tr -d '\n') +TESTFS2=$(for i in $(seq $((255 - ${#TESTPOOL}))); do echo z ; done | tr -d '\n') + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && + log_must zfs destroy $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +log_assert "'zfs create ' can create a ZFS filesystem with name length 255." + +log_must zfs create $TESTPOOL/$TESTFS1 +log_mustnot zfs create $TESTPOOL/$TESTFS2 +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL + +log_pass "'zfs create ' works as expected." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh new file mode 100644 index 000000000000..c6df694502d7 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh @@ -0,0 +1,98 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/properties.kshlib + +# +# DESCRIPTION: +# 'zfs create' should create an encrypted dataset with a valid encryption +# algorithm, key format, key location, and key. +# +# STRATEGY: +# 1. Create a filesystem for each combination of encryption type and key format +# 2. Verify that each filesystem has the correct properties set +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -f $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +set -A ENCRYPTION_ALGS \ + "encryption=on" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A ENCRYPTION_PROPS \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A KEYFORMATS "keyformat=raw" \ + "keyformat=hex" \ + "keyformat=passphrase" + +set -A USER_KEYS "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \ + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \ + "abcdefgh" + +log_assert "'zfs create' should create encrypted datasets using all" \ + "combinations of supported properties" + +typeset -i i=0 +while (( i < ${#ENCRYPTION_ALGS[*]} )); do + typeset -i j=0 + while (( j < ${#KEYFORMATS[*]} )); do + log_must eval "echo ${USER_KEYS[j]} | tr -d '\n' | zfs create" \ + "-o ${ENCRYPTION_ALGS[i]} -o ${KEYFORMATS[j]}" \ + "$TESTPOOL/$TESTFS1" + + datasetexists $TESTPOOL/$TESTFS1 || \ + log_fail "Failed to create dataset using" \ + "${ENCRYPTION_ALGS[i]} and ${KEYFORMATS[j]}" + + propertycheck $TESTPOOL/$TESTFS1 ${ENCRYPTION_PROPS[i]} || \ + log_fail "failed to set ${ENCRYPTION_ALGS[i]}" + propertycheck $TESTPOOL/$TESTFS1 ${KEYFORMATS[j]} || \ + log_fail "failed to set ${KEYFORMATS[j]}" + + log_must zfs destroy -f $TESTPOOL/$TESTFS1 + (( j = j + 1 )) + done + (( i = i + 1 )) +done + +log_pass "'zfs create' creates encrypted datasets using all combinations of" \ + "supported properties" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh new file mode 100644 index 000000000000..9d5ecab0dfee --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh @@ -0,0 +1,134 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/properties.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# ZFS should create datasets only if they have a valid combination of +# encryption properties set. +# +# penc = parent encrypted +# enc = encryption +# loc = keylocation provided +# fmt = keyformat provided +# +# penc enc fmt loc valid notes +# ------------------------------------------- +# no unspec 0 0 yes inherit no encryption (not tested here) +# no unspec 0 1 no no crypt specified +# no unspec 1 0 no no crypt specified +# no unspec 1 1 no no crypt specified +# no off 0 0 yes explicit no encryption +# no off 0 1 no keylocation given, but crypt off +# no off 1 0 no keyformat given, but crypt off +# no off 1 1 no keyformat given, but crypt off +# no on 0 0 no no keyformat specified for new key +# no on 0 1 no no keyformat specified for new key +# no on 1 0 yes new encryption root +# no on 1 1 yes new encryption root +# yes unspec 0 0 yes inherit encryption +# yes unspec 0 1 no no keyformat specified +# yes unspec 1 0 yes new encryption root, crypt inherited +# yes unspec 1 1 yes new encryption root, crypt inherited +# yes off 0 0 no unencrypted child of encrypted parent +# yes off 0 1 no unencrypted child of encrypted parent +# yes off 1 0 no unencrypted child of encrypted parent +# yes off 1 1 no unencrypted child of encrypted parent +# yes on 0 0 yes inherited encryption, local crypt +# yes on 0 1 no no keyformat specified for new key +# yes on 1 0 yes new encryption root +# yes on 1 1 yes new encryption root +# +# STRATEGY: +# 1. Attempt to create a dataset using all combinations of encryption +# properties +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS2 +} +log_onexit cleanup + +log_assert "ZFS should create datasets only if they have a valid" \ + "combination of encryption properties set." + +# Unencrypted parent +log_must zfs create $TESTPOOL/$TESTFS1 +log_mustnot zfs create -o keyformat=passphrase $TESTPOOL/$TESTFS1/c1 +log_mustnot zfs create -o keylocation=prompt $TESTPOOL/$TESTFS1/c1 +log_mustnot zfs create -o keyformat=passphrase -o keylocation=prompt \ + $TESTPOOL/$TESTFS1/c1 + +log_must zfs create -o encryption=off $TESTPOOL/$TESTFS1/c1 +log_mustnot zfs create -o encryption=off -o keylocation=prompt \ + $TESTPOOL/$TESTFS1/c2 +log_mustnot zfs create -o encryption=off -o keyformat=passphrase \ + $TESTPOOL/$TESTFS1/c2 +log_mustnot zfs create -o encryption=off -o keyformat=passphrase \ + -o keylocation=prompt $TESTPOOL/$TESTFS1/c2 + +log_mustnot zfs create -o encryption=on $TESTPOOL/$TESTFS1/c2 +log_mustnot zfs create -o encryption=on -o keylocation=prompt \ + $TESTPOOL/$TESTFS1/c2 +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1/c3" +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1/c4" + +# Encrypted parent +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS2" + +log_must zfs create $TESTPOOL/$TESTFS2/c1 +log_mustnot zfs create -o keylocation=prompt $TESTPOOL/$TESTFS2/c2 +log_must eval "echo $PASSPHRASE | zfs create -o keyformat=passphrase" \ + "$TESTPOOL/$TESTFS2/c3" +log_must eval "echo $PASSPHRASE | zfs create -o keyformat=passphrase" \ + "-o keylocation=prompt $TESTPOOL/$TESTFS2/c4" + +log_mustnot zfs create -o encryption=off $TESTPOOL/$TESTFS2/c5 +log_mustnot zfs create -o encryption=off -o keylocation=prompt \ + $TESTPOOL/$TESTFS2/c5 +log_mustnot zfs create -o encryption=off -o keyformat=passphrase \ + $TESTPOOL/$TESTFS2/c5 +log_mustnot zfs create -o encryption=off -o keyformat=passphrase \ + -o keylocation=prompt $TESTPOOL/$TESTFS2/c5 + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "$TESTPOOL/$TESTFS2/c5" +log_mustnot zfs create -o encryption=on -o keylocation=prompt \ + $TESTPOOL/$TESTFS2/c6 +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS2/c6" +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2/c7" + +log_pass "ZFS creates datasets only if they have a valid combination of" \ + "encryption properties set." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile new file mode 100644 index 000000000000..0751428a0d32 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile @@ -0,0 +1,21 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# + +include $(SRC)/Makefile.master + +ROOTOPTPKG = $(ROOT)/opt/zfs-tests +TARGETDIR = $(ROOTOPTPKG)/tests/functional/cli_root/zfs_load-key + +include $(SRC)/test/zfs-tests/Makefile.com diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh new file mode 100644 index 000000000000..79cd6e9f908e --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh new file mode 100644 index 000000000000..6a9af3bc28c3 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh @@ -0,0 +1,32 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} + +default_setup $DISK diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg new file mode 100644 index 000000000000..90d9f63f1dba --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg @@ -0,0 +1,26 @@ +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +export PASSPHRASE="password" +export PASSPHRASE1="password1" +export PASSPHRASE2="password2" +export HEXKEY="000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F" +export HEXKEY1="201F1E1D1C1B1A191817161514131211100F0E0D0C0B0A090807060504030201" +export RAWKEY="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +export RAWKEY1="bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh new file mode 100644 index 000000000000..847a6aabd3c8 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh @@ -0,0 +1,85 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key' should only load a key for an unloaded encrypted dataset. +# +# STRATEGY: +# 1. Attempt to load the default dataset's key +# 2. Unmount the dataset +# 3. Attempt to load the default dataset's key +# 4. Create an encrypted dataset +# 5. Unmount the dataset and unload its key +# 6. Attempt to load the dataset's key +# 7. Verify the dataset's key is loaded +# 8. Attempt to load the dataset's key again +# 9. Create an encrypted pool +# 10. Unmount the pool and unload its key +# 11. Attempt to load the pool's key +# 12. Verify the pool's key is loaded +# 13. Attempt to load the pool's key again +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy $TESTPOOL/$TESTFS1 + poolexists $TESTPOOL1 && log_must destroy_pool $TESTPOOL1 +} +log_onexit cleanup + +log_assert "'zfs load-key' should only load the key for an" \ + "unloaded encrypted dataset" + +log_mustnot eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS" + +log_must zfs unmount $TESTPOOL/$TESTFS +log_mustnot eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS" + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_must eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS1" +log_must key_available $TESTPOOL/$TESTFS1 + +log_mustnot eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS1" + +typeset DISK2="$(echo $DISKS | awk '{ print $2 }')" +log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \ + "-O keyformat=passphrase -O keylocation=prompt $TESTPOOL1 $DISK2" + +log_must zfs unmount $TESTPOOL1 +log_must zfs unload-key $TESTPOOL1 + +log_must eval "echo $PASSPHRASE | zfs load-key $TESTPOOL1" +log_must key_available $TESTPOOL1 + +log_mustnot eval "echo $PASSPHRASE | zfs load-key $TESTPOOL1" + +log_pass "'zfs load-key' only loads the key for an unloaded encrypted dataset" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh new file mode 100644 index 000000000000..5e331fd1200d --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh @@ -0,0 +1,77 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key -a' should load keys for all datasets. +# +# STRATEGY: +# 1. Create an encrypted filesystem, encrypted zvol, and an encrypted pool +# 2. Unmount all datasets and unload their keys +# 3. Attempt to load all dataset keys +# 4. Verify each dataset has its key loaded +# 5. Attempt to mount the pool and filesystem +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy $TESTPOOL/$TESTFS1 + datasetexists $TESTPOOL/zvol && log_must zfs destroy $TESTPOOL/zvol + poolexists $TESTPOOL1 && log_must destroy_pool $TESTPOOL1 +} +log_onexit cleanup + +log_assert "'zfs load-key -a' should load keys for all datasets" + +log_must eval "echo $PASSPHRASE1 > /$TESTPOOL/pkey" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 + +log_must zfs create -V 64M -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/zvol + +typeset DISK2="$(echo $DISKS | awk '{ print $2}')" +log_must zpool create -O encryption=on -O keyformat=passphrase \ + -O keylocation=file:///$TESTPOOL/pkey $TESTPOOL1 $DISK2 + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_must zfs unload-key $TESTPOOL/zvol + +log_must zfs unmount $TESTPOOL1 +log_must zfs unload-key $TESTPOOL1 + +log_must zfs load-key -a + +log_must key_available $TESTPOOL1 +log_must key_available $TESTPOOL/zvol +log_must key_available $TESTPOOL/$TESTFS1 + +log_must zfs mount $TESTPOOL1 +log_must zfs mount $TESTPOOL/$TESTFS1 + +log_pass "'zfs load-key -a' loads keys for all datasets" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib new file mode 100644 index 000000000000..627b68267e13 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib @@ -0,0 +1,102 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg + +# Return 0 is a dataset key is available, 1 otherwise +# +# $1 - dataset +# +function key_available +{ + typeset ds=$1 + + datasetexists $ds || return 1 + + typeset val=$(get_prop keystatus $ds) + if [[ "$val" == "none" ]]; then + log_note "Dataset $ds is not encrypted" + elif [[ "$val" == "available" ]]; then + return 0 + fi + + return 1 +} + +function key_unavailable +{ + key_available $1 && return 1 + return 0 +} + +function verify_keyformat +{ + typeset ds=$1 + typeset format=$2 + typeset fmt=$(get_prop keyformat $ds) + + if [[ "$fmt" != "$format" ]]; then + log_fail "Expected keyformat $format, got $fmt" + fi + + return 0 +} + +function verify_keylocation +{ + typeset ds=$1 + typeset location=$2 + typeset keyloc=$(get_prop keylocation $ds) + + if [[ "$keyloc" != "$location" ]]; then + log_fail "Expected keylocation $location, got $keyloc" + fi + + return 0 +} + +function verify_encryption_root +{ + typeset ds=$1 + typeset val=$2 + typeset eroot=$(get_prop encryptionroot $ds) + + if [[ "$eroot" != "$val" ]]; then + log_note "Expected encryption root '$val', got '$eroot'" + return 1 + fi + + return 0 +} + +function verify_origin +{ + typeset ds=$1 + typeset val=$2 + typeset orig=$(get_prop origin $ds) + + if [[ "$orig" != "$val" ]]; then + log_note "Expected origin '$val', got '$orig'" + return 1 + fi + + return 0 +} diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh new file mode 100644 index 000000000000..7cbda43ff241 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh @@ -0,0 +1,58 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key' should load a dataset's key from a file. +# +# STRATEGY: +# 1. Create an encrypted dataset with a key file +# 2. Unmount the dataset and unload the key +# 3. Attempt to load the dataset's key +# 4. Verify the key is loaded +# 5. Attempt to mount the dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs load-key' should load a key from a file" + +log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_must zfs load-key $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1 +log_must zfs mount $TESTPOOL/$TESTFS1 + +log_pass "'zfs load-key' loads a key from a file" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh new file mode 100644 index 000000000000..d0b1cdb20ec7 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh @@ -0,0 +1,73 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key -L' should override keylocation with provided value. +# +# STRATEGY: +# 1. Create a key file +# 2. Copy the key file to another location +# 3. Create an encrypted dataset using the keyfile +# 4. Unmount the dataset and unload its key +# 5. Attempt to load the dataset specifying a keylocation of file +# 6. Verify the key is loaded +# 7. Verify the keylocation is the original key file +# 8. Unload the dataset's key +# 9. Attempt to load the dataset specifying a keylocation of prompt +# 10. Verify the key is loaded +# 11. Verify the keylocation is the original key file +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs load-key -L' should override keylocation with provided value" + +typeset key_location="/$TESTPOOL/pkey1" + +log_must eval "echo $PASSPHRASE > $key_location" +log_must cp $key_location /$TESTPOOL/pkey2 + +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file://$key_location $TESTPOOL/$TESTFS1 + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_must zfs load-key -L file:///$TESTPOOL/pkey2 $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1 +log_must verify_keylocation $TESTPOOL/$TESTFS1 "file://$key_location" + +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_must eval "echo $PASSPHRASE | zfs load-key -L prompt $TESTPOOL/$TESTFS1" +log_must key_available $TESTPOOL/$TESTFS1 +log_must verify_keylocation $TESTPOOL/$TESTFS1 "file://$key_location" + +log_pass "'zfs load-key -L' overrides keylocation with provided value" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh new file mode 100644 index 000000000000..bfce786448d9 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh @@ -0,0 +1,54 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key -n' should load the key for an already loaded dataset. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Attempt to load the dataset's key +# 3. Verify the key is loaded +# 4. Attempt to load the dataset's key with an invalid key +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs load-key -n' should load the key for a loaded dataset" + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" + +log_must eval "echo $PASSPHRASE | zfs load-key -n $TESTPOOL/$TESTFS1" +log_must key_available $TESTPOOL/$TESTFS1 + +log_mustnot eval "echo $PASSPHRASE1 | zfs load-key -n $TESTPOOL/$TESTFS1" + +log_pass "'zfs load-key -n' loads the key for a loaded dataset" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh new file mode 100644 index 000000000000..7385b69cf5fe --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh @@ -0,0 +1,66 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key -r' should recursively load keys. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Create a child dataset as an encryption root +# 3. Unmount all datasets and unload their keys +# 4. Attempt to load all dataset keys +# 5. Verify each dataset has its key loaded +# 6. Attempt to mount each dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs load-key -r' should recursively load keys" + +log_must eval "echo $PASSPHRASE1 > /$TESTPOOL/pkey" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 + +log_must zfs create -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1/child +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_must zfs load-key -r $TESTPOOL +log_must key_available $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1/child + +log_must zfs mount $TESTPOOL/$TESTFS1 +log_must zfs mount $TESTPOOL/$TESTFS1/child + +log_pass "'zfs load-key -r' recursively loads keys" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh new file mode 100644 index 000000000000..e81d6f2a5221 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh @@ -0,0 +1,62 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs mount -l' should accept a valid key as it mounts the filesystem. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Unmount and unload the dataset's key +# 3. Verify the key is unloaded +# 4. Attempt to load the key while mounting the dataset +# 5. Verify the key is loaded +# 6. Verify the dataset is mounted +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -f $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +log_assert "'zfs mount -l' should properly load a valid wrapping key" + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_must key_unavailable $TESTPOOL/$TESTFS1 + +log_must eval "echo $PASSPHRASE | zfs mount -l $TESTPOOL/$TESTFS1" +log_must key_available $TESTPOOL/$TESTFS1 + +mounted $TESTPOOL/$TESTFS1 || \ + log_fail "Filesystem $TESTPOOL/$TESTFS1 is unmounted" + +log_pass "'zfs mount -l' properly loads a valid wrapping key" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh new file mode 100644 index 000000000000..336c7b2538bc --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh @@ -0,0 +1,80 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# ZFS must promote clones of an encryption root. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Clone the encryption root +# 3. Clone the clone +# 4. Verify the encryption root of all three datasets is the origin +# 5. Promote the clone of the clone +# 6. Verify the encryption root of all three datasets is still the origin +# 7. Promote the clone of the original encryption root +# 8. Verify the encryption root of all three datasets is the promoted dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -Rf $TESTPOOL/$TESTFS1 + datasetexists $TESTPOOL/clone1 && \ + log_must zfs destroy -Rf $TESTPOOL/clone1 + datasetexists $TESTPOOL/clone2 && \ + log_must zfs destroy -Rf $TESTPOOL/clone2 +} +log_onexit cleanup + +log_assert "ZFS must promote clones of an encryption root" + +passphrase="password" +snaproot="$TESTPOOL/$TESTFS1@snap1" +snapclone="$TESTPOOL/clone1@snap2" + +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" + +log_must zfs snap $snaproot +log_must zfs clone $snaproot $TESTPOOL/clone1 +log_must zfs snap $snapclone +log_must zfs clone $snapclone $TESTPOOL/clone2 + +log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/clone1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/clone2 $TESTPOOL/$TESTFS1 + +log_must zfs promote $TESTPOOL/clone2 +log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/clone1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/clone2 $TESTPOOL/$TESTFS1 + +log_must zfs promote $TESTPOOL/clone2 +log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/clone2 +log_must verify_encryption_root $TESTPOOL/clone1 $TESTPOOL/clone2 +log_must verify_encryption_root $TESTPOOL/clone2 $TESTPOOL/clone2 + +log_pass "ZFS promotes clones of an encryption root" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/setup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/setup.ksh index eb0023587794..486513256e3c 100644 --- a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/setup.ksh +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/setup.ksh @@ -28,6 +28,7 @@ . $STF_SUITE/include/libtest.shlib DISK=${DISKS%% *} + if is_global_zone; then default_volume_setup $DISK else diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh new file mode 100644 index 000000000000..5eee9eecf4bb --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh @@ -0,0 +1,83 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# ZFS should receive an unencrypted stream from an encrypted dataset +# +# STRATEGY: +# 1. Create an unencrypted dataset +# 2. Create an encrypted dataset +# 3. Create and checksum a file on the encrypted dataset +# 4. Snapshot the encrypted dataset +# 5. Attempt to receive the snapshot into an unencrypted child +# 6. Verify encryption is not enabled +# 7. Verify the cheksum of the file is the same as the original +# 8. Attempt to receive the snapshot into an encrypted child +# 9. Verify the cheksum of the file is the same as the original +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 + + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS2 +} + +log_onexit cleanup + +log_assert "ZFS should receive an unencrypted stream from an encrypted dataset" + +typeset passphrase="password" +typeset snap="$TESTPOOL/$TESTFS2@snap" + +log_must zfs create $TESTPOOL/$TESTFS1 +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS2" + +log_must mkfile 1M /$TESTPOOL/$TESTFS2/$TESTFILE0 +typeset checksum=$(md5sum /$TESTPOOL/$TESTFS2/$TESTFILE0 | awk '{ print $1 }') + +log_must zfs snapshot $snap + +log_note "Verify ZFS can receive into an unencrypted child" +log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c1" + +crypt=$(get_prop encryption $TESTPOOL/$TESTFS1/c1) +[[ "$crypt" == "off" ]] || log_fail "Received unencrypted stream as encrypted" + +typeset cksum1=$(md5sum /$TESTPOOL/$TESTFS1/c1/$TESTFILE0 | awk '{ print $1 }') +[[ "$cksum1" == "$checksum" ]] || \ + log_fail "Checksums differ ($cksum1 != $checksum)" + +log_note "Verify ZFS can receive into an encrypted child" +log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS2/c1" + +typeset cksum2=$(md5sum /$TESTPOOL/$TESTFS2/c1/$TESTFILE0 | awk '{ print $1 }') +[[ "$cksum2" == "$checksum" ]] || \ + log_fail "Checksums differ ($cksum2 != $checksum)" + +log_pass "ZFS can receive an unencrypted stream from an encrypted dataset" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh new file mode 100644 index 000000000000..2042b37a98f7 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh @@ -0,0 +1,93 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# ZFS should receive streams from raw sends. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Create a file and get its checksum +# 3. Snapshot the dataset +# 4. Attempt to receive a raw send stream as a child of an unencrypted dataset +# 5. Verify the key is unavailable +# 6. Attempt to load the key and mount the dataset +# 7. Verify the cheksum of the file is the same as the original +# 8. Attempt to receive a raw send stream as a child of an encrypted dataset +# 9. Verify the key is unavailable +# 10. Attempt to load the key and mount the dataset +# 11. Verify the cheksum of the file is the same as the original +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 + + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS2 +} + +log_onexit cleanup + +log_assert "ZFS should receive streams from raw sends" + +typeset passphrase="password" +typeset snap="$TESTPOOL/$TESTFS1@snap" + +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" + +log_must mkfile 1M /$TESTPOOL/$TESTFS1/$TESTFILE0 +typeset checksum=$(md5sum /$TESTPOOL/$TESTFS1/$TESTFILE0 | \ + awk '{ print $1 }') + +log_must zfs snapshot $snap + +log_note "Verify ZFS can receive a raw send stream from an encrypted dataset" +log_must eval "zfs send -w $snap | zfs receive $TESTPOOL/$TESTFS2" + +keystatus=$(get_prop keystatus $TESTPOOL/$TESTFS2) +[[ "$keystatus" == "unavailable" ]] || \ + log_fail "Expected keystatus unavailable, got $keystatus" + +log_must eval "echo $passphrase | zfs mount -l $TESTPOOL/$TESTFS2" + +typeset cksum1=$(md5sum /$TESTPOOL/$TESTFS2/$TESTFILE0 | awk '{ print $1 }') +[[ "$cksum1" == "$checksum" ]] || \ + log_fail "Checksums differ ($cksum1 != $checksum)" + +log_must eval "zfs send -w $snap | zfs receive $TESTPOOL/$TESTFS1/c1" + +keystatus=$(get_prop keystatus $TESTPOOL/$TESTFS1/c1) +[[ "$keystatus" == "unavailable" ]] || \ + log_fail "Expected keystatus unavailable, got $keystatus" + +log_must eval "echo $passphrase | zfs mount -l $TESTPOOL/$TESTFS1/c1" +typeset cksum2=$(md5sum /$TESTPOOL/$TESTFS1/c1/$TESTFILE0 | \ + awk '{ print $1 }') +[[ "$cksum2" == "$checksum" ]] || \ + log_fail "Checksums differ ($cksum2 != $checksum)" + +log_pass "ZFS can receive streams from raw sends" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh new file mode 100644 index 000000000000..c813809a0b5e --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh @@ -0,0 +1,75 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# ZFS should receive streams from raw incremental sends. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Snapshot the dataset +# 3. Create a file and get its checksum +# 4. Snapshot the dataset +# 5. Attempt to receive a raw send stream of the first snapshot +# 6. Attempt to receive a raw incremental send stream of the second snapshot +# 7. Attempt load the key and mount the dataset +# 8. Verify the cheksum of the file is the same as the original +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 + + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS2 +} + +log_onexit cleanup + +log_assert "ZFS should receive streams from raw incremental sends" + +typeset passphrase="password" +typeset snap1="$TESTPOOL/$TESTFS1@snap1" +typeset snap2="$TESTPOOL/$TESTFS1@snap2" + +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" + +log_must zfs snapshot $snap1 + +log_must mkfile 1M /$TESTPOOL/$TESTFS1/$TESTFILE0 +typeset checksum=$(md5sum /$TESTPOOL/$TESTFS1/$TESTFILE0 | awk '{ print $1 }') + +log_must zfs snapshot $snap2 + +log_must eval "zfs send -w $snap1 | zfs receive $TESTPOOL/$TESTFS2" +log_must eval "zfs send -w -i $snap1 $snap2 | zfs receive $TESTPOOL/$TESTFS2" +log_must eval "echo $passphrase | zfs mount -l $TESTPOOL/$TESTFS2" + +typeset cksum1=$(md5sum /$TESTPOOL/$TESTFS2/$TESTFILE0 | awk '{ print $1 }') +[[ "$cksum1" == "$checksum" ]] || \ + log_fail "Checksums differ ($cksum1 != $checksum)" + +log_pass "ZFS can receive streams from raw incremental sends" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh new file mode 100644 index 000000000000..57896c6fd305 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh @@ -0,0 +1,75 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# ZFS should receive to an encrypted child dataset. +# +# STRATEGY: +# 1. Snapshot the default dataset +# 2. Create an encrypted dataset +# 3. Attempt to receive a stream to an encrypted child +# 4. Attempt to receive a stream with properties to an encrypted child +# 5. Attempt to receive a replication stream to an encrypted child +# 6. Unmount and unload the encrypted dataset keys +# 7. Attempt to receive a snapshot stream to an encrypted child +# + +verify_runnable "both" + +function cleanup +{ + snapexists $snap && log_must_busy zfs destroy -f $snap + + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +log_assert "ZFS should receive to an encrypted child dataset" + +typeset passphrase="password" +typeset snap="$TESTPOOL/$TESTFS@snap" +typeset testfile="testfile" + +log_must zfs snapshot $snap + +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" + +log_note "Verifying ZFS will receive to an encrypted child" +log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c1" + +log_note "Verifying 'send -p' will not receive to an encrypted child" +log_mustnot eval "zfs send -p $snap | zfs receive $TESTPOOL/$TESTFS1/c2" + +log_note "Verifying 'send -R' will not receive to an encrypted child" +log_mustnot eval "zfs send -R $snap | zfs receive $TESTPOOL/$TESTFS1/c3" + +log_note "Verifying ZFS will not receive to an encrypted child when the" \ + "parent key is unloaded" +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_mustnot eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c4" + +log_pass "ZFS can receive to an encrypted child dataset" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh new file mode 100644 index 000000000000..fa57658f185b --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs rename' should not move an encrypted child dataset outside of its +# encryption root. +# +# STRATEGY: +# 1. Create two encryption roots, and a child and grandchild of the first +# encryption root +# 2. Attempt to rename the grandchild under an unencrypted parent +# 3. Attempt to rename the grandchild under a different encrypted parent +# 4. Attempt to rename the grandchild under the current parent +# 5. Verify the encryption root of the dataset +# 6. Attempt to rename the grandchild to a child +# 7. Verify the encryption root of the dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS2 + datasetexists $TESTPOOL/$TESTFS3 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS3 +} +log_onexit cleanup + +log_assert "'zfs rename' should not move an encrypted child outside of its" \ + "encryption root" + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2" +log_must zfs create $TESTPOOL/$TESTFS2/child +log_must zfs create $TESTPOOL/$TESTFS2/child/grandchild +log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS3" + +log_mustnot zfs rename $TESTPOOL/$TESTFS2/child/grandchild \ + $TESTPOOL/grandchild + +log_mustnot zfs rename $TESTPOOL/$TESTFS2/child/grandchild \ + $TESTPOOL/$TESTFS3/grandchild + +log_must zfs rename $TESTPOOL/$TESTFS2/child/grandchild \ + $TESTPOOL/$TESTFS2/child/grandchild2 +log_must verify_encryption_root $TESTPOOL/$TESTFS2/child/grandchild2 \ + $TESTPOOL/$TESTFS2 + +log_must zfs rename $TESTPOOL/$TESTFS2/child/grandchild2 \ + $TESTPOOL/$TESTFS2/grandchild2 +log_must verify_encryption_root $TESTPOOL/$TESTFS2/grandchild2 \ + $TESTPOOL/$TESTFS2 + +log_pass "'zfs rename' does not move an encrypted child outside of its" \ + "encryption root" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh new file mode 100644 index 000000000000..400592aaca2c --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh @@ -0,0 +1,51 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs rename' should not rename an unencrypted dataset to a child +# of an encrypted dataset +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Attempt to rename the default dataset to a child of the encrypted dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy $TESTPOOL/$TESTFS2 +} +log_onexit cleanup + +log_assert "'zfs rename' should not rename an unencrypted dataset to a" \ + "child of an encrypted dataset" + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2" +log_mustnot zfs rename $TESTPOOL/$TESTFS $TESTPOOL/$TESTFS2/$TESTFS + +log_pass "'zfs rename' does not rename an unencrypted dataset to a child" \ + "of an encrypted dataset" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted.ksh new file mode 100644 index 000000000000..490e146ba6f0 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted.ksh @@ -0,0 +1,76 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# ZFS should perform unencrypted sends of encrypted datasets, unless the '-p' +# or '-R' options are specified. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 6. Create a child encryption root +# 2. Snapshot the dataset +# 3. Attempt a send +# 4. Attempt a send with properties +# 5. Attempt a replication send +# 7. Unmount the parent and unload its key +# 8. Attempt a send of the parent dataset +# 9. Attempt a send of the child encryption root +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +log_assert "ZFS should perform unencrypted sends of encrypted datasets, " \ + "unless the '-p' or '-R' options are specified" + +typeset passphrase="password" +typeset passphrase1="password1" +typeset snap="$TESTPOOL/$TESTFS1@snap" + +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" + +log_must eval "echo $passphrase1 | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1/child" + +log_must zfs snapshot -r $snap + +log_must eval "zfs send $snap > /dev/null" +log_mustnot eval "zfs send -p $snap > /dev/null" +log_mustnot eval "zfs send -R $snap > /dev/null" + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_mustnot eval "zfs send $snap > /dev/null" +log_must eval "zfs send $TESTPOOL/$TESTFS1/child@snap > /dev/null" + +log_pass "ZFS performs unencrypted sends of encrypted datasets, unless the" \ + "'-p' or '-R' options are specified" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded.ksh new file mode 100644 index 000000000000..112ee1143d10 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded.ksh @@ -0,0 +1,59 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# ZFS should not perform unencrypted sends from encrypted datasets +# with unloaded keys. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Snapshot the dataset +# 3. Unload the dataset key +# 4. Verify sending the stream fails +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +log_assert "ZFS should not perform unencrypted sends from encrypted datasets" \ + "with unloaded keys." + +typeset passphrase="password" +typeset snap="$TESTPOOL/$TESTFS1@snap" + +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" +log_must zfs snapshot $snap +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_mustnot eval "zfs send $snap > /dev/null" + +log_pass "ZFS does not perform unencrypted sends from encrypted datasets" \ + "with unloaded keys." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw.ksh new file mode 100644 index 000000000000..85cc7407e1a1 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw.ksh @@ -0,0 +1,79 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# ZFS should perform raw sends of datasets. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Snapshot the default dataset and the encrypted dataset +# 3. Attempt a raw send of both datasets +# 4. Attempt a raw send with properties of both datasets +# 5. Attempt a raw replication send of both datasets +# 6. Unmount and unload the encrypted dataset key +# 7. Attempt a raw send of the encrypted dataset +# + +verify_runnable "both" + +function cleanup +{ + snapexists $snap && \ + log_must zfs destroy $snap + + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +log_assert "ZFS should perform raw sends of datasets" + +typeset passphrase="password" +typeset snap="$TESTPOOL/$TESTFS@snap" +typeset snap1="$TESTPOOL/$TESTFS1@snap" + +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" + +log_must zfs snapshot $snap +log_must zfs snapshot $snap1 + +log_must eval "zfs send -w $snap > /dev/null" +log_must eval "zfs send -w $snap1 > /dev/null" + +log_note "Verify ZFS can perform raw sends with properties" +log_must eval "zfs send -wp $snap > /dev/null" +log_must eval "zfs send -wp $snap1 > /dev/null" + +log_note "Verify ZFS can perform raw replication sends" +log_must eval "zfs send -wR $snap > /dev/null" +log_must eval "zfs send -wR $snap1 > /dev/null" + +log_note "Verify ZFS can perform a raw send of an encrypted datasets with" \ + "its key unloaded" +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_must eval "zfs send -w $snap1 > /dev/null" + +log_pass "ZFS performs raw sends of datasets" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/setup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/setup.ksh index 8868747d2263..b2b5c077bf93 100644 --- a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/setup.ksh +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/setup.ksh @@ -28,4 +28,5 @@ . $STF_SUITE/include/libtest.shlib DISK=${DISKS%% *} + default_container_volume_setup $DISK diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh new file mode 100644 index 000000000000..313fa4e4d1fe --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh @@ -0,0 +1,93 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# Unencrypted datasets should only allow keylocation of 'none', encryption +# roots should only allow keylocation of 'prompt' and file URI, and encrypted +# child datasets should not be able to change their keylocation. +# +# STRATEGY: +# 1. Verify the key location of the default dataset is 'none' +# 2. Attempt to change the key location of the default dataset +# 3. Create an encrypted dataset using a key file +# 4. Attempt to change the key location of the encrypted dataset to 'none', +# an invalid location, its current location, and 'prompt' +# 5. Attempt to reload the encrypted dataset key using the new key location +# 6. Create a encrypted child dataset +# 7. Verify the key location of the child dataset is 'none' +# 8. Attempt to change the key location of the child dataset +# 9. Verify the key location of the child dataset has not changed +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "Key location can only be 'prompt' or a file path for encryption" \ + "roots, and 'none' for unencrypted volumes" + +log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey" + +log_must verify_keylocation $TESTPOOL/$TESTFS "none" +log_must zfs set keylocation=none $TESTPOOL/$TESTFS +log_mustnot zfs set keylocation=/$TESTPOOL/pkey $TESTPOOL/$TESTFS +log_mustnot zfs set keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS +log_must verify_keylocation $TESTPOOL/$TESTFS "none" + +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 + +log_mustnot zfs set keylocation=none $TESTPOOL/$TESTFS1 +log_mustnot zfs set keylocation=/$TESTPOOL/pkey $TESTPOOL/$TESTFS1 + +log_must zfs set keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 +log_must verify_keylocation $TESTPOOL/$TESTFS1 "file:///$TESTPOOL/pkey" + +log_must zfs set keylocation=prompt $TESTPOOL/$TESTFS1 +log_must verify_keylocation $TESTPOOL/$TESTFS1 "prompt" + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_must rm /$TESTPOOL/pkey +log_must eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS1" +log_must zfs mount $TESTPOOL/$TESTFS1 + +log_must zfs create $TESTPOOL/$TESTFS1/child +log_must verify_keylocation $TESTPOOL/$TESTFS1/child "none" + +log_mustnot zfs set keylocation=none $TESTPOOL/$TESTFS1/child +log_mustnot zfs set keylocation=prompt $TESTPOOL/$TESTFS1/child +log_mustnot zfs set keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child +log_mustnot zfs set keylocation=/$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child + +log_must verify_keylocation $TESTPOOL/$TESTFS1/child "none" + +log_pass "Key location can only be 'prompt' or a file path for encryption" \ + "roots, and 'none' for unencrypted volumes" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile new file mode 100644 index 000000000000..8fe2bf42ca6e --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile @@ -0,0 +1,21 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# + +include $(SRC)/Makefile.master + +ROOTOPTPKG = $(ROOT)/opt/zfs-tests +TARGETDIR = $(ROOTOPTPKG)/tests/functional/cli_root/zfs_unload-key + +include $(SRC)/test/zfs-tests/Makefile.com diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup.ksh new file mode 100644 index 000000000000..79cd6e9f908e --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup.ksh new file mode 100644 index 000000000000..6a9af3bc28c3 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup.ksh @@ -0,0 +1,32 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} + +default_setup $DISK diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh new file mode 100644 index 000000000000..9e08ac69d4de --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh @@ -0,0 +1,69 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs unload-key' should only unload the key of an unmounted dataset. +# +# STRATEGY: +# 1. Attempt to unload the default dataset's key +# 2. Unmount the dataset +# 3. Attempt to unload the default dataset's key +# 4. Create an encrypted dataset +# 5. Attempt to unload the dataset's key +# 6. Verify the key is loaded +# 7. Unmount the dataset +# 8. Attempt to unload the dataset's key +# 9. Verify the key is not loaded +# 10. Attempt to unload the dataset's key +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs unload-key' should unload the key for an unmounted" \ + "encrypted dataset" + +log_mustnot zfs unload-key $TESTPOOL/$TESTFS + +log_must zfs unmount $TESTPOOL/$TESTFS +log_mustnot zfs unload-key $TESTPOOL/$TESTFS + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_mustnot zfs unload-key $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1 + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_must key_unavailable $TESTPOOL/$TESTFS1 + +log_mustnot zfs unload-key $TESTPOOL/$TESTFS1 + +log_pass "'zfs unload-key' unloads the key for an unmounted encrypted dataset" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh new file mode 100644 index 000000000000..ecb98d189424 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh @@ -0,0 +1,76 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs unload-key -a' should unload keys for all datasets. +# +# STRATEGY: +# 1. Create an encrypted filesystem, encrypted child dataset, an encrypted +# zvol, and an encrypted pool +# 2. Unmount all datasets +# 3. Attempt to unload all dataset keys +# 4. Verify each dataset has its key unloaded +# 5. Attempt to mount each dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 + datasetexists $TESTPOOL/zvol && log_must zfs destroy $TESTPOOL/zvol + poolexists $TESTPOOL1 && log_must destroy_pool $TESTPOOL1 +} +log_onexit cleanup + +log_assert "'zfs unload-key -a' should unload keys for all datasets" + +log_must eval "echo $PASSPHRASE1 > /$TESTPOOL/pkey" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 +log_must zfs create $TESTPOOL/$TESTFS1/child + +log_must zfs create -V 64M -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/zvol + +typeset DISK2="$(echo $DISKS | awk '{ print $2}')" +log_must zpool create -O encryption=on -O keyformat=passphrase \ + -O keylocation=file:///$TESTPOOL/pkey $TESTPOOL1 $DISK2 + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unmount $TESTPOOL1 + +log_must zfs unload-key -a + +log_must key_unavailable $TESTPOOL/$TESTFS1 +log_must key_unavailable $TESTPOOL/$TESTFS1/child +log_must key_unavailable $TESTPOOL/zvol +log_must key_unavailable $TESTPOOL1 + +log_mustnot zfs mount $TESTPOOL +log_mustnot zfs mount $TESTPOOL/zvol +log_mustnot zfs mount $TESTPOOL/$TESTFS1 + +log_pass "'zfs unload-key -a' unloads keys for all datasets" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh new file mode 100644 index 000000000000..9766b590587f --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh @@ -0,0 +1,72 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs unload-key -r' should recursively unload keys. +# +# STRATEGY: +# 1. Create a parent encrypted dataset +# 2. Create a sibling encrypted dataset +# 3. Create a child dataset as an encryption root +# 4. Unmount all datasets +# 5. Attempt to unload all dataset keys under parent +# 6. Verify parent and child have their keys unloaded +# 7. Verify sibling has its key loaded +# 8. Attempt to mount all datasets +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs unload-key -r' should recursively unload keys" + +log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 +log_must zfs create -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child +log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2" + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unmount $TESTPOOL/$TESTFS2 + +log_must zfs unload-key -r $TESTPOOL/$TESTFS1 + +log_must key_unavailable $TESTPOOL/$TESTFS1 +log_must key_unavailable $TESTPOOL/$TESTFS1/child + +log_must key_available $TESTPOOL/$TESTFS2 + +log_mustnot zfs mount $TESTPOOL/$TESTFS1 +log_mustnot zfs mount $TESTPOOL/$TESTFS1/child +log_must zfs mount $TESTPOOL/$TESTFS2 + +log_pass "'zfs unload-key -r' recursively unloads keys" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh old mode 100755 new mode 100644 diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_024_pos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_024_pos.ksh new file mode 100644 index 000000000000..d2591f124fb5 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_024_pos.ksh @@ -0,0 +1,111 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2016, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib + +# +# DESCRIPTION: +# Create an encrypted pool +# +# STRATEGY: +# 1. Create a pool for each encryption type and verify that it is properly set +# 2. Create a pool for each keysource type and verify that it is properly set +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} + +log_onexit cleanup + +set -A ENCRYPTION_ALGS "encryption=on" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A ENCRYPTION_PROPS "encryption=aes-256-ccm" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A KEYSOURCE_TYPES "keysource=raw,prompt" \ + "keysource=hex,prompt" \ + "keysource=passphrase,prompt" + +set -A KEYSOURCES "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \ + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \ + "abcdefgh" + +log_assert "'zpool create' can create encrypted pools" + +typeset -i i=0 +while (( i < ${#ENCRYPTION_ALGS[*]} )); do + log_must eval "$ECHO ${KEYSOURCES[0]} | $ZPOOL create \ + -O ${ENCRYPTION_ALGS[i]} -O ${KEYSOURCE_TYPES[0]} \ + $TESTPOOL $DISKS" + + propertycheck $TESTPOOL ${ENCRYPTION_PROPS[i]} || \ + log_fail "${ENCRYPTION_ALGS[i]} is failed to set." + + propertycheck $TESTPOOL ${KEYSOURCE_TYPES[0]} || \ + log_fail "${KEYSOURCE_TYPES[0]} is failed to set." + + log_must $ZPOOL destroy $TESTPOOL + (( i = i + 1 )) +done + +typeset -i j=0 +while (( j < ${#KEYSOURCE_TYPES[*]} )); do + log_must eval "$ECHO ${KEYSOURCES[j]} | $ZPOOL create \ + -O ${ENCRYPTION_ALGS[0]} -O ${KEYSOURCE_TYPES[j]} \ + $TESTPOOL $DISKS" + + propertycheck $TESTPOOL ${ENCRYPTION_PROPS[0]} || \ + log_fail "${ENCRYPTION_ALGS[0]} is failed to set." + + propertycheck $TESTPOOL ${KEYSOURCE_TYPES[j]} || \ + log_fail "${KEYSOURCE_TYPES[j]} is failed to set." + + log_must $ZPOOL destroy $TESTPOOL + (( j = j + 1 )) +done + +log_pass "Creating encrypted pools works as expected." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh new file mode 100644 index 000000000000..8b7ca4799c5e --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh @@ -0,0 +1,89 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib + +# +# DESCRIPTION: +# 'zpool create' should create encrypted pools when using a valid encryption +# algorithm, key format, key location, and key. +# +# STRATEGY: +# 1. Create a pool for each combination of encryption type and key format +# 2. Verify that each filesystem has the correct properties set +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} +log_onexit cleanup + +set -A ENCRYPTION_ALGS "encryption=on" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A ENCRYPTION_PROPS "encryption=aes-256-ccm" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A KEYFORMATS "keyformat=raw" \ + "keyformat=hex" \ + "keyformat=passphrase" + +set -A USER_KEYS "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \ + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \ + "abcdefgh" + +log_assert "'zpool create' should create encrypted pools when using a valid" \ + "encryption algorithm, key format, key location, and key." + +typeset -i i=0 +while (( i < ${#ENCRYPTION_ALGS[*]} )); do + typeset -i j=0 + while (( j < ${#KEYFORMATS[*]} )); do + log_must eval "echo -n ${USER_KEYS[j]} | zpool create" \ + "-O ${ENCRYPTION_ALGS[i]} -O ${KEYFORMATS[j]}" \ + "$TESTPOOL $DISKS" + + propertycheck $TESTPOOL ${ENCRYPTION_PROPS[i]} || \ + log_fail "failed to set ${ENCRYPTION_ALGS[i]}" + propertycheck $TESTPOOL ${KEY_FORMATS[j]} || \ + log_fail "failed to set ${KEYFORMATS[j]}" + + log_must zpool destroy $TESTPOOL + (( j = j + 1 )) + done + (( i = i + 1 )) +done + +log_pass "'zpool create' creates encrypted pools when using a valid" \ + "encryption algorithm, key format, key location, and key." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh new file mode 100644 index 000000000000..aa154d5c65cc --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh @@ -0,0 +1,95 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zpool create' should create an encrypted dataset only if it has a valid +# combination of encryption properties set. +# +# enc = encryption +# loc = keylocation provided +# fmt = keyformat provided +# +# U = unspecified +# N = off +# Y = on +# +# enc fmt loc valid notes +# ------------------------------------------- +# U 0 1 no no crypt specified +# U 1 0 no no crypt specified +# U 1 1 no no crypt specified +# N 0 0 yes explicit no encryption +# N 0 1 no keylocation given, but crypt off +# N 1 0 no keyformat given, but crypt off +# N 1 1 no keyformat given, but crypt off +# Y 0 0 no no keyformat specified for new key +# Y 0 1 no no keyformat specified for new key +# Y 1 0 yes new encryption root +# Y 1 1 yes new encryption root +# +# STRATEGY: +# 1. Attempt to create a dataset using all combinations of encryption +# properties +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} +log_onexit cleanup + +log_assert "'zpool create' should create an encrypted dataset only if it" \ + "has a valid combination of encryption properties set." + +log_mustnot zpool create -O keylocation=prompt $TESTPOOL $DISKS +log_mustnot zpool create -O keyformat=passphrase $TESTPOOL $DISKS +log_mustnot zpool create -O keyformat=passphrase -O keylocation=prompt \ + $TESTPOOL $DISKS + +log_must zpool create -O encryption=off $TESTPOOL $DISKS +log_must zpool destroy $TESTPOOL + +log_mustnot zpool create -O encryption=off -O keylocation=prompt \ + $TESTPOOL $DISKS +log_mustnot zpool create -O encryption=off -O keyformat=passphrase \ + $TESTPOOL $DISKS +log_mustnot zpool create -O encryption=off -O keyformat=passphrase \ + -O keylocation=prompt $TESTPOOL $DISKS + +log_mustnot zpool create -O encryption=on $TESTPOOL $DISKS +log_mustnot zpool create -O encryption=on -O keylocation=prompt \ + $TESTPOOL $DISKS + +log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \ + "-O keyformat=passphrase $TESTPOOL $DISKS" +log_must zpool destroy $TESTPOOL + +log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \ + "-O keyformat=passphrase -O keylocation=prompt $TESTPOOL $DISKS" +log_must zpool destroy $TESTPOOL + +log_pass "'zpool create' creates an encrypted dataset only if it has a" \ + "valid combination of encryption properties set." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index dce99d08bb93..a90ac1d15c1d 100644 --- a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -69,4 +69,5 @@ typeset -a properties=( "feature@sha512" "feature@skein" "feature@edonr" + "feature@encryption" ) diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted.ksh new file mode 100644 index 000000000000..4e9013afeb14 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted.ksh @@ -0,0 +1,59 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zpool import' should import a pool with an encrypted dataset without +# mounting it. +# +# STRATEGY: +# 1. Create an encrypted pool +# 2. Export the pool +# 3. Attempt to import the pool +# 4. Verify the pool exists and the key is not loaded +# + +verify_runnable "both" + +function cleanup +{ + destroy_pool $TESTPOOL1 + log_must rm $VDEV0 + log_must mkfile $FILE_SIZE $VDEV0 +} +log_onexit cleanup + +log_assert "'zpool import' should import a pool with an encrypted dataset" \ + "without mounting it" + +log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \ + "-O keyformat=passphrase -O keylocation=prompt $TESTPOOL1 $VDEV0" +log_must zpool export $TESTPOOL1 +log_must zpool import -d $DEVICE_DIR $TESTPOOL1 +log_must poolexists $TESTPOOL1 +log_must key_unavailable $TESTPOOL1 +log_must unmounted $TESTPOOL1 + +log_pass "'zpool import' imports a pool with an encrypted dataset without" \ + "mounting it" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load.ksh new file mode 100644 index 000000000000..d060e8a798e2 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load.ksh @@ -0,0 +1,59 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zpool import -l' should import a pool with an encrypted dataset and load +# its key. +# +# STRATEGY: +# 1. Create an encrypted pool +# 2. Export the pool +# 3. Attempt to import the pool with the key +# 4. Verify the pool exists and the key is loaded +# + +verify_runnable "both" + +function cleanup +{ + destroy_pool $TESTPOOL1 + log_must rm $VDEV0 + log_must mkfile $FILE_SIZE $VDEV0 +} +log_onexit cleanup + +log_assert "'zpool import -l' should import a pool with an encrypted dataset" \ + "and load its key" + +log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \ + "-O keyformat=passphrase -O keylocation=prompt $TESTPOOL1 $VDEV0" +log_must zpool export $TESTPOOL1 +log_must eval "echo $PASSPHRASE | zpool import -l -d $DEVICE_DIR $TESTPOOL1" +log_must poolexists $TESTPOOL1 +log_must key_available $TESTPOOL1 +log_must mounted $TESTPOOL1 + +log_pass "'zpool import -l' imports a pool with an encrypted dataset and" \ + "loads its key" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded.ksh new file mode 100644 index 000000000000..483a683bd571 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded.ksh @@ -0,0 +1,71 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Scrubs must work on an encrypted dataset with an unloaded key. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Generate data on the dataset +# 3. Unmount the encrypted dataset and unload its key +# 4. Start a scrub +# 5. Wait for the scrub to complete +# 6. Verify the scrub had no errors +# 7. Load the dataset key and mount it +# + +verify_runnable "global" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy $TESTPOOL/$TESTFS2 +} +log_onexit cleanup + +log_assert "Scrubs must work on an encrypted dataset with an unloaded key" + +log_must eval "echo 'password' | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS2" + +typeset mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS2) +log_must mkfile 10m $mntpnt/file1 + +for i in 2..10; do + log_must mkfile 512b $mntpnt/file$i +done + +log_must zfs unmount $TESTPOOL/$TESTFS2 +log_must zfs unload-key $TESTPOOL/$TESTFS2 + +log_must zpool scrub $TESTPOOL + +while ! is_pool_scrubbed $TESTPOOL; do + sleep 1 +done + +log_must check_pool_status $TESTPOOL "scan" "with 0 errors" + +log_must eval "echo 'password' | zfs mount -l $TESTPOOL/$TESTFS2" + +log_pass "Scrubs work on an encrypted dataset with an unloaded key" diff --git a/usr/src/test/zfs-tests/tests/functional/rsend/rsend.kshlib b/usr/src/test/zfs-tests/tests/functional/rsend/rsend.kshlib index a82d3b3d59c2..2bc9e1ab9269 100644 --- a/usr/src/test/zfs-tests/tests/functional/rsend/rsend.kshlib +++ b/usr/src/test/zfs-tests/tests/functional/rsend/rsend.kshlib @@ -607,7 +607,7 @@ function parse_dump if ($1 == "OBJECT") print $1" "$4 if ($1 == "FREEOBJECTS") print $1" "$4" "$7 if ($1 == "FREE") print $1" "$7" "$10 - if ($1 == "WRITE") print $1" "$15" "$18" "$21" "$24" "$27}' + if ($1 == "WRITE") print $1" "$15" "$21" "$24" "$27" "$30}' } # diff --git a/usr/src/test/zfs-tests/tests/functional/rsend/send_encrypted_heirarchy.ksh b/usr/src/test/zfs-tests/tests/functional/rsend/send_encrypted_heirarchy.ksh new file mode 100644 index 000000000000..5e19a6b6c073 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/rsend/send_encrypted_heirarchy.ksh @@ -0,0 +1,96 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Datto Inc. All rights reserved. +# + +. $STF_SUITE/tests/functional/rsend/rsend.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# Raw recursive sends preserve filesystem structure. +# +# STRATEGY: +# 1. Create an encrypted filesystem with a clone and a child +# 2. Snapshot and send the filesystem tree +# 3. Verify that the filesystem structure was correctly received +# 4. Change the child to an encryption root and promote the clone +# 5. Snapshot and send the filesystem tree again +# 6. Verify that the new structure is received correctly +# + +verify_runnable "both" + +function cleanup +{ + log_must cleanup_pool $POOL + log_must cleanup_pool $POOL2 + log_must setup_test_model $POOL +} + +log_assert "Raw recursive sends preserve filesystem structure." +log_onexit cleanup + +# Create the filesystem heirarchy +log_must cleanup_pool $POOL +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase $POOL/$FS" +log_must zfs snapshot $POOL/$FS@snap +log_must zfs clone $POOL/$FS@snap $POOL/clone +log_must zfs create $POOL/$FS/child + +# Back up the tree and verify the structure +log_must zfs snapshot -r $POOL@before +log_must eval "zfs send -wR $POOL@before > $BACKDIR/fs-before-R" +log_must eval "zfs receive -d -F $POOL2 < $BACKDIR/fs-before-R" +dstds=$(get_dst_ds $POOL/$FS $POOL2) +log_must cmp_ds_subs $POOL/$FS $dstds + +log_must verify_encryption_root $POOL/$FS $POOL/$FS +log_must verify_keylocation $POOL/$FS "prompt" +log_must verify_origin $POOL/$FS "-" + +log_must verify_encryption_root $POOL/clone $POOL/$FS +log_must verify_keylocation $POOL/clone "none" +log_must verify_origin $POOL/clone "$POOL/$FS@snap" + +log_must verify_encryption_root $POOL/$FS/child $POOL/$FS +log_must verify_keylocation $POOL/$FS/child "none" + +# Alter the heirarchy and re-send +log_must eval "echo $PASSPHRASE1 | zfs change-key -o keyformat=passphrase" \ + "$POOL/$FS/child" +log_must zfs promote $POOL/clone +log_must zfs snapshot -r $POOL@after +log_must eval "zfs send -wR -i $POOL@before $POOL@after >" \ + "$BACKDIR/fs-after-R" +log_must eval "zfs receive -d -F $POOL2 < $BACKDIR/fs-after-R" +log_must cmp_ds_subs $POOL/$FS $dstds + +log_must verify_encryption_root $POOL/$FS $POOL/clone +log_must verify_keylocation $POOL/$FS "none" +log_must verify_origin $POOL/$FS "$POOL/clone@snap" + +log_must verify_encryption_root $POOL/clone $POOL/clone +log_must verify_keylocation $POOL/clone "prompt" +log_must verify_origin $POOL/clone "-" + +log_must verify_encryption_root $POOL/$FS/child $POOL/$FS/child +log_must verify_keylocation $POOL/$FS/child "prompt" + +log_pass "Raw recursive sends preserve filesystem structure." diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index e386a974a629..f5eec35db23a 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -1397,6 +1397,7 @@ ZFS_COMMON_OBJS += \ dnode_sync.o \ dsl_bookmark.o \ dsl_dir.o \ + dsl_crypt.o \ dsl_dataset.o \ dsl_deadlist.o \ dsl_destroy.o \ @@ -1409,6 +1410,7 @@ ZFS_COMMON_OBJS += \ dsl_scan.o \ zfeature.o \ gzip.o \ + hkdf.o \ lz4.o \ lzjb.o \ metaslab.o \ @@ -1457,6 +1459,7 @@ ZFS_COMMON_OBJS += \ zio.o \ zio_checksum.o \ zio_compress.o \ + zio_crypt.o \ zio_inject.o \ zle.o \ zrlock.o diff --git a/usr/src/uts/common/crypto/core/kcf_prov_lib.c b/usr/src/uts/common/crypto/core/kcf_prov_lib.c index 59f7ea1e70da..68c28cc114ed 100644 --- a/usr/src/uts/common/crypto/core/kcf_prov_lib.c +++ b/usr/src/uts/common/crypto/core/kcf_prov_lib.c @@ -63,7 +63,7 @@ crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd, offset -= uiop->uio_iov[vec_idx++].iov_len) ; - if (vec_idx == uiop->uio_iovcnt) { + if (vec_idx == uiop->uio_iovcnt && length > 0) { /* * The caller specified an offset that is larger than * the total size of the buffers it provided. @@ -444,7 +444,7 @@ crypto_update_uio(void *ctx, crypto_data_t *input, crypto_data_t *output, offset >= uiop->uio_iov[vec_idx].iov_len; offset -= uiop->uio_iov[vec_idx++].iov_len) ; - if (vec_idx == uiop->uio_iovcnt) { + if (vec_idx == uiop->uio_iovcnt && length > 0) { /* * The caller specified an offset that is larger than the * total size of the buffers it provided. diff --git a/usr/src/uts/common/fs/zfs/abd.c b/usr/src/uts/common/fs/zfs/abd.c index 4d7b2cb56a21..1ed1ffb37460 100644 --- a/usr/src/uts/common/fs/zfs/abd.c +++ b/usr/src/uts/common/fs/zfs/abd.c @@ -427,8 +427,9 @@ abd_alloc_for_io(size_t size, boolean_t is_metadata) * buffer data with sabd. Use abd_put() to free. sabd must not be freed while * any derived ABDs exist. */ -abd_t * -abd_get_offset(abd_t *sabd, size_t off) +/* ARGSUSED */ +static inline abd_t * +abd_get_offset_impl(abd_t *sabd, size_t off, size_t size) { abd_t *abd; @@ -480,6 +481,25 @@ abd_get_offset(abd_t *sabd, size_t off) return (abd); } +abd_t * +abd_get_offset(abd_t *sabd, size_t off) +{ + size_t size = sabd->abd_size > off ? sabd->abd_size - off : 0; + + VERIFY3U(size, >, 0); + + return (abd_get_offset_impl(sabd, off, size)); +} + +abd_t * +abd_get_offset_size(abd_t *sabd, size_t off, size_t size) +{ + ASSERT3U(off + size, <=, sabd->abd_size); + + return (abd_get_offset_impl(sabd, off, size)); +} + + /* * Allocate a linear ABD structure for buf. You must free this with abd_put() * since the resulting ABD doesn't own its own buffer. diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c index e431c1f5bf5b..a6d14f27dd06 100644 --- a/usr/src/uts/common/fs/zfs/arc.c +++ b/usr/src/uts/common/fs/zfs/arc.c @@ -250,6 +250,21 @@ * ARC is disabled, then the L2ARC's block must be transformed to look * like the physical block in the main data pool before comparing the * checksum and determining its validity. + * + * The L1ARC has a slightly different system for storing encrypted data. + * Raw (encrypted + possibly compressed) data has a few subtle differences from + * data that is just compressed. The biggest difference is that it is not + * possible to decrypt encrypted data (or visa versa) if the keys aren't loaded. + * The other difference is that encryption cannot be treated as a suggestion. + * If a caller would prefer compressed data, but they actually wind up with + * uncompressed data the worst thing that could happen is there might be a + * performance hit. If the caller requests encrypted data, however, we must be + * sure they actually get it or else secret information could be leaked. Raw + * data is stored in hdr->b_crypt_hdr.b_rabd. An encrypted header, therefore, + * may have both an encrypted version and a decrypted version of its data at + * once. When a caller needs a raw arc_buf_t, it is allocated and the data is + * copied out of this header. To avoid complications with b_pabd, raw buffers + * cannot be shared. */ #include @@ -266,6 +281,8 @@ #include #include #include +#include +#include #ifdef _KERNEL #include #include @@ -446,7 +463,7 @@ typedef struct arc_stats { kstat_named_t arcstat_evict_skip; /* * Number of times arc_evict_state() was unable to evict enough - * buffers to reach it's target amount. + * buffers to reach its target amount. */ kstat_named_t arcstat_evict_not_enough; kstat_named_t arcstat_evict_l2_cached; @@ -806,9 +823,12 @@ typedef struct arc_callback arc_callback_t; struct arc_callback { void *acb_private; - arc_done_func_t *acb_done; + arc_read_done_func_t *acb_done; arc_buf_t *acb_buf; + boolean_t acb_encrypted; boolean_t acb_compressed; + boolean_t acb_noauth; + uint64_t acb_dsobj; zio_t *acb_zio_dummy; arc_callback_t *acb_next; }; @@ -816,12 +836,12 @@ struct arc_callback { typedef struct arc_write_callback arc_write_callback_t; struct arc_write_callback { - void *awcb_private; - arc_done_func_t *awcb_ready; - arc_done_func_t *awcb_children_ready; - arc_done_func_t *awcb_physdone; - arc_done_func_t *awcb_done; - arc_buf_t *awcb_buf; + void *awcb_private; + arc_write_done_func_t *awcb_ready; + arc_write_done_func_t *awcb_children_ready; + arc_write_done_func_t *awcb_physdone; + arc_write_done_func_t *awcb_done; + arc_buf_t *awcb_buf; }; /* @@ -887,6 +907,36 @@ typedef struct l1arc_buf_hdr { abd_t *b_pabd; } l1arc_buf_hdr_t; +/* + * Encrypted blocks will need to be stored encrypted on the L2ARC + * disk as they appear in the main pool. In order for this to work we + * need to pass around the encryption parameters so they can be used + * to write data to the L2ARC. This struct is only defined in the + * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED + * flag set. + */ +typedef struct arc_buf_hdr_crypt { + abd_t *b_rabd; /* raw encrypted data */ + dmu_object_type_t b_ot; /* object type */ + uint32_t b_ebufcnt; /* number or encryped buffers */ + + /* dsobj for looking up encryption key for l2arc encryption */ + uint64_t b_dsobj; /* for looking up key */ + + /* encryption parameters */ + uint8_t b_salt[ZIO_DATA_SALT_LEN]; + uint8_t b_iv[ZIO_DATA_IV_LEN]; + + /* + * Technically this could be removed since we will always be able to + * get the mac from the bp when we need it. However, it is inconvenient + * for callers of arc code to have to pass a bp in all the time. This + * also allows us to assert that L2ARC data is properly encrypted to + * match the data in the main storage pool. + */ + uint8_t b_mac[ZIO_DATA_MAC_LEN]; +} arc_buf_hdr_crypt_t; + typedef struct l2arc_dev l2arc_dev_t; typedef struct l2arc_buf_hdr { @@ -937,6 +987,11 @@ struct arc_buf_hdr { l2arc_buf_hdr_t b_l2hdr; /* L1ARC fields. Undefined when in l2arc_only state */ l1arc_buf_hdr_t b_l1hdr; + /* + * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED + * is set and the L1 header exists. + */ + arc_buf_hdr_crypt_t b_crypt_hdr; }; #define GHOST_STATE(state) \ @@ -957,6 +1012,8 @@ struct arc_buf_hdr { #define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITING) #define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_FLAG_L2_EVICTED) #define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITE_HEAD) +#define HDR_PROTECTED(hdr) ((hdr)->b_flags & ARC_FLAG_PROTECTED) +#define HDR_NOAUTH(hdr) ((hdr)->b_flags & ARC_FLAG_NOAUTH) #define HDR_SHARED_DATA(hdr) ((hdr)->b_flags & ARC_FLAG_SHARED_DATA) #define HDR_ISTYPE_METADATA(hdr) \ @@ -965,6 +1022,13 @@ struct arc_buf_hdr { #define HDR_HAS_L1HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L1HDR) #define HDR_HAS_L2HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR) +#define HDR_HAS_RABD(hdr) \ + (HDR_HAS_L1HDR(hdr) && HDR_PROTECTED(hdr) && \ + (hdr)->b_crypt_hdr.b_rabd != NULL) +#define HDR_ENCRYPTED(hdr) \ + (HDR_PROTECTED(hdr) && DMU_OT_IS_ENCRYPTED((hdr)->b_crypt_hdr.b_ot)) +#define HDR_AUTHENTICATED(hdr) \ + (HDR_PROTECTED(hdr) && !DMU_OT_IS_ENCRYPTED((hdr)->b_crypt_hdr.b_ot)) /* For storing compression mode in b_flags */ #define HDR_COMPRESS_OFFSET (highbit64(ARC_FLAG_COMPRESS_0) - 1) @@ -977,12 +1041,14 @@ struct arc_buf_hdr { #define ARC_BUF_LAST(buf) ((buf)->b_next == NULL) #define ARC_BUF_SHARED(buf) ((buf)->b_flags & ARC_BUF_FLAG_SHARED) #define ARC_BUF_COMPRESSED(buf) ((buf)->b_flags & ARC_BUF_FLAG_COMPRESSED) +#define ARC_BUF_ENCRYPTED(buf) ((buf)->b_flags & ARC_BUF_FLAG_ENCRYPTED) /* * Other sizes */ -#define HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t)) +#define HDR_FULL_CRYPT_SIZE ((int64_t)sizeof (arc_buf_hdr_t)) +#define HDR_FULL_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_crypt_hdr)) #define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr)) /* @@ -1096,13 +1162,21 @@ static kcondvar_t l2arc_feed_thr_cv; static uint8_t l2arc_thread_exit; static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *); +typedef enum arc_fill_flags { + ARC_FILL_LOCKED = 1 << 0, /* hdr lock is held */ + ARC_FILL_COMPRESSED = 1 << 1, /* fill with compressed data */ + ARC_FILL_ENCRYPTED = 1 << 2, /* fill with encrypted data */ + ARC_FILL_NOAUTH = 1 << 3, /* don't attempt to authenticate */ + ARC_FILL_IN_PLACE = 1 << 4 /* fill in place (special case) */ +} arc_fill_flags_t; + static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *); static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *); static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *); static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *); static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag); -static void arc_hdr_free_pabd(arc_buf_hdr_t *); -static void arc_hdr_alloc_pabd(arc_buf_hdr_t *); +static void arc_hdr_free_pabd(arc_buf_hdr_t *, boolean_t); +static void arc_hdr_alloc_pabd(arc_buf_hdr_t *, boolean_t); static void arc_access(arc_buf_hdr_t *, kmutex_t *); static boolean_t arc_is_overflowing(); static void arc_buf_watch(arc_buf_t *); @@ -1251,7 +1325,9 @@ buf_hash_remove(arc_buf_hdr_t *hdr) /* * Global data structures and functions for the buf kmem cache. */ + static kmem_cache_t *hdr_full_cache; +static kmem_cache_t *hdr_full_crypt_cache; static kmem_cache_t *hdr_l2only_cache; static kmem_cache_t *buf_cache; @@ -1265,6 +1341,7 @@ buf_fini(void) for (i = 0; i < BUF_LOCKS; i++) mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock); kmem_cache_destroy(hdr_full_cache); + kmem_cache_destroy(hdr_full_crypt_cache); kmem_cache_destroy(hdr_l2only_cache); kmem_cache_destroy(buf_cache); } @@ -1289,6 +1366,19 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag) return (0); } +/* ARGSUSED */ +static int +hdr_full_crypt_cons(void *vbuf, void *unused, int kmflag) +{ + arc_buf_hdr_t *hdr = vbuf; + + (void) hdr_full_cons(vbuf, unused, kmflag); + bzero(&hdr->b_crypt_hdr, sizeof (hdr->b_crypt_hdr)); + arc_space_consume(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS); + + return (0); +} + /* ARGSUSED */ static int hdr_l2only_cons(void *vbuf, void *unused, int kmflag) @@ -1332,6 +1422,16 @@ hdr_full_dest(void *vbuf, void *unused) arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS); } +/* ARGSUSED */ +static void +hdr_full_crypt_dest(void *vbuf, void *unused) +{ + arc_buf_hdr_t *hdr = vbuf; + + hdr_full_dest(hdr, unused); + arc_space_return(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS); +} + /* ARGSUSED */ static void hdr_l2only_dest(void *vbuf, void *unused) @@ -1395,6 +1495,9 @@ buf_init(void) hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE, 0, hdr_full_cons, hdr_full_dest, hdr_recl, NULL, NULL, 0); + hdr_full_crypt_cache = kmem_cache_create("arc_buf_hdr_t_full_crypt", + HDR_FULL_CRYPT_SIZE, 0, hdr_full_crypt_cons, hdr_full_crypt_dest, + hdr_recl, NULL, NULL, 0); hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only", HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, hdr_recl, NULL, NULL, 0); @@ -1429,6 +1532,47 @@ arc_buf_lsize(arc_buf_t *buf) return (HDR_GET_LSIZE(buf->b_hdr)); } +/* + * This function will return B_TRUE if the buffer is encrypted in memory. + * This buffer can be decrypted by calling arc_untransform(). + */ +boolean_t +arc_is_encrypted(arc_buf_t *buf) +{ + return (ARC_BUF_ENCRYPTED(buf) != 0); +} + +/* + * Returns B_TRUE if the buffer represents data that has not had its MAC + * verified yet. + */ +boolean_t +arc_is_unauthenticated(arc_buf_t *buf) +{ + return (HDR_NOAUTH(buf->b_hdr) != 0); +} + +void +arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt, + uint8_t *iv, uint8_t *mac) +{ + arc_buf_hdr_t *hdr = buf->b_hdr; + + ASSERT(HDR_PROTECTED(hdr)); + + bcopy(hdr->b_crypt_hdr.b_salt, salt, ZIO_DATA_SALT_LEN); + bcopy(hdr->b_crypt_hdr.b_iv, iv, ZIO_DATA_IV_LEN); + bcopy(hdr->b_crypt_hdr.b_mac, mac, ZIO_DATA_MAC_LEN); + *byteorder = (hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ? + /* CONSTCOND */ + ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER; +} + +/* + * Indicates how this buffer is compressed in memory. If it is not compressed + * the value will be ZIO_COMPRESS_OFF. It can be made normally readable with + * arc_untransform() as long as it is also unencrypted. + */ enum zio_compress arc_get_compression(arc_buf_t *buf) { @@ -1438,6 +1582,18 @@ arc_get_compression(arc_buf_t *buf) #define ARC_MINTIME (hz>>4) /* 62 ms */ +/* + * Return the compression algorithm used to store this data in the ARC. If ARC + * compression is enabled or this is an encrypted block, this will be the same + * as what's used to store it on-disk. Otherwise, this will be ZIO_COMPRESS_OFF. + */ +static inline enum zio_compress +arc_hdr_get_compress(arc_buf_hdr_t *hdr) +{ + return (HDR_COMPRESSION_ENABLED(hdr) ? + HDR_GET_COMPRESS(hdr) : ZIO_COMPRESS_OFF); +} + static inline boolean_t arc_buf_is_shared(arc_buf_t *buf) { @@ -1465,6 +1621,7 @@ static inline void arc_cksum_free(arc_buf_hdr_t *hdr) { ASSERT(HDR_HAS_L1HDR(hdr)); + mutex_enter(&hdr->b_l1hdr.b_freeze_lock); if (hdr->b_l1hdr.b_freeze_cksum != NULL) { kmem_free(hdr->b_l1hdr.b_freeze_cksum, sizeof (zio_cksum_t)); @@ -1475,6 +1632,7 @@ arc_cksum_free(arc_buf_hdr_t *hdr) /* * Return true iff at least one of the bufs on hdr is not compressed. + * Encrypted buffers count as compressed. */ static boolean_t arc_hdr_has_uncompressed_buf(arc_buf_hdr_t *hdr) @@ -1521,55 +1679,17 @@ arc_cksum_verify(arc_buf_t *buf) mutex_exit(&hdr->b_l1hdr.b_freeze_lock); } +/* + * This function makes the assumption that data stored in the L2ARC + * will be transformed exactly as it is in the main pool. Because of + * this we can verify the checksum against the reading process's bp. + */ static boolean_t arc_cksum_is_equal(arc_buf_hdr_t *hdr, zio_t *zio) { - enum zio_compress compress = BP_GET_COMPRESS(zio->io_bp); - boolean_t valid_cksum; - ASSERT(!BP_IS_EMBEDDED(zio->io_bp)); VERIFY3U(BP_GET_PSIZE(zio->io_bp), ==, HDR_GET_PSIZE(hdr)); - /* - * We rely on the blkptr's checksum to determine if the block - * is valid or not. When compressed arc is enabled, the l2arc - * writes the block to the l2arc just as it appears in the pool. - * This allows us to use the blkptr's checksum to validate the - * data that we just read off of the l2arc without having to store - * a separate checksum in the arc_buf_hdr_t. However, if compressed - * arc is disabled, then the data written to the l2arc is always - * uncompressed and won't match the block as it exists in the main - * pool. When this is the case, we must first compress it if it is - * compressed on the main pool before we can validate the checksum. - */ - if (!HDR_COMPRESSION_ENABLED(hdr) && compress != ZIO_COMPRESS_OFF) { - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF); - uint64_t lsize = HDR_GET_LSIZE(hdr); - uint64_t csize; - - void *cbuf = zio_buf_alloc(HDR_GET_PSIZE(hdr)); - csize = zio_compress_data(compress, zio->io_abd, cbuf, lsize); - - ASSERT3U(csize, <=, HDR_GET_PSIZE(hdr)); - if (csize < HDR_GET_PSIZE(hdr)) { - /* - * Compressed blocks are always a multiple of the - * smallest ashift in the pool. Ideally, we would - * like to round up the csize to the next - * spa_min_ashift but that value may have changed - * since the block was last written. Instead, - * we rely on the fact that the hdr's psize - * was set to the psize of the block when it was - * last written. We set the csize to that value - * and zero out any part that should not contain - * data. - */ - bzero((char *)cbuf + csize, HDR_GET_PSIZE(hdr) - csize); - csize = HDR_GET_PSIZE(hdr); - } - zio_push_transform(zio, cbuf, csize, HDR_GET_PSIZE(hdr), NULL); - } - /* * Block pointers always store the checksum for the logical data. * If the block pointer has the gang bit set, then the checksum @@ -1583,11 +1703,9 @@ arc_cksum_is_equal(arc_buf_hdr_t *hdr, zio_t *zio) * generated using the correct checksum algorithm and accounts for the * logical I/O size and not just a gang fragment. */ - valid_cksum = (zio_checksum_error_impl(zio->io_spa, zio->io_bp, + return (zio_checksum_error_impl(zio->io_spa, zio->io_bp, BP_GET_CHECKSUM(zio->io_bp), zio->io_abd, zio->io_size, zio->io_offset, NULL) == 0); - zio_pop_transforms(zio); - return (valid_cksum); } /* @@ -1616,6 +1734,7 @@ arc_cksum_compute(arc_buf_t *buf) return; } + ASSERT(!ARC_BUF_ENCRYPTED(buf)); ASSERT(!ARC_BUF_COMPRESSED(buf)); hdr->b_l1hdr.b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t), KM_SLEEP); @@ -1808,15 +1927,14 @@ arc_hdr_set_compress(arc_buf_hdr_t *hdr, enum zio_compress cmp) */ if (!zfs_compressed_arc_enabled || HDR_GET_PSIZE(hdr) == 0) { arc_hdr_clear_flags(hdr, ARC_FLAG_COMPRESSED_ARC); - HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF); ASSERT(!HDR_COMPRESSION_ENABLED(hdr)); - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF); } else { arc_hdr_set_flags(hdr, ARC_FLAG_COMPRESSED_ARC); - HDR_SET_COMPRESS(hdr, cmp); - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, cmp); ASSERT(HDR_COMPRESSION_ENABLED(hdr)); } + + HDR_SET_COMPRESS(hdr, cmp); + ASSERT3U(HDR_GET_COMPRESS(hdr), ==, cmp); } /* @@ -1856,6 +1974,255 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf) return (copied); } +/* + * Return the size of the block, b_pabd, that is stored in the arc_buf_hdr_t. + */ +static uint64_t +arc_hdr_size(arc_buf_hdr_t *hdr) +{ + uint64_t size; + + if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF && + HDR_GET_PSIZE(hdr) > 0) { + size = HDR_GET_PSIZE(hdr); + } else { + ASSERT3U(HDR_GET_LSIZE(hdr), !=, 0); + size = HDR_GET_LSIZE(hdr); + } + return (size); +} + +static int +arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj) +{ + int ret; + uint64_t csize; + uint64_t lsize = HDR_GET_LSIZE(hdr); + uint64_t psize = HDR_GET_PSIZE(hdr); + void *tmpbuf = NULL; + abd_t *abd = hdr->b_l1hdr.b_pabd; + + ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr))); + ASSERT(HDR_AUTHENTICATED(hdr)); + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + + /* + * The MAC is calculated on the compressed data that is stored on disk. + * However, if compressed arc is disabled we will only have the + * decompressed data available to us now. Compress it into a temporary + * abd so we can verify the MAC. The performance overhead of this will + * be relatively low, since most objects in an encrypted objset will + * be encrypted (instead of authenticated) anyway. + */ + if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && + !HDR_COMPRESSION_ENABLED(hdr)) { + tmpbuf = zio_buf_alloc(lsize); + abd = abd_get_from_buf(tmpbuf, lsize); + abd_take_ownership_of_buf(abd, B_TRUE); + + csize = zio_compress_data(HDR_GET_COMPRESS(hdr), + hdr->b_l1hdr.b_pabd, tmpbuf, lsize); + ASSERT3U(csize, <=, psize); + abd_zero_off(abd, csize, psize - csize); + } + + /* + * Authentication is best effort. We authenticate whenever the key is + * available. If we succeed we clear ARC_FLAG_NOAUTH. + */ + if (hdr->b_crypt_hdr.b_ot == DMU_OT_OBJSET) { + ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF); + ASSERT3U(lsize, ==, psize); + ret = spa_do_crypt_objset_mac_abd(B_FALSE, spa, dsobj, abd, + psize, hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS); + } else { + ret = spa_do_crypt_mac_abd(B_FALSE, spa, dsobj, abd, psize, + hdr->b_crypt_hdr.b_mac); + } + + if (ret == 0) + arc_hdr_clear_flags(hdr, ARC_FLAG_NOAUTH); + else if (ret != ENOENT) + goto error; + + if (tmpbuf != NULL) + abd_free(abd); + + return (0); + +error: + if (tmpbuf != NULL) + abd_free(abd); + + return (ret); +} + +/* + * This function will take a header that only has raw encrypted data in + * b_crypt_hdr.b_rabd and decrypt it into a new buffer which is stored in + * b_l1hdr.b_pabd. If designated in the header flags, this function will + * also decompress the data. + */ +static int +arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj) +{ + int ret; + dsl_crypto_key_t *dck = NULL; + abd_t *cabd = NULL; + void *tmp = NULL; + boolean_t no_crypt = B_FALSE; + boolean_t bswap = (hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS); + + ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr))); + ASSERT(HDR_ENCRYPTED(hdr)); + + arc_hdr_alloc_pabd(hdr, B_FALSE); + + /* + * We must be careful to use the passed-in dsobj value here and + * not the value in b_dsobj. b_dsobj is meant to be a best guess for + * the L2ARC, which has the luxury of being able to fail without real + * consequences (the data simply won't make it to the L2ARC). In + * reality, the dsobj stored in the header may belong to a dataset + * that has been unmounted or otherwise disowned, meaning the key + * won't be accessible via that dsobj anymore. + */ + ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); + if (ret != 0) { + ret = SET_ERROR(EACCES); + goto error; + } + + ret = zio_do_crypt_abd(B_FALSE, &dck->dck_key, + hdr->b_crypt_hdr.b_salt, hdr->b_crypt_hdr.b_ot, + hdr->b_crypt_hdr.b_iv, hdr->b_crypt_hdr.b_mac, + HDR_GET_PSIZE(hdr), bswap, hdr->b_l1hdr.b_pabd, + hdr->b_crypt_hdr.b_rabd, &no_crypt); + if (ret != 0) + goto error; + + if (no_crypt) { + abd_copy(hdr->b_l1hdr.b_pabd, hdr->b_crypt_hdr.b_rabd, + HDR_GET_PSIZE(hdr)); + } + + /* + * If this header has disabled arc compression but the b_pabd is + * compressed after decrypting it, we need to decompress the newly + * decrypted data. + */ + if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && + !HDR_COMPRESSION_ENABLED(hdr)) { + /* + * We want to make sure that we are correctly honoring the + * zfs_abd_scatter_enabled setting, so we allocate an abd here + * and then loan a buffer from it, rather than allocating a + * linear buffer and wrapping it in an abd later. + */ + cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); + tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr)); + + ret = zio_decompress_data(HDR_GET_COMPRESS(hdr), + hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr), + HDR_GET_LSIZE(hdr)); + if (ret != 0) { + abd_return_buf(cabd, tmp, arc_hdr_size(hdr)); + goto error; + } + + abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr)); + arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd, + arc_hdr_size(hdr), hdr); + hdr->b_l1hdr.b_pabd = cabd; + } + + spa_keystore_dsl_key_rele(spa, dck, FTAG); + + return (0); + +error: + arc_hdr_free_pabd(hdr, B_FALSE); + if (dck != NULL) + spa_keystore_dsl_key_rele(spa, dck, FTAG); + if (cabd != NULL) + arc_free_data_buf(hdr, cabd, arc_hdr_size(hdr), hdr); + + return (ret); +} + +/* + * This function is called during arc_buf_fill() to prepare the header's + * abd plaintext pointer for use. This involves authenticated protected + * data and decrypting encrypted data into the plaintext abd. + */ +static int +arc_fill_hdr_crypt(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, spa_t *spa, + uint64_t dsobj, boolean_t noauth) +{ + int ret; + + ASSERT(HDR_PROTECTED(hdr)); + + if (hash_lock != NULL) + mutex_enter(hash_lock); + + if (HDR_NOAUTH(hdr) && !noauth) { + /* + * The caller requested authenticated data but our data has + * not been authenticated yet. Verify the MAC now if we can. + */ + ret = arc_hdr_authenticate(hdr, spa, dsobj); + if (ret != 0) + goto error; + } else if (HDR_HAS_RABD(hdr) && hdr->b_l1hdr.b_pabd == NULL) { + /* + * If we only have the encrypted version of the data, but the + * unencrypted version was requested we take this opportunity + * to store the decrypted version in the header for future use. + */ + ret = arc_hdr_decrypt(hdr, spa, dsobj); + if (ret != 0) + goto error; + } + + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + + if (hash_lock != NULL) + mutex_exit(hash_lock); + + return (0); + +error: + if (hash_lock != NULL) + mutex_exit(hash_lock); + + return (ret); +} + +/* + * This function is used by the dbuf code to decrypt bonus buffers in place. + * The dbuf code itself doesn't have any locking for decrypting a shared dnode + * block, so we use the hash lock here to protect against concurrent calls to + * arc_buf_fill(). + */ +/* ARGSUSED */ +static void +arc_buf_untransform_in_place(arc_buf_t *buf, kmutex_t *hash_lock) +{ + arc_buf_hdr_t *hdr = buf->b_hdr; + + ASSERT(HDR_ENCRYPTED(hdr)); + ASSERT3U(hdr->b_crypt_hdr.b_ot, ==, DMU_OT_DNODE); + ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr))); + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + + zio_crypt_copy_dnode_bonus(hdr->b_l1hdr.b_pabd, buf->b_data, + arc_buf_size(buf)); + buf->b_flags &= ~ARC_BUF_FLAG_ENCRYPTED; + buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED; + hdr->b_crypt_hdr.b_ebufcnt -= 1; +} + /* * Given a buf that has a data buffer attached to it, this function will * efficiently fill the buf with data of the specified compression setting from @@ -1870,15 +2237,79 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf) * the correct-sized data buffer. */ static int -arc_buf_fill(arc_buf_t *buf, boolean_t compressed) +arc_buf_fill(arc_buf_t *buf, spa_t *spa, uint64_t dsobj, arc_fill_flags_t flags) { + int error = 0; arc_buf_hdr_t *hdr = buf->b_hdr; - boolean_t hdr_compressed = (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF); + boolean_t hdr_compressed = + (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF); + boolean_t compressed = (flags & ARC_FILL_COMPRESSED) != 0; + boolean_t encrypted = (flags & ARC_FILL_ENCRYPTED) != 0; dmu_object_byteswap_t bswap = hdr->b_l1hdr.b_byteswap; + kmutex_t *hash_lock = (flags & ARC_FILL_LOCKED) ? NULL : HDR_LOCK(hdr); ASSERT3P(buf->b_data, !=, NULL); - IMPLY(compressed, hdr_compressed); + IMPLY(compressed, hdr_compressed || ARC_BUF_ENCRYPTED(buf)); IMPLY(compressed, ARC_BUF_COMPRESSED(buf)); + IMPLY(encrypted, HDR_ENCRYPTED(hdr)); + IMPLY(encrypted, ARC_BUF_ENCRYPTED(buf)); + IMPLY(encrypted, ARC_BUF_COMPRESSED(buf)); + IMPLY(encrypted, !ARC_BUF_SHARED(buf)); + + /* + * If the caller wanted encrypted data we just need to copy it from + * b_rabd and potentially byteswap it. We won't be able to do any + * further transforms on it. + */ + if (encrypted) { + ASSERT(HDR_HAS_RABD(hdr)); + abd_copy_to_buf(buf->b_data, hdr->b_crypt_hdr.b_rabd, + HDR_GET_PSIZE(hdr)); + goto byteswap; + } + + /* + * Adjust encrypted and authenticated headers to accomodate the + * request if needed. + */ + if (HDR_PROTECTED(hdr)) { + error = arc_fill_hdr_crypt(hdr, hash_lock, spa, + dsobj, !!(flags & ARC_FILL_NOAUTH)); + if (error != 0) + return (error); + } + + /* + * There is a special case here for dnode blocks which are + * decrypting their bonus buffers. These blocks may request to + * be decrypted in-place. This is necessary because there may + * be many dnodes pointing into this buffer and there is + * currently no method to synchronize replacing the backing + * b_data buffer and updating all of the pointers. Here we use + * the hash lock to ensure there are no races. If the need + * arises for other types to be decrypted in-place, they must + * add handling here as well. + */ + if ((flags & ARC_FILL_IN_PLACE) != 0) { + ASSERT(!hdr_compressed); + ASSERT(!compressed); + ASSERT(!encrypted); + + if (HDR_ENCRYPTED(hdr) && ARC_BUF_ENCRYPTED(buf)) { + ASSERT3U(hdr->b_crypt_hdr.b_ot, ==, DMU_OT_DNODE); + + if (hash_lock != NULL) + mutex_enter(hash_lock); + arc_buf_untransform_in_place(buf, hash_lock); + if (hash_lock != NULL) + mutex_exit(hash_lock); + + /* Compute the hdr's checksum if necessary */ + arc_cksum_compute(buf); + } + + return (0); + } if (hdr_compressed == compressed) { if (!arc_buf_is_shared(buf)) { @@ -1897,7 +2328,7 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed) if (arc_buf_is_shared(buf)) { ASSERT(ARC_BUF_COMPRESSED(buf)); - /* We need to give the buf it's own b_data */ + /* We need to give the buf its own b_data */ buf->b_flags &= ~ARC_BUF_FLAG_SHARED; buf->b_data = arc_get_data_buf(hdr, HDR_GET_LSIZE(hdr), buf); @@ -1933,7 +2364,7 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed) ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, !=, NULL); return (0); } else { - int error = zio_decompress_data(HDR_GET_COMPRESS(hdr), + error = zio_decompress_data(HDR_GET_COMPRESS(hdr), hdr->b_l1hdr.b_pabd, buf->b_data, HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr)); @@ -1944,13 +2375,14 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed) if (error != 0) { zfs_dbgmsg( "hdr %p, compress %d, psize %d, lsize %d", - hdr, HDR_GET_COMPRESS(hdr), + hdr, arc_hdr_get_compress(hdr), HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr)); return (SET_ERROR(EIO)); } } } +byteswap: /* Byteswap the buf's data if necessary */ if (bswap != DMU_BSWAP_NUMFUNCS) { ASSERT(!HDR_SHARED_DATA(hdr)); @@ -1964,28 +2396,21 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed) return (0); } -int -arc_decompress(arc_buf_t *buf) -{ - return (arc_buf_fill(buf, B_FALSE)); -} - /* - * Return the size of the block, b_pabd, that is stored in the arc_buf_hdr_t. + * If this function is being called to decrypt an encrypted buffer or verify an + * authenticated one, the key must be loaded and a mapping must be made + * available in the keystore via spa_keystore_create_mapping() or one of its + * callers. */ -static uint64_t -arc_hdr_size(arc_buf_hdr_t *hdr) +int +arc_untransform(arc_buf_t *buf, spa_t *spa, uint64_t dsobj, boolean_t in_place) { - uint64_t size; + arc_fill_flags_t flags = 0; - if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && - HDR_GET_PSIZE(hdr) > 0) { - size = HDR_GET_PSIZE(hdr); - } else { - ASSERT3U(HDR_GET_LSIZE(hdr), !=, 0); - size = HDR_GET_LSIZE(hdr); - } - return (size); + if (in_place) + flags |= ARC_FILL_IN_PLACE; + + return (arc_buf_fill(buf, spa, dsobj, flags)); } /* @@ -2004,6 +2429,7 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state) ASSERT0(hdr->b_l1hdr.b_bufcnt); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); (void) refcount_add_many(&state->arcs_esize[type], HDR_GET_LSIZE(hdr), hdr); return; @@ -2014,6 +2440,10 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state) (void) refcount_add_many(&state->arcs_esize[type], arc_hdr_size(hdr), hdr); } + if (HDR_HAS_RABD(hdr)) { + (void) refcount_add_many(&state->arcs_esize[type], + HDR_GET_PSIZE(hdr), hdr); + } for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) { if (arc_buf_is_shared(buf)) @@ -2039,6 +2469,7 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state) ASSERT0(hdr->b_l1hdr.b_bufcnt); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); (void) refcount_remove_many(&state->arcs_esize[type], HDR_GET_LSIZE(hdr), hdr); return; @@ -2049,6 +2480,10 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state) (void) refcount_remove_many(&state->arcs_esize[type], arc_hdr_size(hdr), hdr); } + if (HDR_HAS_RABD(hdr)) { + (void) refcount_remove_many(&state->arcs_esize[type], + HDR_GET_PSIZE(hdr), hdr); + } for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) { if (arc_buf_is_shared(buf)) @@ -2142,7 +2577,9 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, old_state = hdr->b_l1hdr.b_state; refcnt = refcount_count(&hdr->b_l1hdr.b_refcnt); bufcnt = hdr->b_l1hdr.b_bufcnt; - update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL); + + update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL || + HDR_HAS_RABD(hdr)); } else { old_state = arc_l2c_only; refcnt = 0; @@ -2213,6 +2650,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, (void) refcount_add_many(&new_state->arcs_size, HDR_GET_LSIZE(hdr), hdr); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); } else { uint32_t buffers = 0; @@ -2244,8 +2682,11 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, if (hdr->b_l1hdr.b_pabd != NULL) { (void) refcount_add_many(&new_state->arcs_size, arc_hdr_size(hdr), hdr); - } else { - ASSERT(GHOST_STATE(old_state)); + } + + if (HDR_HAS_RABD(hdr)) { + (void) refcount_add_many(&new_state->arcs_size, + HDR_GET_PSIZE(hdr), hdr); } } } @@ -2255,6 +2696,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, if (GHOST_STATE(old_state)) { ASSERT0(bufcnt); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); /* * When moving a header off of a ghost state, @@ -2294,9 +2736,20 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, buf); } ASSERT3U(bufcnt, ==, buffers); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); - (void) refcount_remove_many( - &old_state->arcs_size, arc_hdr_size(hdr), hdr); + ASSERT(hdr->b_l1hdr.b_pabd != NULL || + HDR_HAS_RABD(hdr)); + + if (hdr->b_l1hdr.b_pabd != NULL) { + (void) refcount_remove_many( + &old_state->arcs_size, arc_hdr_size(hdr), + hdr); + } + + if (HDR_HAS_RABD(hdr)) { + (void) refcount_remove_many( + &old_state->arcs_size, HDR_GET_PSIZE(hdr), + hdr); + } } } @@ -2383,12 +2836,13 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf) { /* * The criteria for sharing a hdr's data are: - * 1. the hdr's compression matches the buf's compression - * 2. the hdr doesn't need to be byteswapped - * 3. the hdr isn't already being shared - * 4. the buf is either compressed or it is the last buf in the hdr list + * 1. the buffer is not encrypted + * 2. the hdr's compression matches the buf's compression + * 3. the hdr doesn't need to be byteswapped + * 4. the hdr isn't already being shared + * 5. the buf is either compressed or it is the last buf in the hdr list * - * Criterion #4 maintains the invariant that shared uncompressed + * Criterion #5 maintains the invariant that shared uncompressed * bufs must be the final buf in the hdr's b_buf list. Reading this, you * might ask, "if a compressed buf is allocated first, won't that be the * last thing in the list?", but in that case it's impossible to create @@ -2403,9 +2857,11 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf) * sharing if the new buf isn't the first to be added. */ ASSERT3P(buf->b_hdr, ==, hdr); - boolean_t hdr_compressed = HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF; + boolean_t hdr_compressed = arc_hdr_get_compress(hdr) != + ZIO_COMPRESS_OFF; boolean_t buf_compressed = ARC_BUF_COMPRESSED(buf) != 0; - return (buf_compressed == hdr_compressed && + return (!ARC_BUF_ENCRYPTED(buf) && + buf_compressed == hdr_compressed && hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS && !HDR_SHARED_DATA(hdr) && (ARC_BUF_LAST(buf) || ARC_BUF_COMPRESSED(buf))); @@ -2417,10 +2873,12 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf) * copy was made successfully, or an error code otherwise. */ static int -arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, +arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj, void *tag, + boolean_t encrypted, boolean_t compressed, boolean_t noauth, boolean_t fill, arc_buf_t **ret) { arc_buf_t *buf; + arc_fill_flags_t flags = ARC_FILL_LOCKED; ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT3U(HDR_GET_LSIZE(hdr), >, 0); @@ -2428,6 +2886,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, hdr->b_type == ARC_BUFC_METADATA); ASSERT3P(ret, !=, NULL); ASSERT3P(*ret, ==, NULL); + IMPLY(encrypted, compressed); buf = *ret = kmem_cache_alloc(buf_cache, KM_PUSHPAGE); buf->b_hdr = hdr; @@ -2445,16 +2904,28 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, /* * Only honor requests for compressed bufs if the hdr is actually - * compressed. + * compressed. This must be overriden if the buffer is encrypted since + * encrypted buffers cannot be decompressed. */ - if (compressed && HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) + if (encrypted) { + buf->b_flags |= ARC_BUF_FLAG_COMPRESSED; + buf->b_flags |= ARC_BUF_FLAG_ENCRYPTED; + flags |= ARC_FILL_COMPRESSED | ARC_FILL_ENCRYPTED; + } else if (compressed && + arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF) { buf->b_flags |= ARC_BUF_FLAG_COMPRESSED; + flags |= ARC_FILL_COMPRESSED; + } + + if (noauth) { + ASSERT0(encrypted); + flags |= ARC_FILL_NOAUTH; + } /* * If the hdr's data can be shared then we share the data buffer and * set the appropriate bit in the hdr's b_flags to indicate the hdr is - * sharing it's b_pabd with the arc_buf_t. Otherwise, we allocate a new - * buffer to store the buf's data. + * allocate a new buffer to store the buf's data. * * There are two additional restrictions here because we're sharing * hdr -> buf instead of the usual buf -> hdr. First, the hdr can't be @@ -2465,7 +2936,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, * need to be ABD-aware. */ boolean_t can_share = arc_can_share(hdr, buf) && !HDR_L2_WRITING(hdr) && - abd_is_linear(hdr->b_l1hdr.b_pabd); + hdr->b_l1hdr.b_pabd != NULL && abd_is_linear(hdr->b_l1hdr.b_pabd); /* Set up b_data and sharing */ if (can_share) { @@ -2481,13 +2952,15 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, hdr->b_l1hdr.b_buf = buf; hdr->b_l1hdr.b_bufcnt += 1; + if (encrypted) + hdr->b_crypt_hdr.b_ebufcnt += 1; /* * If the user wants the data from the hdr, we need to either copy or * decompress the data. */ if (fill) { - return (arc_buf_fill(buf, ARC_BUF_COMPRESSED(buf) != 0)); + return (arc_buf_fill(buf, spa, dsobj, flags)); } return (0); @@ -2533,6 +3006,19 @@ arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize, return (buf); } +arc_buf_t * +arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, + dmu_object_type_t ot, uint64_t psize, uint64_t lsize, + enum zio_compress compression_type) +{ + arc_buf_t *buf = arc_alloc_raw_buf(spa, arc_onloan_tag, dsobj, + byteorder, salt, iv, mac, ot, psize, lsize, compression_type); + + atomic_add_64(&arc_loaned_bytes, psize); + return (buf); +} + /* * Return a loaned arc buffer to the arc. @@ -2578,11 +3064,11 @@ l2arc_free_abd_on_write(abd_t *abd, size_t size, arc_buf_contents_t type) } static void -arc_hdr_free_on_write(arc_buf_hdr_t *hdr) +arc_hdr_free_on_write(arc_buf_hdr_t *hdr, boolean_t free_rdata) { arc_state_t *state = hdr->b_l1hdr.b_state; arc_buf_contents_t type = arc_buf_type(hdr); - uint64_t size = arc_hdr_size(hdr); + uint64_t size = (free_rdata) ? HDR_GET_PSIZE(hdr) : arc_hdr_size(hdr); /* protected by hash lock, if in the hash table */ if (multilist_link_active(&hdr->b_l1hdr.b_arc_node)) { @@ -2600,7 +3086,11 @@ arc_hdr_free_on_write(arc_buf_hdr_t *hdr) arc_space_return(size, ARC_SPACE_DATA); } - l2arc_free_abd_on_write(hdr->b_l1hdr.b_pabd, size, type); + if (free_rdata) { + l2arc_free_abd_on_write(hdr->b_crypt_hdr.b_rabd, size, type); + } else { + l2arc_free_abd_on_write(hdr->b_l1hdr.b_pabd, size, type); + } } /* @@ -2615,6 +3105,7 @@ arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) ASSERT(arc_can_share(hdr, buf)); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!ARC_BUF_ENCRYPTED(buf)); ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); /* @@ -2676,12 +3167,12 @@ arc_unshare_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) static arc_buf_t * arc_buf_remove(arc_buf_hdr_t *hdr, arc_buf_t *buf) { - ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); - arc_buf_t **bufp = &hdr->b_l1hdr.b_buf; arc_buf_t *lastbuf = NULL; + ASSERT(HDR_HAS_L1HDR(hdr)); + ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); + /* * Remove the buf from the hdr list and locate the last * remaining buffer on the list. @@ -2744,6 +3235,19 @@ arc_buf_destroy_impl(arc_buf_t *buf) ASSERT(hdr->b_l1hdr.b_bufcnt > 0); hdr->b_l1hdr.b_bufcnt -= 1; + + if (ARC_BUF_ENCRYPTED(buf)) + hdr->b_crypt_hdr.b_ebufcnt -= 1; + + /* + * If we have no more encrypted buffers and we've already + * gotten a copy of the decrypted data we can free b_rabd to + * save some space. + */ + if (hdr->b_crypt_hdr.b_ebufcnt == 0 && HDR_HAS_RABD(hdr) && + hdr->b_l1hdr.b_pabd != NULL && !HDR_IO_IN_PROGRESS(hdr)) { + arc_hdr_free_pabd(hdr, B_TRUE); + } } arc_buf_t *lastbuf = arc_buf_remove(hdr, buf); @@ -2758,16 +3262,17 @@ arc_buf_destroy_impl(arc_buf_t *buf) * There is an equivalent case for compressed bufs, but since * they aren't guaranteed to be the last buf in the list and * that is an exceedingly rare case, we just allow that space be - * wasted temporarily. + * wasted temporarily. We must also be careful not to share + * encrypted buffers, since they cannot be shared. */ - if (lastbuf != NULL) { + if (lastbuf != NULL && !ARC_BUF_ENCRYPTED(lastbuf)) { /* Only one buf can be shared at once */ VERIFY(!arc_buf_is_shared(lastbuf)); /* hdr is uncompressed so can't have compressed buf */ VERIFY(!ARC_BUF_COMPRESSED(lastbuf)); ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); - arc_hdr_free_pabd(hdr); + arc_hdr_free_pabd(hdr, B_FALSE); /* * We must setup a new shared block between the @@ -2788,7 +3293,7 @@ arc_buf_destroy_impl(arc_buf_t *buf) */ ASSERT3P(lastbuf, !=, NULL); ASSERT(arc_buf_is_shared(lastbuf) || - HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF); + arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF); } /* @@ -2805,26 +3310,43 @@ arc_buf_destroy_impl(arc_buf_t *buf) } static void -arc_hdr_alloc_pabd(arc_buf_hdr_t *hdr) +arc_hdr_alloc_pabd(arc_buf_hdr_t *hdr, boolean_t alloc_rdata) { + uint64_t size; + ASSERT3U(HDR_GET_LSIZE(hdr), >, 0); ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT(!HDR_SHARED_DATA(hdr)); + ASSERT(!HDR_SHARED_DATA(hdr) || alloc_rdata); + IMPLY(alloc_rdata, HDR_PROTECTED(hdr)); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); - hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); - hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + if (hdr->b_l1hdr.b_pabd == NULL && !HDR_HAS_RABD(hdr)) + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; - ARCSTAT_INCR(arcstat_compressed_size, arc_hdr_size(hdr)); + if (alloc_rdata) { + size = HDR_GET_PSIZE(hdr); + ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL); + hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr); + ASSERT3P(hdr->b_crypt_hdr.b_rabd, !=, NULL); + } else { + size = arc_hdr_size(hdr); + ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr); + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + } + + ARCSTAT_INCR(arcstat_compressed_size, size); ARCSTAT_INCR(arcstat_uncompressed_size, HDR_GET_LSIZE(hdr)); } static void -arc_hdr_free_pabd(arc_buf_hdr_t *hdr) +arc_hdr_free_pabd(arc_buf_hdr_t *hdr, boolean_t free_rdata) { + uint64_t size = (free_rdata) ? HDR_GET_PSIZE(hdr) : arc_hdr_size(hdr); + ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr)); + IMPLY(free_rdata, HDR_HAS_RABD(hdr)); + /* * If the hdr is currently being written to the l2arc then @@ -2833,28 +3355,41 @@ arc_hdr_free_pabd(arc_buf_hdr_t *hdr) * writing it to the l2arc device. */ if (HDR_L2_WRITING(hdr)) { - arc_hdr_free_on_write(hdr); + arc_hdr_free_on_write(hdr, free_rdata); ARCSTAT_BUMP(arcstat_l2_free_on_write); + } else if (free_rdata) { + arc_free_data_abd(hdr, hdr->b_crypt_hdr.b_rabd, size, hdr); } else { arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd, - arc_hdr_size(hdr), hdr); + size, hdr); } - hdr->b_l1hdr.b_pabd = NULL; - hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; - ARCSTAT_INCR(arcstat_compressed_size, -arc_hdr_size(hdr)); + if (free_rdata) { + hdr->b_crypt_hdr.b_rabd = NULL; + } else { + hdr->b_l1hdr.b_pabd = NULL; + } + + if (hdr->b_l1hdr.b_pabd == NULL && !HDR_HAS_RABD(hdr)) + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; + + ARCSTAT_INCR(arcstat_compressed_size, -size); ARCSTAT_INCR(arcstat_uncompressed_size, -HDR_GET_LSIZE(hdr)); } static arc_buf_hdr_t * arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, - enum zio_compress compression_type, arc_buf_contents_t type) + boolean_t protected, enum zio_compress compression_type, + arc_buf_contents_t type, boolean_t alloc_rdata) { arc_buf_hdr_t *hdr; VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA); - - hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE); + if (protected) { + hdr = kmem_cache_alloc(hdr_full_crypt_cache, KM_PUSHPAGE); + } else { + hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE); + } ASSERT(HDR_EMPTY(hdr)); ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_thawed, ==, NULL); @@ -2865,6 +3400,8 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, hdr->b_flags = 0; arc_hdr_set_flags(hdr, arc_bufc_to_flags(type) | ARC_FLAG_HAS_L1HDR); arc_hdr_set_compress(hdr, compression_type); + if (protected) + arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED); hdr->b_l1hdr.b_state = arc_anon; hdr->b_l1hdr.b_arc_access = 0; @@ -2876,7 +3413,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, * the compressed or uncompressed data depending on the block * it references and compressed arc enablement. */ - arc_hdr_alloc_pabd(hdr); + arc_hdr_alloc_pabd(hdr, alloc_rdata); ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); return (hdr); @@ -2900,6 +3437,16 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) || (old == hdr_l2only_cache && new == hdr_full_cache)); + /* + * if the caller wanted a new full header and the header is to be + * encrypted we will actually allocate the header from the full crypt + * cache instead. The same applies to freeing from the old cache. + */ + if (HDR_PROTECTED(hdr) && new == hdr_full_cache) + new = hdr_full_crypt_cache; + if (HDR_PROTECTED(hdr) && old == hdr_full_cache) + old = hdr_full_crypt_cache; + nhdr = kmem_cache_alloc(new, KM_PUSHPAGE); ASSERT(MUTEX_HELD(HDR_LOCK(hdr))); @@ -2907,7 +3454,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) bcopy(hdr, nhdr, HDR_L2ONLY_SIZE); - if (new == hdr_full_cache) { + if (new == hdr_full_cache || new == hdr_full_crypt_cache) { arc_hdr_set_flags(nhdr, ARC_FLAG_HAS_L1HDR); /* * arc_access and arc_change_state need to be aware that a @@ -2918,6 +3465,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) /* Verify previous threads set to NULL before freeing */ ASSERT3P(nhdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); } else { ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT0(hdr->b_l1hdr.b_bufcnt); @@ -2940,6 +3488,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) */ VERIFY(!HDR_L2_WRITING(hdr)); VERIFY3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); #ifdef ZFS_DEBUG if (hdr->b_l1hdr.b_thawed != NULL) { @@ -2969,25 +3518,129 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) list_insert_after(&dev->l2ad_buflist, hdr, nhdr); list_remove(&dev->l2ad_buflist, hdr); - mutex_exit(&dev->l2ad_mtx); + mutex_exit(&dev->l2ad_mtx); + + /* + * Since we're using the pointer address as the tag when + * incrementing and decrementing the l2ad_alloc refcount, we + * must remove the old pointer (that we're about to destroy) and + * add the new pointer to the refcount. Otherwise we'd remove + * the wrong pointer address when calling arc_hdr_destroy() later. + */ + + (void) refcount_remove_many(&dev->l2ad_alloc, arc_hdr_size(hdr), hdr); + (void) refcount_add_many(&dev->l2ad_alloc, arc_hdr_size(nhdr), nhdr); + + buf_discard_identity(hdr); + kmem_cache_free(old, hdr); + + return (nhdr); +} + +/* + * This function allows an L1 header to be reallocated as a crypt + * header and vice versa. If we are going to a crypt header, the + * new fields will be zeroed out. + */ +static arc_buf_hdr_t * +arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt) +{ + arc_buf_hdr_t *nhdr; + arc_buf_t *buf; + kmem_cache_t *ncache, *ocache; + + ASSERT(HDR_HAS_L1HDR(hdr)); + ASSERT3U(!!HDR_PROTECTED(hdr), !=, need_crypt); + ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon); + ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); + + if (need_crypt) { + ncache = hdr_full_crypt_cache; + ocache = hdr_full_cache; + } else { + ncache = hdr_full_cache; + ocache = hdr_full_crypt_cache; + } + + nhdr = kmem_cache_alloc(ncache, KM_PUSHPAGE); + bcopy(hdr, nhdr, HDR_L2ONLY_SIZE); + nhdr->b_l1hdr.b_freeze_cksum = hdr->b_l1hdr.b_freeze_cksum; + nhdr->b_l1hdr.b_bufcnt = hdr->b_l1hdr.b_bufcnt; + nhdr->b_l1hdr.b_byteswap = hdr->b_l1hdr.b_byteswap; + nhdr->b_l1hdr.b_state = hdr->b_l1hdr.b_state; + nhdr->b_l1hdr.b_arc_access = hdr->b_l1hdr.b_arc_access; + nhdr->b_l1hdr.b_acb = hdr->b_l1hdr.b_acb; + nhdr->b_l1hdr.b_pabd = hdr->b_l1hdr.b_pabd; + nhdr->b_l1hdr.b_buf = hdr->b_l1hdr.b_buf; +#ifdef ZFS_DEBUG + if (hdr->b_l1hdr.b_thawed != NULL) { + nhdr->b_l1hdr.b_thawed = hdr->b_l1hdr.b_thawed; + hdr->b_l1hdr.b_thawed = NULL; + } +#endif /* - * Since we're using the pointer address as the tag when - * incrementing and decrementing the l2ad_alloc refcount, we - * must remove the old pointer (that we're about to destroy) and - * add the new pointer to the refcount. Otherwise we'd remove - * the wrong pointer address when calling arc_hdr_destroy() later. + * This refcount_add() exists only to ensure that the individual + * arc buffers always point to a header that is referenced, avoiding + * a small race condition that could trigger ASSERTs. */ + (void) refcount_add(&nhdr->b_l1hdr.b_refcnt, FTAG); - (void) refcount_remove_many(&dev->l2ad_alloc, arc_hdr_size(hdr), hdr); - (void) refcount_add_many(&dev->l2ad_alloc, arc_hdr_size(nhdr), nhdr); + for (buf = nhdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) { + mutex_enter(&buf->b_evict_lock); + buf->b_hdr = nhdr; + mutex_exit(&buf->b_evict_lock); + } + refcount_transfer(&nhdr->b_l1hdr.b_refcnt, &hdr->b_l1hdr.b_refcnt); + (void) refcount_remove(&nhdr->b_l1hdr.b_refcnt, FTAG); + if (need_crypt) { + arc_hdr_set_flags(nhdr, ARC_FLAG_PROTECTED); + } else { + arc_hdr_clear_flags(nhdr, ARC_FLAG_PROTECTED); + } buf_discard_identity(hdr); - kmem_cache_free(old, hdr); + kmem_cache_free(ocache, hdr); return (nhdr); } +/* + * This function is used by the send / receive code to convert a newly + * allocated arc_buf_t to one that is suitable for a raw encrypted write. It + * is also used to allow the root objset block to be uupdated without altering + * its embedded MACs. Both block types will always be uncompressed so we do not + * have to worry about compression type or psize. + */ +void +arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder, + dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv, + const uint8_t *mac) +{ + arc_buf_hdr_t *hdr = buf->b_hdr; + + ASSERT(ot == DMU_OT_DNODE || ot == DMU_OT_OBJSET); + ASSERT(HDR_HAS_L1HDR(hdr)); + ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon); + + buf->b_flags |= (ARC_BUF_FLAG_COMPRESSED | ARC_BUF_FLAG_ENCRYPTED); + if (!HDR_PROTECTED(hdr)) + hdr = arc_hdr_realloc_crypt(hdr, B_TRUE); + hdr->b_crypt_hdr.b_dsobj = dsobj; + hdr->b_crypt_hdr.b_ot = ot; + hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ? + DMU_BSWAP_NUMFUNCS : DMU_OT_BYTESWAP(ot); + if (!arc_hdr_has_uncompressed_buf(hdr)) + arc_cksum_free(hdr); + + if (salt != NULL) + bcopy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN); + if (iv != NULL) + bcopy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN); + if (mac != NULL) + bcopy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN); +} + /* * Allocate a new arc_buf_hdr_t and arc_buf_t and return the buf to the caller. * The buf is returned thawed since we expect the consumer to modify it. @@ -2996,11 +3649,12 @@ arc_buf_t * arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size) { arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), size, size, - ZIO_COMPRESS_OFF, type); + B_FALSE, ZIO_COMPRESS_OFF, type, B_FALSE); ASSERT(!MUTEX_HELD(HDR_LOCK(hdr))); arc_buf_t *buf = NULL; - VERIFY0(arc_buf_alloc_impl(hdr, tag, B_FALSE, B_FALSE, &buf)); + VERIFY0(arc_buf_alloc_impl(hdr, spa, 0, tag, B_FALSE, B_FALSE, + B_FALSE, B_FALSE, &buf)); arc_buf_thaw(buf); return (buf); @@ -3016,33 +3670,76 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize, { ASSERT3U(lsize, >, 0); ASSERT3U(lsize, >=, psize); - ASSERT(compression_type > ZIO_COMPRESS_OFF); - ASSERT(compression_type < ZIO_COMPRESS_FUNCTIONS); + ASSERT3U(compression_type, >, ZIO_COMPRESS_OFF); + ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS); arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, - compression_type, ARC_BUFC_DATA); + B_FALSE, compression_type, ARC_BUFC_DATA, B_FALSE); ASSERT(!MUTEX_HELD(HDR_LOCK(hdr))); arc_buf_t *buf = NULL; - VERIFY0(arc_buf_alloc_impl(hdr, tag, B_TRUE, B_FALSE, &buf)); + VERIFY0(arc_buf_alloc_impl(hdr, spa, 0, tag, B_FALSE, + B_TRUE, B_FALSE, B_FALSE, &buf)); arc_buf_thaw(buf); ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); if (!arc_buf_is_shared(buf)) { /* * To ensure that the hdr has the correct data in it if we call - * arc_decompress() on this buf before it's been written to + * arc_untransform() on this buf before it's been written to * disk, it's easiest if we just set up sharing between the * buf and the hdr. */ ASSERT(!abd_is_linear(hdr->b_l1hdr.b_pabd)); - arc_hdr_free_pabd(hdr); + arc_hdr_free_pabd(hdr, B_FALSE); arc_share_buf(hdr, buf); } return (buf); } +arc_buf_t * +arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, + dmu_object_type_t ot, uint64_t psize, uint64_t lsize, + enum zio_compress compression_type) +{ + arc_buf_hdr_t *hdr; + arc_buf_t *buf; + arc_buf_contents_t type = DMU_OT_IS_METADATA(ot) ? + ARC_BUFC_METADATA : ARC_BUFC_DATA; + + ASSERT3U(lsize, >, 0); + ASSERT3U(lsize, >=, psize); + ASSERT3U(compression_type, >=, ZIO_COMPRESS_OFF); + ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS); + + hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, B_TRUE, + compression_type, type, B_TRUE); + ASSERT(!MUTEX_HELD(HDR_LOCK(hdr))); + + hdr->b_crypt_hdr.b_dsobj = dsobj; + hdr->b_crypt_hdr.b_ot = ot; + hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ? + DMU_BSWAP_NUMFUNCS : DMU_OT_BYTESWAP(ot); + bcopy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN); + bcopy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN); + bcopy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN); + + /* + * This buffer will be considered encrypted even if the ot is not an + * encrypted type. It will become authenticated instead in + * arc_write_ready(). + */ + buf = NULL; + VERIFY0(arc_buf_alloc_impl(hdr, spa, dsobj, tag, B_TRUE, B_TRUE, + B_FALSE, B_FALSE, &buf)); + arc_buf_thaw(buf); + ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); + + return (buf); +} + static void arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr) { @@ -3116,15 +3813,23 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr) #endif if (hdr->b_l1hdr.b_pabd != NULL) { - arc_hdr_free_pabd(hdr); + arc_hdr_free_pabd(hdr, B_FALSE); } + + if (HDR_HAS_RABD(hdr)) + arc_hdr_free_pabd(hdr, B_TRUE); } ASSERT3P(hdr->b_hash_next, ==, NULL); if (HDR_HAS_L1HDR(hdr)) { ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); - kmem_cache_free(hdr_full_cache, hdr); + + if (!HDR_PROTECTED(hdr)) { + kmem_cache_free(hdr_full_cache, hdr); + } else { + kmem_cache_free(hdr_full_crypt_cache, hdr); + } } else { kmem_cache_free(hdr_l2only_cache, hdr); } @@ -3158,7 +3863,7 @@ arc_buf_destroy(arc_buf_t *buf, void* tag) /* * Evict the arc_buf_hdr that is provided as a parameter. The resultant - * state of the header is dependent on it's state prior to entering this + * state of the header is dependent on its state prior to entering this * function. The following transitions are possible: * * - arc_mru -> arc_mru_ghost @@ -3184,9 +3889,9 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) /* * l2arc_write_buffers() relies on a header's L1 portion - * (i.e. its b_pabd field) during it's write phase. + * (i.e. its b_pabd field) during its write phase. * Thus, we cannot push a header onto the arc_l2c_only - * state (removing it's L1 piece) until the header is + * state (removing its L1 piece) until the header is * done being written to the l2arc. */ if (HDR_HAS_L2HDR(hdr) && HDR_L2_WRITING(hdr)) { @@ -3199,8 +3904,9 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, hdr); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); if (HDR_HAS_L2HDR(hdr)) { + ASSERT(hdr->b_l1hdr.b_pabd == NULL); + ASSERT(!HDR_HAS_RABD(hdr)); /* * This buffer is cached on the 2nd Level ARC; * don't destroy the header. @@ -3267,7 +3973,11 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) * This ensures that the accounting is updated correctly * in arc_free_data_impl(). */ - arc_hdr_free_pabd(hdr); + if (hdr->b_l1hdr.b_pabd != NULL) + arc_hdr_free_pabd(hdr, B_FALSE); + + if (HDR_HAS_RABD(hdr)) + arc_hdr_free_pabd(hdr, B_TRUE); arc_change_state(evicted_state, hdr, hash_lock); ASSERT(HDR_IN_HASH_TABLE(hdr)); @@ -4196,7 +4906,7 @@ arc_reclaim_thread(void *unused) /* * Adapt arc info given the number of bytes we are trying to add and - * the state that we are comming from. This function is only called + * the state that we are coming from. This function is only called * when we are adding new content to the cache. */ static void @@ -4323,7 +5033,7 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag) * upper limit, we must be adding data faster than the evict * thread can evict. Thus, to ensure we don't compound the * problem by adding more data and forcing arc_size to grow even - * further past it's target size, we halt and wait for the + * further past its target size, we halt and wait for the * eviction thread to catch up. * * It's also possible that the reclaim thread is unable to evict @@ -4582,22 +5292,23 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) } } -/* a generic arc_done_func_t which you can use */ +/* a generic arc_read_done_func_t which you can use */ /* ARGSUSED */ void -arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg) +arc_bcopy_func(zio_t *zio, int error, arc_buf_t *buf, void *arg) { - if (zio == NULL || zio->io_error == 0) + if (error == 0) bcopy(buf->b_data, arg, arc_buf_size(buf)); arc_buf_destroy(buf, arg); } -/* a generic arc_done_func_t */ +/* a generic arc_read_done_func_t */ +/* ARGSUSED */ void -arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg) +arc_getbuf_func(zio_t *zio, int error, arc_buf_t *buf, void *arg) { arc_buf_t **bufp = arg; - if (zio && zio->io_error) { + if (error != 0) { arc_buf_destroy(buf, arg); *bufp = NULL; } else { @@ -4611,20 +5322,22 @@ arc_hdr_verify(arc_buf_hdr_t *hdr, blkptr_t *bp) { if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) { ASSERT3U(HDR_GET_PSIZE(hdr), ==, 0); - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF); + ASSERT3U(arc_hdr_get_compress(hdr), ==, ZIO_COMPRESS_OFF); } else { if (HDR_COMPRESSION_ENABLED(hdr)) { - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, + ASSERT3U(arc_hdr_get_compress(hdr), ==, BP_GET_COMPRESS(bp)); } ASSERT3U(HDR_GET_LSIZE(hdr), ==, BP_GET_LSIZE(bp)); ASSERT3U(HDR_GET_PSIZE(hdr), ==, BP_GET_PSIZE(bp)); + ASSERT3U(!!HDR_PROTECTED(hdr), ==, BP_IS_PROTECTED(bp)); } } static void arc_read_done(zio_t *zio) { + blkptr_t *bp = zio->io_bp; arc_buf_hdr_t *hdr = zio->io_private; kmutex_t *hash_lock = NULL; arc_callback_t *callback_list; @@ -4656,6 +5369,26 @@ arc_read_done(zio_t *zio) ASSERT3P(hash_lock, !=, NULL); } + if (BP_IS_PROTECTED(bp)) { + hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp); + hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset; + zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt, + hdr->b_crypt_hdr.b_iv); + + if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) { + void *tmpbuf; + + tmpbuf = abd_borrow_buf_copy(zio->io_abd, + sizeof (zil_chain_t)); + zio_crypt_decode_mac_zil(tmpbuf, + hdr->b_crypt_hdr.b_mac); + abd_return_buf(zio->io_abd, tmpbuf, + sizeof (zil_chain_t)); + } else { + zio_crypt_decode_mac_bp(bp, hdr->b_crypt_hdr.b_mac); + } + } + if (no_zio_error) { /* byteswap if necessary */ if (BP_SHOULD_BYTESWAP(zio->io_bp)) { @@ -4701,19 +5434,40 @@ arc_read_done(zio_t *zio) /* This is a demand read since prefetches don't use callbacks */ callback_cnt++; - int error = arc_buf_alloc_impl(hdr, acb->acb_private, - acb->acb_compressed, no_zio_error, &acb->acb_buf); + int error = arc_buf_alloc_impl(hdr, zio->io_spa, + acb->acb_dsobj, acb->acb_private, acb->acb_encrypted, + acb->acb_compressed, acb->acb_noauth, no_zio_error, + &acb->acb_buf); + + /* + * Assert non-speculative zios didn't fail because an + * encryption key wasn't loaded + */ + ASSERT((zio->io_flags & ZIO_FLAG_SPECULATIVE) || error == 0); + + /* + * If we failed to decrypt, report an error now (as the zio + * layer would have done if it had done the transforms). + */ + if (error == ECKSUM) { + ASSERT(BP_IS_PROTECTED(bp)); + error = SET_ERROR(EIO); + spa_log_error(zio->io_spa, &zio->io_bookmark); + if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) { + zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, + zio->io_spa, NULL, &zio->io_bookmark, zio, + 0, 0); + } + } + if (no_zio_error) { zio->io_error = error; } } hdr->b_l1hdr.b_acb = NULL; arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); - if (callback_cnt == 0) { - ASSERT(HDR_PREFETCH(hdr)); - ASSERT0(hdr->b_l1hdr.b_bufcnt); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); - } + if (callback_cnt == 0) + ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr)); ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) || callback_list != NULL); @@ -4751,8 +5505,10 @@ arc_read_done(zio_t *zio) /* execute each callback and free its structure */ while ((acb = callback_list) != NULL) { - if (acb->acb_done) - acb->acb_done(zio, acb->acb_buf, acb->acb_private); + if (acb->acb_done) { + acb->acb_done(zio, zio->io_error, acb->acb_buf, + acb->acb_private); + } if (acb->acb_zio_dummy != NULL) { acb->acb_zio_dummy->io_error = zio->io_error; @@ -4786,15 +5542,20 @@ arc_read_done(zio_t *zio) * for readers of this block. */ int -arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, - void *private, zio_priority_t priority, int zio_flags, - arc_flags_t *arc_flags, const zbookmark_phys_t *zb) +arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, + arc_read_done_func_t *done, void *private, zio_priority_t priority, + int zio_flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb) { arc_buf_hdr_t *hdr = NULL; kmutex_t *hash_lock = NULL; zio_t *rzio; uint64_t guid = spa_load_guid(spa); - boolean_t compressed_read = (zio_flags & ZIO_FLAG_RAW) != 0; + boolean_t compressed_read = (zio_flags & ZIO_FLAG_RAW_COMPRESS) != 0; + boolean_t encrypted_read = BP_IS_ENCRYPTED(bp) && + (zio_flags & ZIO_FLAG_RAW_ENCRYPT) != 0; + boolean_t noauth_read = BP_IS_AUTHENTICATED(bp) && + (zio_flags & ZIO_FLAG_RAW_ENCRYPT) != 0; + int rc = 0; ASSERT(!BP_IS_EMBEDDED(bp) || BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA); @@ -4808,7 +5569,15 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, hdr = buf_hash_find(guid, bp, &hash_lock); } - if (hdr != NULL && HDR_HAS_L1HDR(hdr) && hdr->b_l1hdr.b_pabd != NULL) { + /* + * Determine if we have an L1 cache hit or a cache miss. For simplicity + * we maintain encrypted data seperately from compressed / uncompressed + * data. If the user is requesting raw encrypted data and we don't have + * that in the header we will read from disk to guarantee that we can + * get it even if the encryption keys aren't loaded. + */ + if (hdr != NULL && HDR_HAS_L1HDR(hdr) && (HDR_HAS_RABD(hdr) || + (hdr->b_l1hdr.b_pabd != NULL && !encrypted_read))) { arc_buf_t *buf = NULL; *arc_flags |= ARC_FLAG_CACHED; @@ -4860,6 +5629,9 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, acb->acb_done = done; acb->acb_private = private; acb->acb_compressed = compressed_read; + acb->acb_encrypted = encrypted_read; + acb->acb_noauth = noauth_read; + acb->acb_dsobj = zb->zb_objset; if (pio != NULL) acb->acb_zio_dummy = zio_null(pio, spa, NULL, NULL, NULL, zio_flags); @@ -4895,8 +5667,10 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, ASSERT(!BP_IS_EMBEDDED(bp) || !BP_IS_HOLE(bp)); /* Get a buf with the desired data in it. */ - VERIFY0(arc_buf_alloc_impl(hdr, private, - compressed_read, B_TRUE, &buf)); + rc = arc_buf_alloc_impl(hdr, spa, zb->zb_objset, + private, encrypted_read, compressed_read, + noauth_read, B_TRUE, &buf); + ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) || rc == 0); } else if (*arc_flags & ARC_FLAG_PREFETCH && refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) { arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH); @@ -4912,7 +5686,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, data, metadata, hits); if (done) - done(NULL, buf, private); + done(NULL, rc, buf, private); } else { uint64_t lsize = BP_GET_LSIZE(bp); uint64_t psize = BP_GET_PSIZE(bp); @@ -4921,13 +5695,15 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, uint64_t addr = 0; boolean_t devw = B_FALSE; uint64_t size; + abd_t *hdr_abd; if (hdr == NULL) { /* this block is not in the cache */ arc_buf_hdr_t *exists = NULL; arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp); hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, - BP_GET_COMPRESS(bp), type); + BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), type, + encrypted_read); if (!BP_IS_EMBEDDED(bp)) { hdr->b_dva = *BP_IDENTITY(bp); @@ -4943,25 +5719,42 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, } } else { /* - * This block is in the ghost cache. If it was L2-only - * (and thus didn't have an L1 hdr), we realloc the - * header to add an L1 hdr. + * This block is in the ghost cache or encrypted data + * was requested and we didn't have it. If it was + * L2-only (and thus didn't have an L1 hdr), + * we realloc the header to add an L1 hdr. */ if (!HDR_HAS_L1HDR(hdr)) { hdr = arc_hdr_realloc(hdr, hdr_l2only_cache, hdr_full_cache); } - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); - ASSERT(GHOST_STATE(hdr->b_l1hdr.b_state)); - ASSERT(!HDR_IO_IN_PROGRESS(hdr)); - ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); + + if (GHOST_STATE(hdr->b_l1hdr.b_state)) { + ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); + ASSERT(!HDR_IO_IN_PROGRESS(hdr)); + ASSERT0(refcount_count(&hdr->b_l1hdr.b_refcnt)); + ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); + ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); + } else if (HDR_IO_IN_PROGRESS(hdr)) { + /* + * If this header already had an IO in progress + * and we are performing another IO to fetch + * encrypted data we must wait until the first + * IO completes so as not to confuse + * arc_read_done(). This should be very rare + * and so the performance impact shouldn't + * matter. + */ + cv_wait(&hdr->b_l1hdr.b_cv, hash_lock); + mutex_exit(hash_lock); + goto top; + } /* * This is a delicate dance that we play here. - * This hdr is in the ghost list so we access it - * to move it out of the ghost list before we + * This hdr might be in the ghost list so we access + * it to move it out of the ghost list before we * initiate the read. If it's a prefetch then * it won't have a callback so we'll remove the * reference that arc_buf_alloc_impl() created. We @@ -4969,25 +5762,41 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, * avoid hitting an assert in remove_reference(). */ arc_access(hdr, hash_lock); - arc_hdr_alloc_pabd(hdr); + arc_hdr_alloc_pabd(hdr, encrypted_read); } - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); - size = arc_hdr_size(hdr); - /* - * If compression is enabled on the hdr, then will do - * RAW I/O and will store the compressed data in the hdr's - * data block. Otherwise, the hdr's data block will contain - * the uncompressed data. - */ - if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) { + if (encrypted_read) { + ASSERT(HDR_HAS_RABD(hdr)); + size = HDR_GET_PSIZE(hdr); + hdr_abd = hdr->b_crypt_hdr.b_rabd; zio_flags |= ZIO_FLAG_RAW; + } else { + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + size = arc_hdr_size(hdr); + hdr_abd = hdr->b_l1hdr.b_pabd; + + if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF) { + zio_flags |= ZIO_FLAG_RAW_COMPRESS; + } + + /* + * For authenticated bp's, we do not ask the ZIO layer + * to authenticate them since this will cause the entire + * IO to fail if the key isn't loaded. Instead, we + * defer authentication until arc_buf_fill(), which will + * verify the data when the key is available. + */ + if (BP_IS_AUTHENTICATED(bp)) + zio_flags |= ZIO_FLAG_RAW_ENCRYPT; } - if (*arc_flags & ARC_FLAG_PREFETCH) + if (*arc_flags & ARC_FLAG_PREFETCH && + refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH); if (*arc_flags & ARC_FLAG_L2CACHE) arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE); + if (BP_IS_AUTHENTICATED(bp)) + arc_hdr_set_flags(hdr, ARC_FLAG_NOAUTH); if (BP_GET_LEVEL(bp) > 0) arc_hdr_set_flags(hdr, ARC_FLAG_INDIRECT); if (*arc_flags & ARC_FLAG_PREDICTIVE_PREFETCH) @@ -4998,6 +5807,9 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, acb->acb_done = done; acb->acb_private = private; acb->acb_compressed = compressed_read; + acb->acb_encrypted = encrypted_read; + acb->acb_noauth = noauth_read; + acb->acb_dsobj = zb->zb_objset; ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); hdr->b_l1hdr.b_acb = acb; @@ -5069,7 +5881,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, HDR_ISTYPE_METADATA(hdr)); cb->l2rcb_abd = abd; } else { - abd = hdr->b_l1hdr.b_pabd; + abd = hdr_abd; } ASSERT(addr >= VDEV_LABEL_START_SIZE && @@ -5082,7 +5894,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, * Issue a null zio if the underlying buffer * was squashed to zero size by compression. */ - ASSERT3U(HDR_GET_COMPRESS(hdr), !=, + ASSERT3U(arc_hdr_get_compress(hdr), !=, ZIO_COMPRESS_EMPTY); rzio = zio_read_phys(pio, vd, addr, asize, abd, @@ -5094,7 +5906,8 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, ZIO_FLAG_DONT_RETRY, B_FALSE); DTRACE_PROBE2(l2arc__read, vdev_t *, vd, zio_t *, rzio); - ARCSTAT_INCR(arcstat_l2_read_bytes, size); + ARCSTAT_INCR(arcstat_l2_read_bytes, + HDR_GET_PSIZE(hdr)); if (*arc_flags & ARC_FLAG_NOWAIT) { zio_nowait(rzio); @@ -5124,7 +5937,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, } } - rzio = zio_read(pio, spa, bp, hdr->b_l1hdr.b_pabd, size, + rzio = zio_read(pio, spa, bp, hdr_abd, size, arc_read_done, hdr, priority, zio_flags, zb); if (*arc_flags & ARC_FLAG_WAIT) @@ -5197,7 +6010,7 @@ arc_release(arc_buf_t *buf, void *tag) arc_buf_hdr_t *hdr = buf->b_hdr; /* - * It would be nice to assert that if it's DMU metadata (level > + * It would be nice to assert that if its DMU metadata (level > * 0 || it's the dnode file), then it must be syncing context. * But we don't know that information at this level. */ @@ -5274,7 +6087,8 @@ arc_release(arc_buf_t *buf, void *tag) uint64_t spa = hdr->b_spa; uint64_t psize = HDR_GET_PSIZE(hdr); uint64_t lsize = HDR_GET_LSIZE(hdr); - enum zio_compress compress = HDR_GET_COMPRESS(hdr); + boolean_t protected = HDR_PROTECTED(hdr); + enum zio_compress compress = arc_hdr_get_compress(hdr); arc_buf_contents_t type = arc_buf_type(hdr); VERIFY3U(hdr->b_type, ==, type); @@ -5299,6 +6113,7 @@ arc_release(arc_buf_t *buf, void *tag) * buffer, then we must stop sharing that block. */ if (arc_buf_is_shared(buf)) { + ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf); VERIFY(!arc_buf_is_shared(lastbuf)); /* @@ -5316,7 +6131,7 @@ arc_release(arc_buf_t *buf, void *tag) if (arc_can_share(hdr, lastbuf)) { arc_share_buf(hdr, lastbuf); } else { - arc_hdr_alloc_pabd(hdr); + arc_hdr_alloc_pabd(hdr, B_FALSE); abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data, psize); } @@ -5331,10 +6146,10 @@ arc_release(arc_buf_t *buf, void *tag) * if we have a compressed, shared buffer. */ ASSERT(arc_buf_is_shared(lastbuf) || - HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF); + arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF); ASSERT(!ARC_BUF_SHARED(buf)); } - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr)); ASSERT3P(state, !=, arc_l2c_only); (void) refcount_remove_many(&state->arcs_size, @@ -5347,16 +6162,24 @@ arc_release(arc_buf_t *buf, void *tag) } hdr->b_l1hdr.b_bufcnt -= 1; + if (ARC_BUF_ENCRYPTED(buf)) + hdr->b_crypt_hdr.b_ebufcnt -= 1; + arc_cksum_verify(buf); arc_buf_unwatch(buf); + /* if this is the last uncompressed buf free the checksum */ + if (!arc_hdr_has_uncompressed_buf(hdr)) + arc_cksum_free(hdr); + mutex_exit(hash_lock); /* * Allocate a new hdr. The new hdr will contain a b_pabd * buffer which will be freed in arc_write(). */ - nhdr = arc_hdr_alloc(spa, psize, lsize, compress, type); + nhdr = arc_hdr_alloc(spa, psize, lsize, protected, + compress, type, HDR_HAS_RABD(hdr)); ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL); ASSERT0(nhdr->b_l1hdr.b_bufcnt); ASSERT0(refcount_count(&nhdr->b_l1hdr.b_refcnt)); @@ -5365,12 +6188,14 @@ arc_release(arc_buf_t *buf, void *tag) nhdr->b_l1hdr.b_buf = buf; nhdr->b_l1hdr.b_bufcnt = 1; + if (ARC_BUF_ENCRYPTED(buf)) + nhdr->b_crypt_hdr.b_ebufcnt = 1; (void) refcount_add(&nhdr->b_l1hdr.b_refcnt, tag); buf->b_hdr = nhdr; mutex_exit(&buf->b_evict_lock); (void) refcount_add_many(&arc_anon->arcs_size, - arc_buf_size(buf), buf); + HDR_GET_LSIZE(nhdr), buf); } else { mutex_exit(&buf->b_evict_lock); ASSERT(refcount_count(&hdr->b_l1hdr.b_refcnt) == 1); @@ -5379,8 +6204,8 @@ arc_release(arc_buf_t *buf, void *tag) ASSERT(!HDR_IO_IN_PROGRESS(hdr)); arc_change_state(arc_anon, hdr, hash_lock); hdr->b_l1hdr.b_arc_access = 0; - mutex_exit(hash_lock); + mutex_exit(hash_lock); buf_discard_identity(hdr); arc_buf_thaw(buf); } @@ -5435,11 +6260,15 @@ arc_write_ready(zio_t *zio) if (arc_buf_is_shared(buf)) { arc_unshare_buf(hdr, buf); } else { - arc_hdr_free_pabd(hdr); + arc_hdr_free_pabd(hdr, B_FALSE); } } + + if (HDR_HAS_RABD(hdr)) + arc_hdr_free_pabd(hdr, B_TRUE); } ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); ASSERT(!HDR_SHARED_DATA(hdr)); ASSERT(!arc_buf_is_shared(buf)); @@ -5448,9 +6277,37 @@ arc_write_ready(zio_t *zio) if (HDR_IO_IN_PROGRESS(hdr)) ASSERT(zio->io_flags & ZIO_FLAG_REEXECUTED); - arc_cksum_compute(buf); arc_hdr_set_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); + if (BP_IS_PROTECTED(zio->io_bp) != !!HDR_PROTECTED(hdr)) + hdr = arc_hdr_realloc_crypt(hdr, BP_IS_PROTECTED(zio->io_bp)); + + if (BP_IS_PROTECTED(zio->io_bp)) { + /* ZIL blocks are written through zio_rewrite */ + ASSERT3U(BP_GET_TYPE(zio->io_bp), !=, DMU_OT_INTENT_LOG); + ASSERT(HDR_PROTECTED(hdr)); + + hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(zio->io_bp); + hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset; + zio_crypt_decode_params_bp(zio->io_bp, hdr->b_crypt_hdr.b_salt, + hdr->b_crypt_hdr.b_iv); + zio_crypt_decode_mac_bp(zio->io_bp, hdr->b_crypt_hdr.b_mac); + } + + /* + * If this block was written for raw encryption but the zio layer + * ended up only authenticating it, adjust the buffer flags now. + */ + if (BP_IS_AUTHENTICATED(zio->io_bp) && ARC_BUF_ENCRYPTED(buf)) { + arc_hdr_set_flags(hdr, ARC_FLAG_NOAUTH); + buf->b_flags &= ~ARC_BUF_FLAG_ENCRYPTED; + if (BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF) + buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED; + } + + /* this must be done after the buffer flags are adjusted */ + arc_cksum_compute(buf); + enum zio_compress compress; if (BP_IS_HOLE(zio->io_bp) || BP_IS_EMBEDDED(zio->io_bp)) { compress = ZIO_COMPRESS_OFF; @@ -5461,10 +6318,14 @@ arc_write_ready(zio_t *zio) HDR_SET_PSIZE(hdr, psize); arc_hdr_set_compress(hdr, compress); + if (zio->io_error != 0 || psize == 0) + goto out; /* - * Fill the hdr with data. If the hdr is compressed, the data we want - * is available from the zio, otherwise we can take it from the buf. + * Fill the hdr with data. If the buffer is encrypted we have no choice + * but to copy the data into b_rabd. If the hdr is compressed, the data + * we want is available from the zio, otherwise we can take it from + * the buf. * * We might be able to share the buf's data with the hdr here. However, * doing so would cause the ARC to be full of linear ABDs if we write a @@ -5474,23 +6335,29 @@ arc_write_ready(zio_t *zio) * written. Therefore, if they're allowed then we allocate one and copy * the data into it; otherwise, we share the data directly if we can. */ - if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) { - arc_hdr_alloc_pabd(hdr); - + if (ARC_BUF_ENCRYPTED(buf)) { + ASSERT3U(psize, >, 0); + ASSERT(ARC_BUF_COMPRESSED(buf)); + arc_hdr_alloc_pabd(hdr, B_TRUE); + abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize); + } else if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) { /* * Ideally, we would always copy the io_abd into b_pabd, but the * user may have disabled compressed ARC, thus we must check the * hdr's compression setting rather than the io_bp's. */ - if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) { - ASSERT3U(BP_GET_COMPRESS(zio->io_bp), !=, - ZIO_COMPRESS_OFF); + if (BP_IS_ENCRYPTED(zio->io_bp)) { ASSERT3U(psize, >, 0); - + arc_hdr_alloc_pabd(hdr, B_TRUE); + abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize); + } else if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF && + !ARC_BUF_COMPRESSED(buf)) { + ASSERT3U(psize, >, 0); + arc_hdr_alloc_pabd(hdr, B_FALSE); abd_copy(hdr->b_l1hdr.b_pabd, zio->io_abd, psize); } else { ASSERT3U(zio->io_orig_size, ==, arc_hdr_size(hdr)); - + arc_hdr_alloc_pabd(hdr, B_FALSE); abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data, arc_buf_size(buf)); } @@ -5498,10 +6365,10 @@ arc_write_ready(zio_t *zio) ASSERT3P(buf->b_data, ==, abd_to_buf(zio->io_orig_abd)); ASSERT3U(zio->io_orig_size, ==, arc_buf_size(buf)); ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1); - arc_share_buf(hdr, buf); } +out: arc_hdr_verify(hdr, zio->io_bp); } @@ -5612,9 +6479,9 @@ arc_write_done(zio_t *zio) zio_t * arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, - boolean_t l2arc, const zio_prop_t *zp, arc_done_func_t *ready, - arc_done_func_t *children_ready, arc_done_func_t *physdone, - arc_done_func_t *done, void *private, zio_priority_t priority, + boolean_t l2arc, const zio_prop_t *zp, arc_write_done_func_t *ready, + arc_write_done_func_t *children_ready, arc_write_done_func_t *physdone, + arc_write_done_func_t *done, void *private, zio_priority_t priority, int zio_flags, const zbookmark_phys_t *zb) { arc_buf_hdr_t *hdr = buf->b_hdr; @@ -5630,17 +6497,33 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0); if (l2arc) arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE); - if (ARC_BUF_COMPRESSED(buf)) { - /* - * We're writing a pre-compressed buffer. Make the - * compression algorithm requested by the zio_prop_t match - * the pre-compressed buffer's compression algorithm. - */ - localprop.zp_compress = HDR_GET_COMPRESS(hdr); - ASSERT3U(HDR_GET_LSIZE(hdr), !=, arc_buf_size(buf)); + if (ARC_BUF_ENCRYPTED(buf)) { + ASSERT(ARC_BUF_COMPRESSED(buf)); + localprop.zp_encrypt = B_TRUE; + localprop.zp_compress = HDR_GET_COMPRESS(hdr); + /* CONSTCOND */ + localprop.zp_byteorder = + (hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ? + ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER; + bcopy(hdr->b_crypt_hdr.b_salt, localprop.zp_salt, + ZIO_DATA_SALT_LEN); + bcopy(hdr->b_crypt_hdr.b_iv, localprop.zp_iv, + ZIO_DATA_IV_LEN); + bcopy(hdr->b_crypt_hdr.b_mac, localprop.zp_mac, + ZIO_DATA_MAC_LEN); + if (DMU_OT_IS_ENCRYPTED(localprop.zp_type)) { + localprop.zp_nopwrite = B_FALSE; + localprop.zp_copies = + MIN(localprop.zp_copies, SPA_DVAS_PER_BP - 1); + } zio_flags |= ZIO_FLAG_RAW; + } else if (ARC_BUF_COMPRESSED(buf)) { + ASSERT3U(HDR_GET_LSIZE(hdr), !=, arc_buf_size(buf)); + localprop.zp_compress = HDR_GET_COMPRESS(hdr); + zio_flags |= ZIO_FLAG_RAW_COMPRESS; } + callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP); callback->awcb_ready = ready; callback->awcb_children_ready = children_ready; @@ -5663,11 +6546,16 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, if (arc_buf_is_shared(buf)) { arc_unshare_buf(hdr, buf); } else { - arc_hdr_free_pabd(hdr); + arc_hdr_free_pabd(hdr, B_FALSE); } VERIFY3P(buf->b_data, !=, NULL); - arc_hdr_set_compress(hdr, ZIO_COMPRESS_OFF); } + + if (HDR_HAS_RABD(hdr)) + arc_hdr_free_pabd(hdr, B_TRUE); + + arc_hdr_set_compress(hdr, ZIO_COMPRESS_OFF); + ASSERT(!arc_buf_is_shared(buf)); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); @@ -5850,8 +6738,8 @@ arc_state_multilist_index_func(multilist_t *ml, void *obj) /* * The assumption here, is the hash value for a given - * arc_buf_hdr_t will remain constant throughout it's lifetime - * (i.e. it's b_spa, b_dva, and b_birth fields don't change). + * arc_buf_hdr_t will remain constant throughout its lifetime + * (i.e. its b_spa, b_dva, and b_birth fields don't change). * Thus, we don't need to store the header's sublist index * on insertion, as this index can be recalculated on removal. * @@ -5967,6 +6855,8 @@ arc_state_fini(void) multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_DATA]); multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_DATA]); multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]); + multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]); + multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_DATA]); } uint64_t @@ -6553,6 +7443,108 @@ l2arc_write_done(zio_t *zio) kmem_free(cb, sizeof (l2arc_write_callback_t)); } +static int +l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb) +{ + int ret; + spa_t *spa = zio->io_spa; + arc_buf_hdr_t *hdr = cb->l2rcb_hdr; + blkptr_t *bp = zio->io_bp; + dsl_crypto_key_t *dck = NULL; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + boolean_t no_crypt = B_FALSE; + + /* + * ZIL data is never be written to the L2ARC, so we don't need + * special handling for its unique MAC storage. + */ + ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG); + ASSERT(MUTEX_HELD(HDR_LOCK(hdr))); + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + + /* + * If the data was encrypted, decrypt it now. Note that + * we must check the bp here and not the hdr, since the + * hdr does not have its encryption parameters updated + * until arc_read_done(). + */ + if (BP_IS_ENCRYPTED(bp)) { + abd_t *eabd = arc_get_data_abd(hdr, + arc_hdr_size(hdr), hdr); + + zio_crypt_decode_params_bp(bp, salt, iv); + zio_crypt_decode_mac_bp(bp, mac); + + ret = spa_keystore_lookup_key(spa, + cb->l2rcb_zb.zb_objset, FTAG, &dck); + if (ret != 0) { + arc_free_data_abd(hdr, eabd, arc_hdr_size(hdr), hdr); + goto error; + } + + ret = zio_do_crypt_abd(B_FALSE, &dck->dck_key, + salt, BP_GET_TYPE(bp), iv, mac, HDR_GET_PSIZE(hdr), + BP_SHOULD_BYTESWAP(bp), eabd, hdr->b_l1hdr.b_pabd, + &no_crypt); + if (ret != 0) { + arc_free_data_abd(hdr, eabd, arc_hdr_size(hdr), hdr); + spa_keystore_dsl_key_rele(spa, dck, FTAG); + goto error; + } + + spa_keystore_dsl_key_rele(spa, dck, FTAG); + + /* + * If we actually performed decryption, replace b_pabd + * with the decrypted data. Otherwise we can just throw + * our decryption buffer away. + */ + if (!no_crypt) { + arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd, + arc_hdr_size(hdr), hdr); + hdr->b_l1hdr.b_pabd = eabd; + zio->io_abd = eabd; + } else { + arc_free_data_abd(hdr, eabd, arc_hdr_size(hdr), hdr); + } + } + + /* + * If the L2ARC block was compressed, but ARC compression + * is disabled we decompress the data into a new buffer and + * replace the existing data. + */ + if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && + !HDR_COMPRESSION_ENABLED(hdr)) { + abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); + void *tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr)); + + ret = zio_decompress_data(HDR_GET_COMPRESS(hdr), + hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr), + HDR_GET_LSIZE(hdr)); + if (ret != 0) { + abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr)); + arc_free_data_abd(hdr, cabd, arc_hdr_size(hdr), hdr); + goto error; + } + + abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr)); + arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd, + arc_hdr_size(hdr), hdr); + hdr->b_l1hdr.b_pabd = cabd; + zio->io_abd = cabd; + zio->io_size = HDR_GET_LSIZE(hdr); + } + + return (0); + +error: + return (ret); +} + + /* * A read to a cache device completed. Validate buffer contents before * handing over to the regular ARC routines. @@ -6560,10 +7552,11 @@ l2arc_write_done(zio_t *zio) static void l2arc_read_done(zio_t *zio) { + int tfm_error = 0; l2arc_read_callback_t *cb; arc_buf_hdr_t *hdr; kmutex_t *hash_lock; - boolean_t valid_cksum; + boolean_t valid_cksum, using_rdata; ASSERT3P(zio->io_vd, !=, NULL); ASSERT(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE); @@ -6603,7 +7596,16 @@ l2arc_read_done(zio_t *zio) */ abd_free(cb->l2rcb_abd); zio->io_size = zio->io_orig_size = arc_hdr_size(hdr); - zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd; + + if (BP_IS_ENCRYPTED(&cb->l2rcb_bp) && + (cb->l2rcb_flags & ZIO_FLAG_RAW_ENCRYPT)) { + ASSERT(HDR_HAS_RABD(hdr)); + zio->io_abd = zio->io_orig_abd = + hdr->b_crypt_hdr.b_rabd; + } else { + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd; + } } ASSERT3P(zio->io_abd, !=, NULL); @@ -6611,12 +7613,25 @@ l2arc_read_done(zio_t *zio) /* * Check this survived the L2ARC journey. */ - ASSERT3P(zio->io_abd, ==, hdr->b_l1hdr.b_pabd); + ASSERT(zio->io_abd == hdr->b_l1hdr.b_pabd || + (HDR_HAS_RABD(hdr) && zio->io_abd == hdr->b_crypt_hdr.b_rabd)); zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */ zio->io_bp = &zio->io_bp_copy; /* XXX fix in L2ARC 2.0 */ valid_cksum = arc_cksum_is_equal(hdr, zio); - if (valid_cksum && zio->io_error == 0 && !HDR_L2_EVICTED(hdr)) { + using_rdata = (HDR_HAS_RABD(hdr) && + zio->io_abd == hdr->b_crypt_hdr.b_rabd); + + /* + * b_rabd will always match the data as it exists on disk if it is + * being used. Therefore if we are reading into b_rabd we do not + * attempt to untransform the data. + */ + if (valid_cksum && !using_rdata) + tfm_error = l2arc_untransform(zio, cb); + + if (valid_cksum && tfm_error == 0 && zio->io_error == 0 && + !HDR_L2_EVICTED(hdr)) { mutex_exit(hash_lock); zio->io_private = hdr; arc_read_done(zio); @@ -6631,7 +7646,7 @@ l2arc_read_done(zio_t *zio) } else { zio->io_error = SET_ERROR(EIO); } - if (!valid_cksum) + if (!valid_cksum || tfm_error != 0) ARCSTAT_BUMP(arcstat_l2_cksum_bad); /* @@ -6641,11 +7656,13 @@ l2arc_read_done(zio_t *zio) */ if (zio->io_waiter == NULL) { zio_t *pio = zio_unique_parent(zio); + void *abd = (using_rdata) ? + hdr->b_crypt_hdr.b_rabd : hdr->b_l1hdr.b_pabd; ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL); zio_nowait(zio_read(pio, zio->io_spa, zio->io_bp, - hdr->b_l1hdr.b_pabd, zio->io_size, arc_read_done, + abd, zio->io_size, arc_read_done, hdr, zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb)); } @@ -6804,6 +7821,123 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all) mutex_exit(&dev->l2ad_mtx); } +/* + * Handle any abd transforms that might be required for writing to the L2ARC. + * If successful, this function will always return an abd with the data + * transformed as it is on disk in a new abd of asize bytes. + */ +static int +l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize, + abd_t **abd_out) +{ + int ret; + void *tmp = NULL; + abd_t *cabd = NULL, *eabd = NULL, *to_write = hdr->b_l1hdr.b_pabd; + enum zio_compress compress = HDR_GET_COMPRESS(hdr); + uint64_t psize = HDR_GET_PSIZE(hdr); + uint64_t size = arc_hdr_size(hdr); + boolean_t ismd = HDR_ISTYPE_METADATA(hdr); + boolean_t bswap = (hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS); + dsl_crypto_key_t *dck = NULL; + uint8_t mac[ZIO_DATA_MAC_LEN] = { 0 }; + boolean_t no_crypt = B_FALSE; + + ASSERT((HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && + !HDR_COMPRESSION_ENABLED(hdr)) || + HDR_ENCRYPTED(hdr) || HDR_SHARED_DATA(hdr) || psize != asize); + ASSERT3U(psize, <=, asize); + + /* + * If this data simply needs its own buffer, we simply allocate it + * and copy the data. This may be done to elimiate a depedency on a + * shared buffer or to reallocate the buffer to match asize. + */ + if (HDR_HAS_RABD(hdr) && asize != psize) { + ASSERT3U(size, ==, psize); + to_write = abd_alloc_for_io(asize, ismd); + abd_copy(to_write, hdr->b_crypt_hdr.b_rabd, size); + if (size != asize) + abd_zero_off(to_write, size, asize - size); + goto out; + } + + if ((compress == ZIO_COMPRESS_OFF || HDR_COMPRESSION_ENABLED(hdr)) && + !HDR_ENCRYPTED(hdr)) { + ASSERT3U(size, ==, psize); + to_write = abd_alloc_for_io(asize, ismd); + abd_copy(to_write, hdr->b_l1hdr.b_pabd, size); + if (size != asize) + abd_zero_off(to_write, size, asize - size); + goto out; + } + + if (compress != ZIO_COMPRESS_OFF && !HDR_COMPRESSION_ENABLED(hdr)) { + cabd = abd_alloc_for_io(asize, ismd); + tmp = abd_borrow_buf(cabd, asize); + + psize = zio_compress_data(compress, to_write, tmp, size); + ASSERT3U(psize, <=, HDR_GET_PSIZE(hdr)); + if (psize < asize) + bzero((char *)tmp + psize, asize - psize); + psize = HDR_GET_PSIZE(hdr); + abd_return_buf_copy(cabd, tmp, asize); + to_write = cabd; + } + + if (HDR_ENCRYPTED(hdr)) { + eabd = abd_alloc_for_io(asize, ismd); + + /* + * If the dataset was disowned before the buffer + * made it to this point, the key to re-encrypt + * it won't be available. In this case we simply + * won't write the buffer to the L2ARC. + */ + ret = spa_keystore_lookup_key(spa, hdr->b_crypt_hdr.b_dsobj, + FTAG, &dck); + if (ret != 0) + goto error; + + ret = zio_do_crypt_abd(B_TRUE, &dck->dck_key, + hdr->b_crypt_hdr.b_salt, hdr->b_crypt_hdr.b_ot, + hdr->b_crypt_hdr.b_iv, mac, psize, bswap, to_write, + eabd, &no_crypt); + if (ret != 0) + goto error; + + if (no_crypt) + abd_copy(eabd, to_write, psize); + + if (psize != asize) + abd_zero_off(eabd, psize, asize - psize); + + /* assert that the MAC we got here matches the one we saved */ + ASSERT0(bcmp(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN)); + spa_keystore_dsl_key_rele(spa, dck, FTAG); + + if (to_write == cabd) + abd_free(cabd); + + to_write = eabd; + } + +out: + ASSERT3P(to_write, !=, hdr->b_l1hdr.b_pabd); + *abd_out = to_write; + return (0); + +error: + if (dck != NULL) + spa_keystore_dsl_key_rele(spa, dck, FTAG); + if (cabd != NULL) + abd_free(cabd); + if (eabd != NULL) + abd_free(eabd); + + *abd_out = NULL; + return (ret); +} + /* * Find and write ARC buffers to the L2ARC device. * @@ -6840,6 +7974,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) multilist_sublist_t *mls = l2arc_sublist_lock(try); uint64_t passed_sz = 0; + VERIFY3P(mls, !=, NULL); /* * L2ARC fast warmup. * @@ -6857,6 +7992,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) for (; hdr; hdr = hdr_prev) { kmutex_t *hash_lock; + abd_t *to_write = NULL; if (arc_warm == B_FALSE) hdr_prev = multilist_sublist_next(mls, hdr); @@ -6894,9 +8030,10 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT3U(HDR_GET_PSIZE(hdr), >, 0); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); ASSERT3U(arc_hdr_size(hdr), >, 0); - uint64_t psize = arc_hdr_size(hdr); + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL || + HDR_HAS_RABD(hdr)); + uint64_t psize = HDR_GET_PSIZE(hdr); uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev, psize); @@ -6906,6 +8043,57 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) break; } + /* + * We rely on the L1 portion of the header below, so + * it's invalid for this header to have been evicted out + * of the ghost cache, prior to being written out. The + * ARC_FLAG_L2_WRITING bit ensures this won't happen. + */ + arc_hdr_set_flags(hdr, ARC_FLAG_L2_WRITING); + ASSERT(HDR_HAS_L1HDR(hdr)); + + ASSERT3U(HDR_GET_PSIZE(hdr), >, 0); + ASSERT(hdr->b_l1hdr.b_pabd != NULL || + HDR_HAS_RABD(hdr)); + ASSERT3U(arc_hdr_size(hdr), >, 0); + + /* + * If this header has b_rabd, we can use this since it + * must always match the data exactly as it exists on + * disk. Otherwise, the L2ARC can normally use the + * hdr's data, but if we're sharing data between the + * hdr and one of its bufs, L2ARC needs its own copy of + * the data so that the ZIO below can't race with the + * buf consumer. To ensure that this copy will be + * available for the lifetime of the ZIO and be cleaned + * up afterwards, we add it to the l2arc_free_on_write + * queue. If we need to apply any transforms to the + * data (compression, encryption) we will also need the + * extra buffer. + */ + if (HDR_HAS_RABD(hdr) && psize == asize) { + to_write = hdr->b_crypt_hdr.b_rabd; + } else if ((HDR_COMPRESSION_ENABLED(hdr) || + HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_OFF) && + !HDR_ENCRYPTED(hdr) && !HDR_SHARED_DATA(hdr) && + psize == asize) { + to_write = hdr->b_l1hdr.b_pabd; + } else { + int ret; + arc_buf_contents_t type = arc_buf_type(hdr); + + ret = l2arc_apply_transforms(spa, hdr, asize, + &to_write); + if (ret != 0) { + arc_hdr_clear_flags(hdr, + ARC_FLAG_L2_WRITING); + mutex_exit(hash_lock); + continue; + } + + l2arc_free_abd_on_write(to_write, asize, type); + } + if (pio == NULL) { /* * Insert a dummy header on the buflist so @@ -6926,14 +8114,14 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) hdr->b_l2hdr.b_dev = dev; hdr->b_l2hdr.b_daddr = dev->l2ad_hand; - arc_hdr_set_flags(hdr, - ARC_FLAG_L2_WRITING | ARC_FLAG_HAS_L2HDR); + arc_hdr_set_flags(hdr, ARC_FLAG_HAS_L2HDR); mutex_enter(&dev->l2ad_mtx); list_insert_head(&dev->l2ad_buflist, hdr); mutex_exit(&dev->l2ad_mtx); - (void) refcount_add_many(&dev->l2ad_alloc, psize, hdr); + (void) refcount_add_many(&dev->l2ad_alloc, + arc_hdr_size(hdr), hdr); /* * Normally the L2ARC can use the hdr's data, but if @@ -6949,7 +8137,9 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) * lifetime of the ZIO and be cleaned up afterwards, we * add it to the l2arc_free_on_write queue. */ - abd_t *to_write; + + asize = vdev_psize_to_asize(dev->l2ad_vdev, + psize); if (!HDR_SHARED_DATA(hdr) && psize == asize) { to_write = hdr->b_l1hdr.b_pabd; } else { @@ -6973,8 +8163,11 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev, zio_t *, wzio); - write_psize += psize; write_asize += asize; + /* + * Keep the clock hand suitably device-aligned. + */ + write_psize += psize; dev->l2ad_hand += asize; mutex_exit(hash_lock); diff --git a/usr/src/uts/common/fs/zfs/bptree.c b/usr/src/uts/common/fs/zfs/bptree.c index c74d07236c1b..1a432507f7ad 100644 --- a/usr/src/uts/common/fs/zfs/bptree.c +++ b/usr/src/uts/common/fs/zfs/bptree.c @@ -211,7 +211,8 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func, err = 0; for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) { bptree_entry_phys_t bte; - int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST; + int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST + | TRAVERSE_NO_DECRYPT; err = dmu_read(os, obj, i * sizeof (bte), sizeof (bte), &bte, DMU_READ_NO_PREFETCH); diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c index cf6b550c3ef6..5fbce4164b0d 100644 --- a/usr/src/uts/common/fs/zfs/dbuf.c +++ b/usr/src/uts/common/fs/zfs/dbuf.c @@ -122,7 +122,7 @@ int dbuf_cache_max_shift = 5; * cache size). Once the eviction thread is woken up and eviction is required, * it will continue evicting buffers until it's able to reduce the cache size * to the low water mark. If the cache size continues to grow and hits the high - * water mark, then callers adding elments to the cache will begin to evict + * water mark, then callers adding elements to the cache will begin to evict * directly from the cache until the cache is no longer above the high water * mark. */ @@ -284,7 +284,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db) dmu_buf_impl_t *dbf, **dbp; /* - * We musn't hold db_mtx to maintain lock ordering: + * We mustn't hold db_mtx to maintain lock ordering: * DBUF_HASH_MUTEX > db_mtx. */ ASSERT(refcount_is_zero(&db->db_holds)); @@ -894,8 +894,9 @@ dbuf_whichblock(dnode_t *dn, int64_t level, uint64_t offset) } } +/* ARGSUSED */ static void -dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb) +dbuf_read_done(zio_t *zio, int err, arc_buf_t *buf, void *vdb) { dmu_buf_impl_t *db = vdb; @@ -915,7 +916,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb) db->db_freed_in_flight = FALSE; dbuf_set_data(db, buf); db->db_state = DB_CACHED; - } else if (zio == NULL || zio->io_error == 0) { + } else if (err == 0) { dbuf_set_data(db, buf); db->db_state = DB_CACHED; } else { @@ -928,12 +929,13 @@ dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb) dbuf_rele_and_unlock(db, NULL); } -static void +static int dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) { dnode_t *dn; zbookmark_phys_t zb; arc_flags_t aflags = ARC_FLAG_NOWAIT; + int err, zio_flags = 0; DB_DNODE_ENTER(db); dn = DB_DNODE(db); @@ -946,6 +948,22 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) if (db->db_blkid == DMU_BONUS_BLKID) { int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen); + arc_buf_t *dn_buf = (dn->dn_dbuf != NULL) ? + dn->dn_dbuf->db_buf : NULL; + + /* if the underlying dnode block is encrypted, decrypt it */ + if (dn_buf != NULL && dn->dn_objset->os_encrypted && + DMU_OT_IS_ENCRYPTED(dn->dn_bonustype) && + (flags & DB_RF_NO_DECRYPT) == 0 && + arc_is_encrypted(dn_buf)) { + err = arc_untransform(dn_buf, dn->dn_objset->os_spa, + dmu_objset_id(dn->dn_objset), B_TRUE); + if (err != 0) { + DB_DNODE_EXIT(db); + mutex_exit(&db->db_mtx); + return (err); + } + } ASSERT3U(bonuslen, <=, db->db.db_size); db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN); @@ -957,7 +975,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) DB_DNODE_EXIT(db); db->db_state = DB_CACHED; mutex_exit(&db->db_mtx); - return; + return (0); } /* @@ -997,7 +1015,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) DB_DNODE_EXIT(db); db->db_state = DB_CACHED; mutex_exit(&db->db_mtx); - return; + return (0); } DB_DNODE_EXIT(db); @@ -1012,12 +1030,29 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) db->db_objset->os_dsl_dataset->ds_object : DMU_META_OBJSET, db->db.db_object, db->db_level, db->db_blkid); + /* + * All bps of an encrypted os should have the encryption bit set. + * If this is not true it indicates tampering and we report an error. + */ + if (db->db_objset->os_encrypted && !BP_USES_CRYPT(db->db_blkptr)) { + spa_log_error(db->db_objset->os_spa, &zb); + zfs_panic_recover("unencrypted block in encrypted " + "object set %llu", dmu_objset_id(db->db_objset)); + return (SET_ERROR(EIO)); + } + dbuf_add_ref(db, NULL); - (void) arc_read(zio, db->db_objset->os_spa, db->db_blkptr, - dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, - (flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED, + zio_flags = (flags & DB_RF_CANFAIL) ? + ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED; + + if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(db->db_blkptr)) + zio_flags |= ZIO_FLAG_RAW; + + err = arc_read(zio, db->db_objset->os_spa, db->db_blkptr, + dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); + return (err); } /* @@ -1055,25 +1090,38 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg) * or (if there a no active holders) * just null out the current db_data pointer. */ - ASSERT(dr->dr_txg >= txg - 2); + ASSERT3U(dr->dr_txg, >=, txg - 2); if (db->db_blkid == DMU_BONUS_BLKID) { /* Note that the data bufs here are zio_bufs */ dr->dt.dl.dr_data = zio_buf_alloc(DN_MAX_BONUSLEN); arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN); } else if (refcount_count(&db->db_holds) > db->db_dirtycnt) { + dnode_t *dn = DB_DNODE(db); int size = arc_buf_size(db->db_buf); arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); spa_t *spa = db->db_objset->os_spa; enum zio_compress compress_type = arc_get_compression(db->db_buf); - if (compress_type == ZIO_COMPRESS_OFF) { - dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size); - } else { + if (arc_is_encrypted(db->db_buf)) { + boolean_t byteorder; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + + arc_get_raw_params(db->db_buf, &byteorder, salt, + iv, mac); + dr->dt.dl.dr_data = arc_alloc_raw_buf(spa, db, + dmu_objset_id(dn->dn_objset), byteorder, salt, iv, + mac, dn->dn_type, size, arc_buf_lsize(db->db_buf), + compress_type); + } else if (compress_type != ZIO_COMPRESS_OFF) { ASSERT3U(type, ==, ARC_BUFC_DATA); dr->dt.dl.dr_data = arc_alloc_compressed_buf(spa, db, size, arc_buf_lsize(db->db_buf), compress_type); + } else { + dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size); } bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size); } else { @@ -1109,16 +1157,21 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) mutex_enter(&db->db_mtx); if (db->db_state == DB_CACHED) { + spa_t *spa = dn->dn_objset->os_spa; + /* - * If the arc buf is compressed, we need to decompress it to - * read the data. This could happen during the "zfs receive" of - * a stream which is compressed and deduplicated. + * If the arc buf is compressed or encrypted, we need to + * untransform it to read the data. This could happen during + * the "zfs receive" of a stream which is deduplicated and + * either raw or compressed. We do not need to do this if the + * caller wants raw encrypted data. */ - if (db->db_buf != NULL && - arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF) { - dbuf_fix_old_data(db, - spa_syncing_txg(dmu_objset_spa(db->db_objset))); - err = arc_decompress(db->db_buf); + if (db->db_buf != NULL && (flags & DB_RF_NO_DECRYPT) == 0 && + (arc_is_encrypted(db->db_buf) || + arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) { + dbuf_fix_old_data(db, spa_syncing_txg(spa)); + err = arc_untransform(db->db_buf, spa, + dmu_objset_id(db->db_objset), B_FALSE); dbuf_set_data(db, db->db_buf); } mutex_exit(&db->db_mtx); @@ -1136,18 +1189,18 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); need_wait = B_TRUE; } - dbuf_read_impl(db, zio, flags); + err = dbuf_read_impl(db, zio, flags); /* dbuf_read_impl has dropped db_mtx for us */ - if (prefetch) + if (!err && prefetch) dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE); if ((flags & DB_RF_HAVESTRUCT) == 0) rw_exit(&dn->dn_struct_rwlock); DB_DNODE_EXIT(db); - if (need_wait) + if (!err && need_wait) err = zio_wait(zio); } else { /* @@ -1237,6 +1290,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; dr->dt.dl.dr_nopwrite = B_FALSE; + dr->dt.dl.dr_raw = B_FALSE; /* * Release the already-written buffer, so we leave it in @@ -1825,11 +1879,10 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) return (B_FALSE); } -void -dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx) +static void +dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; - int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH; ASSERT(tx->tx_txg != 0); ASSERT(!refcount_is_zero(&db->db_holds)); @@ -1860,12 +1913,19 @@ dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx) DB_DNODE_ENTER(db); if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock)) - rf |= DB_RF_HAVESTRUCT; + flags |= DB_RF_HAVESTRUCT; DB_DNODE_EXIT(db); - (void) dbuf_read(db, NULL, rf); + (void) dbuf_read(db, NULL, flags); (void) dbuf_dirty(db, tx); } +void +dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx) +{ + dmu_buf_will_dirty_impl(db_fake, + DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH, tx); +} + void dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) { @@ -1893,6 +1953,29 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) (void) dbuf_dirty(db, tx); } +/* + * This function is effectively the same as dmu_buf_will_dirty(), but + * indicates the caller expects raw encrypted data in the db. It will + * also set the raw flag on the created dirty record. + */ +void +dmu_buf_will_change_crypt_params(dmu_buf_t *db_fake, dmu_tx_t *tx) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + dbuf_dirty_record_t *dr; + + dmu_buf_will_dirty_impl(db_fake, + DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_NO_DECRYPT, tx); + + dr = db->db_last_dirty; + while (dr != NULL && dr->dr_txg > tx->tx_txg) + dr = dr->dr_next; + + ASSERT3P(dr, !=, NULL); + ASSERT3U(dr->dr_txg, ==, tx->tx_txg); + dr->dt.dl.dr_raw = B_TRUE; +} + #pragma weak dmu_buf_fill_done = dbuf_fill_done /* ARGSUSED */ void @@ -1979,6 +2062,13 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) if (db->db_state == DB_CACHED && refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) { + /* + * In practice, we will never have a case where we have an + * encrypted arc buffer while additional holds exist on the + * dbuf. We don't handle this here so we simply assert that + * fact instead. + */ + ASSERT(!arc_is_encrypted(buf)); mutex_exit(&db->db_mtx); (void) dbuf_dirty(db, tx); bcopy(buf->b_data, db->db.db_data, db->db.db_size); @@ -1994,6 +2084,8 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) ASSERT(db->db_buf != NULL); if (dr != NULL && dr->dr_txg == tx->tx_txg) { ASSERT(dr->dt.dl.dr_data == db->db_buf); + IMPLY(arc_is_encrypted(buf), dr->dt.dl.dr_raw); + if (!arc_released(db->db_buf)) { ASSERT(dr->dt.dl.dr_override_state == DR_OVERRIDDEN); @@ -2031,10 +2123,11 @@ dbuf_destroy(dmu_buf_impl_t *db) } if (db->db_blkid == DMU_BONUS_BLKID) { - ASSERT(db->db.db_data != NULL); - zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN); - arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); - db->db_state = DB_UNCACHED; + if (db->db.db_data != NULL) { + zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN); + arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); + db->db_state = DB_UNCACHED; + } } dbuf_clear_data(db); @@ -2319,7 +2412,7 @@ dbuf_issue_final_prefetch(dbuf_prefetch_arg_t *dpa, blkptr_t *bp) * prefetch if the next block down is our target. */ static void -dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private) +dbuf_prefetch_indirect_done(zio_t *zio, int err, arc_buf_t *abuf, void *private) { dbuf_prefetch_arg_t *dpa = private; @@ -2339,7 +2432,7 @@ dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private) */ if (zio != NULL) { ASSERT3S(BP_GET_LEVEL(zio->io_bp), ==, dpa->dpa_curlevel); - if (zio->io_flags & ZIO_FLAG_RAW) { + if (zio->io_flags & ZIO_FLAG_RAW_COMPRESS) { ASSERT3U(BP_GET_PSIZE(zio->io_bp), ==, zio->io_size); } else { ASSERT3U(BP_GET_LSIZE(zio->io_bp), ==, zio->io_size); @@ -2364,7 +2457,7 @@ dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private) (dpa->dpa_epbs * (dpa->dpa_curlevel - dpa->dpa_zb.zb_level)); blkptr_t *bp = ((blkptr_t *)abuf->b_data) + P2PHASE(nextblkid, 1ULL << dpa->dpa_epbs); - if (BP_IS_HOLE(bp) || (zio != NULL && zio->io_error != 0)) { + if (BP_IS_HOLE(bp) || err != 0) { kmem_free(dpa, sizeof (*dpa)); } else if (dpa->dpa_curlevel == dpa->dpa_zb.zb_level) { ASSERT3U(nextblkid, ==, dpa->dpa_zb.zb_blkid); @@ -2392,7 +2485,8 @@ dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private) * Issue prefetch reads for the given block on the given level. If the indirect * blocks above that block are not in memory, we will read them in * asynchronously. As a result, this call never blocks waiting for a read to - * complete. + * complete. Note that the prefetch might fail if the dataset is encrypted and + * the encryption key is unmapped before the IO completes. */ void dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio, @@ -2959,6 +3053,41 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db) } } +/* + * Ensure the dbuf's data is untransformed if the associated dirty + * record requires it. This is used by dbuf_sync_leaf() to ensure + * that a dnode block is decrypted before we write new data to it. + * For raw writes we assert that the buffer is already encrypted. + */ +static void +dbuf_check_crypt(dbuf_dirty_record_t *dr) +{ + int err; + dmu_buf_impl_t *db = dr->dr_dbuf; + + ASSERT(MUTEX_HELD(&db->db_mtx)); + + if (!dr->dt.dl.dr_raw && arc_is_encrypted(db->db_buf)) { + /* + * Unfortunately, there is currently no mechanism for + * syncing context to handle decryption errors. An error + * here is only possible if an attacker maliciously + * changed a dnode block and updated the associated + * checksums going up the block tree. + */ + err = arc_untransform(db->db_buf, db->db_objset->os_spa, + dmu_objset_id(db->db_objset), B_TRUE); + if (err) + panic("Invalid dnode block MAC"); + } else if (dr->dt.dl.dr_raw) { + /* + * Writing raw encrypted data requires the db's arc buffer + * to be converted to raw by the caller. + */ + ASSERT(arc_is_encrypted(db->db_buf)); + } +} + static void dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx) { @@ -3054,8 +3183,10 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) ASSERT(*datap != NULL); ASSERT0(db->db_level); - ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN); - bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen); + ASSERT3U(DN_MAX_BONUS_LEN(dn->dn_phys), <=, + DN_SLOTS_TO_BONUSLEN(dn->dn_phys->dn_extra_slots + 1)); + bcopy(*datap, DN_BONUS(dn->dn_phys), + DN_MAX_BONUS_LEN(dn->dn_phys)); DB_DNODE_EXIT(db); if (*datap != db->db.db_data) { @@ -3096,6 +3227,13 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) ASSERT(dr->dt.dl.dr_override_state != DR_NOT_OVERRIDDEN); } + /* + * If this is a dnode block, ensure it is appropriately encrypted + * or decrypted, depending on what we are writing to it this txg. + */ + if (os->os_encrypted && dn->dn_object == DMU_META_DNODE_OBJECT) + dbuf_check_crypt(dr); + if (db->db_state != DB_NOFILL && dn->dn_object != DMU_META_DNODE_OBJECT && refcount_count(&db->db_holds) > 1 && @@ -3113,16 +3251,26 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) * DNONE_DNODE blocks). */ int psize = arc_buf_size(*datap); + int lsize = arc_buf_lsize(*datap); arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); enum zio_compress compress_type = arc_get_compression(*datap); - if (compress_type == ZIO_COMPRESS_OFF) { - *datap = arc_alloc_buf(os->os_spa, db, type, psize); - } else { + if (arc_is_encrypted(*datap)) { + boolean_t byteorder; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + + arc_get_raw_params(*datap, &byteorder, salt, iv, mac); + *datap = arc_alloc_raw_buf(os->os_spa, db, + dmu_objset_id(os), byteorder, salt, iv, mac, + dn->dn_type, psize, lsize, compress_type); + } else if (compress_type != ZIO_COMPRESS_OFF) { ASSERT3U(type, ==, ARC_BUFC_DATA); - int lsize = arc_buf_lsize(*datap); *datap = arc_alloc_compressed_buf(os->os_spa, db, psize, lsize, compress_type); + } else { + *datap = arc_alloc_buf(os->os_spa, db, type, psize); } bcopy(db->db.db_data, (*datap)->b_data, psize); } @@ -3253,7 +3401,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb) DB_DNODE_EXIT(db); if (!BP_IS_EMBEDDED(bp)) - bp->blk_fill = fill; + BP_SET_FILL(bp, fill); mutex_exit(&db->db_mtx); @@ -3538,6 +3686,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) wp_flag |= (db->db_state == DB_NOFILL) ? WP_NOFILL : 0; dmu_write_policy(os, dn, db->db_level, wp_flag, &zp); + DB_DNODE_EXIT(db); /* @@ -3584,7 +3733,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) * ready callback so that we can properly handle an indirect * block that only contains holes. */ - arc_done_func_t *children_ready_cb = NULL; + arc_write_done_func_t *children_ready_cb = NULL; if (db->db_level != 0) children_ready_cb = dbuf_write_children_ready; diff --git a/usr/src/uts/common/fs/zfs/ddt.c b/usr/src/uts/common/fs/zfs/ddt.c index ba3e02cfb5b0..4338030eead4 100644 --- a/usr/src/uts/common/fs/zfs/ddt.c +++ b/usr/src/uts/common/fs/zfs/ddt.c @@ -253,6 +253,10 @@ ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg) BP_SET_BIRTH(bp, txg, ddp->ddp_phys_birth); } +/* + * The bp created via this function may be used for repairs and scrub, but it + * will be missing the salt / IV required to do a full decrypting read. + */ void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk, const ddt_phys_t *ddp, blkptr_t *bp) @@ -263,11 +267,12 @@ ddt_bp_create(enum zio_checksum checksum, ddt_bp_fill(ddp, bp, ddp->ddp_phys_birth); bp->blk_cksum = ddk->ddk_cksum; - bp->blk_fill = 1; BP_SET_LSIZE(bp, DDK_GET_LSIZE(ddk)); BP_SET_PSIZE(bp, DDK_GET_PSIZE(ddk)); BP_SET_COMPRESS(bp, DDK_GET_COMPRESS(ddk)); + BP_SET_CRYPT(bp, DDK_GET_CRYPT(ddk)); + BP_SET_FILL(bp, 1); BP_SET_CHECKSUM(bp, checksum); BP_SET_TYPE(bp, DMU_OT_DEDUP); BP_SET_LEVEL(bp, 0); @@ -281,9 +286,12 @@ ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp) ddk->ddk_cksum = bp->blk_cksum; ddk->ddk_prop = 0; + ASSERT(BP_IS_ENCRYPTED(bp) || !BP_USES_CRYPT(bp)); + DDK_SET_LSIZE(ddk, BP_GET_LSIZE(bp)); DDK_SET_PSIZE(ddk, BP_GET_PSIZE(bp)); DDK_SET_COMPRESS(ddk, BP_GET_COMPRESS(bp)); + DDK_SET_CRYPT(ddk, BP_USES_CRYPT(bp)); } void @@ -367,7 +375,7 @@ ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds) if (ddp->ddp_phys_birth == 0) continue; - for (int d = 0; d < SPA_DVAS_PER_BP; d++) + for (int d = 0; d < DDE_GET_NDVAS(dde); d++) dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]); dds->dds_blocks += 1; @@ -521,6 +529,7 @@ ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref) uint64_t ditto = spa->spa_dedup_ditto; int total_copies = 0; int desired_copies = 0; + int copies_needed = 0; for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { ddt_phys_t *ddp = &dde->dde_phys[p]; @@ -546,7 +555,13 @@ ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref) if (total_refcnt >= ditto * ditto) desired_copies++; - return (MAX(desired_copies, total_copies) - total_copies); + copies_needed = MAX(desired_copies, total_copies) - total_copies; + + /* encrypted blocks store their IV in DVA[2] */ + if (DDK_GET_CRYPT(&dde->dde_key)) + copies_needed = MIN(copies_needed, SPA_DVAS_PER_BP - 1); + + return (copies_needed); } int @@ -556,7 +571,7 @@ ddt_ditto_copies_present(ddt_entry_t *dde) dva_t *dva = ddp->ddp_dva; int copies = 0 - DVA_GET_GANG(dva); - for (int d = 0; d < SPA_DVAS_PER_BP; d++, dva++) + for (int d = 0; d < DDE_GET_NDVAS(dde); d++, dva++) if (DVA_IS_VALID(dva)) copies++; diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c index fde295d85129..dae57b4c0690 100644 --- a/usr/src/uts/common/fs/zfs/dmu.c +++ b/usr/src/uts/common/fs/zfs/dmu.c @@ -66,60 +66,60 @@ int zfs_nopwrite_enabled = 1; uint32_t zfs_per_txg_dirty_frees_percent = 30; const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { - { DMU_BSWAP_UINT8, TRUE, "unallocated" }, - { DMU_BSWAP_ZAP, TRUE, "object directory" }, - { DMU_BSWAP_UINT64, TRUE, "object array" }, - { DMU_BSWAP_UINT8, TRUE, "packed nvlist" }, - { DMU_BSWAP_UINT64, TRUE, "packed nvlist size" }, - { DMU_BSWAP_UINT64, TRUE, "bpobj" }, - { DMU_BSWAP_UINT64, TRUE, "bpobj header" }, - { DMU_BSWAP_UINT64, TRUE, "SPA space map header" }, - { DMU_BSWAP_UINT64, TRUE, "SPA space map" }, - { DMU_BSWAP_UINT64, TRUE, "ZIL intent log" }, - { DMU_BSWAP_DNODE, TRUE, "DMU dnode" }, - { DMU_BSWAP_OBJSET, TRUE, "DMU objset" }, - { DMU_BSWAP_UINT64, TRUE, "DSL directory" }, - { DMU_BSWAP_ZAP, TRUE, "DSL directory child map"}, - { DMU_BSWAP_ZAP, TRUE, "DSL dataset snap map" }, - { DMU_BSWAP_ZAP, TRUE, "DSL props" }, - { DMU_BSWAP_UINT64, TRUE, "DSL dataset" }, - { DMU_BSWAP_ZNODE, TRUE, "ZFS znode" }, - { DMU_BSWAP_OLDACL, TRUE, "ZFS V0 ACL" }, - { DMU_BSWAP_UINT8, FALSE, "ZFS plain file" }, - { DMU_BSWAP_ZAP, TRUE, "ZFS directory" }, - { DMU_BSWAP_ZAP, TRUE, "ZFS master node" }, - { DMU_BSWAP_ZAP, TRUE, "ZFS delete queue" }, - { DMU_BSWAP_UINT8, FALSE, "zvol object" }, - { DMU_BSWAP_ZAP, TRUE, "zvol prop" }, - { DMU_BSWAP_UINT8, FALSE, "other uint8[]" }, - { DMU_BSWAP_UINT64, FALSE, "other uint64[]" }, - { DMU_BSWAP_ZAP, TRUE, "other ZAP" }, - { DMU_BSWAP_ZAP, TRUE, "persistent error log" }, - { DMU_BSWAP_UINT8, TRUE, "SPA history" }, - { DMU_BSWAP_UINT64, TRUE, "SPA history offsets" }, - { DMU_BSWAP_ZAP, TRUE, "Pool properties" }, - { DMU_BSWAP_ZAP, TRUE, "DSL permissions" }, - { DMU_BSWAP_ACL, TRUE, "ZFS ACL" }, - { DMU_BSWAP_UINT8, TRUE, "ZFS SYSACL" }, - { DMU_BSWAP_UINT8, TRUE, "FUID table" }, - { DMU_BSWAP_UINT64, TRUE, "FUID table size" }, - { DMU_BSWAP_ZAP, TRUE, "DSL dataset next clones"}, - { DMU_BSWAP_ZAP, TRUE, "scan work queue" }, - { DMU_BSWAP_ZAP, TRUE, "ZFS user/group used" }, - { DMU_BSWAP_ZAP, TRUE, "ZFS user/group quota" }, - { DMU_BSWAP_ZAP, TRUE, "snapshot refcount tags"}, - { DMU_BSWAP_ZAP, TRUE, "DDT ZAP algorithm" }, - { DMU_BSWAP_ZAP, TRUE, "DDT statistics" }, - { DMU_BSWAP_UINT8, TRUE, "System attributes" }, - { DMU_BSWAP_ZAP, TRUE, "SA master node" }, - { DMU_BSWAP_ZAP, TRUE, "SA attr registration" }, - { DMU_BSWAP_ZAP, TRUE, "SA attr layouts" }, - { DMU_BSWAP_ZAP, TRUE, "scan translations" }, - { DMU_BSWAP_UINT8, FALSE, "deduplicated block" }, - { DMU_BSWAP_ZAP, TRUE, "DSL deadlist map" }, - { DMU_BSWAP_UINT64, TRUE, "DSL deadlist map hdr" }, - { DMU_BSWAP_ZAP, TRUE, "DSL dir clones" }, - { DMU_BSWAP_UINT64, TRUE, "bpobj subobj" } + { DMU_BSWAP_UINT8, TRUE, FALSE, "unallocated" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "object directory" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "object array" }, + { DMU_BSWAP_UINT8, TRUE, FALSE, "packed nvlist" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "packed nvlist size" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj header" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map header" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map" }, + { DMU_BSWAP_UINT64, TRUE, TRUE, "ZIL intent log" }, + { DMU_BSWAP_DNODE, TRUE, TRUE, "DMU dnode" }, + { DMU_BSWAP_OBJSET, TRUE, FALSE, "DMU objset" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL directory" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL directory child map"}, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dataset snap map" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL props" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL dataset" }, + { DMU_BSWAP_ZNODE, TRUE, FALSE, "ZFS znode" }, + { DMU_BSWAP_OLDACL, TRUE, TRUE, "ZFS V0 ACL" }, + { DMU_BSWAP_UINT8, FALSE, TRUE, "ZFS plain file" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS directory" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS master node" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS delete queue" }, + { DMU_BSWAP_UINT8, FALSE, TRUE, "zvol object" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "zvol prop" }, + { DMU_BSWAP_UINT8, FALSE, TRUE, "other uint8[]" }, + { DMU_BSWAP_UINT64, FALSE, TRUE, "other uint64[]" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "other ZAP" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "persistent error log" }, + { DMU_BSWAP_UINT8, TRUE, FALSE, "SPA history" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA history offsets" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "Pool properties" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL permissions" }, + { DMU_BSWAP_ACL, TRUE, TRUE, "ZFS ACL" }, + { DMU_BSWAP_UINT8, TRUE, TRUE, "ZFS SYSACL" }, + { DMU_BSWAP_UINT8, TRUE, TRUE, "FUID table" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "FUID table size" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dataset next clones"}, + { DMU_BSWAP_ZAP, TRUE, FALSE, "scan work queue" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS user/group used" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS user/group quota" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "snapshot refcount tags"}, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT ZAP algorithm" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT statistics" }, + { DMU_BSWAP_UINT8, TRUE, TRUE, "System attributes" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, "SA master node" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, "SA attr registration" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, "SA attr layouts" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "scan translations" }, + { DMU_BSWAP_UINT8, FALSE, TRUE, "deduplicated block" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL deadlist map" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL deadlist map hdr" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dir clones" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj subobj" } }; const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = { @@ -191,6 +191,8 @@ dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset, if (flags & DMU_READ_NO_PREFETCH) db_flags |= DB_RF_NOPREFETCH; + if (flags & DMU_READ_NO_DECRYPT) + db_flags |= DB_RF_NO_DECRYPT; err = dmu_buf_hold_noread_by_dnode(dn, offset, tag, dbp); if (err == 0) { @@ -214,6 +216,8 @@ dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, if (flags & DMU_READ_NO_PREFETCH) db_flags |= DB_RF_NOPREFETCH; + if (flags & DMU_READ_NO_DECRYPT) + db_flags |= DB_RF_NO_DECRYPT; err = dmu_buf_hold_noread(os, object, offset, tag, dbp); if (err == 0) { @@ -314,11 +318,18 @@ dmu_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx) * returns ENOENT, EIO, or 0. */ int -dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) +dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, uint32_t flags, + dmu_buf_t **dbp) { dnode_t *dn; dmu_buf_impl_t *db; int error; + uint32_t db_flags = DB_RF_MUST_SUCCEED; + + if (flags & DMU_READ_NO_PREFETCH) + db_flags |= DB_RF_NOPREFETCH; + if (flags & DMU_READ_NO_DECRYPT) + db_flags |= DB_RF_NO_DECRYPT; error = dnode_hold(os, object, FTAG, &dn); if (error) @@ -348,12 +359,24 @@ dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) dnode_rele(dn, FTAG); - VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH)); + error = dbuf_read(db, NULL, db_flags); + if (error) { + dnode_evict_bonus(dn); + dbuf_rele(db, tag); + *dbp = NULL; + return (error); + } *dbp = &db->db; return (0); } +int +dmu_bonus_hold(objset_t *os, uint64_t obj, void *tag, dmu_buf_t **dbp) +{ + return (dmu_bonus_hold_impl(os, obj, tag, DMU_READ_NO_PREFETCH, dbp)); +} + /* * returns ENOENT, EIO, or 0. * @@ -589,8 +612,8 @@ dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag) * indirect blocks prefeteched will be those that point to the blocks containing * the data starting at offset, and continuing to offset + len. * - * Note that if the indirect blocks above the blocks being prefetched are not in - * cache, they will be asychronously read in. + * Note that if the indirect blocks above the blocks being prefetched are not + * in cache, they will be asychronously read in. */ void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, @@ -722,7 +745,7 @@ dmu_objset_zfs_unmounting(objset_t *os) static int dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, - uint64_t length) + uint64_t length, boolean_t raw) { uint64_t object_size = (dn->dn_maxblkid + 1) * dn->dn_datablksz; int err; @@ -800,6 +823,17 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len, uint64_t, dmu_tx_get_txg(tx)); dnode_free_range(dn, chunk_begin, chunk_len, tx); + + /* if this is a raw free, mark the dirty record as such */ + if (raw) { + dbuf_dirty_record_t *dr = dn->dn_dbuf->db_last_dirty; + + while (dr != NULL && dr->dr_txg > tx->tx_txg) + dr = dr->dr_next; + if (dr != NULL && dr->dr_txg == tx->tx_txg) + dr->dt.dl.dr_raw = B_TRUE; + } + dmu_tx_commit(tx); length -= chunk_len; @@ -817,7 +851,7 @@ dmu_free_long_range(objset_t *os, uint64_t object, err = dnode_hold(os, object, FTAG, &dn); if (err != 0) return (err); - err = dmu_free_long_range_impl(os, dn, offset, length); + err = dmu_free_long_range_impl(os, dn, offset, length, B_FALSE); /* * It is important to zero out the maxblkid when freeing the entire @@ -832,8 +866,37 @@ dmu_free_long_range(objset_t *os, uint64_t object, return (err); } +/* + * This function is equivalent to dmu_free_long_range(), but also + * marks the new dirty record as a raw write. + */ int -dmu_free_long_object(objset_t *os, uint64_t object) +dmu_free_long_range_raw(objset_t *os, uint64_t object, + uint64_t offset, uint64_t length) +{ + dnode_t *dn; + int err; + + err = dnode_hold(os, object, FTAG, &dn); + if (err != 0) + return (err); + err = dmu_free_long_range_impl(os, dn, offset, length, B_TRUE); + + /* + * It is important to zero out the maxblkid when freeing the entire + * file, so that (a) subsequent calls to dmu_free_long_range_impl() + * will take the fast path, and (b) dnode_reallocate() can verify + * that the entire file has been freed. + */ + if (err == 0 && offset == 0 && length == DMU_OBJECT_END) + dn->dn_maxblkid = 0; + + dnode_rele(dn, FTAG); + return (err); +} + +static int +dmu_free_long_object_impl(objset_t *os, uint64_t object, boolean_t raw) { dmu_tx_t *tx; int err; @@ -848,7 +911,11 @@ dmu_free_long_object(objset_t *os, uint64_t object) dmu_tx_mark_netfree(tx); err = dmu_tx_assign(tx, TXG_WAIT); if (err == 0) { - err = dmu_object_free(os, object, tx); + if (raw) + err = dmu_object_dirty_raw(os, object, tx); + if (err == 0) + err = dmu_object_free(os, object, tx); + dmu_tx_commit(tx); } else { dmu_tx_abort(tx); @@ -857,6 +924,19 @@ dmu_free_long_object(objset_t *os, uint64_t object) return (err); } +int +dmu_free_long_object(objset_t *os, uint64_t object) +{ + return (dmu_free_long_object_impl(os, object, B_FALSE)); +} + +int +dmu_free_long_object_raw(objset_t *os, uint64_t object) +{ + return (dmu_free_long_object_impl(os, object, B_TRUE)); +} + + int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_tx_t *tx) @@ -1470,28 +1550,95 @@ dmu_return_arcbuf(arc_buf_t *buf) arc_buf_destroy(buf, FTAG); } +void +dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, const uint8_t *salt, + const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx) +{ + dmu_object_type_t type; + dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle; + uint64_t dsobj = dmu_objset_id(db->db_objset); + + ASSERT3P(db->db_buf, !=, NULL); + ASSERT3U(dsobj, !=, 0); + + dmu_buf_will_change_crypt_params(handle, tx); + + DB_DNODE_ENTER(db); + type = DB_DNODE(db)->dn_type; + DB_DNODE_EXIT(db); + + /* + * This technically violates the assumption the dmu code makes + * that dnode blocks are only released in syncing context. + */ + (void) arc_release(db->db_buf, db); + arc_convert_to_raw(db->db_buf, dsobj, byteorder, type, salt, iv, mac); +} + +void +dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset, + dmu_buf_t *handle, dmu_tx_t *tx) +{ + dmu_buf_t *dst_handle; + dmu_buf_impl_t *dstdb; + dmu_buf_impl_t *srcdb = (dmu_buf_impl_t *)handle; + arc_buf_t *abuf; + uint64_t datalen; + boolean_t byteorder; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + + ASSERT3P(srcdb->db_buf, !=, NULL); + + /* hold the db that we want to write to */ + VERIFY0(dmu_buf_hold(os, object, offset, FTAG, &dst_handle, + DMU_READ_NO_DECRYPT)); + dstdb = (dmu_buf_impl_t *)dst_handle; + datalen = arc_buf_size(srcdb->db_buf); + + /* allocated an arc buffer that matches the type of srcdb->db_buf */ + if (arc_is_encrypted(srcdb->db_buf)) { + arc_get_raw_params(srcdb->db_buf, &byteorder, salt, iv, mac); + abuf = arc_loan_raw_buf(os->os_spa, dmu_objset_id(os), + byteorder, salt, iv, mac, DB_DNODE(dstdb)->dn_type, + datalen, arc_buf_lsize(srcdb->db_buf), + arc_get_compression(srcdb->db_buf)); + } else { + /* we won't get a compressed db back from dmu_buf_hold() */ + ASSERT3U(arc_get_compression(srcdb->db_buf), + ==, ZIO_COMPRESS_OFF); + abuf = arc_loan_buf(os->os_spa, + DMU_OT_IS_METADATA(DB_DNODE(dstdb)->dn_type), datalen); + } + + ASSERT3U(datalen, ==, arc_buf_size(abuf)); + + /* copy the data to the new buffer and assign it to the dstdb */ + bcopy(srcdb->db_buf->b_data, abuf->b_data, datalen); + dbuf_assign_arcbuf(dstdb, abuf, tx); + dmu_buf_rele(dst_handle, FTAG); +} + /* * When possible directly assign passed loaned arc buffer to a dbuf. * If this is not possible copy the contents of passed arc buf via * dmu_write(). */ void -dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, +dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf, dmu_tx_t *tx) { - dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle; - dnode_t *dn; dmu_buf_impl_t *db; + objset_t *os = dn->dn_objset; + uint64_t object = dn->dn_object; uint32_t blksz = (uint32_t)arc_buf_lsize(buf); uint64_t blkid; - DB_DNODE_ENTER(dbuf); - dn = DB_DNODE(dbuf); rw_enter(&dn->dn_struct_rwlock, RW_READER); blkid = dbuf_whichblock(dn, 0, offset); VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL); rw_exit(&dn->dn_struct_rwlock); - DB_DNODE_EXIT(dbuf); /* * We can only assign if the offset is aligned, the arc buf is the @@ -1501,19 +1648,10 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, dbuf_assign_arcbuf(db, buf, tx); dbuf_rele(db, FTAG); } else { - objset_t *os; - uint64_t object; - /* compressed bufs must always be assignable to their dbuf */ ASSERT3U(arc_get_compression(buf), ==, ZIO_COMPRESS_OFF); ASSERT(!(buf->b_flags & ARC_BUF_FLAG_COMPRESSED)); - DB_DNODE_ENTER(dbuf); - dn = DB_DNODE(dbuf); - os = dn->dn_objset; - object = dn->dn_object; - DB_DNODE_EXIT(dbuf); - dbuf_rele(db, FTAG); dmu_write(os, object, offset, blksz, buf->b_data, tx); dmu_return_arcbuf(buf); @@ -1521,6 +1659,17 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, } } +void +dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, + dmu_tx_t *tx) +{ + dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle; + + DB_DNODE_ENTER(dbuf); + dmu_assign_arcbuf_by_dnode(DB_DNODE(dbuf), offset, buf, tx); + DB_DNODE_EXIT(dbuf); +} + typedef struct { dbuf_dirty_record_t *dsa_dr; dmu_sync_cb_t *dsa_done; @@ -1545,7 +1694,7 @@ dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg) BP_SET_LSIZE(bp, db->db_size); } else if (!BP_IS_EMBEDDED(bp)) { ASSERT(BP_GET_LEVEL(bp) == 0); - bp->blk_fill = 1; + BP_SET_FILL(bp, 1); } } } @@ -1857,6 +2006,20 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) return (0); } +int +dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels, dmu_tx_t *tx) +{ + dnode_t *dn; + int err; + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + err = dnode_set_nlevels(dn, nlevels, tx); + dnode_rele(dn, FTAG); + return (err); +} + int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, dmu_tx_t *tx) @@ -1911,6 +2074,25 @@ dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, dnode_rele(dn, FTAG); } +/* + * Dirty an object and set the dirty record's raw flag. This is used + * when writing raw data to an object that will not effect the + * encryption parameters, specifically during raw receives. + */ +int +dmu_object_dirty_raw(objset_t *os, uint64_t object, dmu_tx_t *tx) +{ + dnode_t *dn; + int err; + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + dmu_buf_will_change_crypt_params((dmu_buf_t *)dn->dn_dbuf, tx); + dnode_rele(dn, FTAG); + return (err); +} + int zfs_mdcomp_disable = 0; /* @@ -1931,6 +2113,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) boolean_t dedup = B_FALSE; boolean_t nopwrite = B_FALSE; boolean_t dedup_verify = os->os_dedup_verify; + boolean_t encrypt = B_FALSE; int copies = os->os_copies; /* @@ -2018,16 +2201,42 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) compress != ZIO_COMPRESS_OFF && zfs_nopwrite_enabled); } - zp->zp_checksum = checksum; - zp->zp_compress = compress; - ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_INHERIT); + /* + * All objects in an encrypted objset are protected from modification + * via a MAC. Encrypted objects store their IV and salt in the last DVA + * in the bp, so we cannot use all copies. Encrypted objects are also + * not subject to nopwrite since writing the same data will still + * result in a new ciphertext. Only encrypted blocks can be dedup'd + * to avoid ambiguity in the dedup code since the DDT does not store + * object types. + */ + if (os->os_encrypted && (wp & WP_NOFILL) == 0) { + encrypt = B_TRUE; + if (DMU_OT_IS_ENCRYPTED(type)) { + copies = MIN(copies, SPA_DVAS_PER_BP - 1); + nopwrite = B_FALSE; + } else { + dedup = B_FALSE; + } + + if (type == DMU_OT_DNODE || type == DMU_OT_OBJSET) + compress = ZIO_COMPRESS_EMPTY; + } + + zp->zp_compress = compress; + zp->zp_checksum = checksum; zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type; zp->zp_level = level; zp->zp_copies = MIN(copies, spa_max_replication(os->os_spa)); zp->zp_dedup = dedup; zp->zp_dedup_verify = dedup && dedup_verify; zp->zp_nopwrite = nopwrite; + zp->zp_encrypt = encrypt; + zp->zp_byteorder = ZFS_HOST_BYTEORDER; + bzero(zp->zp_salt, ZIO_DATA_SALT_LEN); + bzero(zp->zp_iv, ZIO_DATA_IV_LEN); + bzero(zp->zp_mac, ZIO_DATA_MAC_LEN); } int diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c index 6bf61854b7e9..f62ed0f44aa7 100644 --- a/usr/src/uts/common/fs/zfs/dmu_objset.c +++ b/usr/src/uts/common/fs/zfs/dmu_objset.c @@ -53,6 +53,7 @@ #include #include #include +#include /* * Needed to close a window in dnode_move() that allows the objset to be freed @@ -355,16 +356,23 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, if (!BP_IS_HOLE(os->os_rootbp)) { arc_flags_t aflags = ARC_FLAG_WAIT; zbookmark_phys_t zb; + enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); if (DMU_OS_IS_L2CACHEABLE(os)) aflags |= ARC_FLAG_L2CACHE; + if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) { + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + ASSERT(BP_IS_AUTHENTICATED(bp)); + zio_flags |= ZIO_FLAG_RAW; + } + dprintf_bp(os->os_rootbp, "reading %s", ""); err = arc_read(NULL, spa, os->os_rootbp, arc_getbuf_func, &os->os_phys_buf, - ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); + ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); if (err != 0) { kmem_free(os, sizeof (objset_t)); /* convert checksum errors into IO errors */ @@ -405,6 +413,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, if (ds != NULL) { boolean_t needlock = B_FALSE; + os->os_encrypted = (ds->ds_dir->dd_crypto_obj != 0); + /* * Note: it's valid to open the objset if the dataset is * long-held, in which case the pool_config lock will not @@ -414,6 +424,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, needlock = B_TRUE; dsl_pool_config_enter(dmu_objset_pool(os), FTAG); } + err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), primary_cache_changed_cb, os); @@ -476,6 +487,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, /* It's the meta-objset. */ os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; os->os_compress = ZIO_COMPRESS_ON; + os->os_encrypted = B_FALSE; os->os_copies = spa_max_replication(spa); os->os_dedup_checksum = ZIO_CHECKSUM_OFF; os->os_dedup_verify = B_FALSE; @@ -554,16 +566,18 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) * can be held at a time. */ int -dmu_objset_hold(const char *name, void *tag, objset_t **osp) +dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag, + objset_t **osp) { dsl_pool_t *dp; dsl_dataset_t *ds; int err; + ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0; err = dsl_pool_hold(name, tag, &dp); if (err != 0) return (err); - err = dsl_dataset_hold(dp, name, tag, &ds); + err = dsl_dataset_hold_flags(dp, name, flags, tag, &ds); if (err != 0) { dsl_pool_rele(dp, tag); return (err); @@ -578,23 +592,39 @@ dmu_objset_hold(const char *name, void *tag, objset_t **osp) return (err); } +int +dmu_objset_hold(const char *name, void *tag, objset_t **osp) +{ + return (dmu_objset_hold_flags(name, B_FALSE, tag, osp)); +} + +/* ARGSUSED */ static int dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type, - boolean_t readonly, void *tag, objset_t **osp) + boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp) { int err; err = dmu_objset_from_ds(ds, osp); if (err != 0) { - dsl_dataset_disown(ds, tag); + return (err); } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) { - dsl_dataset_disown(ds, tag); return (SET_ERROR(EINVAL)); } else if (!readonly && dsl_dataset_is_snapshot(ds)) { - dsl_dataset_disown(ds, tag); return (SET_ERROR(EROFS)); } - return (err); + + /* if we are decrypting, we can now check MACs in os->os_phys_buf */ + if (decrypt && arc_is_unauthenticated((*osp)->os_phys_buf)) { + err = arc_untransform((*osp)->os_phys_buf, (*osp)->os_spa, + ds->ds_object, B_FALSE); + if (err != 0) + return (err); + + ASSERT0(arc_is_unauthenticated((*osp)->os_phys_buf)); + } + + return (0); } /* @@ -604,48 +634,70 @@ dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type, */ int dmu_objset_own(const char *name, dmu_objset_type_t type, - boolean_t readonly, void *tag, objset_t **osp) + boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp) { dsl_pool_t *dp; dsl_dataset_t *ds; int err; + ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0; err = dsl_pool_hold(name, FTAG, &dp); if (err != 0) return (err); - err = dsl_dataset_own(dp, name, tag, &ds); + err = dsl_dataset_own(dp, name, flags, tag, &ds); if (err != 0) { dsl_pool_rele(dp, FTAG); return (err); } - err = dmu_objset_own_impl(ds, type, readonly, tag, osp); + err = dmu_objset_own_impl(ds, type, readonly, decrypt, tag, osp); + if (err != 0) { + dsl_dataset_disown(ds, flags, tag); + dsl_pool_rele(dp, FTAG); + return (err); + } + dsl_pool_rele(dp, FTAG); - return (err); + return (0); } int dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type, - boolean_t readonly, void *tag, objset_t **osp) + boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp) { dsl_dataset_t *ds; int err; + ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0; - err = dsl_dataset_own_obj(dp, obj, tag, &ds); + err = dsl_dataset_own_obj(dp, obj, flags, tag, &ds); if (err != 0) return (err); - return (dmu_objset_own_impl(ds, type, readonly, tag, osp)); + err = dmu_objset_own_impl(ds, type, readonly, decrypt, tag, osp); + if (err != 0) { + dsl_dataset_disown(ds, flags, tag); + return (err); + } + + return (0); } void -dmu_objset_rele(objset_t *os, void *tag) +dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag) { + ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0; + dsl_pool_t *dp = dmu_objset_pool(os); - dsl_dataset_rele(os->os_dsl_dataset, tag); + dsl_dataset_rele_flags(os->os_dsl_dataset, flags, tag); dsl_pool_rele(dp, tag); } +void +dmu_objset_rele(objset_t *os, void *tag) +{ + dmu_objset_rele_flags(os, B_FALSE, tag); +} + /* * When we are called, os MUST refer to an objset associated with a dataset * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner @@ -658,11 +710,11 @@ dmu_objset_rele(objset_t *os, void *tag) * same name so that it can be partially torn down and reconstructed. */ void -dmu_objset_refresh_ownership(objset_t *os, void *tag) +dmu_objset_refresh_ownership(objset_t *os, boolean_t decrypt, void *tag) { dsl_pool_t *dp; dsl_dataset_t *ds, *newds; - char name[ZFS_MAX_DATASET_NAME_LEN]; + char name[MAXNAMELEN]; ds = os->os_dsl_dataset; VERIFY3P(ds, !=, NULL); @@ -672,16 +724,18 @@ dmu_objset_refresh_ownership(objset_t *os, void *tag) dsl_dataset_name(ds, name); dp = dmu_objset_pool(os); dsl_pool_config_enter(dp, FTAG); - dmu_objset_disown(os, tag); - VERIFY0(dsl_dataset_own(dp, name, tag, &newds)); + dmu_objset_disown(os, decrypt, tag); + VERIFY0(dsl_dataset_own(dp, name, + (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag, &newds)); VERIFY3P(newds, ==, os->os_dsl_dataset); dsl_pool_config_exit(dp, FTAG); } void -dmu_objset_disown(objset_t *os, void *tag) +dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag) { - dsl_dataset_disown(os->os_dsl_dataset, tag); + dsl_dataset_disown(os->os_dsl_dataset, + (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag); } void @@ -758,6 +812,8 @@ dmu_objset_evict(objset_t *os) } else { mutex_exit(&os->os_lock); } + + } void @@ -800,16 +856,21 @@ dmu_objset_snap_cmtime(objset_t *os) return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir)); } -/* called from dsl for meta-objset */ +/* ARGSUSED */ objset_t * -dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, - dmu_objset_type_t type, dmu_tx_t *tx) +dmu_objset_create_impl_dnstats(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, + dmu_objset_type_t type, int levels, int blksz, int ibs, dmu_tx_t *tx) { objset_t *os; dnode_t *mdn; ASSERT(dmu_tx_is_syncing(tx)); + if (blksz == 0) + blksz = 1 << DNODE_BLOCK_SHIFT; + if (ibs == 0) + ibs = DN_MAX_INDBLKSHIFT; + if (ds != NULL) VERIFY0(dmu_objset_from_ds(ds, &os)); else @@ -817,8 +878,8 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, mdn = DMU_META_DNODE(os); - dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, - DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); + dnode_allocate(mdn, DMU_OT_DNODE, blksz, DN_MAX_INDBLKSHIFT, + DMU_OT_NONE, 0, tx); /* * We don't want to have to increase the meta-dnode's nlevels @@ -832,22 +893,25 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, * to convergence, so minimizing its dn_nlevels matters. */ if (ds != NULL) { - int levels = 1; - - /* - * Determine the number of levels necessary for the meta-dnode - * to contain DN_MAX_OBJECT dnodes. Note that in order to - * ensure that we do not overflow 64 bits, there has to be - * a nlevels that gives us a number of blocks > DN_MAX_OBJECT - * but < 2^64. Therefore, - * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT) (10) must be - * less than (64 - log2(DN_MAX_OBJECT)) (16). - */ - while ((uint64_t)mdn->dn_nblkptr << - (mdn->dn_datablkshift - DNODE_SHIFT + - (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < - DN_MAX_OBJECT) - levels++; + if (levels == 0) { + levels = 1; + + /* + * Determine the number of levels necessary for the + * meta-dnode to contain DN_MAX_OBJECT dnodes. Note + * that in order to ensure that we do not overflow + * 64 bits, there has to be a nlevels that gives us a + * number of blocks > DN_MAX_OBJECT but < 2^64. + * Therefore, (mdn->dn_indblkshift - SPA_BLKPTRSHIFT) + * (10) must be less than (64 - log2(DN_MAX_OBJECT)) + * (16). + */ + while ((uint64_t)mdn->dn_nblkptr << + (mdn->dn_datablkshift - DNODE_SHIFT + (levels - 1) * + (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < + DN_MAX_OBJECT) + levels++; + } mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = mdn->dn_nlevels = levels; @@ -857,7 +921,13 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, ASSERT(type != DMU_OST_ANY); ASSERT(type < DMU_OST_NUMTYPES); os->os_phys->os_type = type; - if (dmu_objset_userused_enabled(os)) { + + /* + * Enable user accounting if it is enabled and this is not an + * encrypted receive. + */ + if (dmu_objset_userused_enabled(os) && + (!os->os_encrypted || !dmu_objset_is_receiving(os))) { os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; os->os_flags = os->os_phys->os_flags; } @@ -867,6 +937,14 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, return (os); } +/* called from dsl for meta-objset */ +objset_t * +dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, + dmu_objset_type_t type, dmu_tx_t *tx) +{ + return (dmu_objset_create_impl_dnstats(spa, ds, bp, type, 0, 0, 0, tx)); +} + typedef struct dmu_objset_create_arg { const char *doca_name; cred_t *doca_cred; @@ -875,6 +953,7 @@ typedef struct dmu_objset_create_arg { void *doca_userarg; dmu_objset_type_t doca_type; uint64_t doca_flags; + dsl_crypto_params_t *doca_dcp; } dmu_objset_create_arg_t; /*ARGSUSED*/ @@ -900,8 +979,16 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx) dsl_dir_rele(pdd, FTAG); return (SET_ERROR(EEXIST)); } + + error = dmu_objset_create_crypt_check(pdd, doca->doca_dcp); + if (error != 0) { + dsl_dir_rele(pdd, FTAG); + return (error); + } + error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL, doca->doca_cred); + dsl_dir_rele(pdd, FTAG); return (error); @@ -918,13 +1005,15 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx) uint64_t obj; blkptr_t *bp; objset_t *os; + zio_t *rzio; VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail)); obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags, - doca->doca_cred, tx); + doca->doca_cred, doca->doca_dcp, tx); - VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); + VERIFY0(dsl_dataset_hold_obj_flags(pdd->dd_pool, obj, + DS_HOLD_FLAG_DECRYPT, FTAG, &ds)); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); bp = dsl_dataset_get_blkptr(ds); os = dmu_objset_create_impl(pdd->dd_pool->dp_spa, @@ -936,16 +1025,61 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx) doca->doca_cred, tx); } + /* + * The doca_userfunc() may write out some data that needs to be + * encrypted if the dataset is encrypted (specifically the root + * directory). This data must be written out before the encryption + * key mapping is removed by dsl_dataset_rele_flags(). Force the + * I/O to occur immediately by invoking the relevant sections of + * dsl_pool_sync(). + */ + if (os->os_encrypted) { + dsl_dataset_t *tmpds = NULL; + boolean_t need_sync_done = B_FALSE; + + mutex_enter(&ds->ds_lock); + ds->ds_owner = FTAG; + mutex_exit(&ds->ds_lock); + + rzio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + tmpds = txg_list_remove_this(&dp->dp_dirty_datasets, ds, + tx->tx_txg); + if (tmpds != NULL) { + dsl_dataset_sync(ds, rzio, tx); + need_sync_done = B_TRUE; + } + VERIFY0(zio_wait(rzio)); + dmu_objset_do_userquota_updates(os, tx); + taskq_wait(dp->dp_sync_taskq); + + rzio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + tmpds = txg_list_remove_this(&dp->dp_dirty_datasets, ds, + tx->tx_txg); + if (tmpds != NULL) { + dmu_buf_rele(ds->ds_dbuf, ds); + dsl_dataset_sync(ds, rzio, tx); + } + VERIFY0(zio_wait(rzio)); + + if (need_sync_done) + dsl_dataset_sync_done(ds, tx); + + mutex_enter(&ds->ds_lock); + ds->ds_owner = NULL; + mutex_exit(&ds->ds_lock); + } + spa_history_log_internal_ds(ds, "create", tx, ""); - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); dsl_dir_rele(pdd, FTAG); } int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, - void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg) + dsl_crypto_params_t *dcp, dmu_objset_create_sync_func_t func, void *arg) { dmu_objset_create_arg_t doca; + dsl_crypto_params_t tmp_dcp = { 0 }; doca.doca_name = name; doca.doca_cred = CRED(); @@ -954,9 +1088,19 @@ dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, doca.doca_userarg = arg; doca.doca_type = type; + /* + * Some callers (mostly for testing) do not provide a dcp on their + * own but various code inside the sync task will require it to be + * allocated. Rather than adding NULL checks throughout this code + * or adding dummy dcp's to all of the callers we simply create a + * dummy one here and use that. This zero dcp will have the same + * effect as asking for inheritence of all encryption params. + */ + doca.doca_dcp = (dcp != NULL) ? dcp : &tmp_dcp; + return (dsl_sync_task(name, dmu_objset_create_check, dmu_objset_create_sync, &doca, - 5, ZFS_SPACE_CHECK_NORMAL)); + 6, ZFS_SPACE_CHECK_NORMAL)); } typedef struct dmu_objset_clone_arg { @@ -996,18 +1140,29 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx) dsl_dir_rele(pdd, FTAG); return (SET_ERROR(EDQUOT)); } - dsl_dir_rele(pdd, FTAG); error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin); - if (error != 0) + if (error != 0) { + dsl_dir_rele(pdd, FTAG); return (error); + } /* You can only clone snapshots, not the head datasets. */ if (!origin->ds_is_snapshot) { dsl_dataset_rele(origin, FTAG); + dsl_dir_rele(pdd, FTAG); return (SET_ERROR(EINVAL)); } + + error = dmu_objset_clone_crypt_check(pdd, origin->ds_dir); + if (error != 0) { + dsl_dataset_rele(origin, FTAG); + dsl_dir_rele(pdd, FTAG); + return (error); + } + dsl_dataset_rele(origin, FTAG); + dsl_dir_rele(pdd, FTAG); return (0); } @@ -1027,7 +1182,7 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx) VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin)); obj = dsl_dataset_create_sync(pdd, tail, origin, 0, - doca->doca_cred, tx); + doca->doca_cred, NULL, tx); VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); dsl_dataset_name(origin, namebuf); @@ -1049,7 +1204,7 @@ dmu_objset_clone(const char *clone, const char *origin) return (dsl_sync_task(clone, dmu_objset_clone_check, dmu_objset_clone_sync, &doca, - 5, ZFS_SPACE_CHECK_NORMAL)); + 6, ZFS_SPACE_CHECK_NORMAL)); } int @@ -1101,10 +1256,10 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) blkptr_t *bp = zio->io_bp; objset_t *os = arg; dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; + uint64_t fill = 0; ASSERT(!BP_IS_EMBEDDED(bp)); ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); - ASSERT0(BP_GET_LEVEL(bp)); /* * Update rootbp fill count: it should be the number of objects @@ -1112,9 +1267,11 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) * objects that are stored in the objset_phys_t -- the meta * dnode and user/group accounting objects). */ - bp->blk_fill = 0; for (int i = 0; i < dnp->dn_nblkptr; i++) - bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]); + fill += BP_GET_FILL(&dnp->dn_blkptr[i]); + + BP_SET_FILL(bp, fill); + if (os->os_dsl_dataset != NULL) rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_WRITER, FTAG); *os->os_rootbp = *bp; @@ -1203,6 +1360,19 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) dmu_write_policy(os, NULL, 0, 0, &zp); + /* + * If we are either claiming the ZIL or doing a raw receive write out + * the os_phys_buf raw. Neither of these actions will effect the MAC + * at this point. + */ + if (arc_is_unauthenticated(os->os_phys_buf) || os->os_next_write_raw) { + ASSERT(os->os_encrypted); + os->os_next_write_raw = B_FALSE; + arc_convert_to_raw(os->os_phys_buf, + os->os_dsl_dataset->ds_object, ZFS_HOST_BYTEORDER, + DMU_OT_OBJSET, NULL, NULL, NULL); + } + zio = arc_write(pio, os->os_spa, tx->tx_txg, blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done, @@ -1226,7 +1396,8 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) txgoff = tx->tx_txg & TXG_MASK; - if (dmu_objset_userused_enabled(os)) { + if (dmu_objset_userused_enabled(os) && + (!os->os_encrypted || !dmu_objset_is_receiving(os))) { /* * We must create the list here because it uses the * dn_dirty_link[] of this txg. But it may already @@ -1463,6 +1634,10 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) if (!dmu_objset_userused_enabled(os)) return; + /* if this is a raw receive just return and handle accounting later */ + if (os->os_encrypted && dmu_objset_is_receiving(os)) + return; + /* Allocate the user/groupused objects if necessary. */ if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) { VERIFY0(zap_create_claim(os, @@ -1542,6 +1717,18 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx) if (!dmu_objset_userused_enabled(dn->dn_objset)) return; + /* + * Raw receives introduce a problem with user accounting. Raw + * receives cannot update the user accounting info because the + * user ids and the sizes are encrypted. To guarantee that we + * never end up with bad user accounting, we simply disable it + * during raw receives. We also disable this for normal receives + * so that an incremental raw receive may be done on top of an + * existing non-raw receive. + */ + if (os->os_encrypted && dmu_objset_is_receiving(os)) + return; + if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST| DN_ID_CHKED_SPILL))) return; diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c index 9a2dad407351..1c55b1e9c7e8 100644 --- a/usr/src/uts/common/fs/zfs/dmu_send.c +++ b/usr/src/uts/common/fs/zfs/dmu_send.c @@ -98,18 +98,17 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf, int len) ssize_t resid; /* have to get resid to get detailed errno */ /* - * The code does not rely on this (len being a multiple of 8). We keep + * The code does not rely on len being a multiple of 8. We keep * this assertion because of the corresponding assertion in * receive_read(). Keeping this assertion ensures that we do not * inadvertently break backwards compatibility (causing the assertion - * in receive_read() to trigger on old software). - * - * Removing the assertions could be rolled into a new feature that uses - * data that isn't 8-byte aligned; if the assertions were removed, a - * feature flag would have to be added. + * in receive_read() to trigger on old software). Newer feature flags + * (such as raw send) may break this assertion since they were + * introduced after the requirement was made obsolete. */ - ASSERT0(len % 8); + ASSERT(len % 8 == 0 || + (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) != 0); dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp, (caddr_t)buf, len, @@ -248,11 +247,11 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, } static int -dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, - uint64_t object, uint64_t offset, int lsize, int psize, const blkptr_t *bp, - void *data) +dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, uint64_t object, + uint64_t offset, int lsize, int psize, const blkptr_t *bp, void *data) { uint64_t payload_size; + boolean_t raw = (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW); struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write); /* @@ -285,16 +284,36 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, drrw->drr_toguid = dsp->dsa_toguid; drrw->drr_logical_size = lsize; - /* only set the compression fields if the buf is compressed */ - if (lsize != psize) { - ASSERT(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_COMPRESSED); + /* only set the compression fields if the buf is compressed or raw */ + if (raw || lsize != psize) { ASSERT(!BP_IS_EMBEDDED(bp)); - ASSERT(!BP_SHOULD_BYTESWAP(bp)); - ASSERT(!DMU_OT_IS_METADATA(BP_GET_TYPE(bp))); - ASSERT3U(BP_GET_COMPRESS(bp), !=, ZIO_COMPRESS_OFF); ASSERT3S(psize, >, 0); - ASSERT3S(lsize, >=, psize); + if (raw) { + ASSERT(BP_IS_PROTECTED(bp)); + + /* + * This is a raw protected block so we need to pass + * along everything the receiving side will need to + * interpret this block, including the byteswap, salt, + * IV, and MAC. + */ + if (BP_SHOULD_BYTESWAP(bp)) + drrw->drr_flags |= DRR_RAW_BYTESWAP; + zio_crypt_decode_params_bp(bp, drrw->drr_salt, + drrw->drr_iv); + zio_crypt_decode_mac_bp(bp, drrw->drr_mac); + } else { + /* this is a compressed block */ + ASSERT(dsp->dsa_featureflags & + DMU_BACKUP_FEATURE_COMPRESSED); + ASSERT(!BP_SHOULD_BYTESWAP(bp)); + ASSERT(!DMU_OT_IS_METADATA(BP_GET_TYPE(bp))); + ASSERT3U(BP_GET_COMPRESS(bp), !=, ZIO_COMPRESS_OFF); + ASSERT3S(lsize, >=, psize); + } + + /* set fields common to compressed and raw sends */ drrw->drr_compressiontype = BP_GET_COMPRESS(bp); drrw->drr_compressed_size = psize; payload_size = drrw->drr_compressed_size; @@ -302,22 +321,23 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, payload_size = drrw->drr_logical_size; } - if (bp == NULL || BP_IS_EMBEDDED(bp)) { + if (bp == NULL || BP_IS_EMBEDDED(bp) || (BP_IS_PROTECTED(bp) && !raw)) { /* - * There's no pre-computed checksum for partial-block - * writes or embedded BP's, so (like - * fletcher4-checkummed blocks) userland will have to - * compute a dedup-capable checksum itself. + * There's no pre-computed checksum for partial-block writes, + * embedded BP's, or encrypted BP's that are being sent as + * plaintext, so (like fletcher4-checkummed blocks) userland + * will have to compute a dedup-capable checksum itself. */ drrw->drr_checksumtype = ZIO_CHECKSUM_OFF; } else { drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); if (zio_checksum_table[drrw->drr_checksumtype].ci_flags & ZCHECKSUM_FLAG_DEDUP) - drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; + drrw->drr_flags |= DRR_CHECKSUM_DEDUP; DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); + DDK_SET_CRYPT(&drrw->drr_key, BP_IS_PROTECTED(bp)); drrw->drr_key.ddk_cksum = bp->blk_cksum; } @@ -361,9 +381,10 @@ dump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, } static int -dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) +dump_spill(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, void *data) { struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill); + uint64_t blksz = BP_GET_LSIZE(bp); if (dsp->dsa_pending_op != PENDING_NONE) { if (dump_record(dsp, NULL, 0) != 0) @@ -378,6 +399,18 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) drrs->drr_length = blksz; drrs->drr_toguid = dsp->dsa_toguid; + /* handle raw send fields */ + if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) { + ASSERT(BP_IS_PROTECTED(bp)); + + if (BP_SHOULD_BYTESWAP(bp)) + drrs->drr_flags |= DRR_RAW_BYTESWAP; + drrs->drr_compressiontype = BP_GET_COMPRESS(bp); + drrs->drr_compressed_size = BP_GET_PSIZE(bp); + zio_crypt_decode_params_bp(bp, drrs->drr_salt, drrs->drr_iv); + zio_crypt_decode_mac_bp(bp, drrs->drr_mac); + } + if (dump_record(dsp, data, blksz) != 0) return (SET_ERROR(EINTR)); return (0); @@ -430,9 +463,11 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) } static int -dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) +dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, + dnode_phys_t *dnp) { struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object); + int bonuslen; if (object < dsp->dsa_resume_object) { /* @@ -472,11 +507,33 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE) drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE; - if (dump_record(dsp, DN_BONUS(dnp), - P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) { - return (SET_ERROR(EINTR)); + bonuslen = P2ROUNDUP(dnp->dn_bonuslen, 8); + + if ((dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW)) { + ASSERT(BP_IS_ENCRYPTED(bp)); + + if (BP_SHOULD_BYTESWAP(bp)) + drro->drr_flags |= DRR_RAW_BYTESWAP; + + /* needed for reconstructing dnp on recv side */ + drro->drr_indblkshift = dnp->dn_indblkshift; + drro->drr_nlevels = dnp->dn_nlevels; + drro->drr_nblkptr = dnp->dn_nblkptr; + + /* + * Since we encrypt the entire bonus area, the (raw) part + * beyond the bonuslen is actually nonzero, so we need + * to send it. + */ + if (bonuslen != 0) { + drro->drr_raw_bonuslen = DN_MAX_BONUS_LEN(dnp); + bonuslen = drro->drr_raw_bonuslen; + } } + if (dump_record(dsp, DN_BONUS(dnp), bonuslen) != 0) + return (SET_ERROR(EINTR)); + /* Free anything past the end of the file. */ if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL) != 0) @@ -486,6 +543,41 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) return (0); } +static int +dump_object_range(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t firstobj, + uint64_t numslots) +{ + struct drr_object_range *drror = + &(dsp->dsa_drr->drr_u.drr_object_range); + + /* we only use this record type for raw sends */ + ASSERT(BP_IS_PROTECTED(bp)); + ASSERT(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW); + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_DNODE); + ASSERT0(BP_GET_LEVEL(bp)); + + if (dsp->dsa_pending_op != PENDING_NONE) { + if (dump_record(dsp, NULL, 0) != 0) + return (SET_ERROR(EINTR)); + dsp->dsa_pending_op = PENDING_NONE; + } + + bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); + dsp->dsa_drr->drr_type = DRR_OBJECT_RANGE; + drror->drr_firstobj = firstobj; + drror->drr_numslots = numslots; + drror->drr_toguid = dsp->dsa_toguid; + if (BP_SHOULD_BYTESWAP(bp)) + drror->drr_flags |= DRR_RAW_BYTESWAP; + zio_crypt_decode_params_bp(bp, drror->drr_salt, drror->drr_iv); + zio_crypt_decode_mac_bp(bp, drror->drr_mac); + + if (dump_record(dsp, NULL, 0) != 0) + return (SET_ERROR(EINTR)); + return (0); +} + static boolean_t backup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp) { @@ -529,6 +621,7 @@ send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT || zb->zb_object >= sta->resume.zb_object); + ASSERT3P(sta->ds, !=, NULL); if (sta->cancel) return (SET_ERROR(EINTR)); @@ -601,6 +694,18 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT || zb->zb_object >= dsa->dsa_resume_object); + /* + * All bps of an encrypted os should have the encryption bit set. + * If this is not true it indicates tampering and we report an error. + */ + if (dsa->dsa_os->os_encrypted && + !BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) { + spa_log_error(spa, zb); + zfs_panic_recover("unencrypted block in encrypted " + "object set %llu", ds->ds_object); + return (SET_ERROR(EIO)); + } + if (zb->zb_object != DMU_META_DNODE_OBJECT && DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { return (0); @@ -616,36 +721,60 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { return (0); } else if (type == DMU_OT_DNODE) { - int blksz = BP_GET_LSIZE(bp); + int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; arc_flags_t aflags = ARC_FLAG_WAIT; arc_buf_t *abuf; + enum zio_flag zioflags = ZIO_FLAG_CANFAIL; + + if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) { + ASSERT(BP_IS_ENCRYPTED(bp)); + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + zioflags |= ZIO_FLAG_RAW; + } ASSERT0(zb->zb_level); if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, - &aflags, zb) != 0) + ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0) return (SET_ERROR(EIO)); dnode_phys_t *blk = abuf->b_data; - uint64_t dnobj = zb->zb_blkid * (blksz >> DNODE_SHIFT); - for (int i = 0; i < blksz >> DNODE_SHIFT; i++) { - err = dump_dnode(dsa, dnobj + i, blk + i); - if (err != 0) - break; + uint64_t dnobj = zb->zb_blkid * epb; + + /* + * Raw sends require sending encryption parameters for the + * block of dnodes. Regular sends do not need to send this + * info. + */ + if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) { + ASSERT(arc_is_encrypted(abuf)); + err = dump_object_range(dsa, bp, dnobj, epb); + } + + if (err == 0) { + for (int i = 0; i < epb; + i += blk[i].dn_extra_slots + 1) { + err = dump_dnode(dsa, bp, dnobj + i, blk + i); + if (err != 0) + break; + } } arc_buf_destroy(abuf, &abuf); } else if (type == DMU_OT_SA) { arc_flags_t aflags = ARC_FLAG_WAIT; arc_buf_t *abuf; - int blksz = BP_GET_LSIZE(bp); + enum zio_flag zioflags = ZIO_FLAG_CANFAIL; + + if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) { + ASSERT(BP_IS_PROTECTED(bp)); + zioflags |= ZIO_FLAG_RAW; + } if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, - &aflags, zb) != 0) + ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0) return (SET_ERROR(EIO)); - err = dump_spill(dsa, zb->zb_object, blksz, abuf->b_data); + err = dump_spill(dsa, bp, zb->zb_object, abuf->b_data); arc_buf_destroy(abuf, &abuf); } else if (backup_do_embed(dsa, bp)) { /* it's an embedded level-0 block of a regular object */ @@ -667,6 +796,14 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) */ boolean_t split_large_blocks = blksz > SPA_OLD_MAXBLOCKSIZE && !(dsa->dsa_featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS); + + /* + * Raw sends require that we always get raw data as it exists + * on disk, so we assert that we are not splitting blocks here. + */ + boolean_t request_raw = + (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) != 0; + /* * We should only request compressed data from the ARC if all * the following are true: @@ -682,6 +819,8 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) !split_large_blocks && !BP_SHOULD_BYTESWAP(bp) && !BP_IS_EMBEDDED(bp) && !DMU_OT_IS_METADATA(BP_GET_TYPE(bp)); + IMPLY(request_raw, !split_large_blocks); + IMPLY(request_raw, BP_IS_PROTECTED(bp)); ASSERT0(zb->zb_level); ASSERT(zb->zb_object > dsa->dsa_resume_object || (zb->zb_object == dsa->dsa_resume_object && @@ -695,8 +834,11 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) ASSERT3U(blksz, ==, BP_GET_LSIZE(bp)); enum zio_flag zioflags = ZIO_FLAG_CANFAIL; - if (request_compressed) + if (request_raw) zioflags |= ZIO_FLAG_RAW; + else if (request_compressed) + zioflags |= ZIO_FLAG_RAW_COMPRESS; + if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0) { if (zfs_send_corrupt_data) { @@ -716,6 +858,7 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) offset = zb->zb_blkid * blksz; if (split_large_blocks) { + ASSERT0(arc_is_encrypted(abuf)); ASSERT3U(arc_get_compression(abuf), ==, ZIO_COMPRESS_OFF); char *buf = abuf->b_data; @@ -758,7 +901,7 @@ static int dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, zfs_bookmark_phys_t *ancestor_zb, boolean_t is_clone, boolean_t embedok, boolean_t large_block_ok, boolean_t compressok, - int outfd, uint64_t resumeobj, uint64_t resumeoff, + boolean_t rawok, int outfd, uint64_t resumeobj, uint64_t resumeoff, vnode_t *vp, offset_t *off) { objset_t *os; @@ -775,6 +918,24 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, return (err); } + /* + * If this is a non-raw send of an encrypted ds, we can ensure that + * the objset_phys_t is authenticated. This is safe because this is + * either a snapshot or we have owned the dataset, ensuring that + * it can't be modified. + */ + if (!rawok && os->os_encrypted && + arc_is_unauthenticated(os->os_phys_buf)) { + err = arc_untransform(os->os_phys_buf, os->os_spa, + to_ds->ds_object, B_FALSE); + if (err != 0) { + dsl_pool_rele(dp, tag); + return (err); + } + + ASSERT0(arc_is_unauthenticated(os->os_phys_buf)); + } + drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); drr->drr_type = DRR_BEGIN; drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; @@ -795,20 +956,27 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, } #endif - if (large_block_ok && to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_BLOCKS]) + /* raw sends imply large_block_ok */ + if ((large_block_ok || rawok) && + to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_BLOCKS]) featureflags |= DMU_BACKUP_FEATURE_LARGE_BLOCKS; - if (embedok && + + /* encrypted datasets will not have embedded blocks */ + if ((embedok || rawok) && !os->os_encrypted && spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) { featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA; - if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) - featureflags |= DMU_BACKUP_FEATURE_LZ4; } - if (compressok) { + + /* raw send implies compressok */ + if (compressok || rawok) featureflags |= DMU_BACKUP_FEATURE_COMPRESSED; - } + if (rawok && os->os_encrypted) + featureflags |= DMU_BACKUP_FEATURE_RAW; + if ((featureflags & - (DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_COMPRESSED)) != - 0 && spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) { + (DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_COMPRESSED | + DMU_BACKUP_FEATURE_RAW)) != 0 && + spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) { featureflags |= DMU_BACKUP_FEATURE_LZ4; } @@ -864,19 +1032,43 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, void *payload = NULL; size_t payload_len = 0; - if (resumeobj != 0 || resumeoff != 0) { - dmu_object_info_t to_doi; - err = dmu_object_info(os, resumeobj, &to_doi); - if (err != 0) - goto out; - SET_BOOKMARK(&to_arg.resume, to_ds->ds_object, resumeobj, 0, - resumeoff / to_doi.doi_data_block_size); - + /* handle features that require a DRR_BEGIN payload */ + if (featureflags & + (DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_RAW)) { + nvlist_t *keynvl = NULL; nvlist_t *nvl = fnvlist_alloc(); - fnvlist_add_uint64(nvl, "resume_object", resumeobj); - fnvlist_add_uint64(nvl, "resume_offset", resumeoff); + + if (featureflags & DMU_BACKUP_FEATURE_RESUMING) { + dmu_object_info_t to_doi; + err = dmu_object_info(os, resumeobj, &to_doi); + if (err != 0) { + fnvlist_free(nvl); + goto out; + } + + SET_BOOKMARK(&to_arg.resume, to_ds->ds_object, + resumeobj, 0, + resumeoff / to_doi.doi_data_block_size); + + fnvlist_add_uint64(nvl, "resume_object", resumeobj); + fnvlist_add_uint64(nvl, "resume_offset", resumeoff); + } + + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + ASSERT(os->os_encrypted); + + err = dsl_crypto_populate_key_nvlist(to_ds, &keynvl); + if (err != 0) { + fnvlist_free(nvl); + goto out; + } + + fnvlist_add_nvlist(nvl, "crypt_keydata", keynvl); + } + payload = fnvlist_pack(nvl, &payload_len); drr->drr_payloadlen = payload_len; + fnvlist_free(keynvl); fnvlist_free(nvl); } @@ -894,6 +1086,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, to_arg.ds = to_ds; to_arg.fromtxg = fromtxg; to_arg.flags = TRAVERSE_PRE | TRAVERSE_PREFETCH; + if (rawok) + to_arg.flags |= TRAVERSE_NO_DECRYPT; (void) thread_create(NULL, 0, send_traverse_thread, &to_arg, 0, curproc, TS_RUN, minclsyspri); @@ -940,7 +1134,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, if (dump_record(dsp, NULL, 0) != 0) err = dsp->dsa_err; - out: mutex_enter(&to_ds->ds_sendstream_lock); list_remove(&to_ds->ds_sendstreams, dsp); @@ -959,18 +1152,19 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, boolean_t embedok, boolean_t large_block_ok, boolean_t compressok, - int outfd, vnode_t *vp, offset_t *off) + boolean_t rawok, int outfd, vnode_t *vp, offset_t *off) { dsl_pool_t *dp; dsl_dataset_t *ds; dsl_dataset_t *fromds = NULL; + ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT; int err; err = dsl_pool_hold(pool, FTAG, &dp); if (err != 0) return (err); - err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds); + err = dsl_dataset_hold_obj_flags(dp, tosnap, dsflags, FTAG, &ds); if (err != 0) { dsl_pool_rele(dp, FTAG); return (err); @@ -982,7 +1176,7 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds); if (err != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); dsl_pool_rele(dp, FTAG); return (err); } @@ -995,24 +1189,27 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, is_clone = (fromds->ds_dir != ds->ds_dir); dsl_dataset_rele(fromds, FTAG); err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, - embedok, large_block_ok, compressok, outfd, 0, 0, vp, off); + embedok, large_block_ok, compressok, rawok, outfd, + 0, 0, vp, off); } else { err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, - embedok, large_block_ok, compressok, outfd, 0, 0, vp, off); + embedok, large_block_ok, compressok, rawok, outfd, + 0, 0, vp, off); } - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (err); } int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, - boolean_t large_block_ok, boolean_t compressok, int outfd, - uint64_t resumeobj, uint64_t resumeoff, - vnode_t *vp, offset_t *off) + boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, + int outfd, uint64_t resumeobj, uint64_t resumeoff, vnode_t *vp, + offset_t *off) { dsl_pool_t *dp; dsl_dataset_t *ds; int err; + ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT; boolean_t owned = B_FALSE; if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL) @@ -1027,10 +1224,10 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, * We are sending a filesystem or volume. Ensure * that it doesn't change by owning the dataset. */ - err = dsl_dataset_own(dp, tosnap, FTAG, &ds); + err = dsl_dataset_own(dp, tosnap, dsflags, FTAG, &ds); owned = B_TRUE; } else { - err = dsl_dataset_hold(dp, tosnap, FTAG, &ds); + err = dsl_dataset_hold_flags(dp, tosnap, dsflags, FTAG, &ds); } if (err != 0) { dsl_pool_rele(dp, FTAG); @@ -1070,22 +1267,27 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb); } if (err != 0) { - dsl_dataset_rele(ds, FTAG); + if (owned) + dsl_dataset_disown(ds, dsflags, FTAG); + else + dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_pool_rele(dp, FTAG); return (err); } err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, - embedok, large_block_ok, compressok, + embedok, large_block_ok, compressok, rawok, outfd, resumeobj, resumeoff, vp, off); } else { err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, - embedok, large_block_ok, compressok, + embedok, large_block_ok, compressok, rawok, outfd, resumeobj, resumeoff, vp, off); } if (owned) - dsl_dataset_disown(ds, FTAG); + dsl_dataset_disown(ds, dsflags, FTAG); else - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); + return (err); } @@ -1238,7 +1440,8 @@ dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg, * traverse the blocks of the snapshot with birth times after * from_txg, summing their uncompressed size */ - err = traverse_dataset(ds, from_txg, TRAVERSE_POST, + err = traverse_dataset(ds, from_txg, + TRAVERSE_POST | TRAVERSE_NO_DECRYPT, dmu_calculate_send_traversal, &size); if (err) return (err); @@ -1332,9 +1535,17 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, /* if full, then must be forced */ if (!drba->drba_cookie->drc_force) return (SET_ERROR(EEXIST)); - /* start from $ORIGIN@$ORIGIN, if supported */ - drba->drba_snapobj = dp->dp_origin_snap != NULL ? - dp->dp_origin_snap->ds_object : 0; + + /* + * We don't support using zfs recv -F to blow away + * encrypted filesystems. This would require the + * dsl dir to point to the old encryption key and + * the new one at the same time during the receive. + */ + if (ds->ds_dir->dd_crypto_obj != 0) + return (SET_ERROR(EINVAL)); + + drba->drba_snapobj = 0; } return (0); @@ -1349,6 +1560,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) struct drr_begin *drrb = drba->drba_cookie->drc_drrb; uint64_t fromguid = drrb->drr_fromguid; int flags = drrb->drr_flags; + ds_hold_flags_t dsflags = 0; int error; uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); dsl_dataset_t *ds; @@ -1395,18 +1607,26 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS)) return (SET_ERROR(ENOTSUP)); - error = dsl_dataset_hold(dp, tofs, FTAG, &ds); + if ((featureflags & DMU_BACKUP_FEATURE_RAW)) { + /* raw receives require the encryption feature */ + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) + return (SET_ERROR(ENOTSUP)); + } else { + dsflags |= DS_HOLD_FLAG_DECRYPT; + } + + error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds); if (error == 0) { /* target fs already exists; recv into temp clone */ /* Can't recv a clone into an existing fs */ if (flags & DRR_FLAG_CLONE || drba->drba_origin) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } error = recv_begin_check_existing_impl(drba, ds, fromguid); - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); } else if (error == ENOENT) { /* target fs does not exist; must be a full backup or clone */ char buf[ZFS_MAX_DATASET_NAME_LEN]; @@ -1431,7 +1651,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) /* Open the parent of tofs */ ASSERT3U(strlen(tofs), <, sizeof (buf)); (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); - error = dsl_dataset_hold(dp, buf, FTAG, &ds); + error = dsl_dataset_hold_flags(dp, buf, dsflags, FTAG, &ds); if (error != 0) return (error); @@ -1443,39 +1663,40 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL, drba->drba_cred); if (error != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (error); } error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_SNAPSHOT_LIMIT, NULL, drba->drba_cred); if (error != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (error); } if (drba->drba_origin != NULL) { dsl_dataset_t *origin; - error = dsl_dataset_hold(dp, drba->drba_origin, - FTAG, &origin); + + error = dsl_dataset_hold_flags(dp, drba->drba_origin, + dsflags, FTAG, &origin); if (error != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (error); } if (!origin->ds_is_snapshot) { - dsl_dataset_rele(origin, FTAG); - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(origin, dsflags, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } if (dsl_dataset_phys(origin)->ds_guid != fromguid && fromguid != 0) { - dsl_dataset_rele(origin, FTAG); - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(origin, dsflags, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(ENODEV)); } - dsl_dataset_rele(origin, FTAG); + dsl_dataset_rele_flags(origin, dsflags, FTAG); } - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); error = 0; } return (error); @@ -1489,27 +1710,41 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) objset_t *mos = dp->dp_meta_objset; struct drr_begin *drrb = drba->drba_cookie->drc_drrb; const char *tofs = drba->drba_cookie->drc_tofs; + uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); dsl_dataset_t *ds, *newds; + objset_t *os; uint64_t dsobj; + ds_hold_flags_t dsflags = 0; int error; uint64_t crflags = 0; + dsl_crypto_params_t *dcpp = NULL; + dsl_crypto_params_t dcp = { 0 }; if (drrb->drr_flags & DRR_FLAG_CI_DATA) crflags |= DS_FLAG_CI_DATASET; + if ((featureflags & DMU_BACKUP_FEATURE_RAW) == 0) + dsflags |= DS_HOLD_FLAG_DECRYPT; + else + dcp.cp_cmd = DCP_CMD_RAW_RECV; - error = dsl_dataset_hold(dp, tofs, FTAG, &ds); + error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds); if (error == 0) { /* create temporary clone */ dsl_dataset_t *snap = NULL; + if (drba->drba_snapobj != 0) { VERIFY0(dsl_dataset_hold_obj(dp, drba->drba_snapobj, FTAG, &snap)); + } else { + /* we use the dcp whenever we are not making a clone */ + dcpp = &dcp; } + dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name, - snap, crflags, drba->drba_cred, tx); + snap, crflags, drba->drba_cred, dcpp, tx); if (drba->drba_snapobj != 0) dsl_dataset_rele(snap, FTAG); - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); } else { dsl_dir_t *dd; const char *tail; @@ -1520,18 +1755,21 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) if (drba->drba_origin != NULL) { VERIFY0(dsl_dataset_hold(dp, drba->drba_origin, FTAG, &origin)); + } else { + /* we use the dcp whenever we are not making a clone */ + dcpp = &dcp; } /* Create new dataset. */ - dsobj = dsl_dataset_create_sync(dd, - strrchr(tofs, '/') + 1, - origin, crflags, drba->drba_cred, tx); + dsobj = dsl_dataset_create_sync(dd, strrchr(tofs, '/') + 1, + origin, crflags, drba->drba_cred, dcpp, tx); if (origin != NULL) dsl_dataset_rele(origin, FTAG); dsl_dir_rele(dd, FTAG); drba->drba_cookie->drc_newfs = B_TRUE; } - VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds)); + VERIFY0(dsl_dataset_own_obj(dp, dsobj, dsflags, dmu_recv_tag, &newds)); + VERIFY0(dmu_objset_from_ds(newds, &os)); if (drba->drba_cookie->drc_resumable) { dsl_dataset_zapify(newds, tx); @@ -1551,32 +1789,46 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) 8, 1, &zero, tx)); VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_BYTES, 8, 1, &zero, tx)); - if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & - DMU_BACKUP_FEATURE_LARGE_BLOCKS) { + if (featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) { VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_LARGEBLOCK, 8, 1, &one, tx)); } - if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & - DMU_BACKUP_FEATURE_EMBED_DATA) { + if (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) { VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_EMBEDOK, 8, 1, &one, tx)); } - if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & - DMU_BACKUP_FEATURE_COMPRESSED) { + if (featureflags & DMU_BACKUP_FEATURE_COMPRESSED) { VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_COMPRESSOK, 8, 1, &one, tx)); } + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_RAWOK, + 8, 1, &one, tx)); + } + } + + /* + * Usually the os->os_encrypted value is tied to the presence of a + * DSL Crypto Key object in the dd. However, that will not be received + * until dmu_recv_stream(), so we set the value manually for now. + */ + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + os->os_encrypted = B_TRUE; + drba->drba_cookie->drc_raw = B_TRUE; } dmu_buf_will_dirty(newds->ds_dbuf, tx); dsl_dataset_phys(newds)->ds_flags |= DS_FLAG_INCONSISTENT; /* - * If we actually created a non-clone, we need to create the - * objset in our new dataset. + * If we actually created a non-clone, we need to create the objset + * in our new dataset. If this is a raw send we postpone this until + * dmu_recv_stream() so that we can allocate the metadnode with the + * properties from the DRR_BEGIN payload. */ rrw_enter(&newds->ds_bp_rwlock, RW_READER, FTAG); - if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) { + if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds)) && + (featureflags & DMU_BACKUP_FEATURE_RAW) == 0) { (void) dmu_objset_create_impl(dp->dp_spa, newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx); } @@ -1594,6 +1846,7 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) dsl_pool_t *dp = dmu_tx_pool(tx); struct drr_begin *drrb = drba->drba_cookie->drc_drrb; int error; + ds_hold_flags_t dsflags = 0; uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); dsl_dataset_t *ds; const char *tofs = drba->drba_cookie->drc_tofs; @@ -1631,29 +1884,32 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) (void) snprintf(recvname, sizeof (recvname), "%s/%s", tofs, recv_clone_name); - if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) { + if ((featureflags & DMU_BACKUP_FEATURE_RAW) == 0) + dsflags |= DS_HOLD_FLAG_DECRYPT; + + if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) { /* %recv does not exist; continue in tofs */ - error = dsl_dataset_hold(dp, tofs, FTAG, &ds); + error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds); if (error != 0) return (error); } /* check that ds is marked inconsistent */ if (!DS_IS_INCONSISTENT(ds)) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } /* check that there is resuming data, and that the toguid matches */ if (!dsl_dataset_is_zapified(ds)) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } uint64_t val; error = zap_lookup(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val); if (error != 0 || drrb->drr_toguid != val) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } @@ -1663,13 +1919,13 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) * fails) because it will be marked inconsistent. */ if (dsl_dataset_has_owner(ds)) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EBUSY)); } /* There should not be any snapshots of this fs yet. */ if (ds->ds_prev != NULL && ds->ds_prev->ds_dir == ds->ds_dir) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } @@ -1683,11 +1939,11 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) (void) zap_lookup(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val); if (drrb->drr_fromguid != val) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (0); } @@ -1697,7 +1953,11 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx) dmu_recv_begin_arg_t *drba = arg; dsl_pool_t *dp = dmu_tx_pool(tx); const char *tofs = drba->drba_cookie->drc_tofs; + struct drr_begin *drrb = drba->drba_cookie->drc_drrb; + uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); dsl_dataset_t *ds; + objset_t *os; + ds_hold_flags_t dsflags = 0; uint64_t dsobj; /* 6 extra bytes for /%recv */ char recvname[ZFS_MAX_DATASET_NAME_LEN + 6]; @@ -1705,9 +1965,15 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx) (void) snprintf(recvname, sizeof (recvname), "%s/%s", tofs, recv_clone_name); - if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) { + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + drba->drba_cookie->drc_raw = B_TRUE; + } else { + dsflags |= DS_HOLD_FLAG_DECRYPT; + } + + if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) { /* %recv does not exist; continue in tofs */ - VERIFY0(dsl_dataset_hold(dp, tofs, FTAG, &ds)); + VERIFY0(dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds)); drba->drba_cookie->drc_newfs = B_TRUE; } @@ -1716,15 +1982,17 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT; dsobj = ds->ds_object; - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); - VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &ds)); + VERIFY0(dsl_dataset_own_obj(dp, dsobj, dsflags, dmu_recv_tag, &ds)); + VERIFY0(dmu_objset_from_ds(ds, &os)); dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT; rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); - ASSERT(!BP_IS_HOLE(dsl_dataset_get_blkptr(ds))); + ASSERT(!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)) || + drba->drba_cookie->drc_raw); rrw_exit(&ds->ds_bp_rwlock, FTAG); drba->drba_cookie->drc_ds = ds; @@ -1786,7 +2054,7 @@ struct receive_record_arg { * If the record is a write, pointer to the arc_buf_t containing the * payload. */ - arc_buf_t *write_buf; + arc_buf_t *arc_buf; int payload_size; uint64_t bytes_read; /* bytes read from stream when record created */ boolean_t eos_marker; /* Marks the end of the stream */ @@ -1810,6 +2078,7 @@ struct receive_writer_arg { /* A map from guid to dataset to help handle dedup'd streams. */ avl_tree_t *guid_to_ds_map; boolean_t resumable; + boolean_t raw; uint64_t last_object, last_offset; uint64_t bytes_read; /* bytes read when current record created */ }; @@ -1844,12 +2113,15 @@ struct receive_arg { zio_cksum_t prev_cksum; int err; boolean_t byteswap; + boolean_t raw; + uint64_t featureflags; /* Sorted list of objects not to issue prefetches for. */ struct objlist ignore_objlist; }; typedef struct guid_map_entry { uint64_t guid; + boolean_t raw; dsl_dataset_t *gme_ds; avl_node_t avlnode; } guid_map_entry_t; @@ -1876,7 +2148,8 @@ free_guid_map_onexit(void *arg) while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { dsl_dataset_long_rele(gmep->gme_ds, gmep); - dsl_dataset_rele(gmep->gme_ds, gmep); + dsl_dataset_rele_flags(gmep->gme_ds, + (gmep->raw) ? 0 : DS_HOLD_FLAG_DECRYPT, gmep); kmem_free(gmep, sizeof (guid_map_entry_t)); } avl_destroy(ca); @@ -1892,7 +2165,8 @@ receive_read(struct receive_arg *ra, int len, void *buf) * The code doesn't rely on this (lengths being multiples of 8). See * comment in dump_bytes. */ - ASSERT0(len % 8); + ASSERT(len % 8 == 0 || + (ra->featureflags & DMU_BACKUP_FEATURE_RAW) != 0); while (done < len) { ssize_t resid; @@ -1945,6 +2219,7 @@ byteswap_record(dmu_replay_record_t *drr) DO32(drr_object.drr_bonustype); DO32(drr_object.drr_blksz); DO32(drr_object.drr_bonuslen); + DO32(drr_object.drr_raw_bonuslen); DO64(drr_object.drr_toguid); break; case DRR_FREEOBJECTS: @@ -1992,6 +2267,13 @@ byteswap_record(dmu_replay_record_t *drr) DO64(drr_spill.drr_object); DO64(drr_spill.drr_length); DO64(drr_spill.drr_toguid); + DO64(drr_spill.drr_compressed_size); + DO32(drr_spill.drr_type); + break; + case DRR_OBJECT_RANGE: + DO64(drr_object_range.drr_firstobj); + DO64(drr_object_range.drr_numslots); + DO64(drr_object_range.drr_toguid); break; case DRR_END: DO64(drr_end.drr_toguid); @@ -2076,6 +2358,21 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, return (SET_ERROR(EINVAL)); } + if (rwa->raw) { + if (drro->drr_raw_bonuslen < drro->drr_bonuslen || + drro->drr_indblkshift > SPA_MAXBLOCKSHIFT || + drro->drr_nlevels > DN_MAX_LEVELS || + drro->drr_nblkptr > DN_MAX_NBLKPTR || + DN_SLOTS_TO_BONUSLEN(drro->drr_dn_slots) < + drro->drr_raw_bonuslen) + return (SET_ERROR(EINVAL)); + } else { + if (drro->drr_flags != 0 || drro->drr_raw_bonuslen != 0 || + drro->drr_indblkshift != 0 || drro->drr_nlevels != 0 || + drro->drr_nblkptr != 0) + return (SET_ERROR(EINVAL)); + } + err = dmu_object_info(rwa->os, drro->drr_object, &doi); if (err != 0 && err != ENOENT) @@ -2086,15 +2383,24 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, * If we are losing blkptrs or changing the block size this must * be a new file instance. We must clear out the previous file * contents before we can change this type of metadata in the dnode. + * Raw receives will also check that the indirect structure of the + * dnode hasn't changed. */ if (err == 0) { - int nblkptr; - - nblkptr = deduce_nblkptr(drro->drr_bonustype, + uint32_t indblksz = drro->drr_indblkshift ? + 1ULL << drro->drr_indblkshift : 0; + int nblkptr = deduce_nblkptr(drro->drr_bonustype, drro->drr_bonuslen); + /* nblkptr will be bounded by the bonus size and type */ + if (rwa->raw && nblkptr != drro->drr_nblkptr) + return (SET_ERROR(EINVAL)); + if (drro->drr_blksz != doi.doi_data_block_size || - nblkptr < doi.doi_nblkptr) { + nblkptr < doi.doi_nblkptr || + (rwa->raw && + (indblksz != doi.doi_metadata_block_size || + drro->drr_nlevels < doi.doi_indirection))) { err = dmu_free_long_range(rwa->os, drro->drr_object, 0, DMU_OBJECT_END); if (err != 0) @@ -2104,6 +2410,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, tx = dmu_tx_create(rwa->os); dmu_tx_hold_bonus(tx, object); + dmu_tx_hold_write(tx, object, 0, 0); err = dmu_tx_assign(tx, TXG_WAIT); if (err != 0) { dmu_tx_abort(tx); @@ -2129,24 +2436,50 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, return (SET_ERROR(EINVAL)); } + if (rwa->raw) + VERIFY0(dmu_object_dirty_raw(rwa->os, drro->drr_object, tx)); + dmu_object_set_checksum(rwa->os, drro->drr_object, drro->drr_checksumtype, tx); dmu_object_set_compress(rwa->os, drro->drr_object, drro->drr_compress, tx); + /* handle more restrictive dnode structuring for raw recvs */ + if (rwa->raw) { + /* + * Set the indirect block shift and nlevels. This will not fail + * because we ensured all of the blocks were free earlier if + * this is a new object. + */ + VERIFY0(dmu_object_set_blocksize(rwa->os, drro->drr_object, + drro->drr_blksz, drro->drr_indblkshift, tx)); + VERIFY0(dmu_object_set_nlevels(rwa->os, drro->drr_object, + drro->drr_nlevels, tx)); + } + if (data != NULL) { dmu_buf_t *db; + uint32_t flags = DMU_READ_NO_PREFETCH; + + if (rwa->raw) + flags |= DMU_READ_NO_DECRYPT; - VERIFY0(dmu_bonus_hold(rwa->os, drro->drr_object, FTAG, &db)); + VERIFY0(dmu_bonus_hold_impl(rwa->os, drro->drr_object, + FTAG, flags, &db)); dmu_buf_will_dirty(db, tx); ASSERT3U(db->db_size, >=, drro->drr_bonuslen); - bcopy(data, db->db_data, drro->drr_bonuslen); - if (rwa->byteswap) { + bcopy(data, db->db_data, DRR_OBJECT_PAYLOAD_SIZE(drro)); + + /* + * Raw bonus buffers have their byteorder determined by the + * DRR_OBJECT_RANGE record. + */ + if (rwa->byteswap && !rwa->raw) { dmu_object_byteswap_t byteswap = DMU_OT_BYTESWAP(drro->drr_bonustype); dmu_ot_byteswap[byteswap].ob_func(db->db_data, - drro->drr_bonuslen); + DRR_OBJECT_PAYLOAD_SIZE(drro)); } dmu_buf_rele(db, FTAG); } @@ -2174,7 +2507,11 @@ receive_freeobjects(struct receive_writer_arg *rwa, if (dmu_object_info(rwa->os, obj, NULL) != 0) continue; - err = dmu_free_long_object(rwa->os, obj); + if (rwa->raw) + err = dmu_free_long_object_raw(rwa->os, obj); + else + err = dmu_free_long_object(rwa->os, obj); + if (err != 0) return (err); } @@ -2187,8 +2524,9 @@ static int receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, arc_buf_t *abuf) { - dmu_tx_t *tx; int err; + dmu_tx_t *tx; + dnode_t *dn; if (drrw->drr_offset + drrw->drr_logical_size < drrw->drr_offset || !DMU_OT_IS_VALID(drrw->drr_type)) @@ -2210,7 +2548,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, return (SET_ERROR(EINVAL)); tx = dmu_tx_create(rwa->os); - dmu_tx_hold_write(tx, drrw->drr_object, drrw->drr_offset, drrw->drr_logical_size); err = dmu_tx_assign(tx, TXG_WAIT); @@ -2218,7 +2555,12 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, dmu_tx_abort(tx); return (err); } - if (rwa->byteswap) { + + if (rwa->raw) + VERIFY0(dmu_object_dirty_raw(rwa->os, drrw->drr_object, tx)); + + if (rwa->byteswap && !arc_is_encrypted(abuf) && + arc_get_compression(abuf) == ZIO_COMPRESS_OFF) { dmu_object_byteswap_t byteswap = DMU_OT_BYTESWAP(drrw->drr_type); dmu_ot_byteswap[byteswap].ob_func(abuf->b_data, @@ -2226,10 +2568,9 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, } /* use the bonus buf to look up the dnode in dmu_assign_arcbuf */ - dmu_buf_t *bonus; - if (dmu_bonus_hold(rwa->os, drrw->drr_object, FTAG, &bonus) != 0) - return (SET_ERROR(EINVAL)); - dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx); + VERIFY0(dnode_hold(rwa->os, drrw->drr_object, FTAG, &dn)); + dmu_assign_arcbuf_by_dnode(dn, drrw->drr_offset, abuf, tx); + dnode_rele(dn, FTAG); /* * Note: If the receive fails, we want the resume stream to start @@ -2239,7 +2580,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, */ save_resume_state(rwa, drrw->drr_object, drrw->drr_offset, tx); dmu_tx_commit(tx); - dmu_buf_rele(bonus, FTAG); return (0); } @@ -2261,6 +2601,7 @@ receive_write_byref(struct receive_writer_arg *rwa, guid_map_entry_t *gmep; avl_index_t where; objset_t *ref_os = NULL; + int flags = DMU_READ_PREFETCH; dmu_buf_t *dbp; if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset) @@ -2282,8 +2623,12 @@ receive_write_byref(struct receive_writer_arg *rwa, ref_os = rwa->os; } + if (rwa->raw) + flags |= DMU_READ_NO_DECRYPT; + + /* may return either a regular db or an encrypted one */ err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, - drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH); + drrwbr->drr_refoffset, FTAG, &dbp, flags); if (err != 0) return (err); @@ -2296,8 +2641,15 @@ receive_write_byref(struct receive_writer_arg *rwa, dmu_tx_abort(tx); return (err); } - dmu_write(rwa->os, drrwbr->drr_object, - drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); + + if (rwa->raw) { + VERIFY0(dmu_object_dirty_raw(rwa->os, drrwbr->drr_object, tx)); + dmu_copy_from_buf(rwa->os, drrwbr->drr_object, + drrwbr->drr_offset, dbp, tx); + } else { + dmu_write(rwa->os, drrwbr->drr_object, + drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); + } dmu_buf_rele(dbp, FTAG); /* See comment in restore_write. */ @@ -2347,7 +2699,7 @@ receive_write_embedded(struct receive_writer_arg *rwa, static int receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, - void *data) + arc_buf_t *abuf) { dmu_tx_t *tx; dmu_buf_t *db, *db_spill; @@ -2357,6 +2709,13 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, drrs->drr_length > spa_maxblocksize(dmu_objset_spa(rwa->os))) return (SET_ERROR(EINVAL)); + if (rwa->raw) { + if (!DMU_OT_IS_VALID(drrs->drr_type) || + drrs->drr_compressiontype >= ZIO_COMPRESS_FUNCTIONS || + drrs->drr_compressed_size == 0) + return (SET_ERROR(EINVAL)); + } + if (dmu_object_info(rwa->os, drrs->drr_object, NULL) != 0) return (SET_ERROR(EINVAL)); @@ -2378,11 +2737,13 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, return (err); } dmu_buf_will_dirty(db_spill, tx); + if (rwa->raw) + VERIFY0(dmu_object_dirty_raw(rwa->os, drrs->drr_object, tx)); if (db_spill->db_size < drrs->drr_length) VERIFY(0 == dbuf_spill_set_blksz(db_spill, drrs->drr_length, tx)); - bcopy(data, db_spill->db_data, drrs->drr_length); + dbuf_assign_arcbuf((dmu_buf_impl_t *)db_spill, abuf, tx); dmu_buf_rele(db, FTAG); dmu_buf_rele(db_spill, FTAG); @@ -2404,24 +2765,113 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf) if (dmu_object_info(rwa->os, drrf->drr_object, NULL) != 0) return (SET_ERROR(EINVAL)); - err = dmu_free_long_range(rwa->os, drrf->drr_object, - drrf->drr_offset, drrf->drr_length); + if (rwa->raw) { + err = dmu_free_long_range_raw(rwa->os, drrf->drr_object, + drrf->drr_offset, drrf->drr_length); + } else { + err = dmu_free_long_range(rwa->os, drrf->drr_object, + drrf->drr_offset, drrf->drr_length); + } return (err); } +static int +receive_object_range(struct receive_writer_arg *rwa, + struct drr_object_range *drror) +{ + int ret; + dmu_tx_t *tx; + dnode_t *mdn = NULL; + dmu_buf_t *db = NULL; + uint64_t offset; + + /* + * By default, we assume this block is in our native format + * (ZFS_HOST_BYTEORDER). We then take into account whether + * the send stream is byteswapped (rwa->byteswap). Finally, + * we need to byteswap again if this particular block was + * in non-native format on the send side. + */ + boolean_t byteorder = ZFS_HOST_BYTEORDER ^ rwa->byteswap ^ + !!DRR_IS_RAW_BYTESWAPPED(drror->drr_flags); + + /* + * Since dnode block sizes are constant, we should not need to worry + * about making sure that the dnode block size is the same on the + * sending and receiving sides for the time being. For non-raw sends, + * this does not matter (and in fact we do not send a DRR_OBJECT_RANGE + * record at all). Raw sends require this record type because the + * encryption parameters are used to protect an entire block of bonus + * buffers. If the size of dnode blocks ever becomes variable, + * handling will need to be added to ensure that dnode block sizes + * match on the sending and receiving side. + */ + if (drror->drr_numslots != DNODES_PER_BLOCK || + P2PHASE(drror->drr_firstobj, DNODES_PER_BLOCK) != 0 || + !rwa->raw) + return (SET_ERROR(EINVAL)); + + offset = drror->drr_firstobj * sizeof (dnode_phys_t); + mdn = DMU_META_DNODE(rwa->os); + + tx = dmu_tx_create(rwa->os); + ret = dmu_tx_assign(tx, TXG_WAIT); + if (ret != 0) { + dmu_tx_abort(tx); + return (ret); + } + + ret = dmu_buf_hold_by_dnode(mdn, offset, FTAG, &db, + DMU_READ_PREFETCH | DMU_READ_NO_DECRYPT); + if (ret != 0) { + dmu_tx_commit(tx); + return (ret); + } + + /* + * Convert the buffer associated with this range of dnodes to a + * raw buffer. This ensures that it will be written out as a raw + * buffer when we fill in the dnode objects in future records. + * Since we are commiting this tx now, it is technically possible + * for the dnode block to end up on-disk with the incorrect MAC. + * Despite this, the dataset is marked as inconsistent so no other + * code paths (apart from scrubs) will attempt to read this data. + * Scrubs will not be effected by this either since scrubs only + * read raw data and do not attempt to check the MAC. + */ + dmu_convert_to_raw(db, byteorder, drror->drr_salt, drror->drr_iv, + drror->drr_mac, tx); + dmu_buf_rele(db, FTAG); + dmu_tx_commit(tx); + return (0); +} + /* used to destroy the drc_ds on error */ static void dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) { - if (drc->drc_resumable) { - /* wait for our resume state to be written to disk */ - txg_wait_synced(drc->drc_ds->ds_dir->dd_pool, 0); - dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); + dsl_dataset_t *ds = drc->drc_ds; + ds_hold_flags_t dsflags = (drc->drc_raw) ? 0 : DS_HOLD_FLAG_DECRYPT; + + /* + * Wait for the txg sync before cleaning up the receive. For + * resumable receives, this ensures that our resume state has + * been written out to disk. For raw receives, this ensures + * that the user accounting code will not attempt to do anything + * after we stopped receiving the dataset. + */ + txg_wait_synced(ds->ds_dir->dd_pool, 0); + + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); + if (drc->drc_resumable && !BP_IS_HOLE(dsl_dataset_get_blkptr(ds))) { + rrw_exit(&ds->ds_bp_rwlock, FTAG); + dsl_dataset_disown(ds, dsflags, dmu_recv_tag); } else { char name[ZFS_MAX_DATASET_NAME_LEN]; - dsl_dataset_name(drc->drc_ds, name); - dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); + rrw_exit(&ds->ds_bp_rwlock, FTAG); + dsl_dataset_name(ds, name); + dsl_dataset_disown(ds, dsflags, dmu_recv_tag); (void) dsl_destroy_head(name); } } @@ -2469,6 +2919,7 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf) err = receive_read(ra, sizeof (ra->next_rrd->header), &ra->next_rrd->header); ra->next_rrd->bytes_read = ra->bytes_read; + if (err != 0) { kmem_free(ra->next_rrd, sizeof (*ra->next_rrd)); ra->next_rrd = NULL; @@ -2610,9 +3061,10 @@ receive_read_record(struct receive_arg *ra) case DRR_OBJECT: { struct drr_object *drro = &ra->rrd->header.drr_u.drr_object; - uint32_t size = P2ROUNDUP(drro->drr_bonuslen, 8); + uint32_t size = DRR_OBJECT_PAYLOAD_SIZE(drro); void *buf = kmem_zalloc(size, KM_SLEEP); dmu_object_info_t doi; + err = receive_read_payload_and_next_header(ra, size, buf); if (err != 0) { kmem_free(buf, size); @@ -2640,7 +3092,18 @@ receive_read_record(struct receive_arg *ra) struct drr_write *drrw = &ra->rrd->header.drr_u.drr_write; arc_buf_t *abuf; boolean_t is_meta = DMU_OT_IS_METADATA(drrw->drr_type); - if (DRR_WRITE_COMPRESSED(drrw)) { + + if (ra->raw) { + boolean_t byteorder = ZFS_HOST_BYTEORDER ^ + !!DRR_IS_RAW_BYTESWAPPED(drrw->drr_flags) ^ + ra->byteswap; + + abuf = arc_loan_raw_buf(dmu_objset_spa(ra->os), + drrw->drr_object, byteorder, drrw->drr_salt, + drrw->drr_iv, drrw->drr_mac, drrw->drr_type, + drrw->drr_compressed_size, drrw->drr_logical_size, + drrw->drr_compressiontype); + } else if (DRR_WRITE_COMPRESSED(drrw)) { ASSERT3U(drrw->drr_compressed_size, >, 0); ASSERT3U(drrw->drr_logical_size, >=, drrw->drr_compressed_size); @@ -2660,7 +3123,7 @@ receive_read_record(struct receive_arg *ra) dmu_return_arcbuf(abuf); return (err); } - ra->rrd->write_buf = abuf; + ra->rrd->arc_buf = abuf; receive_read_prefetch(ra, drrw->drr_object, drrw->drr_offset, drrw->drr_logical_size); return (err); @@ -2710,11 +3173,38 @@ receive_read_record(struct receive_arg *ra) case DRR_SPILL: { struct drr_spill *drrs = &ra->rrd->header.drr_u.drr_spill; - void *buf = kmem_zalloc(drrs->drr_length, KM_SLEEP); - err = receive_read_payload_and_next_header(ra, drrs->drr_length, - buf); - if (err != 0) - kmem_free(buf, drrs->drr_length); + arc_buf_t *abuf; + int len = DRR_SPILL_PAYLOAD_SIZE(drrs); + + /* DRR_SPILL records are either raw or uncompressed */ + if (ra->raw) { + boolean_t byteorder = ZFS_HOST_BYTEORDER ^ + !!DRR_IS_RAW_BYTESWAPPED(drrs->drr_flags) ^ + ra->byteswap; + + abuf = arc_loan_raw_buf(dmu_objset_spa(ra->os), + drrs->drr_object, byteorder, drrs->drr_salt, + drrs->drr_iv, drrs->drr_mac, drrs->drr_type, + drrs->drr_compressed_size, drrs->drr_length, + drrs->drr_compressiontype); + } else { + abuf = arc_loan_buf(dmu_objset_spa(ra->os), + DMU_OT_IS_METADATA(drrs->drr_type), + drrs->drr_length); + } + + err = receive_read_payload_and_next_header(ra, len, + abuf->b_data); + if (err != 0) { + dmu_return_arcbuf(abuf); + return (err); + } + ra->rrd->arc_buf = abuf; + return (err); + } + case DRR_OBJECT_RANGE: + { + err = receive_read_payload_and_next_header(ra, 0, NULL); return (err); } default: @@ -2753,11 +3243,11 @@ receive_process_record(struct receive_writer_arg *rwa, case DRR_WRITE: { struct drr_write *drrw = &rrd->header.drr_u.drr_write; - err = receive_write(rwa, drrw, rrd->write_buf); + err = receive_write(rwa, drrw, rrd->arc_buf); /* if receive_write() is successful, it consumes the arc_buf */ if (err != 0) - dmu_return_arcbuf(rrd->write_buf); - rrd->write_buf = NULL; + dmu_return_arcbuf(rrd->arc_buf); + rrd->arc_buf = NULL; rrd->payload = NULL; return (err); } @@ -2784,11 +3274,20 @@ receive_process_record(struct receive_writer_arg *rwa, case DRR_SPILL: { struct drr_spill *drrs = &rrd->header.drr_u.drr_spill; - err = receive_spill(rwa, drrs, rrd->payload); - kmem_free(rrd->payload, rrd->payload_size); + err = receive_spill(rwa, drrs, rrd->arc_buf); + /* if receive_spill() is successful, it consumes the arc_buf */ + if (err != 0) + dmu_return_arcbuf(rrd->arc_buf); + rrd->arc_buf = NULL; rrd->payload = NULL; return (err); } + case DRR_OBJECT_RANGE: + { + struct drr_object_range *drror = + &rrd->header.drr_u.drr_object_range; + return (receive_object_range(rwa, drror)); + } default: return (SET_ERROR(EINVAL)); } @@ -2812,9 +3311,9 @@ receive_writer_thread(void *arg) */ if (rwa->err == 0) { rwa->err = receive_process_record(rwa, rrd); - } else if (rrd->write_buf != NULL) { - dmu_return_arcbuf(rrd->write_buf); - rrd->write_buf = NULL; + } else if (rrd->arc_buf != NULL) { + dmu_return_arcbuf(rrd->arc_buf); + rrd->arc_buf = NULL; rrd->payload = NULL; } else if (rrd->payload != NULL) { kmem_free(rrd->payload, rrd->payload_size); @@ -2879,6 +3378,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, nvlist_t *begin_nvl = NULL; ra.byteswap = drc->drc_byteswap; + ra.raw = drc->drc_raw; ra.cksum = drc->drc_cksum; ra.vp = vp; ra.voff = *voffp; @@ -2904,17 +3404,25 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, ASSERT(dsl_dataset_phys(drc->drc_ds)->ds_flags & DS_FLAG_INCONSISTENT); featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); + ra.featureflags = featureflags; + + /* embedded data is incompatible with encrypted datasets */ + if (ra.os->os_encrypted && + (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)) { + err = SET_ERROR(EINVAL); + goto out; + } /* if this stream is dedup'ed, set up the avl tree for guid mapping */ if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { minor_t minor; if (cleanup_fd == -1) { - ra.err = SET_ERROR(EBADF); + err = SET_ERROR(EBADF); goto out; } - ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); - if (ra.err != 0) { + err = zfs_onexit_fd_hold(cleanup_fd, &minor); + if (err != 0) { cleanup_fd = -1; goto out; } @@ -2928,12 +3436,12 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, err = zfs_onexit_add_cb(minor, free_guid_map_onexit, rwa.guid_to_ds_map, action_handlep); - if (ra.err != 0) + if (err != 0) goto out; } else { err = zfs_onexit_cb_data(minor, *action_handlep, (void **)&rwa.guid_to_ds_map); - if (ra.err != 0) + if (err != 0) goto out; } @@ -2958,6 +3466,24 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, goto out; } + /* handle DSL encryption key payload */ + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + nvlist_t *keynvl = NULL; + + ASSERT(ra.os->os_encrypted); + ASSERT(drc->drc_raw); + + err = nvlist_lookup_nvlist(begin_nvl, "crypt_keydata", &keynvl); + if (err != 0) + goto out; + + err = dsl_crypto_recv_key(spa_name(ra.os->os_spa), + drc->drc_ds->ds_object, drc->drc_drrb->drr_type, + keynvl); + if (err != 0) + goto out; + } + if (featureflags & DMU_BACKUP_FEATURE_RESUMING) { err = resume_check(&ra, begin_nvl); if (err != 0) @@ -2971,6 +3497,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, rwa.os = ra.os; rwa.byteswap = drc->drc_byteswap; rwa.resumable = drc->drc_resumable; + rwa.raw = drc->drc_raw; (void) thread_create(NULL, 0, receive_writer_thread, &rwa, 0, curproc, TS_RUN, minclsyspri); @@ -3120,6 +3647,7 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx) { dmu_recv_cookie_t *drc = arg; dsl_pool_t *dp = dmu_tx_pool(tx); + boolean_t encrypted = drc->drc_ds->ds_dir->dd_crypto_obj != 0; spa_history_log_internal_ds(drc->drc_ds, "finish receiving", tx, "snap=%s", drc->drc_tosnap); @@ -3212,21 +3740,31 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx) drc->drc_newsnapobj = dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj; } + /* * Release the hold from dmu_recv_begin. This must be done before - * we return to open context, so that when we free the dataset's dnode, - * we can evict its bonus buffer. + * we return to open context, so that when we free the dataset's dnode + * we can evict its bonus buffer. Since the dataset may be destroyed + * at this point (and therefore won't have a valid pointer to the spa) + * we release the key mapping manually here while we do have a valid + * pointer, if it exists. */ - dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); + if (!drc->drc_raw && encrypted) { + (void) spa_keystore_remove_mapping(dmu_tx_pool(tx)->dp_spa, + drc->drc_ds->ds_object, drc->drc_ds); + } + dsl_dataset_disown(drc->drc_ds, 0, dmu_recv_tag); drc->drc_ds = NULL; } static int -add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj) +add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj, + boolean_t raw) { dsl_pool_t *dp; dsl_dataset_t *snapds; guid_map_entry_t *gmep; + ds_hold_flags_t dsflags = (raw) ? 0 : DS_HOLD_FLAG_DECRYPT; int err; ASSERT(guid_map != NULL); @@ -3235,9 +3773,10 @@ add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj) if (err != 0) return (err); gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP); - err = dsl_dataset_hold_obj(dp, snapobj, gmep, &snapds); + err = dsl_dataset_hold_obj_flags(dp, snapobj, dsflags, gmep, &snapds); if (err == 0) { gmep->guid = dsl_dataset_phys(snapds)->ds_guid; + gmep->raw = raw; gmep->gme_ds = snapds; avl_add(guid_map, gmep); dsl_dataset_long_hold(snapds, gmep); @@ -3292,9 +3831,8 @@ dmu_recv_end(dmu_recv_cookie_t *drc, void *owner) if (error != 0) { dmu_recv_cleanup_ds(drc); } else if (drc->drc_guid_to_ds_map != NULL) { - (void) add_ds_to_guidmap(drc->drc_tofs, - drc->drc_guid_to_ds_map, - drc->drc_newsnapobj); + (void) add_ds_to_guidmap(drc->drc_tofs, drc->drc_guid_to_ds_map, + drc->drc_newsnapobj, drc->drc_raw); } return (error); } diff --git a/usr/src/uts/common/fs/zfs/dmu_traverse.c b/usr/src/uts/common/fs/zfs/dmu_traverse.c index d3061a07f2c9..3012d0f7eb2c 100644 --- a/usr/src/uts/common/fs/zfs/dmu_traverse.c +++ b/usr/src/uts/common/fs/zfs/dmu_traverse.c @@ -132,7 +132,7 @@ traverse_zil(traverse_data_t *td, zil_header_t *zh) zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh); (void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td, - claim_txg); + claim_txg, !(td->td_flags & TRAVERSE_NO_DECRYPT)); zil_free(zilog); } @@ -181,6 +181,7 @@ traverse_prefetch_metadata(traverse_data_t *td, const blkptr_t *bp, const zbookmark_phys_t *zb) { arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; + int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA)) return; @@ -196,8 +197,11 @@ traverse_prefetch_metadata(traverse_data_t *td, if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE) return; + if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp)) + zio_flags |= ZIO_FLAG_RAW; + (void) arc_read(NULL, td->td_spa, bp, NULL, NULL, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); + ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); } static boolean_t @@ -296,6 +300,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, blkptr_t *cbp; int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; + ASSERT(!BP_IS_PROTECTED(bp)); + err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); if (err != 0) @@ -320,11 +326,18 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, } } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { arc_flags_t flags = ARC_FLAG_WAIT; + uint32_t zio_flags = ZIO_FLAG_CANFAIL; int i; int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; + /* + * dnode blocks might have their bonus buffers encrypted, so + * we must be careful to honor TRAVERSE_NO_DECRYPT + */ + if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp)) + zio_flags |= ZIO_FLAG_RAW; err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); + ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); if (err != 0) goto post; dnode_phys_t *child_dnp = buf->b_data; @@ -342,10 +355,14 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, break; } } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { + uint32_t zio_flags = ZIO_FLAG_CANFAIL; arc_flags_t flags = ARC_FLAG_WAIT; + if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp)) + zio_flags |= ZIO_FLAG_RAW; + err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); + ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); if (err != 0) goto post; @@ -494,6 +511,7 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { prefetch_data_t *pfd = arg; + int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; arc_flags_t aflags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; ASSERT(pfd->pd_bytes_fetched >= 0); @@ -512,8 +530,11 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, cv_broadcast(&pfd->pd_cv); mutex_exit(&pfd->pd_mtx); + if ((pfd->pd_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp)) + zio_flags |= ZIO_FLAG_RAW; + (void) arc_read(NULL, spa, bp, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &aflags, zb); + zio_flags, &aflags, zb); return (0); } @@ -582,15 +603,22 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, mutex_init(&pd.pd_mtx, NULL, MUTEX_DEFAULT, NULL); cv_init(&pd.pd_cv, NULL, CV_DEFAULT, NULL); + SET_BOOKMARK(&czb, td.td_objset, + ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); + /* See comment on ZIL traversal in dsl_scan_visitds. */ if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) { + enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; arc_flags_t flags = ARC_FLAG_WAIT; objset_phys_t *osp; arc_buf_t *buf; - err = arc_read(NULL, td.td_spa, rootbp, - arc_getbuf_func, &buf, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, NULL); + if ((td.td_flags & TRAVERSE_NO_DECRYPT) && + BP_IS_PROTECTED(rootbp)) + zio_flags |= ZIO_FLAG_RAW; + + err = arc_read(NULL, td.td_spa, rootbp, arc_getbuf_func, + &buf, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, &czb); if (err != 0) return (err); @@ -604,8 +632,6 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, &td, TQ_NOQUEUE)) pd.pd_exited = B_TRUE; - SET_BOOKMARK(&czb, td.td_objset, - ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); err = traverse_visitbp(&td, NULL, rootbp, &czb); mutex_enter(&pd.pd_mtx); diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c index b439037d161e..718ff4dea6bc 100644 --- a/usr/src/uts/common/fs/zfs/dnode.c +++ b/usr/src/uts/common/fs/zfs/dnode.c @@ -1112,7 +1112,12 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, rw_exit(&mdn->dn_struct_rwlock); if (db == NULL) return (SET_ERROR(EIO)); - err = dbuf_read(db, NULL, DB_RF_CANFAIL); + + /* + * We do not need to decrypt to read the dnode so it doesn't matter + * if we get the encrypted or decrypted version. + */ + err = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_NO_DECRYPT); if (err) { dbuf_rele(db, FTAG); return (err); @@ -1405,11 +1410,73 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx) return (SET_ERROR(ENOTSUP)); } +static void +dnode_set_nlevels_impl(dnode_t *dn, int new_nlevels, dmu_tx_t *tx) +{ + uint64_t txgoff = tx->tx_txg & TXG_MASK; + int old_nlevels = dn->dn_nlevels; + dmu_buf_impl_t *db; + list_t *list; + dbuf_dirty_record_t *new, *dr, *dr_next; + + ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); + + dn->dn_nlevels = new_nlevels; + + ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]); + dn->dn_next_nlevels[txgoff] = new_nlevels; + + /* dirty the left indirects */ + db = dbuf_hold_level(dn, old_nlevels, 0, FTAG); + ASSERT(db != NULL); + new = dbuf_dirty(db, tx); + dbuf_rele(db, FTAG); + + /* transfer the dirty records to the new indirect */ + mutex_enter(&dn->dn_mtx); + mutex_enter(&new->dt.di.dr_mtx); + list = &dn->dn_dirty_records[txgoff]; + for (dr = list_head(list); dr; dr = dr_next) { + dr_next = list_next(&dn->dn_dirty_records[txgoff], dr); + if (dr->dr_dbuf->db_level != new_nlevels-1 && + dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID && + dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) { + ASSERT(dr->dr_dbuf->db_level == old_nlevels-1); + list_remove(&dn->dn_dirty_records[txgoff], dr); + list_insert_tail(&new->dt.di.dr_children, dr); + dr->dr_parent = new; + } + } + mutex_exit(&new->dt.di.dr_mtx); + mutex_exit(&dn->dn_mtx); +} + +int +dnode_set_nlevels(dnode_t *dn, int nlevels, dmu_tx_t *tx) +{ + int ret = 0; + + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + + if (dn->dn_nlevels == nlevels) { + ret = 0; + goto out; + } else if (nlevels < dn->dn_nlevels) { + ret = SET_ERROR(EINVAL); + goto out; + } + + dnode_set_nlevels_impl(dn, nlevels, tx); + +out: + rw_exit(&dn->dn_struct_rwlock); + return (ret); +} + /* read-holding callers must not rely on the lock being continuously held */ void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) { - uint64_t txgoff = tx->tx_txg & TXG_MASK; int epbs, new_nlevels; uint64_t sz; @@ -1447,41 +1514,8 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) sz <= blkid && sz >= dn->dn_nblkptr; sz <<= epbs) new_nlevels++; - if (new_nlevels > dn->dn_nlevels) { - int old_nlevels = dn->dn_nlevels; - dmu_buf_impl_t *db; - list_t *list; - dbuf_dirty_record_t *new, *dr, *dr_next; - - dn->dn_nlevels = new_nlevels; - - ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]); - dn->dn_next_nlevels[txgoff] = new_nlevels; - - /* dirty the left indirects */ - db = dbuf_hold_level(dn, old_nlevels, 0, FTAG); - ASSERT(db != NULL); - new = dbuf_dirty(db, tx); - dbuf_rele(db, FTAG); - - /* transfer the dirty records to the new indirect */ - mutex_enter(&dn->dn_mtx); - mutex_enter(&new->dt.di.dr_mtx); - list = &dn->dn_dirty_records[txgoff]; - for (dr = list_head(list); dr; dr = dr_next) { - dr_next = list_next(&dn->dn_dirty_records[txgoff], dr); - if (dr->dr_dbuf->db_level != new_nlevels-1 && - dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID && - dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) { - ASSERT(dr->dr_dbuf->db_level == old_nlevels-1); - list_remove(&dn->dn_dirty_records[txgoff], dr); - list_insert_tail(&new->dt.di.dr_children, dr); - dr->dr_parent = new; - } - } - mutex_exit(&new->dt.di.dr_mtx); - mutex_exit(&dn->dn_mtx); - } + if (new_nlevels > dn->dn_nlevels) + dnode_set_nlevels_impl(dn, new_nlevels, tx); out: if (have_read) @@ -1840,7 +1874,8 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, */ return (SET_ERROR(ESRCH)); } - error = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT); + error = dbuf_read(db, NULL, + DB_RF_CANFAIL | DB_RF_HAVESTRUCT | DB_RF_NO_DECRYPT); if (error) { dbuf_rele(db, FTAG); return (error); diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c index bfa92db7a954..b1c34c16ded3 100644 --- a/usr/src/uts/common/fs/zfs/dnode_sync.c +++ b/usr/src/uts/common/fs/zfs/dnode_sync.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -536,7 +537,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg); /* * Now that we've released our hold, the dnode may - * be evicted, so we musn't access it. + * be evicted, so we mustn't access it. */ } @@ -546,6 +547,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) void dnode_sync(dnode_t *dn, dmu_tx_t *tx) { + objset_t *os = dn->dn_objset; dnode_phys_t *dnp = dn->dn_phys; int txgoff = tx->tx_txg & TXG_MASK; list_t *list = &dn->dn_dirty_records[txgoff]; @@ -560,8 +562,13 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf)); - if (dmu_objset_userused_enabled(dn->dn_objset) && - !DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { + /* + * Do user accounting if it is enabled and this is not + * an encrypted receive. + */ + if (dmu_objset_userused_enabled(os) && + !DMU_OBJECT_IS_SPECIAL(dn->dn_object) && + (!os->os_encrypted || !dmu_objset_is_receiving(os))) { mutex_enter(&dn->dn_mtx); dn->dn_oldused = DN_USED_BYTES(dn->dn_phys); dn->dn_oldflags = dn->dn_phys->dn_flags; @@ -569,7 +576,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) mutex_exit(&dn->dn_mtx); dmu_objset_userquota_get_ids(dn, B_FALSE, tx); } else { - /* Once we account for it, we should always account for it. */ + /* Once we account for it, we should always account for it */ ASSERT(!(dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED)); } diff --git a/usr/src/uts/common/fs/zfs/dsl_crypt.c b/usr/src/uts/common/fs/zfs/dsl_crypt.c new file mode 100644 index 000000000000..feda4d78e6b5 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/dsl_crypt.c @@ -0,0 +1,2640 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This file's primary purpose is for managing master encryption keys in + * memory and on disk. For more info on how these keys are used, see the + * block comment in zio_crypt.c. + * + * All master keys are stored encrypted on disk in the form of the DSL + * Crypto Key ZAP object. The binary key data in this object is always + * randomly generated and is encrypted with the user's wrapping key. This + * layer of indirection allows the user to change their key without + * needing to re-encrypt the entire dataset. The ZAP also holds on to the + * (non-encrypted) encryption algorithm identifier, IV, and MAC needed to + * safely decrypt the master key. For more info on the user's key see the + * block comment in libzfs_crypto.c + * + * In-memory encryption keys are managed through the spa_keystore. The + * keystore consists of 3 AVL trees, which are as follows: + * + * The Wrapping Key Tree: + * The wrapping key (wkey) tree stores the user's keys that are fed into the + * kernel through 'zfs load-key' and related commands. Datasets inherit their + * parent's wkey by default, so these structures are refcounted. The wrapping + * keys remain in memory until they are explicitly unloaded (with + * "zfs unload-key"). Unloading is only possible when no datasets are using + * them (refcount=0). + * + * The DSL Crypto Key Tree: + * The DSL Crypto Keys (DCK) are the in-memory representation of decrypted + * master keys. They are used by the functions in zio_crypt.c to perform + * encryption, decryption, and authentication. Snapshots and clones of a given + * dataset will share a DSL Crypto Key, so they are also refcounted. Once the + * refcount on a key hits zero, it is immediately zeroed out and freed. + * + * The Crypto Key Mapping Tree: + * The zio layer needs to lookup master keys by their dataset object id. Since + * the DSL Crypto Keys can belong to multiple datasets, we maintain a tree of + * dsl_key_mapping_t's which essentially just map the dataset object id to its + * appropriate DSL Crypto Key. The management for creating and destroying these + * mappings hooks into the code for owning and disowning datasets. Usually, + * there will only be one active dataset owner, but there are times + * (particularly during dataset creation and destruction) when this may not be + * true or the dataset may not be initialized enough to own. As a result, this + * object is also refcounted. + */ + +static void +dsl_wrapping_key_hold(dsl_wrapping_key_t *wkey, void *tag) +{ + (void) refcount_add(&wkey->wk_refcnt, tag); +} + +static void +dsl_wrapping_key_rele(dsl_wrapping_key_t *wkey, void *tag) +{ + (void) refcount_remove(&wkey->wk_refcnt, tag); +} + +static void +dsl_wrapping_key_free(dsl_wrapping_key_t *wkey) +{ + ASSERT0(refcount_count(&wkey->wk_refcnt)); + + if (wkey->wk_key.ck_data) { + bzero(wkey->wk_key.ck_data, + CRYPTO_BITS2BYTES(wkey->wk_key.ck_length)); + kmem_free(wkey->wk_key.ck_data, + CRYPTO_BITS2BYTES(wkey->wk_key.ck_length)); + } + + refcount_destroy(&wkey->wk_refcnt); + kmem_free(wkey, sizeof (dsl_wrapping_key_t)); +} + +static int +dsl_wrapping_key_create(uint8_t *wkeydata, zfs_keyformat_t keyformat, + uint64_t salt, uint64_t iters, dsl_wrapping_key_t **wkey_out) +{ + int ret; + dsl_wrapping_key_t *wkey; + + /* allocate the wrapping key */ + wkey = kmem_alloc(sizeof (dsl_wrapping_key_t), KM_SLEEP); + if (!wkey) + return (SET_ERROR(ENOMEM)); + + /* allocate and initialize the underlying crypto key */ + wkey->wk_key.ck_data = kmem_alloc(WRAPPING_KEY_LEN, KM_SLEEP); + if (!wkey->wk_key.ck_data) { + ret = ENOMEM; + goto error; + } + + wkey->wk_key.ck_format = CRYPTO_KEY_RAW; + wkey->wk_key.ck_length = CRYPTO_BYTES2BITS(WRAPPING_KEY_LEN); + bcopy(wkeydata, wkey->wk_key.ck_data, WRAPPING_KEY_LEN); + + /* initialize the rest of the struct */ + refcount_create(&wkey->wk_refcnt); + wkey->wk_keyformat = keyformat; + wkey->wk_salt = salt; + wkey->wk_iters = iters; + + *wkey_out = wkey; + return (0); + +error: + dsl_wrapping_key_free(wkey); + + *wkey_out = NULL; + return (ret); +} + +int +dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props, + nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out) +{ + int ret; + uint64_t crypt = ZIO_CRYPT_INHERIT; + uint64_t keyformat = ZFS_KEYFORMAT_NONE; + uint64_t salt = 0, iters = 0; + dsl_crypto_params_t *dcp = NULL; + dsl_wrapping_key_t *wkey = NULL; + uint8_t *wkeydata = NULL; + uint_t wkeydata_len = 0; + char *keylocation = NULL; + + dcp = kmem_zalloc(sizeof (dsl_crypto_params_t), KM_SLEEP); + if (!dcp) { + ret = SET_ERROR(ENOMEM); + goto error; + } + + /* get relevant properties from the nvlist */ + dcp->cp_cmd = cmd; + + /* get relevant arguments from the nvlists */ + if (props != NULL) { + (void) nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &crypt); + (void) nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &keyformat); + (void) nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation); + (void) nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), &salt); + (void) nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &iters); + dcp->cp_crypt = crypt; + } + + if (crypto_args != NULL) { + (void) nvlist_lookup_uint8_array(crypto_args, "wkeydata", + &wkeydata, &wkeydata_len); + } + + /* check for valid command */ + if (dcp->cp_cmd >= DCP_CMD_MAX) { + ret = SET_ERROR(EINVAL); + goto error; + } else { + dcp->cp_cmd = cmd; + } + + /* check for valid crypt */ + if (dcp->cp_crypt >= ZIO_CRYPT_FUNCTIONS) { + ret = SET_ERROR(EINVAL); + goto error; + } else { + dcp->cp_crypt = crypt; + } + + /* check for valid keyformat */ + if (keyformat >= ZFS_KEYFORMAT_FORMATS) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* check for a valid keylocation (of any kind) and copy it in */ + if (keylocation != NULL) { + if (!zfs_prop_valid_keylocation(keylocation, B_FALSE)) { + ret = SET_ERROR(EINVAL); + goto error; + } + + dcp->cp_keylocation = spa_strdup(keylocation); + } + + /* check wrapping key length, if given */ + if (wkeydata != NULL && wkeydata_len != WRAPPING_KEY_LEN) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* if the user asked for the deault crypt, determine that now */ + if (dcp->cp_crypt == ZIO_CRYPT_ON) + dcp->cp_crypt = ZIO_CRYPT_ON_VALUE; + + /* create the wrapping key from the raw data */ + if (wkeydata != NULL) { + /* create the wrapping key with the verified parameters */ + ret = dsl_wrapping_key_create(wkeydata, keyformat, salt, + iters, &wkey); + if (ret != 0) + goto error; + + dcp->cp_wkey = wkey; + } + + /* + * Remove the encryption properties from the nvlist since they are not + * maintained through the DSL. + */ + (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_ENCRYPTION)); + (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_KEYFORMAT)); + (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT)); + (void) nvlist_remove_all(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS)); + + *dcp_out = dcp; + + return (0); + +error: + if (wkey != NULL) + dsl_wrapping_key_free(wkey); + if (dcp != NULL) + kmem_free(dcp, sizeof (dsl_crypto_params_t)); + + *dcp_out = NULL; + return (ret); +} + +void +dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload) +{ + if (dcp == NULL) + return; + + if (dcp->cp_keylocation != NULL) + spa_strfree(dcp->cp_keylocation); + if (unload && dcp->cp_wkey != NULL) + dsl_wrapping_key_free(dcp->cp_wkey); + + kmem_free(dcp, sizeof (dsl_crypto_params_t)); +} + +static int +spa_crypto_key_compare(const void *a, const void *b) +{ + const dsl_crypto_key_t *dcka = a; + const dsl_crypto_key_t *dckb = b; + + if (dcka->dck_obj < dckb->dck_obj) + return (-1); + if (dcka->dck_obj > dckb->dck_obj) + return (1); + return (0); +} + +static int +spa_key_mapping_compare(const void *a, const void *b) +{ + const dsl_key_mapping_t *kma = a; + const dsl_key_mapping_t *kmb = b; + + if (kma->km_dsobj < kmb->km_dsobj) + return (-1); + if (kma->km_dsobj > kmb->km_dsobj) + return (1); + return (0); +} + +static int +spa_wkey_compare(const void *a, const void *b) +{ + const dsl_wrapping_key_t *wka = a; + const dsl_wrapping_key_t *wkb = b; + + if (wka->wk_ddobj < wkb->wk_ddobj) + return (-1); + if (wka->wk_ddobj > wkb->wk_ddobj) + return (1); + return (0); +} + +void +spa_keystore_init(spa_keystore_t *sk) +{ + rw_init(&sk->sk_dk_lock, NULL, RW_DEFAULT, NULL); + rw_init(&sk->sk_km_lock, NULL, RW_DEFAULT, NULL); + rw_init(&sk->sk_wkeys_lock, NULL, RW_DEFAULT, NULL); + avl_create(&sk->sk_dsl_keys, spa_crypto_key_compare, + sizeof (dsl_crypto_key_t), + offsetof(dsl_crypto_key_t, dck_avl_link)); + avl_create(&sk->sk_key_mappings, spa_key_mapping_compare, + sizeof (dsl_key_mapping_t), + offsetof(dsl_key_mapping_t, km_avl_link)); + avl_create(&sk->sk_wkeys, spa_wkey_compare, sizeof (dsl_wrapping_key_t), + offsetof(dsl_wrapping_key_t, wk_avl_link)); +} + +void +spa_keystore_fini(spa_keystore_t *sk) +{ + dsl_wrapping_key_t *wkey; + void *cookie = NULL; + + ASSERT(avl_is_empty(&sk->sk_dsl_keys)); + ASSERT(avl_is_empty(&sk->sk_key_mappings)); + + while ((wkey = avl_destroy_nodes(&sk->sk_wkeys, &cookie)) != NULL) + dsl_wrapping_key_free(wkey); + + avl_destroy(&sk->sk_wkeys); + avl_destroy(&sk->sk_key_mappings); + avl_destroy(&sk->sk_dsl_keys); + rw_destroy(&sk->sk_wkeys_lock); + rw_destroy(&sk->sk_km_lock); + rw_destroy(&sk->sk_dk_lock); +} + +int +dsl_dir_get_encryption_root_ddobj(dsl_dir_t *dd, uint64_t *rddobj) +{ + if (dd->dd_crypto_obj == 0) + return (SET_ERROR(ENOENT)); + + return (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, rddobj)); +} + +static int +spa_keystore_wkey_hold_ddobj_impl(spa_t *spa, uint64_t ddobj, + void *tag, dsl_wrapping_key_t **wkey_out) +{ + int ret; + dsl_wrapping_key_t search_wkey; + dsl_wrapping_key_t *found_wkey; + + ASSERT(RW_LOCK_HELD(&spa->spa_keystore.sk_wkeys_lock)); + + /* init the search wrapping key */ + search_wkey.wk_ddobj = ddobj; + + /* lookup the wrapping key */ + found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, &search_wkey, NULL); + if (!found_wkey) { + ret = SET_ERROR(ENOENT); + goto error; + } + + /* increment the refcount */ + dsl_wrapping_key_hold(found_wkey, tag); + + *wkey_out = found_wkey; + return (0); + +error: + *wkey_out = NULL; + return (ret); +} + +static int +spa_keystore_wkey_hold_dd(spa_t *spa, dsl_dir_t *dd, void *tag, + dsl_wrapping_key_t **wkey_out) +{ + int ret; + dsl_wrapping_key_t *wkey; + uint64_t rddobj; + boolean_t locked = B_FALSE; + + if (!RW_WRITE_HELD(&spa->spa_keystore.sk_wkeys_lock)) { + rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_READER); + locked = B_TRUE; + } + + /* get the ddobj that the keylocation property was inherited from */ + ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj); + if (ret != 0) + goto error; + + /* lookup the wkey in the avl tree */ + ret = spa_keystore_wkey_hold_ddobj_impl(spa, rddobj, tag, &wkey); + if (ret != 0) + goto error; + + /* unlock the wkey tree if we locked it */ + if (locked) + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + + *wkey_out = wkey; + return (0); + +error: + if (locked) + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + + *wkey_out = NULL; + return (ret); +} + +int +dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation) +{ + int ret = 0; + dsl_dir_t *dd = NULL; + dsl_pool_t *dp = NULL; + uint64_t rddobj; + + /* hold the dsl dir */ + ret = dsl_pool_hold(dsname, FTAG, &dp); + if (ret != 0) + goto out; + + ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL); + if (ret != 0) + goto out; + + /* if dd is not encrypted, the value may only be "none" */ + if (dd->dd_crypto_obj == 0) { + if (strcmp(keylocation, "none") != 0) { + ret = SET_ERROR(EACCES); + goto out; + } + + ret = 0; + goto out; + } + + /* check for a valid keylocation for encrypted datasets */ + if (!zfs_prop_valid_keylocation(keylocation, B_TRUE)) { + ret = SET_ERROR(EINVAL); + goto out; + } + + /* check that this is an encryption root */ + ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj); + if (ret != 0) + goto out; + + if (rddobj != dd->dd_object) { + ret = SET_ERROR(EACCES); + goto out; + } + + dsl_dir_rele(dd, FTAG); + dsl_pool_rele(dp, FTAG); + + return (0); + +out: + if (dd != NULL) + dsl_dir_rele(dd, FTAG); + if (dp != NULL) + dsl_pool_rele(dp, FTAG); + + return (ret); +} + +static void +dsl_crypto_key_free(dsl_crypto_key_t *dck) +{ + ASSERT(refcount_count(&dck->dck_holds) == 0); + + /* destroy the zio_crypt_key_t */ + zio_crypt_key_destroy(&dck->dck_key); + + /* free the refcount, wrapping key, and lock */ + refcount_destroy(&dck->dck_holds); + if (dck->dck_wkey) + dsl_wrapping_key_rele(dck->dck_wkey, dck); + + /* free the key */ + kmem_free(dck, sizeof (dsl_crypto_key_t)); +} + +static void +dsl_crypto_key_rele(dsl_crypto_key_t *dck, void *tag) +{ + if (refcount_remove(&dck->dck_holds, tag) == 0) + dsl_crypto_key_free(dck); +} + +static int +dsl_crypto_key_open(objset_t *mos, dsl_wrapping_key_t *wkey, + uint64_t dckobj, void *tag, dsl_crypto_key_t **dck_out) +{ + int ret; + uint64_t crypt = 0, guid = 0; + uint8_t raw_keydata[MASTER_KEY_MAX_LEN]; + uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN]; + uint8_t iv[WRAPPING_IV_LEN]; + uint8_t mac[WRAPPING_MAC_LEN]; + dsl_crypto_key_t *dck; + + /* allocate and initialize the key */ + dck = kmem_zalloc(sizeof (dsl_crypto_key_t), KM_SLEEP); + if (!dck) + return (SET_ERROR(ENOMEM)); + + /* fetch all of the values we need from the ZAP */ + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, + &crypt); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1, &guid); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1, + MASTER_KEY_MAX_LEN, raw_keydata); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1, + SHA512_HMAC_KEYLEN, raw_hmac_keydata); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN, + iv); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN, + mac); + if (ret != 0) + goto error; + + /* + * Unwrap the keys. If there is an error return EACCES to indicate + * an authentication failure. + */ + ret = zio_crypt_key_unwrap(&wkey->wk_key, crypt, guid, raw_keydata, + raw_hmac_keydata, iv, mac, &dck->dck_key); + if (ret != 0) { + ret = SET_ERROR(EACCES); + goto error; + } + + /* finish initializing the dsl_crypto_key_t */ + refcount_create(&dck->dck_holds); + dsl_wrapping_key_hold(wkey, dck); + dck->dck_wkey = wkey; + dck->dck_obj = dckobj; + (void) refcount_add(&dck->dck_holds, tag); + + *dck_out = dck; + return (0); + +error: + if (dck != NULL) { + bzero(dck, sizeof (dsl_crypto_key_t)); + kmem_free(dck, sizeof (dsl_crypto_key_t)); + } + + *dck_out = NULL; + return (ret); +} + +static int +spa_keystore_dsl_key_hold_impl(spa_t *spa, uint64_t dckobj, void *tag, + dsl_crypto_key_t **dck_out) +{ + int ret; + dsl_crypto_key_t search_dck; + dsl_crypto_key_t *found_dck; + + ASSERT(RW_LOCK_HELD(&spa->spa_keystore.sk_dk_lock)); + + /* init the search key */ + search_dck.dck_obj = dckobj; + + /* find the matching key in the keystore */ + found_dck = avl_find(&spa->spa_keystore.sk_dsl_keys, &search_dck, NULL); + if (!found_dck) { + ret = SET_ERROR(ENOENT); + goto error; + } + + /* increment the refcount */ + (void) refcount_add(&found_dck->dck_holds, tag); + + *dck_out = found_dck; + return (0); + +error: + *dck_out = NULL; + return (ret); +} + +static int +spa_keystore_dsl_key_hold_dd(spa_t *spa, dsl_dir_t *dd, void *tag, + dsl_crypto_key_t **dck_out) +{ + int ret; + avl_index_t where; + dsl_crypto_key_t *dck = NULL; + dsl_wrapping_key_t *wkey = NULL; + uint64_t dckobj = dd->dd_crypto_obj; + + rw_enter(&spa->spa_keystore.sk_dk_lock, RW_WRITER); + + /* lookup the key in the tree of currently loaded keys */ + ret = spa_keystore_dsl_key_hold_impl(spa, dckobj, tag, &dck); + if (!ret) { + rw_exit(&spa->spa_keystore.sk_dk_lock); + *dck_out = dck; + return (0); + } + + /* lookup the wrapping key from the keystore */ + ret = spa_keystore_wkey_hold_dd(spa, dd, FTAG, &wkey); + if (ret != 0) { + ret = SET_ERROR(EACCES); + goto error_unlock; + } + + /* read the key from disk */ + ret = dsl_crypto_key_open(spa->spa_meta_objset, wkey, dckobj, + tag, &dck); + if (ret != 0) + goto error_unlock; + + /* + * add the key to the keystore (this should always succeed + * since we made sure it didn't exist before) + */ + (void) avl_find(&spa->spa_keystore.sk_dsl_keys, dck, &where); + avl_insert(&spa->spa_keystore.sk_dsl_keys, dck, where); + + /* release the wrapping key (the dsl key now has a reference to it) */ + dsl_wrapping_key_rele(wkey, FTAG); + + rw_exit(&spa->spa_keystore.sk_dk_lock); + + *dck_out = dck; + return (0); + +error_unlock: + rw_exit(&spa->spa_keystore.sk_dk_lock); + if (wkey != NULL) + dsl_wrapping_key_rele(wkey, FTAG); + + *dck_out = NULL; + return (ret); +} + +void +spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, void *tag) +{ + rw_enter(&spa->spa_keystore.sk_dk_lock, RW_WRITER); + + if (refcount_remove(&dck->dck_holds, tag) == 0) { + avl_remove(&spa->spa_keystore.sk_dsl_keys, dck); + dsl_crypto_key_free(dck); + } + + rw_exit(&spa->spa_keystore.sk_dk_lock); +} + +int +spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey) +{ + int ret; + avl_index_t where; + dsl_wrapping_key_t *found_wkey; + + rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER); + + /* insert the wrapping key into the keystore */ + found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, wkey, &where); + if (found_wkey != NULL) { + ret = SET_ERROR(EEXIST); + goto error_unlock; + } + avl_insert(&spa->spa_keystore.sk_wkeys, wkey, where); + + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + + return (0); + +error_unlock: + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + return (ret); +} + +int +spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp, + boolean_t noop) +{ + int ret; + dsl_dir_t *dd = NULL; + dsl_crypto_key_t *dck = NULL; + dsl_wrapping_key_t *wkey = dcp->cp_wkey; + dsl_pool_t *dp = NULL; + uint64_t keyformat, salt, iters; + + /* + * We don't validate the wrapping key's keyformat, salt, or iters + * since they will never be needed after the DCK has been wrapped. + */ + if (dcp->cp_wkey == NULL || + dcp->cp_cmd != DCP_CMD_NONE || + dcp->cp_crypt != ZIO_CRYPT_INHERIT || + dcp->cp_keylocation != NULL) + return (SET_ERROR(EINVAL)); + + ret = dsl_pool_hold(dsname, FTAG, &dp); + if (ret != 0) + goto error; + + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) { + ret = (SET_ERROR(ENOTSUP)); + goto error; + } + + /* hold the dsl dir */ + ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL); + if (ret != 0) + goto error; + + /* initialize the wkey's ddobj */ + wkey->wk_ddobj = dd->dd_object; + + /* verify that the wkey is correct by opening its dsl key */ + ret = dsl_crypto_key_open(dp->dp_meta_objset, wkey, + dd->dd_crypto_obj, FTAG, &dck); + if (ret != 0) + goto error; + + /* initialize the wkey encryption parameters from the DSL Crypto Key */ + ret = zap_lookup(dp->dp_meta_objset, dd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &keyformat); + if (ret != 0) + goto error; + + ret = zap_lookup(dp->dp_meta_objset, dd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &salt); + if (ret != 0) + goto error; + + ret = zap_lookup(dp->dp_meta_objset, dd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &iters); + if (ret != 0) + goto error; + + ASSERT3U(keyformat, <, ZFS_KEYFORMAT_FORMATS); + ASSERT3U(keyformat, !=, ZFS_KEYFORMAT_NONE); + IMPLY(keyformat == ZFS_KEYFORMAT_PASSPHRASE, iters != 0); + IMPLY(keyformat == ZFS_KEYFORMAT_PASSPHRASE, salt != 0); + IMPLY(keyformat != ZFS_KEYFORMAT_PASSPHRASE, iters == 0); + IMPLY(keyformat != ZFS_KEYFORMAT_PASSPHRASE, salt == 0); + + wkey->wk_keyformat = keyformat; + wkey->wk_salt = salt; + wkey->wk_iters = iters; + + /* + * At this point we have verified the wkey and confirmed that it can + * be used to decrypt a DSL Crypto Key. We can simply cleanup and + * return if this is all the user wanted to do. + */ + if (noop) + goto error; + + /* insert the wrapping key into the keystore */ + ret = spa_keystore_load_wkey_impl(dp->dp_spa, wkey); + if (ret != 0) + goto error; + + dsl_crypto_key_rele(dck, FTAG); + dsl_dir_rele(dd, FTAG); + dsl_pool_rele(dp, FTAG); + + return (0); + +error: + if (dck != NULL) + dsl_crypto_key_rele(dck, FTAG); + if (dd != NULL) + dsl_dir_rele(dd, FTAG); + if (dp != NULL) + dsl_pool_rele(dp, FTAG); + + return (ret); +} + +int +spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj) +{ + int ret; + dsl_wrapping_key_t search_wkey; + dsl_wrapping_key_t *found_wkey; + + /* init the search wrapping key */ + search_wkey.wk_ddobj = ddobj; + + rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER); + + /* remove the wrapping key from the keystore */ + found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, + &search_wkey, NULL); + if (!found_wkey) { + ret = SET_ERROR(ENOENT); + goto error_unlock; + } else if (refcount_count(&found_wkey->wk_refcnt) != 0) { + ret = SET_ERROR(EBUSY); + goto error_unlock; + } + avl_remove(&spa->spa_keystore.sk_wkeys, found_wkey); + + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + + /* free the wrapping key */ + dsl_wrapping_key_free(found_wkey); + + return (0); + +error_unlock: + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + return (ret); +} + +int +spa_keystore_unload_wkey(const char *dsname) +{ + int ret = 0; + dsl_dir_t *dd = NULL; + dsl_pool_t *dp = NULL; + + /* hold the dsl dir */ + ret = dsl_pool_hold(dsname, FTAG, &dp); + if (ret != 0) + goto error; + + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) { + ret = (SET_ERROR(ENOTSUP)); + goto error; + } + + ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL); + if (ret != 0) + goto error; + + /* unload the wkey */ + ret = spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object); + if (ret != 0) + goto error; + + dsl_dir_rele(dd, FTAG); + dsl_pool_rele(dp, FTAG); + + return (0); + +error: + if (dd != NULL) + dsl_dir_rele(dd, FTAG); + if (dp != NULL) + dsl_pool_rele(dp, FTAG); + + return (ret); +} + +int +spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj, + dsl_dir_t *dd, void *tag) +{ + int ret; + avl_index_t where; + dsl_key_mapping_t *km = NULL, *found_km; + boolean_t should_free = B_FALSE; + + /* allocate the mapping */ + km = kmem_alloc(sizeof (dsl_key_mapping_t), KM_SLEEP); + if (!km) + return (SET_ERROR(ENOMEM)); + + /* initialize the mapping */ + refcount_create(&km->km_refcnt); + + ret = spa_keystore_dsl_key_hold_dd(spa, dd, km, &km->km_key); + if (ret != 0) + goto error; + + km->km_dsobj = dsobj; + + rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER); + + /* + * If a mapping already exists, simply increment its refcount and + * cleanup the one we made. We want to allocate / free outside of + * the lock because this lock is also used by the zio layer to lookup + * key mappings. Otherwise, use the one we created. Normally, there will + * only be one active reference at a time (the objset owner), but there + * are times when there could be multiple async users. + */ + found_km = avl_find(&spa->spa_keystore.sk_key_mappings, km, &where); + if (found_km != NULL) { + should_free = B_TRUE; + (void) refcount_add(&found_km->km_refcnt, tag); + } else { + (void) refcount_add(&km->km_refcnt, tag); + avl_insert(&spa->spa_keystore.sk_key_mappings, km, where); + } + + rw_exit(&spa->spa_keystore.sk_km_lock); + + if (should_free) { + spa_keystore_dsl_key_rele(spa, km->km_key, km); + refcount_destroy(&km->km_refcnt); + kmem_free(km, sizeof (dsl_key_mapping_t)); + } + + return (0); + +error: + if (km->km_key) + spa_keystore_dsl_key_rele(spa, km->km_key, km); + + refcount_destroy(&km->km_refcnt); + kmem_free(km, sizeof (dsl_key_mapping_t)); + + return (ret); +} + +int +spa_keystore_create_mapping(spa_t *spa, dsl_dataset_t *ds, void *tag) +{ + return (spa_keystore_create_mapping_impl(spa, ds->ds_object, + ds->ds_dir, tag)); +} + +int +spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag) +{ + int ret; + dsl_key_mapping_t search_km; + dsl_key_mapping_t *found_km; + boolean_t should_free = B_FALSE; + + /* init the search key mapping */ + search_km.km_dsobj = dsobj; + + rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER); + + /* find the matching mapping */ + found_km = avl_find(&spa->spa_keystore.sk_key_mappings, + &search_km, NULL); + if (found_km == NULL) { + ret = SET_ERROR(ENOENT); + goto error_unlock; + } + + /* + * Decrement the refcount on the mapping and remove it from the tree if + * it is zero. Try to minimize time spent in this lock by deferring + * cleanup work. + */ + if (refcount_remove(&found_km->km_refcnt, tag) == 0) { + should_free = B_TRUE; + avl_remove(&spa->spa_keystore.sk_key_mappings, found_km); + } + + rw_exit(&spa->spa_keystore.sk_km_lock); + + /* destroy the key mapping */ + if (should_free) { + spa_keystore_dsl_key_rele(spa, found_km->km_key, found_km); + kmem_free(found_km, sizeof (dsl_key_mapping_t)); + } + + return (0); + +error_unlock: + rw_exit(&spa->spa_keystore.sk_km_lock); + return (ret); +} + +/* + * This function is primarily used by the zio and arc layer to lookup + * DSL Crypto Keys for encryption. Callers must release the key with + * spa_keystore_dsl_key_rele(). The function may also be called with + * dck_out == NULL and tag == NULL to simply check that a key exists + * without getting a reference to it. + */ +int +spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag, + dsl_crypto_key_t **dck_out) +{ + int ret; + dsl_key_mapping_t search_km; + dsl_key_mapping_t *found_km; + + ASSERT((tag != NULL && dck_out != NULL) || + (tag == NULL && dck_out == NULL)); + + /* init the search key mapping */ + search_km.km_dsobj = dsobj; + + rw_enter(&spa->spa_keystore.sk_km_lock, RW_READER); + + /* remove the mapping from the tree */ + found_km = avl_find(&spa->spa_keystore.sk_key_mappings, &search_km, + NULL); + if (found_km == NULL) { + ret = SET_ERROR(ENOENT); + goto error_unlock; + } + + if (found_km && tag) + (void) refcount_add(&found_km->km_key->dck_holds, tag); + + rw_exit(&spa->spa_keystore.sk_km_lock); + + if (dck_out != NULL) + *dck_out = found_km->km_key; + return (0); + +error_unlock: + rw_exit(&spa->spa_keystore.sk_km_lock); + + if (dck_out != NULL) + *dck_out = NULL; + return (ret); +} + +static int +dmu_objset_check_wkey_loaded(dsl_dir_t *dd) +{ + int ret; + dsl_wrapping_key_t *wkey = NULL; + + ret = spa_keystore_wkey_hold_dd(dd->dd_pool->dp_spa, dd, FTAG, + &wkey); + if (ret != 0) + return (SET_ERROR(EACCES)); + + dsl_wrapping_key_rele(wkey, FTAG); + + return (0); +} + +static zfs_keystatus_t +dsl_dataset_get_keystatus(dsl_dir_t *dd) +{ + /* check if this dd has a has a dsl key */ + if (dd->dd_crypto_obj == 0) + return (ZFS_KEYSTATUS_NONE); + + return (dmu_objset_check_wkey_loaded(dd) == 0 ? + ZFS_KEYSTATUS_AVAILABLE : ZFS_KEYSTATUS_UNAVAILABLE); +} + +static int +dsl_dir_get_crypt(dsl_dir_t *dd, uint64_t *crypt) +{ + if (dd->dd_crypto_obj == 0) { + *crypt = ZIO_CRYPT_OFF; + return (0); + } + + return (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, crypt)); +} + +static void +dsl_crypto_key_sync_impl(objset_t *mos, uint64_t dckobj, uint64_t crypt, + uint64_t root_ddobj, uint64_t guid, uint8_t *iv, uint8_t *mac, + uint8_t *keydata, uint8_t *hmac_keydata, uint64_t keyformat, + uint64_t salt, uint64_t iters, dmu_tx_t *tx) +{ + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, + &crypt, tx)); + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, + &root_ddobj, tx)); + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1, + &guid, tx)); + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN, + iv, tx)); + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN, + mac, tx)); + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1, + MASTER_KEY_MAX_LEN, keydata, tx)); + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1, + SHA512_HMAC_KEYLEN, hmac_keydata, tx)); + VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), + 8, 1, &keyformat, tx)); + VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), + 8, 1, &salt, tx)); + VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), + 8, 1, &iters, tx)); +} + +static void +dsl_crypto_key_sync(dsl_crypto_key_t *dck, dmu_tx_t *tx) +{ + zio_crypt_key_t *key = &dck->dck_key; + dsl_wrapping_key_t *wkey = dck->dck_wkey; + uint8_t keydata[MASTER_KEY_MAX_LEN]; + uint8_t hmac_keydata[SHA512_HMAC_KEYLEN]; + uint8_t iv[WRAPPING_IV_LEN]; + uint8_t mac[WRAPPING_MAC_LEN]; + + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT3U(key->zk_crypt, <, ZIO_CRYPT_FUNCTIONS); + + /* encrypt and store the keys along with the IV and MAC */ + VERIFY0(zio_crypt_key_wrap(&dck->dck_wkey->wk_key, key, iv, mac, + keydata, hmac_keydata)); + + /* update the ZAP with the obtained values */ + dsl_crypto_key_sync_impl(tx->tx_pool->dp_meta_objset, dck->dck_obj, + key->zk_crypt, wkey->wk_ddobj, key->zk_guid, iv, mac, keydata, + hmac_keydata, wkey->wk_keyformat, wkey->wk_salt, wkey->wk_iters, + tx); +} + +typedef struct spa_keystore_change_key_args { + const char *skcka_dsname; + dsl_crypto_params_t *skcka_cp; +} spa_keystore_change_key_args_t; + +static int +spa_keystore_change_key_check(void *arg, dmu_tx_t *tx) +{ + int ret; + dsl_dir_t *dd = NULL; + dsl_pool_t *dp = dmu_tx_pool(tx); + spa_keystore_change_key_args_t *skcka = arg; + dsl_crypto_params_t *dcp = skcka->skcka_cp; + uint64_t rddobj; + + /* check for the encryption feature */ + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) { + ret = SET_ERROR(ENOTSUP); + goto error; + } + + /* check for valid key change command */ + if (dcp->cp_cmd != DCP_CMD_NEW_KEY && + dcp->cp_cmd != DCP_CMD_INHERIT && + dcp->cp_cmd != DCP_CMD_FORCE_NEW_KEY && + dcp->cp_cmd != DCP_CMD_FORCE_INHERIT) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* hold the dd */ + ret = dsl_dir_hold(dp, skcka->skcka_dsname, FTAG, &dd, NULL); + if (ret != 0) + goto error; + + /* verify that the dataset is encrypted */ + if (dd->dd_crypto_obj == 0) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* clones must always use their origin's key */ + if (dsl_dir_is_clone(dd)) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* lookup the ddobj we are inheriting the keylocation from */ + ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj); + if (ret != 0) + goto error; + + /* Handle inheritence */ + if (dcp->cp_cmd == DCP_CMD_INHERIT || + dcp->cp_cmd == DCP_CMD_FORCE_INHERIT) { + /* no other encryption params should be given */ + if (dcp->cp_crypt != ZIO_CRYPT_INHERIT || + dcp->cp_keylocation != NULL || + dcp->cp_wkey != NULL) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* check that this is an encryption root */ + if (dd->dd_object != rddobj) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* check that the parent is encrypted */ + if (dd->dd_parent->dd_crypto_obj == 0) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* if we are rewrapping check that both keys are loaded */ + if (dcp->cp_cmd == DCP_CMD_INHERIT) { + ret = dmu_objset_check_wkey_loaded(dd); + if (ret != 0) + goto error; + + ret = dmu_objset_check_wkey_loaded(dd->dd_parent); + if (ret != 0) + goto error; + } + + dsl_dir_rele(dd, FTAG); + return (0); + } + + /* handle forcing an encryption root without rewrapping */ + if (dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY) { + /* no other encryption params should be given */ + if (dcp->cp_crypt != ZIO_CRYPT_INHERIT || + dcp->cp_keylocation != NULL || + dcp->cp_wkey != NULL) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* check that this is not an encryption root */ + if (dd->dd_object == rddobj) { + ret = SET_ERROR(EINVAL); + goto error; + } + + dsl_dir_rele(dd, FTAG); + return (0); + } + + /* crypt cannot be changed after creation */ + if (dcp->cp_crypt != ZIO_CRYPT_INHERIT) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* we are not inheritting our parent's wkey so we need one ourselves */ + if (dcp->cp_wkey == NULL) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* check for a valid keyformat for the new wrapping key */ + if (dcp->cp_wkey->wk_keyformat >= ZFS_KEYFORMAT_FORMATS || + dcp->cp_wkey->wk_keyformat == ZFS_KEYFORMAT_NONE) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* + * If this dataset is not currently an encryption root we need a new + * keylocation for this dataset's new wrapping key. Otherwise we can + * just keep the one we already had. + */ + if (dd->dd_object != rddobj && dcp->cp_keylocation == NULL) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* check that the keylocation is valid if it is not NULL */ + if (dcp->cp_keylocation != NULL && + !zfs_prop_valid_keylocation(dcp->cp_keylocation, B_TRUE)) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* passphrases require pbkdf2 salt and iters */ + if (dcp->cp_wkey->wk_keyformat == ZFS_KEYFORMAT_PASSPHRASE) { + if (dcp->cp_wkey->wk_salt == 0 || + dcp->cp_wkey->wk_iters < MIN_PBKDF2_ITERATIONS) { + ret = SET_ERROR(EINVAL); + goto error; + } + } else { + if (dcp->cp_wkey->wk_salt != 0 || dcp->cp_wkey->wk_iters != 0) { + ret = SET_ERROR(EINVAL); + goto error; + } + } + + /* make sure the dd's wkey is loaded */ + ret = dmu_objset_check_wkey_loaded(dd); + if (ret != 0) + goto error; + + dsl_dir_rele(dd, FTAG); + + return (0); + +error: + if (dd != NULL) + dsl_dir_rele(dd, FTAG); + + return (ret); +} + + +static void +spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, + uint64_t new_rddobj, dsl_wrapping_key_t *wkey, dmu_tx_t *tx) +{ + zap_cursor_t *zc; + zap_attribute_t *za; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dir_t *dd = NULL; + dsl_crypto_key_t *dck = NULL; + uint64_t curr_rddobj; + + ASSERT(RW_WRITE_HELD(&dp->dp_spa->spa_keystore.sk_wkeys_lock)); + + /* hold the dd */ + VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd)); + + /* ignore hidden dsl dirs */ + if (dd->dd_myname[0] == '$' || dd->dd_myname[0] == '%') { + dsl_dir_rele(dd, FTAG); + return; + } + + /* + * Stop recursing if this dsl dir didn't inherit from the root + * or if this dd is a clone. + */ + VERIFY0(dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj)); + if (curr_rddobj != rddobj || dsl_dir_is_clone(dd)) { + dsl_dir_rele(dd, FTAG); + return; + } + + /* + * If we don't have a wrapping key just update the dck to reflect the + * new encryption root. Otherwise rewrap the entire dck and re-sync it + * to disk. + */ + if (wkey == NULL) { + VERIFY0(zap_update(dp->dp_meta_objset, dd->dd_crypto_obj, + DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, &new_rddobj, tx)); + } else { + VERIFY0(spa_keystore_dsl_key_hold_dd(dp->dp_spa, dd, + FTAG, &dck)); + dsl_wrapping_key_hold(wkey, dck); + dsl_wrapping_key_rele(dck->dck_wkey, dck); + dck->dck_wkey = wkey; + dsl_crypto_key_sync(dck, tx); + spa_keystore_dsl_key_rele(dp->dp_spa, dck, FTAG); + } + + zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); + za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + + /* Recurse into all child dsl dirs. */ + for (zap_cursor_init(zc, dp->dp_meta_objset, + dsl_dir_phys(dd)->dd_child_dir_zapobj); + zap_cursor_retrieve(zc, za) == 0; + zap_cursor_advance(zc)) { + spa_keystore_change_key_sync_impl(rddobj, + za->za_first_integer, new_rddobj, wkey, tx); + } + zap_cursor_fini(zc); + + kmem_free(za, sizeof (zap_attribute_t)); + kmem_free(zc, sizeof (zap_cursor_t)); + + dsl_dir_rele(dd, FTAG); +} + +static void +spa_keystore_change_key_sync(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_t *ds; + avl_index_t where; + dsl_pool_t *dp = dmu_tx_pool(tx); + spa_t *spa = dp->dp_spa; + spa_keystore_change_key_args_t *skcka = arg; + dsl_crypto_params_t *dcp = skcka->skcka_cp; + dsl_wrapping_key_t *wkey = NULL, *found_wkey; + dsl_wrapping_key_t wkey_search; + char *keylocation = dcp->cp_keylocation; + uint64_t rddobj, new_rddobj; + + /* create and initialize the wrapping key */ + VERIFY0(dsl_dataset_hold(dp, skcka->skcka_dsname, FTAG, &ds)); + ASSERT(!ds->ds_is_snapshot); + + if (dcp->cp_cmd == DCP_CMD_NEW_KEY || + dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY) { + /* + * We are changing to a new wkey. Set additional properties + * which can be sent along with this ioctl. Note that this + * command can set keylocation even if it can't normally be + * set via 'zfs set' due to a non-local keylocation. + */ + if (dcp->cp_cmd == DCP_CMD_NEW_KEY) { + wkey = dcp->cp_wkey; + wkey->wk_ddobj = ds->ds_dir->dd_object; + } else { + keylocation = "prompt"; + } + + if (keylocation != NULL) { + dsl_prop_set_sync_impl(ds, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1, + keylocation, tx); + } + + VERIFY0(dsl_dir_get_encryption_root_ddobj(ds->ds_dir, &rddobj)); + new_rddobj = ds->ds_dir->dd_object; + } else { + /* + * We are inheritting the parent's wkey. Unset any local + * keylocation and grab a reference to the wkey. + */ + if (dcp->cp_cmd == DCP_CMD_INHERIT) { + VERIFY0(spa_keystore_wkey_hold_dd(spa, + ds->ds_dir->dd_parent, FTAG, &wkey)); + } + + dsl_prop_set_sync_impl(ds, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), ZPROP_SRC_NONE, + 0, 0, NULL, tx); + + rddobj = ds->ds_dir->dd_object; + new_rddobj = ds->ds_dir->dd_parent->dd_object; + } + + if (wkey == NULL) { + ASSERT(dcp->cp_cmd == DCP_CMD_FORCE_INHERIT || + dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY); + } + + rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER); + + /* recurse through all children and rewrap their keys */ + spa_keystore_change_key_sync_impl(rddobj, ds->ds_dir->dd_object, + new_rddobj, wkey, tx); + + /* + * All references to the old wkey should be released now (if it + * existed). Replace the wrapping key. + */ + wkey_search.wk_ddobj = ds->ds_dir->dd_object; + found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, &wkey_search, NULL); + if (found_wkey != NULL) { + ASSERT0(refcount_count(&found_wkey->wk_refcnt)); + avl_remove(&spa->spa_keystore.sk_wkeys, found_wkey); + dsl_wrapping_key_free(found_wkey); + } + + if (dcp->cp_cmd == DCP_CMD_NEW_KEY) { + (void) avl_find(&spa->spa_keystore.sk_wkeys, wkey, &where); + avl_insert(&spa->spa_keystore.sk_wkeys, wkey, where); + } else if (wkey != NULL) { + dsl_wrapping_key_rele(wkey, FTAG); + } + + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + + dsl_dataset_rele(ds, FTAG); +} + +int +spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp) +{ + spa_keystore_change_key_args_t skcka; + + /* initialize the args struct */ + skcka.skcka_dsname = dsname; + skcka.skcka_cp = dcp; + + /* + * Perform the actual work in syncing context. The blocks modified + * here could be calculated but it would require holding the pool + * lock and tarversing all of the datasets that will have their keys + * changed. + */ + return (dsl_sync_task(dsname, spa_keystore_change_key_check, + spa_keystore_change_key_sync, &skcka, 15, + ZFS_SPACE_CHECK_RESERVED)); +} + +int +dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent) +{ + int ret; + uint64_t curr_rddobj, parent_rddobj; + + if (dd->dd_crypto_obj == 0) { + /* children of encrypted parents must be encrypted */ + if (newparent->dd_crypto_obj != 0) { + ret = SET_ERROR(EACCES); + goto error; + } + + return (0); + } + + ret = dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj); + if (ret != 0) + goto error; + + /* + * if this is not an encryption root, we must make sure we are not + * moving dd to a new encryption root + */ + if (dd->dd_object != curr_rddobj) { + ret = dsl_dir_get_encryption_root_ddobj(newparent, + &parent_rddobj); + if (ret != 0) + goto error; + + if (parent_rddobj != curr_rddobj) { + ret = SET_ERROR(EACCES); + goto error; + } + } + + return (0); + +error: + return (ret); +} + +/* + * Check to make sure that a promote from targetdd to origindd will not require + * any key rewraps. + */ +int +dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin) +{ + int ret; + uint64_t rddobj, op_rddobj, tp_rddobj; + + /* If the dataset is not encrypted we don't need to check anything */ + if (origin->dd_crypto_obj == 0) + return (0); + + /* + * If we are not changing the first origin snapshot in a chain + * the encryption root won't change either. + */ + if (dsl_dir_is_clone(origin)) + return (0); + + /* + * If the origin is the encryption root we will update + * the DSL Crypto Key to point to the target instead. + */ + ret = dsl_dir_get_encryption_root_ddobj(origin, &rddobj); + if (ret != 0) + return (ret); + + if (rddobj == origin->dd_object) + return (0); + + /* + * The origin is inheriting its encryption root from its parent. + * Check that the parent of the target has the same encryption root. + */ + ret = dsl_dir_get_encryption_root_ddobj(origin->dd_parent, &op_rddobj); + if (ret != 0) + return (ret); + + ret = dsl_dir_get_encryption_root_ddobj(target->dd_parent, &tp_rddobj); + if (ret != 0) + return (ret); + + if (op_rddobj != tp_rddobj) + return (SET_ERROR(EACCES)); + + return (0); +} + +void +dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, + dmu_tx_t *tx) +{ + uint64_t rddobj; + dsl_pool_t *dp = target->dd_pool; + dsl_dataset_t *targetds; + dsl_dataset_t *originds; + char *keylocation; + + if (origin->dd_crypto_obj == 0) + return; + if (dsl_dir_is_clone(origin)) + return; + + VERIFY0(dsl_dir_get_encryption_root_ddobj(origin, &rddobj)); + + if (rddobj != origin->dd_object) + return; + + /* + * If the target is being promoted to the encyrption root update the + * DSL Crypto Key and keylocation to reflect that. We also need to + * update the DSL Crypto Keys of all children inheritting their + * encryption root to point to the new target. Otherwise, the check + * function ensured that the encryption root will not change. + */ + keylocation = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP); + + VERIFY0(dsl_dataset_hold_obj(dp, + dsl_dir_phys(target)->dd_head_dataset_obj, FTAG, &targetds)); + VERIFY0(dsl_dataset_hold_obj(dp, + dsl_dir_phys(origin)->dd_head_dataset_obj, FTAG, &originds)); + + VERIFY0(dsl_prop_get_dd(origin, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + 1, ZAP_MAXVALUELEN, keylocation, NULL, B_FALSE)); + dsl_prop_set_sync_impl(targetds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1, keylocation, tx); + dsl_prop_set_sync_impl(originds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + ZPROP_SRC_NONE, 0, 0, NULL, tx); + + rw_enter(&dp->dp_spa->spa_keystore.sk_wkeys_lock, RW_WRITER); + spa_keystore_change_key_sync_impl(rddobj, origin->dd_object, + target->dd_object, NULL, tx); + rw_exit(&dp->dp_spa->spa_keystore.sk_wkeys_lock); + + dsl_dataset_rele(targetds, FTAG); + dsl_dataset_rele(originds, FTAG); + kmem_free(keylocation, ZAP_MAXVALUELEN); +} + +int +dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd) +{ + int ret; + uint64_t pcrypt, crypt; + + /* + * Check that we are not making an unencrypted child of an + * encrypted parent. + */ + ret = dsl_dir_get_crypt(parentdd, &pcrypt); + if (ret != 0) + return (ret); + + ret = dsl_dir_get_crypt(origindd, &crypt); + if (ret != 0) + return (ret); + + ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT); + ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT); + + if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) + return (SET_ERROR(EINVAL)); + + return (0); +} + + +int +dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_crypto_params_t *dcp) +{ + int ret; + uint64_t pcrypt, crypt; + + if (dcp->cp_cmd != DCP_CMD_NONE) + return (SET_ERROR(EINVAL)); + + if (parentdd != NULL) { + ret = dsl_dir_get_crypt(parentdd, &pcrypt); + if (ret != 0) + return (ret); + } else { + pcrypt = ZIO_CRYPT_OFF; + } + + crypt = (dcp->cp_crypt == ZIO_CRYPT_INHERIT) ? pcrypt : dcp->cp_crypt; + + ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT); + ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT); + + /* + * We can't create an unencrypted child of an encrypted parent + * under any circumstances. + */ + if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) + return (SET_ERROR(EINVAL)); + + /* check for valid dcp with no encryption (inherited or local) */ + if (crypt == ZIO_CRYPT_OFF) { + /* Must not specify encryption params */ + if (dcp->cp_wkey != NULL || + (dcp->cp_keylocation != NULL && + strcmp(dcp->cp_keylocation, "none") != 0)) + return (SET_ERROR(EINVAL)); + + return (0); + } + + /* + * We will now definitely be encrypting. Check the feature flag. When + * creating the pool the caller will check this for us since we won't + * technically have the fetaure activated yet. + */ + if (parentdd != NULL && + !spa_feature_is_enabled(parentdd->dd_pool->dp_spa, + SPA_FEATURE_ENCRYPTION)) { + return (SET_ERROR(EOPNOTSUPP)); + } + + /* handle inheritence */ + if (dcp->cp_wkey == NULL) { + ASSERT3P(parentdd, !=, NULL); + + /* key must be fully unspecified */ + if (dcp->cp_keylocation != NULL) + return (SET_ERROR(EINVAL)); + + /* parent must have a key to inherit */ + if (pcrypt == ZIO_CRYPT_OFF) + return (SET_ERROR(EINVAL)); + + /* check for parent key */ + ret = dmu_objset_check_wkey_loaded(parentdd); + if (ret != 0) + return (ret); + + return (0); + } + + /* At this point we should have a fully specified key. Check location */ + if (dcp->cp_keylocation == NULL || + !zfs_prop_valid_keylocation(dcp->cp_keylocation, B_TRUE)) + return (SET_ERROR(EINVAL)); + + /* Must have fully specified keyformat */ + switch (dcp->cp_wkey->wk_keyformat) { + case ZFS_KEYFORMAT_HEX: + case ZFS_KEYFORMAT_RAW: + /* requires no pbkdf2 iters and salt */ + if (dcp->cp_wkey->wk_salt != 0 || + dcp->cp_wkey->wk_iters != 0) + return (SET_ERROR(EINVAL)); + break; + case ZFS_KEYFORMAT_PASSPHRASE: + /* requires pbkdf2 iters and salt */ + if (dcp->cp_wkey->wk_salt == 0 || + dcp->cp_wkey->wk_iters < MIN_PBKDF2_ITERATIONS) + return (SET_ERROR(EINVAL)); + break; + case ZFS_KEYFORMAT_NONE: + default: + /* keyformat must be specified and valid */ + return (SET_ERROR(EINVAL)); + } + + return (0); +} + +void +dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, + dsl_dataset_t *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx) +{ + dsl_pool_t *dp = dd->dd_pool; + uint64_t crypt; + dsl_wrapping_key_t *wkey; + + /* clones always use their origin's wrapping key */ + if (dsl_dir_is_clone(dd)) { + ASSERT3P(dcp, ==, NULL); + + /* + * If this is an encrypted clone we just need to clone the + * dck into dd. Zapify the dd so we can do that. + */ + if (origin->ds_dir->dd_crypto_obj != 0) { + dmu_buf_will_dirty(dd->dd_dbuf, tx); + dsl_dir_zapify(dd, tx); + + dd->dd_crypto_obj = + dsl_crypto_key_clone_sync(origin->ds_dir, tx); + VERIFY0(zap_add(dp->dp_meta_objset, dd->dd_object, + DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, + &dd->dd_crypto_obj, tx)); + } + + return; + } + + /* + * A NULL dcp at this point indicates this is the origin dataset + * which does not have an objset to encrypt. Raw receives will handle + * encryption seperately later. In both cases we can simply return. + */ + if (dcp == NULL || dcp->cp_cmd == DCP_CMD_RAW_RECV) + return; + + crypt = dcp->cp_crypt; + wkey = dcp->cp_wkey; + + /* figure out the effective crypt */ + if (crypt == ZIO_CRYPT_INHERIT && dd->dd_parent != NULL) + VERIFY0(dsl_dir_get_crypt(dd->dd_parent, &crypt)); + + /* if we aren't doing encryption just return */ + if (crypt == ZIO_CRYPT_OFF || crypt == ZIO_CRYPT_INHERIT) + return; + + /* zapify the dd so that we can add the crypto key obj to it */ + dmu_buf_will_dirty(dd->dd_dbuf, tx); + dsl_dir_zapify(dd, tx); + + /* use the new key if given or inherit from the parent */ + if (wkey == NULL) { + VERIFY0(spa_keystore_wkey_hold_dd(dp->dp_spa, + dd->dd_parent, FTAG, &wkey)); + } else { + wkey->wk_ddobj = dd->dd_object; + } + + ASSERT3P(wkey, !=, NULL); + + /* Create or clone the DSL crypto key and activate the feature */ + dd->dd_crypto_obj = dsl_crypto_key_create_sync(crypt, wkey, tx); + VERIFY0(zap_add(dp->dp_meta_objset, dd->dd_object, + DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, &dd->dd_crypto_obj, + tx)); + dsl_dataset_activate_feature(dsobj, SPA_FEATURE_ENCRYPTION, tx); + + /* + * If we inherited the wrapping key we release our reference now. + * Otherwise, this is a new key and we need to load it into the + * keystore. + */ + if (dcp->cp_wkey == NULL) { + dsl_wrapping_key_rele(wkey, FTAG); + } else { + VERIFY0(spa_keystore_load_wkey_impl(dp->dp_spa, wkey)); + } +} + +typedef struct dsl_crypto_recv_key_arg { + uint64_t dcrka_dsobj; + nvlist_t *dcrka_nvl; + dmu_objset_type_t dcrka_ostype; +} dsl_crypto_recv_key_arg_t; + +int +dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx) +{ + int ret; + objset_t *mos = tx->tx_pool->dp_meta_objset; + objset_t *os; + dnode_t *mdn; + dsl_crypto_recv_key_arg_t *dcrka = arg; + nvlist_t *nvl = dcrka->dcrka_nvl; + dsl_dataset_t *ds = NULL; + uint8_t *buf = NULL; + uint_t len; + uint64_t intval, guid, nlevels, blksz, ibs, nblkptr; + boolean_t is_passphrase = B_FALSE; + + ret = dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_dsobj, FTAG, &ds); + if (ret != 0) + goto error; + + ASSERT(dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT); + + /* + * Read and check all the encryption values from the nvlist. We need + * all of the fields of a DSL Crypto Key, as well as a fully specified + * wrapping key. + */ + ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, &intval); + if (ret != 0 || intval >= ZIO_CRYPT_FUNCTIONS || + intval <= ZIO_CRYPT_OFF) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_GUID, &intval); + if (ret != 0) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* + * If this is an incremental receive make sure the given key guid + * matches the one we already have. + */ + if (ds->ds_dir->dd_crypto_obj != 0) { + ret = zap_lookup(mos, ds->ds_dir->dd_crypto_obj, + DSL_CRYPTO_KEY_GUID, 8, 1, &guid); + if (ret != 0) + goto error; + + if (intval != guid) { + ret = SET_ERROR(EACCES); + goto error; + } + } + + ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, + &buf, &len); + if (ret != 0 || len != MASTER_KEY_MAX_LEN) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, + &buf, &len); + if (ret != 0 || len != SHA512_HMAC_KEYLEN) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_IV, &buf, &len); + if (ret != 0 || len != WRAPPING_IV_LEN) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, &buf, &len); + if (ret != 0 || len != WRAPPING_MAC_LEN) { + ret = SET_ERROR(EINVAL); + goto error; + } + + + ret = nvlist_lookup_uint8_array(nvl, "portable_mac", &buf, &len); + if (ret != 0 || len != ZIO_OBJSET_MAC_LEN) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), + &intval); + if (ret != 0 || intval >= ZFS_KEYFORMAT_FORMATS || + intval == ZFS_KEYFORMAT_NONE) { + ret = SET_ERROR(EINVAL); + goto error; + } + + is_passphrase = (intval == ZFS_KEYFORMAT_PASSPHRASE); + + /* + * for raw receives we allow any number of pbkdf2iters since there + * won't be a chance for the user to change it. + */ + ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), + &intval); + if (ret != 0 || (is_passphrase == (intval == 0))) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), + &intval); + if (ret != 0 || (is_passphrase == (intval == 0))) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* raw receives also need info about the structure of the metadnode */ + ret = nvlist_lookup_uint64(nvl, "mdn_checksum", &intval); + if (ret != 0 || intval >= ZIO_CHECKSUM_LEGACY_FUNCTIONS) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_uint64(nvl, "mdn_compress", &intval); + if (ret != 0 || intval >= ZIO_COMPRESS_LEGACY_FUNCTIONS) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_uint64(nvl, "mdn_nlevels", &nlevels); + if (ret != 0 || nlevels > DN_MAX_LEVELS) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_uint64(nvl, "mdn_blksz", &blksz); + if (ret != 0 || blksz < SPA_MINBLOCKSIZE) { + ret = SET_ERROR(EINVAL); + goto error; + } else if (blksz > spa_maxblocksize(tx->tx_pool->dp_spa)) { + ret = SET_ERROR(ENOTSUP); + goto error; + } + + ret = nvlist_lookup_uint64(nvl, "mdn_indblkshift", &ibs); + if (ret != 0 || ibs < DN_MIN_INDBLKSHIFT || + ibs > DN_MAX_INDBLKSHIFT) { + ret = SET_ERROR(ENOTSUP); + goto error; + } + + ret = nvlist_lookup_uint64(nvl, "mdn_nblkptr", &nblkptr); + if (ret != 0 || nblkptr != DN_MAX_NBLKPTR) { + ret = SET_ERROR(ENOTSUP); + goto error; + } + + ret = dmu_objset_from_ds(ds, &os); + if (ret != 0) + goto error; + + /* + * Useraccounting is not portable and must be done with the keys loaded. + * Therefore, whenever we do any kind of receive the useraccounting + * must not be present. + */ + ASSERT0(os->os_flags & OBJSET_FLAG_USERACCOUNTING_COMPLETE); + + mdn = DMU_META_DNODE(os); + + /* + * If we already created the objset, make sure its unchangable + * properties match the ones received in the nvlist. + */ + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); + if (!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)) && + (mdn->dn_nlevels != nlevels || mdn->dn_datablksz != blksz || + mdn->dn_indblkshift != ibs || mdn->dn_nblkptr != nblkptr)) { + ret = SET_ERROR(EINVAL); + goto error; + } + rrw_exit(&ds->ds_bp_rwlock, FTAG); + + dsl_dataset_rele(ds, FTAG); + return (0); + +error: + if (ds != NULL) + dsl_dataset_rele(ds, FTAG); + return (ret); +} + +static void +dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) +{ + dsl_crypto_recv_key_arg_t *dcrka = arg; + uint64_t dsobj = dcrka->dcrka_dsobj; + nvlist_t *nvl = dcrka->dcrka_nvl; + dsl_pool_t *dp = tx->tx_pool; + objset_t *mos = dp->dp_meta_objset; + dsl_dataset_t *ds; + objset_t *os; + dnode_t *mdn; + uint8_t *keydata, *hmac_keydata, *iv, *mac, *portable_mac; + uint_t len; + uint64_t rddobj, one = 1; + uint64_t crypt, guid, keyformat, iters, salt; + uint64_t compress, checksum, nlevels, blksz, ibs; + char *keylocation = "prompt"; + + VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); + VERIFY0(dmu_objset_from_ds(ds, &os)); + mdn = DMU_META_DNODE(os); + + /* lookup the values we need to create the DSL Crypto Key and objset */ + crypt = fnvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE); + guid = fnvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_GUID); + keyformat = fnvlist_lookup_uint64(nvl, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT)); + iters = fnvlist_lookup_uint64(nvl, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS)); + salt = fnvlist_lookup_uint64(nvl, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT)); + VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, + &keydata, &len)); + VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, + &hmac_keydata, &len)); + VERIFY0(nvlist_lookup_uint8_array(nvl, "portable_mac", &portable_mac, + &len)); + VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_IV, &iv, &len)); + VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, &mac, &len)); + compress = fnvlist_lookup_uint64(nvl, "mdn_compress"); + checksum = fnvlist_lookup_uint64(nvl, "mdn_checksum"); + nlevels = fnvlist_lookup_uint64(nvl, "mdn_nlevels"); + blksz = fnvlist_lookup_uint64(nvl, "mdn_blksz"); + ibs = fnvlist_lookup_uint64(nvl, "mdn_indblkshift"); + + /* if we haven't created an objset for the ds yet, do that now */ + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); + if (BP_IS_HOLE(dsl_dataset_get_blkptr(ds))) { + (void) dmu_objset_create_impl_dnstats(dp->dp_spa, ds, + dsl_dataset_get_blkptr(ds), dcrka->dcrka_ostype, nlevels, + blksz, ibs, tx); + } + rrw_exit(&ds->ds_bp_rwlock, FTAG); + + /* + * Set the portable MAC. The local MAC will always be zero since the + * incoming data will all be portable and user accounting will be + * deferred until the next mount. Afterwards, flag the os to be + * written out raw next time. + */ + arc_release(os->os_phys_buf, &os->os_phys_buf); + bcopy(portable_mac, os->os_phys->os_portable_mac, ZIO_OBJSET_MAC_LEN); + bzero(os->os_phys->os_local_mac, ZIO_OBJSET_MAC_LEN); + os->os_next_write_raw = B_TRUE; + + /* set metadnode compression and checksum */ + mdn->dn_compress = compress; + mdn->dn_checksum = checksum; + dsl_dataset_dirty(ds, tx); + + /* if this is a new dataset setup the DSL Crypto Key. */ + if (ds->ds_dir->dd_crypto_obj == 0) { + /* zapify the dsl dir so we can add the key object to it */ + dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); + dsl_dir_zapify(ds->ds_dir, tx); + + /* create the DSL Crypto Key on disk and activate the feature */ + ds->ds_dir->dd_crypto_obj = zap_create(mos, + DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); + VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, + ds->ds_dir->dd_crypto_obj, DSL_CRYPTO_KEY_REFCOUNT, + sizeof (uint64_t), 1, &one, tx)); + + dsl_dataset_activate_feature(dsobj, SPA_FEATURE_ENCRYPTION, tx); + ds->ds_feature_inuse[SPA_FEATURE_ENCRYPTION] = B_TRUE; + + /* save the dd_crypto_obj on disk */ + VERIFY0(zap_add(mos, ds->ds_dir->dd_object, + DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, + &ds->ds_dir->dd_crypto_obj, tx)); + + /* + * Set the keylocation to prompt by default. If keylocation + * has been provided via the properties, this will be overriden + * later. + */ + dsl_prop_set_sync_impl(ds, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1, + keylocation, tx); + + rddobj = ds->ds_dir->dd_object; + } else { + VERIFY0(dsl_dir_get_encryption_root_ddobj(ds->ds_dir, &rddobj)); + } + + /* sync the key data to the ZAP object on disk */ + dsl_crypto_key_sync_impl(mos, ds->ds_dir->dd_crypto_obj, crypt, + rddobj, guid, iv, mac, keydata, hmac_keydata, keyformat, salt, + iters, tx); + + dsl_dataset_rele(ds, FTAG); +} + +/* + * This function is used to sync an nvlist representing a DSL Crypto Key and + * the associated encryption parameters. The key will be written exactly as is + * without wrapping it. + */ +int +dsl_crypto_recv_key(const char *poolname, uint64_t dsobj, + dmu_objset_type_t ostype, nvlist_t *nvl) +{ + dsl_crypto_recv_key_arg_t dcrka; + + dcrka.dcrka_dsobj = dsobj; + dcrka.dcrka_nvl = nvl; + dcrka.dcrka_ostype = ostype; + + return (dsl_sync_task(poolname, dsl_crypto_recv_key_check, + dsl_crypto_recv_key_sync, &dcrka, 1, ZFS_SPACE_CHECK_NORMAL)); +} + +int +dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, nvlist_t **nvl_out) +{ + int ret; + objset_t *os; + dnode_t *mdn; + uint64_t rddobj; + nvlist_t *nvl = NULL; + uint64_t dckobj = ds->ds_dir->dd_crypto_obj; + dsl_dir_t *rdd = NULL; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + uint64_t crypt = 0, guid = 0, format = 0, iters = 0, salt = 0; + uint8_t raw_keydata[MASTER_KEY_MAX_LEN]; + uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN]; + uint8_t iv[WRAPPING_IV_LEN]; + uint8_t mac[WRAPPING_MAC_LEN]; + + ASSERT(dckobj != 0); + + VERIFY0(dmu_objset_from_ds(ds, &os)); + mdn = DMU_META_DNODE(os); + + ret = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP); + if (ret != 0) + goto error; + + /* lookup values from the DSL Crypto Key */ + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, + &crypt); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1, &guid); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1, + MASTER_KEY_MAX_LEN, raw_keydata); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1, + SHA512_HMAC_KEYLEN, raw_hmac_keydata); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN, + iv); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN, + mac); + if (ret != 0) + goto error; + + /* + * Lookup wrapping key properties. An early version of the code did + * not correctly add these values to the wrapping key or the DSL + * Crypto Key on disk for non encryption roots, so to be safe we + * always take the slightly circuitous route of looking it up from + * the encryption root's key. + */ + ret = dsl_dir_get_encryption_root_ddobj(ds->ds_dir, &rddobj); + if (ret != 0) + goto error; + + dsl_pool_config_enter(dp, FTAG); + + ret = dsl_dir_hold_obj(dp, rddobj, NULL, FTAG, &rdd); + if (ret != 0) + goto error_unlock; + + ret = zap_lookup(dp->dp_meta_objset, rdd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &format); + if (ret != 0) + goto error_unlock; + + if (format == ZFS_KEYFORMAT_PASSPHRASE) { + ret = zap_lookup(dp->dp_meta_objset, rdd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &iters); + if (ret != 0) + goto error_unlock; + + ret = zap_lookup(dp->dp_meta_objset, rdd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &salt); + if (ret != 0) + goto error_unlock; + } + + dsl_dir_rele(rdd, FTAG); + dsl_pool_config_exit(dp, FTAG); + + fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, crypt); + fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_GUID, guid); + VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, + raw_keydata, MASTER_KEY_MAX_LEN)); + VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, + raw_hmac_keydata, SHA512_HMAC_KEYLEN)); + VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_IV, iv, + WRAPPING_IV_LEN)); + VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, mac, + WRAPPING_MAC_LEN)); + VERIFY0(nvlist_add_uint8_array(nvl, "portable_mac", + os->os_phys->os_portable_mac, ZIO_OBJSET_MAC_LEN)); + fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), format); + fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), iters); + fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), salt); + fnvlist_add_uint64(nvl, "mdn_checksum", mdn->dn_checksum); + fnvlist_add_uint64(nvl, "mdn_compress", mdn->dn_compress); + fnvlist_add_uint64(nvl, "mdn_nlevels", mdn->dn_nlevels); + fnvlist_add_uint64(nvl, "mdn_blksz", mdn->dn_datablksz); + fnvlist_add_uint64(nvl, "mdn_indblkshift", mdn->dn_indblkshift); + fnvlist_add_uint64(nvl, "mdn_nblkptr", mdn->dn_nblkptr); + + *nvl_out = nvl; + return (0); + +error_unlock: + dsl_pool_config_exit(dp, FTAG); +error: + if (rdd != NULL) + dsl_dir_rele(rdd, FTAG); + nvlist_free(nvl); + + *nvl_out = NULL; + return (ret); +} + +uint64_t +dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, + dmu_tx_t *tx) +{ + dsl_crypto_key_t dck; + uint64_t one = 1; + + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); + ASSERT3U(crypt, >, ZIO_CRYPT_OFF); + + /* create the DSL Crypto Key ZAP object */ + dck.dck_obj = zap_create(tx->tx_pool->dp_meta_objset, + DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); + + /* fill in the key (on the stack) and sync it to disk */ + dck.dck_wkey = wkey; + VERIFY0(zio_crypt_key_init(crypt, &dck.dck_key)); + + dsl_crypto_key_sync(&dck, tx); + VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dck.dck_obj, + DSL_CRYPTO_KEY_REFCOUNT, sizeof (uint64_t), 1, &one, tx)); + + zio_crypt_key_destroy(&dck.dck_key); + bzero(&dck.dck_key, sizeof (zio_crypt_key_t)); + + return (dck.dck_obj); +} + +uint64_t +dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx) +{ + objset_t *mos = tx->tx_pool->dp_meta_objset; + + ASSERT(dmu_tx_is_syncing(tx)); + + VERIFY0(zap_increment(mos, origindd->dd_crypto_obj, + DSL_CRYPTO_KEY_REFCOUNT, 1, tx)); + + return (origindd->dd_crypto_obj); +} + +void +dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx) +{ + objset_t *mos = tx->tx_pool->dp_meta_objset; + uint64_t refcnt; + + /* Decrement the refcount, destroy if this is the last reference */ + VERIFY0(zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_REFCOUNT, + sizeof (uint64_t), 1, &refcnt)); + + if (refcnt != 1) { + VERIFY0(zap_increment(mos, dckobj, DSL_CRYPTO_KEY_REFCOUNT, + -1, tx)); + } else { + VERIFY0(zap_destroy(mos, dckobj, tx)); + } +} + +void +dsl_dataset_crypt_stats(dsl_dataset_t *ds, nvlist_t *nv) +{ + uint64_t intval; + dsl_dir_t *dd = ds->ds_dir; + dsl_dir_t *enc_root; + char buf[ZFS_MAX_DATASET_NAME_LEN]; + + if (dd->dd_crypto_obj == 0) + return; + + intval = dsl_dataset_get_keystatus(dd); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEYSTATUS, intval); + + if (dsl_dir_get_crypt(dd, &intval) == 0) + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_ENCRYPTION, intval); + if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + DSL_CRYPTO_KEY_GUID, 8, 1, &intval) == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEY_GUID, intval); + } + if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &intval) == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEYFORMAT, intval); + } + if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &intval) == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_PBKDF2_SALT, intval); + } + if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &intval) == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_PBKDF2_ITERS, intval); + } + + if (dsl_dir_get_encryption_root_ddobj(dd, &intval) == 0) { + VERIFY0(dsl_dir_hold_obj(dd->dd_pool, intval, NULL, FTAG, + &enc_root)); + dsl_dir_name(enc_root, buf); + dsl_dir_rele(enc_root, FTAG); + dsl_prop_nvlist_add_string(nv, ZFS_PROP_ENCRYPTION_ROOT, buf); + } +} + +int +spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt) +{ + int ret; + dsl_crypto_key_t *dck = NULL; + + /* look up the key from the spa's keystore */ + ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); + if (ret != 0) + goto error; + + ret = zio_crypt_key_get_salt(&dck->dck_key, salt); + if (ret != 0) + goto error; + + spa_keystore_dsl_key_rele(spa, dck, FTAG); + return (0); + +error: + if (dck != NULL) + spa_keystore_dsl_key_rele(spa, dck, FTAG); + return (ret); +} + +/* + * Objset blocks are a special case for MAC generation. These blocks have 2 + * 256-bit MACs which are embedded within the block itself, rather than a + * single 128 bit MAC. As a result, this function handles encoding and decoding + * the MACs on its own, unlike other functions in this file. + */ +int +spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, + abd_t *abd, uint_t datalen, boolean_t byteswap) +{ + int ret; + dsl_crypto_key_t *dck = NULL; + void *buf = abd_borrow_buf_copy(abd, datalen); + objset_phys_t *osp = buf; + uint8_t portable_mac[ZIO_OBJSET_MAC_LEN]; + uint8_t local_mac[ZIO_OBJSET_MAC_LEN]; + + /* look up the key from the spa's keystore */ + ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); + if (ret != 0) + goto error; + + /* calculate both HMACs */ + ret = zio_crypt_do_objset_hmacs(&dck->dck_key, buf, datalen, + byteswap, portable_mac, local_mac); + if (ret != 0) + goto error; + + spa_keystore_dsl_key_rele(spa, dck, FTAG); + + /* if we are generating encode the HMACs in the objset_phys_t */ + if (generate) { + bcopy(portable_mac, osp->os_portable_mac, ZIO_OBJSET_MAC_LEN); + bcopy(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN); + abd_return_buf_copy(abd, buf, datalen); + return (0); + } + + if (bcmp(portable_mac, osp->os_portable_mac, ZIO_OBJSET_MAC_LEN) != 0 || + bcmp(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN) != 0) { + abd_return_buf(abd, buf, datalen); + return (SET_ERROR(ECKSUM)); + } + + abd_return_buf(abd, buf, datalen); + + return (0); + +error: + if (dck != NULL) + spa_keystore_dsl_key_rele(spa, dck, FTAG); + abd_return_buf(abd, buf, datalen); + return (ret); +} + +int +spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, abd_t *abd, + uint_t datalen, uint8_t *mac) +{ + int ret; + dsl_crypto_key_t *dck = NULL; + uint8_t *buf = abd_borrow_buf_copy(abd, datalen); + uint8_t digestbuf[ZIO_DATA_MAC_LEN]; + + /* look up the key from the spa's keystore */ + ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); + if (ret != 0) + goto error; + + /* perform the hmac */ + ret = zio_crypt_do_hmac(&dck->dck_key, buf, datalen, + digestbuf, ZIO_DATA_MAC_LEN); + if (ret != 0) + goto error; + + abd_return_buf(abd, buf, datalen); + spa_keystore_dsl_key_rele(spa, dck, FTAG); + + /* + * Truncate and fill in mac buffer if we were asked to generate a MAC. + * Otherwise verify that the MAC matched what we expected. + */ + if (generate) { + bcopy(digestbuf, mac, ZIO_DATA_MAC_LEN); + return (0); + } + + if (bcmp(digestbuf, mac, ZIO_DATA_MAC_LEN) != 0) + return (SET_ERROR(ECKSUM)); + + return (0); + +error: + if (dck != NULL) + spa_keystore_dsl_key_rele(spa, dck, FTAG); + abd_return_buf(abd, buf, datalen); + return (ret); +} + +/* + * This function serves as a multiplexer for encryption and decryption of + * all blocks (except the L2ARC). For encryption, it will populate the IV, + * salt, MAC, and cabd (the ciphertext). On decryption it will simply use + * these fields to populate pabd (the plaintext). + */ +/* ARGSUSED */ +int +spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, uint64_t dsobj, + const blkptr_t *bp, uint64_t txgid, uint_t datalen, abd_t *pabd, + abd_t *cabd, uint8_t *iv, uint8_t *mac, uint8_t *salt, boolean_t *no_crypt) +{ + int ret; + dmu_object_type_t ot = BP_GET_TYPE(bp); + dsl_crypto_key_t *dck = NULL; + uint8_t *plainbuf = NULL, *cipherbuf = NULL; + + ASSERT(spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION)); + ASSERT(!BP_IS_EMBEDDED(bp)); + ASSERT(BP_IS_ENCRYPTED(bp)); + + /* look up the key from the spa's keystore */ + ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); + if (ret != 0) + return (ret); + + if (encrypt) { + plainbuf = abd_borrow_buf_copy(pabd, datalen); + cipherbuf = abd_borrow_buf(cabd, datalen); + } else { + plainbuf = abd_borrow_buf(pabd, datalen); + cipherbuf = abd_borrow_buf_copy(cabd, datalen); + } + + /* + * Both encryption and decryption functions need a salt for key + * generation and an IV. When encrypting a non-dedup block, we + * generate the salt and IV randomly to be stored by the caller. Dedup + * blocks perform a (more expensive) HMAC of the plaintext to obtain + * the salt and the IV. ZIL blocks have their salt and IV generated + * at allocation time in zio_alloc_zil(). On decryption, we simply use + * the provided values. + */ + if (encrypt && ot != DMU_OT_INTENT_LOG && !BP_GET_DEDUP(bp)) { + ret = zio_crypt_key_get_salt(&dck->dck_key, salt); + if (ret != 0) + goto error; + + ret = zio_crypt_generate_iv(iv); + if (ret != 0) + goto error; + } else if (encrypt && BP_GET_DEDUP(bp)) { + ret = zio_crypt_generate_iv_salt_dedup(&dck->dck_key, + plainbuf, datalen, iv, salt); + if (ret != 0) + goto error; + } + + /* call lower level function to perform encryption / decryption */ + ret = zio_do_crypt_data(encrypt, &dck->dck_key, salt, ot, iv, mac, + datalen, BP_SHOULD_BYTESWAP(bp), plainbuf, cipherbuf, no_crypt); + if (ret != 0) + goto error; + + if (encrypt) { + abd_return_buf(pabd, plainbuf, datalen); + abd_return_buf_copy(cabd, cipherbuf, datalen); + } else { + abd_return_buf_copy(pabd, plainbuf, datalen); + abd_return_buf(cabd, cipherbuf, datalen); + } + + spa_keystore_dsl_key_rele(spa, dck, FTAG); + + return (0); + +error: + if (encrypt) { + /* zero out any state we might have changed while encrypting */ + bzero(salt, ZIO_DATA_SALT_LEN); + bzero(iv, ZIO_DATA_IV_LEN); + bzero(mac, ZIO_DATA_MAC_LEN); + abd_return_buf(pabd, plainbuf, datalen); + abd_return_buf_copy(cabd, cipherbuf, datalen); + } else { + abd_return_buf_copy(pabd, plainbuf, datalen); + abd_return_buf(cabd, cipherbuf, datalen); + } + + spa_keystore_dsl_key_rele(spa, dck, FTAG); + + return (ret); +} diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c index de16398e36b0..8109d757b31f 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dataset.c +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c @@ -376,8 +376,8 @@ dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag) } int -dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, - dsl_dataset_t **dsp) +dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj, + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp) { objset_t *mos = dp->dp_meta_objset; dmu_buf_t *dbuf; @@ -535,11 +535,27 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); *dsp = ds; + + if ((flags & DS_HOLD_FLAG_DECRYPT) && ds->ds_dir->dd_crypto_obj != 0) { + err = spa_keystore_create_mapping(dp->dp_spa, ds, ds); + if (err != 0) { + dsl_dataset_rele(ds, tag); + return (SET_ERROR(EACCES)); + } + } + return (0); } int -dsl_dataset_hold(dsl_pool_t *dp, const char *name, +dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, + dsl_dataset_t **dsp) +{ + return (dsl_dataset_hold_obj_flags(dp, dsobj, 0, tag, dsp)); +} + +int +dsl_dataset_hold_flags(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp) { dsl_dir_t *dd; @@ -555,7 +571,7 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name, ASSERT(dsl_pool_config_held(dp)); obj = dsl_dir_phys(dd)->dd_head_dataset_obj; if (obj != 0) - err = dsl_dataset_hold_obj(dp, obj, tag, &ds); + err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag, &ds); else err = SET_ERROR(ENOENT); @@ -564,16 +580,18 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name, dsl_dataset_t *snap_ds; if (*snapname++ != '@') { - dsl_dataset_rele(ds, tag); + dsl_dataset_rele_flags(ds, flags, tag); dsl_dir_rele(dd, FTAG); return (SET_ERROR(ENOENT)); } dprintf("looking for snapshot '%s'\n", snapname); err = dsl_dataset_snap_lookup(ds, snapname, &obj); - if (err == 0) - err = dsl_dataset_hold_obj(dp, obj, tag, &snap_ds); - dsl_dataset_rele(ds, tag); + if (err == 0) { + err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag, + &snap_ds); + } + dsl_dataset_rele_flags(ds, flags, tag); if (err == 0) { mutex_enter(&snap_ds->ds_lock); @@ -591,14 +609,21 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name, } int -dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, +dsl_dataset_hold(dsl_pool_t *dp, const char *name, void *tag, + dsl_dataset_t **dsp) +{ + return (dsl_dataset_hold_flags(dp, name, 0, tag, dsp)); +} + +int +dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp) { - int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); + int err = dsl_dataset_hold_obj_flags(dp, dsobj, flags, tag, dsp); if (err != 0) return (err); if (!dsl_dataset_tryown(*dsp, tag)) { - dsl_dataset_rele(*dsp, tag); + dsl_dataset_rele_flags(*dsp, flags, tag); *dsp = NULL; return (SET_ERROR(EBUSY)); } @@ -606,14 +631,14 @@ dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, } int -dsl_dataset_own(dsl_pool_t *dp, const char *name, +dsl_dataset_own(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp) { - int err = dsl_dataset_hold(dp, name, tag, dsp); + int err = dsl_dataset_hold_flags(dp, name, flags, tag, dsp); if (err != 0) return (err); if (!dsl_dataset_tryown(*dsp, tag)) { - dsl_dataset_rele(*dsp, tag); + dsl_dataset_rele_flags(*dsp, flags, tag); return (SET_ERROR(EBUSY)); } return (0); @@ -689,13 +714,24 @@ dsl_dataset_namelen(dsl_dataset_t *ds) } void -dsl_dataset_rele(dsl_dataset_t *ds, void *tag) +dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag) { + if (ds->ds_dir != NULL && ds->ds_dir->dd_crypto_obj != 0 && + (flags & DS_HOLD_FLAG_DECRYPT)) { + (void) spa_keystore_remove_mapping(ds->ds_dir->dd_pool->dp_spa, + ds->ds_object, ds); + } dmu_buf_rele(ds->ds_dbuf, tag); } void -dsl_dataset_disown(dsl_dataset_t *ds, void *tag) +dsl_dataset_rele(dsl_dataset_t *ds, void *tag) +{ + dsl_dataset_rele_flags(ds, 0, tag); +} + +void +dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag) { ASSERT3P(ds->ds_owner, ==, tag); ASSERT(ds->ds_dbuf != NULL); @@ -704,7 +740,7 @@ dsl_dataset_disown(dsl_dataset_t *ds, void *tag) ds->ds_owner = NULL; mutex_exit(&ds->ds_lock); dsl_dataset_long_rele(ds, tag); - dsl_dataset_rele(ds, tag); + dsl_dataset_rele_flags(ds, flags, tag); } boolean_t @@ -733,7 +769,7 @@ dsl_dataset_has_owner(dsl_dataset_t *ds) return (rv); } -static void +void dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx) { spa_t *spa = dmu_tx_pool(tx)->dp_spa; @@ -763,7 +799,7 @@ dsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx) uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, - uint64_t flags, dmu_tx_t *tx) + dsl_crypto_params_t *dcp, uint64_t flags, dmu_tx_t *tx) { dsl_pool_t *dp = dd->dd_pool; dmu_buf_t *dbuf; @@ -862,6 +898,9 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, } } + /* handle encryption */ + dsl_dataset_create_crypt_sync(dsobj, dd, origin, dcp, tx); + if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; @@ -884,6 +923,8 @@ dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) zio_t *zio; bzero(&os->os_zil_header, sizeof (os->os_zil_header)); + if (os->os_encrypted) + os->os_next_write_raw = B_TRUE; zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); dsl_dataset_sync(ds, zio, tx); @@ -897,7 +938,8 @@ dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) uint64_t dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, - dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) + dsl_dataset_t *origin, uint64_t flags, cred_t *cr, + dsl_crypto_params_t *dcp, dmu_tx_t *tx) { dsl_pool_t *dp = pdd->dd_pool; uint64_t dsobj, ddobj; @@ -909,7 +951,7 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); - dsobj = dsl_dataset_create_sync_dd(dd, origin, + dsobj = dsl_dataset_create_sync_dd(dd, origin, dcp, flags & ~DS_CREATE_FLAG_NODIRTY, tx); dsl_deleg_set_create_perms(dd, tx, cr); @@ -1795,6 +1837,10 @@ get_receive_resume_stats_impl(dsl_dataset_t *ds) DS_FIELD_RESUME_COMPRESSOK) == 0) { fnvlist_add_boolean(token_nv, "compressok"); } + if (zap_contains(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_RAWOK) == 0) { + fnvlist_add_boolean(token_nv, "rawok"); + } packed = fnvlist_pack(token_nv, &packed_size); fnvlist_free(token_nv); compressed = kmem_alloc(packed_size, KM_SLEEP); @@ -2116,6 +2162,7 @@ dsl_get_mountpoint(dsl_dataset_t *ds, const char *dsname, char *value, void dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) { + int err; dsl_pool_t *dp = ds->ds_dir->dd_pool; ASSERT(dsl_pool_config_held(dp)); @@ -2160,13 +2207,24 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, dsl_get_userrefs(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, - dsl_get_defer_destroy(ds)); + DS_IS_DEFER_DESTROY(ds) ? 1 : 0); + dsl_dataset_crypt_stats(ds, nv); if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { - uint64_t written; - if (dsl_get_written(ds, &written) == 0) { - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, - written); + uint64_t written, comp, uncomp; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + dsl_dataset_t *prev; + + err = dsl_dataset_hold_obj(dp, + dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); + if (err == 0) { + err = dsl_dataset_space_written(prev, ds, &written, + &comp, &uncomp); + dsl_dataset_rele(prev, FTAG); + if (err == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, + written); + } } } @@ -2568,7 +2626,7 @@ dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) fnvlist_add_string(ddra->ddra_result, "target", namebuf); cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", - ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); + ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, NULL, tx); VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); @@ -2650,6 +2708,23 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) return (SET_ERROR(EXDEV)); } + snap = list_head(&ddpa->shared_snaps); + if (snap == NULL) { + err = SET_ERROR(ENOENT); + goto out; + } + origin_ds = snap->ds; + + /* + * Encrypted clones share a DSL Crypto Key with their origin's dsl dir. + * When doing a promote we must make sure the encryption root for + * both the target and the target's origin does not change to avoid + * needing to rewrap encryption keys + */ + err = dsl_dataset_promote_crypt_check(hds->ds_dir, origin_ds->ds_dir); + if (err != 0) + goto out; + /* * Compute and check the amount of space to transfer. Since this is * so expensive, don't do the preliminary check. @@ -2833,6 +2908,8 @@ dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, NULL, FTAG, &odd)); + dsl_dataset_promote_crypt_sync(hds->ds_dir, odd, tx); + /* change origin's next snap */ dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); oldnext_obj = dsl_dataset_phys(origin_ds)->ds_next_snap_obj; diff --git a/usr/src/uts/common/fs/zfs/dsl_destroy.c b/usr/src/uts/common/fs/zfs/dsl_destroy.c index d11d8c5f72a6..d76f67cea42f 100644 --- a/usr/src/uts/common/fs/zfs/dsl_destroy.c +++ b/usr/src/uts/common/fs/zfs/dsl_destroy.c @@ -635,8 +635,8 @@ old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx) ka.ds = ds; ka.tx = tx; VERIFY0(traverse_dataset(ds, - dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST, - kill_blkptr, &ka)); + dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST | + TRAVERSE_NO_DECRYPT, kill_blkptr, &ka)); ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || dsl_dataset_phys(ds)->ds_unique_bytes == 0); } @@ -739,6 +739,11 @@ dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx) for (t = 0; t < DD_USED_NUM; t++) ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]); + if (dd->dd_crypto_obj != 0) { + dsl_crypto_key_destroy_sync(dd->dd_crypto_obj, tx); + (void) spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object); + } + VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx)); VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx)); VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx)); @@ -982,7 +987,8 @@ dsl_destroy_head(const char *name) * remove the objects from open context so that the txg sync * is not too long. */ - error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os); + error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_FALSE, + FTAG, &os); if (error == 0) { uint64_t prev_snap_txg = dsl_dataset_phys(dmu_objset_ds(os))-> @@ -993,7 +999,7 @@ dsl_destroy_head(const char *name) (void) dmu_free_long_object(os, obj); /* sync out all frees */ txg_wait_synced(dmu_objset_pool(os), 0); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); } } diff --git a/usr/src/uts/common/fs/zfs/dsl_dir.c b/usr/src/uts/common/fs/zfs/dsl_dir.c index ace9716cd025..9695ae5838bc 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dir.c +++ b/usr/src/uts/common/fs/zfs/dsl_dir.c @@ -158,6 +158,7 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, { dmu_buf_t *dbuf; dsl_dir_t *dd; + dmu_object_info_t doi; int err; ASSERT(dsl_pool_config_held(dp)); @@ -166,14 +167,11 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, if (err != 0) return (err); dd = dmu_buf_get_user(dbuf); -#ifdef ZFS_DEBUG - { - dmu_object_info_t doi; - dmu_object_info_from_db(dbuf, &doi); - ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR); - ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t)); - } -#endif + + dmu_object_info_from_db(dbuf, &doi); + ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR); + ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t)); + if (dd == NULL) { dsl_dir_t *winner; @@ -181,6 +179,15 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, dd->dd_object = ddobj; dd->dd_dbuf = dbuf; dd->dd_pool = dp; + + if (dsl_dir_is_zapified(dd) && + zap_contains(dp->dp_meta_objset, ddobj, + DD_FIELD_CRYPTO_KEY_OBJ) == 0) { + VERIFY0(zap_lookup(dp->dp_meta_objset, + ddobj, DD_FIELD_CRYPTO_KEY_OBJ, + sizeof (uint64_t), 1, &dd->dd_crypto_obj)); + } + mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); dsl_prop_init(dd); @@ -911,6 +918,7 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN) ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN; + dmu_buf_rele(dbuf, FTAG); return (ddobj); @@ -1023,6 +1031,8 @@ dsl_dir_get_snapshot_count(dsl_dir_t *dd, uint64_t *count) void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) { + uint64_t intval; + mutex_enter(&dd->dd_lock); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dsl_dir_get_quota(dd)); @@ -1042,14 +1052,19 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) } mutex_exit(&dd->dd_lock); - uint64_t count; - if (dsl_dir_get_filesystem_count(dd, &count) == 0) { - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_FILESYSTEM_COUNT, - count); - } - if (dsl_dir_get_snapshot_count(dd, &count) == 0) { - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_SNAPSHOT_COUNT, - count); + if (dsl_dir_is_zapified(dd)) { + objset_t *os = dd->dd_pool->dp_meta_objset; + + if (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, + sizeof (intval), 1, &intval) == 0) { + dsl_prop_nvlist_add_uint64(nv, + ZFS_PROP_FILESYSTEM_COUNT, intval); + } + if (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, + sizeof (intval), 1, &intval) == 0) { + dsl_prop_nvlist_add_uint64(nv, + ZFS_PROP_SNAPSHOT_COUNT, intval); + } } if (dsl_dir_is_clone(dd)) { @@ -1863,6 +1878,14 @@ dsl_dir_rename_check(void *arg, dmu_tx_t *tx) } } + /* check for encryption errors */ + error = dsl_dir_rename_crypt_check(dd, newparent); + if (error != 0) { + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); + return (SET_ERROR(EACCES)); + } + /* no rename into our descendant */ if (closest_common_ancestor(dd, newparent) == dd) { dsl_dir_rele(newparent, FTAG); diff --git a/usr/src/uts/common/fs/zfs/dsl_pool.c b/usr/src/uts/common/fs/zfs/dsl_pool.c index ac7079fdc757..9d84a32e9175 100644 --- a/usr/src/uts/common/fs/zfs/dsl_pool.c +++ b/usr/src/uts/common/fs/zfs/dsl_pool.c @@ -382,7 +382,8 @@ dsl_pool_close(dsl_pool_t *dp) } dsl_pool_t * -dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) +dsl_pool_create(spa_t *spa, nvlist_t *zplprops, dsl_crypto_params_t *dcp, + uint64_t txg) { int err; dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); @@ -396,6 +397,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) /* create and open the MOS (meta-objset) */ dp->dp_meta_objset = dmu_objset_create_impl(spa, NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); + spa->spa_meta_objset = dp->dp_meta_objset; /* create the pool directory */ err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, @@ -433,8 +435,19 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) dsl_pool_create_origin(dp, tx); + /* + * Some features may be needed when creating the root dataset, so we + * create the feature objects here. + */ + if (spa_version(spa) >= SPA_VERSION_FEATURES) + spa_feature_create_zap_objects(spa, tx); + + if (dcp != NULL && dcp->cp_crypt != ZIO_CRYPT_OFF && + dcp->cp_crypt != ZIO_CRYPT_INHERIT) + spa_feature_enable(spa, SPA_FEATURE_ENCRYPTION, tx); + /* create the root dataset */ - obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx); + obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, dcp, 0, tx); /* create the root objset */ VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); @@ -885,7 +898,7 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx) /* create the origin dir, ds, & snap-ds */ dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME, - NULL, 0, kcred, tx); + NULL, 0, kcred, NULL, tx); VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, diff --git a/usr/src/uts/common/fs/zfs/dsl_prop.c b/usr/src/uts/common/fs/zfs/dsl_prop.c index aeefbf39fa26..e45338ea83a5 100644 --- a/usr/src/uts/common/fs/zfs/dsl_prop.c +++ b/usr/src/uts/common/fs/zfs/dsl_prop.c @@ -922,7 +922,7 @@ typedef enum dsl_prop_getflags { DSL_PROP_GET_INHERITING = 0x1, /* searching parent of target ds */ DSL_PROP_GET_SNAPSHOT = 0x2, /* snapshot dataset */ DSL_PROP_GET_LOCAL = 0x4, /* local properties */ - DSL_PROP_GET_RECEIVED = 0x8 /* received properties */ + DSL_PROP_GET_RECEIVED = 0x8, /* received properties */ } dsl_prop_getflags_t; static int @@ -1089,6 +1089,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp, if (err) break; } + out: return (err); } diff --git a/usr/src/uts/common/fs/zfs/dsl_scan.c b/usr/src/uts/common/fs/zfs/dsl_scan.c index 67ee4d48cd71..7b3fcdeb369c 100644 --- a/usr/src/uts/common/fs/zfs/dsl_scan.c +++ b/usr/src/uts/common/fs/zfs/dsl_scan.c @@ -641,7 +641,7 @@ dsl_scan_zil(dsl_pool_t *dp, zil_header_t *zh) zilog = zil_alloc(dp->dp_meta_objset, zh); (void) zil_parse(zilog, dsl_scan_zil_block, dsl_scan_zil_record, &zsa, - claim_txg); + claim_txg, B_FALSE); zil_free(zilog); } @@ -653,6 +653,7 @@ dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp, { zbookmark_phys_t czb; arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; + int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD; if (zfs_no_scrub_prefetch) return; @@ -661,11 +662,16 @@ dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp, (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)) return; + if (BP_IS_PROTECTED(bp)) { + ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_DNODE); + ASSERT3U(BP_GET_LEVEL(bp), ==, 0); + zio_flags |= ZIO_FLAG_RAW; + } + SET_BOOKMARK(&czb, objset, object, BP_GET_LEVEL(bp), blkid); (void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa, bp, - NULL, NULL, ZIO_PRIORITY_ASYNC_READ, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD, &flags, &czb); + NULL, NULL, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, &czb); } static boolean_t @@ -751,6 +757,11 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; arc_buf_t *buf; + if (BP_IS_PROTECTED(bp)) { + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + zio_flags |= ZIO_FLAG_RAW; + } + err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); if (err) { diff --git a/usr/src/uts/common/fs/zfs/hkdf.c b/usr/src/uts/common/fs/zfs/hkdf.c new file mode 100644 index 000000000000..1d6cc898e434 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/hkdf.c @@ -0,0 +1,173 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include + +static int +hkdf_sha512_extract(uint8_t *salt, uint_t salt_len, uint8_t *key_material, + uint_t km_len, uint8_t *out_buf) +{ + int ret; + crypto_mechanism_t mech; + crypto_key_t key; + crypto_data_t input_cd, output_cd; + + /* initialize HMAC mechanism */ + mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); + mech.cm_param = NULL; + mech.cm_param_len = 0; + + /* initialize the salt as a crypto key */ + key.ck_format = CRYPTO_KEY_RAW; + key.ck_length = CRYPTO_BYTES2BITS(salt_len); + key.ck_data = salt; + + /* initialize crypto data for the input and output data */ + input_cd.cd_format = CRYPTO_DATA_RAW; + input_cd.cd_offset = 0; + input_cd.cd_length = km_len; + input_cd.cd_raw.iov_base = (char *)key_material; + input_cd.cd_raw.iov_len = input_cd.cd_length; + + output_cd.cd_format = CRYPTO_DATA_RAW; + output_cd.cd_offset = 0; + output_cd.cd_length = SHA512_DIGEST_LENGTH; + output_cd.cd_raw.iov_base = (char *)out_buf; + output_cd.cd_raw.iov_len = output_cd.cd_length; + + ret = crypto_mac(&mech, &input_cd, &key, NULL, &output_cd, NULL); + if (ret != CRYPTO_SUCCESS) + return (SET_ERROR(EIO)); + + return (0); +} + +static int +hkdf_sha512_expand(uint8_t *extract_key, uint8_t *info, uint_t info_len, + uint8_t *out_buf, uint_t out_len) +{ + int ret; + crypto_mechanism_t mech; + crypto_context_t ctx; + crypto_key_t key; + crypto_data_t T_cd, info_cd, c_cd; + uint_t i, T_len = 0, pos = 0; + uint8_t c; + uint_t N = (out_len + SHA512_DIGEST_LENGTH) / SHA512_DIGEST_LENGTH; + uint8_t T[SHA512_DIGEST_LENGTH]; + + if (N > 255) + return (SET_ERROR(EINVAL)); + + /* initialize HMAC mechanism */ + mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); + mech.cm_param = NULL; + mech.cm_param_len = 0; + + /* initialize the salt as a crypto key */ + key.ck_format = CRYPTO_KEY_RAW; + key.ck_length = CRYPTO_BYTES2BITS(SHA512_DIGEST_LENGTH); + key.ck_data = extract_key; + + /* initialize crypto data for the input and output data */ + T_cd.cd_format = CRYPTO_DATA_RAW; + T_cd.cd_offset = 0; + T_cd.cd_raw.iov_base = (char *)T; + + c_cd.cd_format = CRYPTO_DATA_RAW; + c_cd.cd_offset = 0; + c_cd.cd_length = 1; + c_cd.cd_raw.iov_base = (char *)&c; + c_cd.cd_raw.iov_len = c_cd.cd_length; + + info_cd.cd_format = CRYPTO_DATA_RAW; + info_cd.cd_offset = 0; + info_cd.cd_length = info_len; + info_cd.cd_raw.iov_base = (char *)info; + info_cd.cd_raw.iov_len = info_cd.cd_length; + + for (i = 1; i <= N; i++) { + c = i; + + T_cd.cd_length = T_len; + T_cd.cd_raw.iov_len = T_cd.cd_length; + + ret = crypto_mac_init(&mech, &key, NULL, &ctx, NULL); + if (ret != CRYPTO_SUCCESS) + return (SET_ERROR(EIO)); + + ret = crypto_mac_update(ctx, &T_cd, NULL); + if (ret != CRYPTO_SUCCESS) + return (SET_ERROR(EIO)); + + ret = crypto_mac_update(ctx, &info_cd, NULL); + if (ret != CRYPTO_SUCCESS) + return (SET_ERROR(EIO)); + + ret = crypto_mac_update(ctx, &c_cd, NULL); + if (ret != CRYPTO_SUCCESS) + return (SET_ERROR(EIO)); + + T_len = SHA512_DIGEST_LENGTH; + T_cd.cd_length = T_len; + T_cd.cd_raw.iov_len = T_cd.cd_length; + + ret = crypto_mac_final(ctx, &T_cd, NULL); + if (ret != CRYPTO_SUCCESS) + return (SET_ERROR(EIO)); + + bcopy(T, out_buf + pos, + (i != N) ? SHA512_DIGEST_LENGTH : (out_len - pos)); + pos += SHA512_DIGEST_LENGTH; + } + + return (0); +} + +/* + * HKDF is designed to be a relatively fast function for deriving keys from a + * master key + a salt. We use this function to generate new encryption keys + * so as to avoid hitting the cryptographic limits of the underlying + * encryption modes. Note that, for the sake of deriving encryption keys, the + * info parameter is called the "salt" everywhere else in the code. + */ +int +hkdf_sha512(uint8_t *key_material, uint_t km_len, uint8_t *salt, + uint_t salt_len, uint8_t *info, uint_t info_len, uint8_t *output_key, + uint_t out_len) +{ + int ret; + uint8_t extract_key[SHA512_DIGEST_LENGTH]; + + ret = hkdf_sha512_extract(salt, salt_len, key_material, km_len, + extract_key); + if (ret != 0) + return (ret); + + ret = hkdf_sha512_expand(extract_key, info, info_len, output_key, + out_len); + if (ret != 0) + return (ret); + + return (0); +} diff --git a/usr/src/uts/common/fs/zfs/metaslab.c b/usr/src/uts/common/fs/zfs/metaslab.c index a68dd0daa835..82ed63d808ed 100644 --- a/usr/src/uts/common/fs/zfs/metaslab.c +++ b/usr/src/uts/common/fs/zfs/metaslab.c @@ -1920,7 +1920,8 @@ metaslab_passivate(metaslab_t *msp, uint64_t weight) * this metaslab again. In that case, it had better be empty, * or we would be leaving space on the table. */ - ASSERT(size >= SPA_MINBLOCKSIZE || + ASSERT(!WEIGHT_IS_SPACEBASED(msp->ms_weight) || + size >= SPA_MINBLOCKSIZE || range_tree_space(msp->ms_tree) == 0); ASSERT0(weight & METASLAB_ACTIVE_MASK); diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index e97aa59a4074..b014fd039549 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -1103,6 +1103,8 @@ spa_activate(spa_t *spa, int mode) avl_create(&spa->spa_errlist_last, spa_error_entry_compare, sizeof (spa_error_entry_t), offsetof(spa_error_entry_t, se_avl)); + + spa_keystore_init(&spa->spa_keystore); } /* @@ -1142,10 +1144,11 @@ spa_deactivate(spa_t *spa) * still have errors left in the queues. Empty them just in case. */ spa_errlog_drain(spa); - avl_destroy(&spa->spa_errlist_scrub); avl_destroy(&spa->spa_errlist_last); + spa_keystore_fini(&spa->spa_keystore); + spa->spa_state = POOL_STATE_UNINITIALIZED; mutex_enter(&spa->spa_proc_lock); @@ -1973,8 +1976,8 @@ spa_load_verify(spa_t *spa) if (spa_load_verify_metadata) { error = traverse_pool(spa, spa->spa_verify_min_txg, - TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, - spa_load_verify_cb, rio); + TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | + TRAVERSE_NO_DECRYPT, spa_load_verify_cb, rio); } (void) zio_wait(rio); @@ -2180,7 +2183,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type, spa->spa_loaded_ts.tv_nsec = 0; } if (error != EBADF) { - zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); + zfs_ereport_post(ereport, spa, NULL, NULL, NULL, 0, 0); } } spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; @@ -3593,12 +3596,28 @@ spa_l2cache_drop(spa_t *spa) } } +/* + * Verify encryption parameters for spa creation. If we are encrypting, we must + * have the encryption feature flag enabled. + */ +static int +spa_create_check_encryption_params(dsl_crypto_params_t *dcp, + boolean_t has_encryption) +{ + if (dcp->cp_crypt != ZIO_CRYPT_OFF && + dcp->cp_crypt != ZIO_CRYPT_INHERIT && + !has_encryption) + return (SET_ERROR(ENOTSUP)); + + return (dmu_objset_create_crypt_check(NULL, dcp)); +} + /* * Pool Creation */ int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, - nvlist_t *zplprops) + nvlist_t *zplprops, dsl_crypto_params_t *dcp) { spa_t *spa; char *altroot = NULL; @@ -3609,8 +3628,11 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, uint64_t txg = TXG_INITIAL; nvlist_t **spares, **l2cache; uint_t nspares, nl2cache; - uint64_t version, obj; + uint64_t version, obj, root_dsobj = 0; boolean_t has_features; + boolean_t has_encryption; + spa_feature_t feat; + char *feat_name; /* * If this pool already exists, return failure. @@ -3637,10 +3659,27 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, } has_features = B_FALSE; + has_encryption = B_FALSE; for (nvpair_t *elem = nvlist_next_nvpair(props, NULL); elem != NULL; elem = nvlist_next_nvpair(props, elem)) { - if (zpool_prop_feature(nvpair_name(elem))) + if (zpool_prop_feature(nvpair_name(elem))) { has_features = B_TRUE; + feat_name = strchr(nvpair_name(elem), '@') + 1; + VERIFY0(zfeature_lookup_name(feat_name, &feat)); + if (feat == SPA_FEATURE_ENCRYPTION) + has_encryption = B_TRUE; + } + } + + /* verify encryption params, if they were provided */ + if (dcp != NULL) { + error = spa_create_check_encryption_params(dcp, has_encryption); + if (error != 0) { + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (error); + } } if (has_features || nvlist_lookup_uint64(props, @@ -3730,8 +3769,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, } spa->spa_is_initializing = B_TRUE; - spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); - spa->spa_meta_objset = dp->dp_meta_objset; + spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, dcp, txg); spa->spa_is_initializing = B_FALSE; /* @@ -3756,9 +3794,6 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, cmn_err(CE_PANIC, "failed to add pool config"); } - if (spa_version(spa) >= SPA_VERSION_FEATURES) - spa_feature_create_zap_objects(spa, tx); - if (zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION, sizeof (uint64_t), 1, &version, tx) != 0) { @@ -3818,14 +3853,25 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, dmu_tx_commit(tx); - spa->spa_sync_on = B_TRUE; - txg_sync_start(spa->spa_dsl_pool); - /* - * We explicitly wait for the first transaction to complete so that our - * bean counters are appropriately updated. + * If the root dataset is encrypted we will need to create key mappings + * for the zio layer before we start to write any data to disk and hold + * them until after the first txg has been synced. Waiting for the first + * transaction to complete also ensures that our bean counters are + * appropriately updated. */ - txg_wait_synced(spa->spa_dsl_pool, txg); + if (dp->dp_root_dir->dd_crypto_obj != 0) { + root_dsobj = dsl_dir_phys(dp->dp_root_dir)->dd_head_dataset_obj; + VERIFY0(spa_keystore_create_mapping_impl(spa, root_dsobj, + dp->dp_root_dir, FTAG)); + } + + spa->spa_sync_on = B_TRUE; + txg_sync_start(dp); + txg_wait_synced(dp, txg); + + if (dp->dp_root_dir->dd_crypto_obj != 0) + VERIFY0(spa_keystore_remove_mapping(spa, root_dsobj, FTAG)); spa_config_sync(spa, B_FALSE, B_TRUE); spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_CREATE); diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c index 68807c4753a3..db4d50042972 100644 --- a/usr/src/uts/common/fs/zfs/spa_config.c +++ b/usr/src/uts/common/fs/zfs/spa_config.c @@ -273,7 +273,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) */ if (target->spa_ccw_fail_time == 0) { zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE, - target, NULL, NULL, 0, 0); + target, NULL, NULL, NULL, 0, 0); } target->spa_ccw_fail_time = gethrtime(); spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE); diff --git a/usr/src/uts/common/fs/zfs/spa_errlog.c b/usr/src/uts/common/fs/zfs/spa_errlog.c index 8ce780537abb..f717ebb8c093 100644 --- a/usr/src/uts/common/fs/zfs/spa_errlog.c +++ b/usr/src/uts/common/fs/zfs/spa_errlog.c @@ -90,9 +90,8 @@ name_to_bookmark(char *buf, zbookmark_phys_t *zb) * during spa_errlog_sync(). */ void -spa_log_error(spa_t *spa, zio_t *zio) +spa_log_error(spa_t *spa, const zbookmark_phys_t *zb) { - zbookmark_phys_t *zb = &zio->io_logical->io_bookmark; spa_error_entry_t search; spa_error_entry_t *new; avl_tree_t *tree; diff --git a/usr/src/uts/common/fs/zfs/spa_history.c b/usr/src/uts/common/fs/zfs/spa_history.c index 2ad0dcfc5ce0..897d3c6e9a8f 100644 --- a/usr/src/uts/common/fs/zfs/spa_history.c +++ b/usr/src/uts/common/fs/zfs/spa_history.c @@ -384,11 +384,16 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) { int err = 0; dmu_tx_t *tx; - nvlist_t *nvarg; + nvlist_t *nvarg, *in_nvl = NULL; if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa)) return (SET_ERROR(EINVAL)); + err = nvlist_lookup_nvlist(nvl, ZPOOL_HIST_INPUT_NVL, &in_nvl); + if (err == 0) { + (void) nvlist_remove_all(in_nvl, ZPOOL_HIDDEN_ARGS); + } + tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c index 22d69b185bdb..b3d6bab00e86 100644 --- a/usr/src/uts/common/fs/zfs/spa_misc.c +++ b/usr/src/uts/common/fs/zfs/spa_misc.c @@ -1428,6 +1428,7 @@ snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp) char type[256]; char *checksum = NULL; char *compress = NULL; + char *crypt_type = NULL; if (bp != NULL) { if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) { @@ -1441,6 +1442,15 @@ snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp) (void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name, sizeof (type)); } + if (BP_IS_ENCRYPTED(bp)) { + crypt_type = "encrypted"; + } else if (BP_IS_AUTHENTICATED(bp)) { + crypt_type = "authenticated"; + } else if (BP_HAS_INDIRECT_MAC_CKSUM(bp)) { + crypt_type = "indirect-MAC"; + } else { + crypt_type = "unencrypted"; + } if (!BP_IS_EMBEDDED(bp)) { checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name; @@ -1449,7 +1459,7 @@ snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp) } SNPRINTF_BLKPTR(snprintf, ' ', buf, buflen, bp, type, checksum, - compress); + crypt_type, compress); } void diff --git a/usr/src/uts/common/fs/zfs/sys/abd.h b/usr/src/uts/common/fs/zfs/sys/abd.h index 134846a12f56..1688f5918115 100644 --- a/usr/src/uts/common/fs/zfs/sys/abd.h +++ b/usr/src/uts/common/fs/zfs/sys/abd.h @@ -73,6 +73,7 @@ abd_t *abd_alloc_for_io(size_t, boolean_t); abd_t *abd_alloc_sametype(abd_t *, size_t); void abd_free(abd_t *); abd_t *abd_get_offset(abd_t *, size_t); +abd_t *abd_get_offset_size(abd_t *, size_t, size_t); abd_t *abd_get_from_buf(void *, size_t); void abd_put(abd_t *); diff --git a/usr/src/uts/common/fs/zfs/sys/arc.h b/usr/src/uts/common/fs/zfs/sys/arc.h index 10c920ff9d97..1dff8442052d 100644 --- a/usr/src/uts/common/fs/zfs/sys/arc.h +++ b/usr/src/uts/common/fs/zfs/sys/arc.h @@ -58,11 +58,22 @@ _NOTE(CONSTCOND) } while (0) typedef struct arc_buf_hdr arc_buf_hdr_t; typedef struct arc_buf arc_buf_t; -typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private); + +/* + * Because the ARC can store encrypted data, errors (not due to bugs) may arise + * while transforming data into its desired format - specifically, when + * decrypting, the key may not be present, or the HMAC may not be correct + * which signifies deliberate tampering with the on-disk state + * (assuming that the checksum was correct). The "error" parameter will be + * nonzero in this case, even if there is no associated zio. + */ +typedef void arc_read_done_func_t(zio_t *zio, int error, arc_buf_t *buf, + void *private); +typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *private); /* generic arc_done_func_t's which you can use */ -arc_done_func_t arc_bcopy_func; -arc_done_func_t arc_getbuf_func; +arc_read_done_func_t arc_bcopy_func; +arc_read_done_func_t arc_getbuf_func; typedef enum arc_flags { @@ -90,20 +101,29 @@ typedef enum arc_flags ARC_FLAG_L2_WRITING = 1 << 11, /* write in progress */ ARC_FLAG_L2_EVICTED = 1 << 12, /* evicted during I/O */ ARC_FLAG_L2_WRITE_HEAD = 1 << 13, /* head of write list */ + /* + * Encrypted or authenticated on disk (may be plaintext in memory). + * This header has b_crypt_hdr allocated. Does not include indirect + * blocks with checksums of MACs which will also have their X + * (encrypted) bit set in the bp. + */ + ARC_FLAG_PROTECTED = 1 << 14, + /* data has not been authenticated yet */ + ARC_FLAG_NOAUTH = 1 << 15, /* indicates that the buffer contains metadata (otherwise, data) */ - ARC_FLAG_BUFC_METADATA = 1 << 14, + ARC_FLAG_BUFC_METADATA = 1 << 16, /* Flags specifying whether optional hdr struct fields are defined */ - ARC_FLAG_HAS_L1HDR = 1 << 15, - ARC_FLAG_HAS_L2HDR = 1 << 16, + ARC_FLAG_HAS_L1HDR = 1 << 17, + ARC_FLAG_HAS_L2HDR = 1 << 18, /* * Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data. * This allows the l2arc to use the blkptr's checksum to verify * the data without having to store the checksum in the hdr. */ - ARC_FLAG_COMPRESSED_ARC = 1 << 17, - ARC_FLAG_SHARED_DATA = 1 << 18, + ARC_FLAG_COMPRESSED_ARC = 1 << 19, + ARC_FLAG_SHARED_DATA = 1 << 20, /* * The arc buffer's compression mode is stored in the top 7 bits of the @@ -122,7 +142,12 @@ typedef enum arc_flags typedef enum arc_buf_flags { ARC_BUF_FLAG_SHARED = 1 << 0, - ARC_BUF_FLAG_COMPRESSED = 1 << 1 + ARC_BUF_FLAG_COMPRESSED = 1 << 1, + /* + * indicates whether this arc_buf_t is encrypted, regardless of + * state on-disk + */ + ARC_BUF_FLAG_ENCRYPTED = 1 << 2 } arc_buf_flags_t; struct arc_buf { @@ -155,15 +180,31 @@ typedef enum arc_space_type { void arc_space_consume(uint64_t space, arc_space_type_t type); void arc_space_return(uint64_t space, arc_space_type_t type); boolean_t arc_is_metadata(arc_buf_t *buf); +boolean_t arc_is_encrypted(arc_buf_t *buf); +boolean_t arc_is_unauthenticated(arc_buf_t *buf); enum zio_compress arc_get_compression(arc_buf_t *buf); -int arc_decompress(arc_buf_t *buf); +void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt, + uint8_t *iv, uint8_t *mac); +int arc_untransform(arc_buf_t *buf, spa_t *spa, uint64_t dsobj, + boolean_t in_place); +void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder, + dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv, + const uint8_t *mac); arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size); arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize, enum zio_compress compression_type); +arc_buf_t *arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, + boolean_t byteorder, const uint8_t *salt, const uint8_t *iv, + const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize, + enum zio_compress compression_type); arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size); arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize, enum zio_compress compression_type); +arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, + dmu_object_type_t ot, uint64_t psize, uint64_t lsize, + enum zio_compress compression_type); void arc_return_buf(arc_buf_t *buf, void *tag); void arc_loan_inuse_buf(arc_buf_t *buf, void *tag); void arc_buf_destroy(arc_buf_t *buf, void *tag); @@ -178,12 +219,12 @@ int arc_referenced(arc_buf_t *buf); #endif int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, - arc_done_func_t *done, void *private, zio_priority_t priority, int flags, - arc_flags_t *arc_flags, const zbookmark_phys_t *zb); + arc_read_done_func_t *done, void *private, zio_priority_t priority, + int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb); zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp, - arc_done_func_t *ready, arc_done_func_t *child_ready, - arc_done_func_t *physdone, arc_done_func_t *done, + arc_write_done_func_t *ready, arc_write_done_func_t *child_ready, + arc_write_done_func_t *physdone, arc_write_done_func_t *done, void *private, zio_priority_t priority, int zio_flags, const zbookmark_phys_t *zb); void arc_freed(spa_t *spa, const blkptr_t *bp); diff --git a/usr/src/uts/common/fs/zfs/sys/dbuf.h b/usr/src/uts/common/fs/zfs/sys/dbuf.h index 6862599a6540..177ad5237db1 100644 --- a/usr/src/uts/common/fs/zfs/sys/dbuf.h +++ b/usr/src/uts/common/fs/zfs/sys/dbuf.h @@ -54,6 +54,7 @@ extern "C" { #define DB_RF_NOPREFETCH (1 << 3) #define DB_RF_NEVERWAIT (1 << 4) #define DB_RF_CACHED (1 << 5) +#define DB_RF_NO_DECRYPT (1 << 6) /* * The simplified state transition diagram for dbufs looks like: @@ -146,6 +147,7 @@ typedef struct dbuf_dirty_record { override_states_t dr_override_state; uint8_t dr_copies; boolean_t dr_nopwrite; + boolean_t dr_raw; } dl; } dt; } dbuf_dirty_record_t; diff --git a/usr/src/uts/common/fs/zfs/sys/ddt.h b/usr/src/uts/common/fs/zfs/sys/ddt.h index 15d2a9a7ad71..b193f8509cf3 100644 --- a/usr/src/uts/common/fs/zfs/sys/ddt.h +++ b/usr/src/uts/common/fs/zfs/sys/ddt.h @@ -67,9 +67,10 @@ enum ddt_class { typedef struct ddt_key { zio_cksum_t ddk_cksum; /* 256-bit block checksum */ /* - * Encoded with logical & physical size, and compression, as follows: + * Encoded with logical & physical size, encryption, and compression, + * as follows: * +-------+-------+-------+-------+-------+-------+-------+-------+ - * | 0 | 0 | 0 | comp | PSIZE | LSIZE | + * | 0 | 0 | 0 |X| comp| PSIZE | LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ */ uint64_t ddk_prop; @@ -85,11 +86,17 @@ typedef struct ddt_key { #define DDK_SET_PSIZE(ddk, x) \ BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x) -#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 8) -#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 8, x) +#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 7) +#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 7, x) + +#define DDK_GET_CRYPT(ddk) BF64_GET((ddk)->ddk_prop, 39, 1) +#define DDK_SET_CRYPT(ddk, x) BF64_SET((ddk)->ddk_prop, 39, 1, x) #define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t)) +#define DDE_GET_NDVAS(dde) (DDK_GET_CRYPT(&dde->dde_key) \ + ? SPA_DVAS_PER_BP : SPA_DVAS_PER_BP - 1) + typedef struct ddt_phys { dva_t ddp_dva[SPA_DVAS_PER_BP]; uint64_t ddp_refcnt; diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h index 7cb65596e48a..0c8f5ce6eedb 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu.h @@ -75,6 +75,7 @@ struct nvlist; struct arc_buf; struct zio_prop; struct sa_handle; +struct dsl_crypto_params; typedef struct objset objset_t; typedef struct dmu_tx dmu_tx_t; @@ -104,16 +105,18 @@ typedef enum dmu_object_byteswap { #define DMU_OT_NEWTYPE 0x80 #define DMU_OT_METADATA 0x40 -#define DMU_OT_BYTESWAP_MASK 0x3f +#define DMU_OT_ENCRYPTED 0x20 +#define DMU_OT_BYTESWAP_MASK 0x1f /* * Defines a uint8_t object type. Object types specify if the data * in the object is metadata (boolean) and how to byteswap the data * (dmu_object_byteswap_t). */ -#define DMU_OT(byteswap, metadata) \ +#define DMU_OT(byteswap, metadata, encrypted) \ (DMU_OT_NEWTYPE | \ ((metadata) ? DMU_OT_METADATA : 0) | \ + ((encrypted) ? DMU_OT_ENCRYPTED : 0) | \ ((byteswap) & DMU_OT_BYTESWAP_MASK)) #define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \ @@ -124,6 +127,10 @@ typedef enum dmu_object_byteswap { ((ot) & DMU_OT_METADATA) : \ dmu_ot[(ot)].ot_metadata) +#define DMU_OT_IS_ENCRYPTED(ot) (((ot) & DMU_OT_NEWTYPE) ? \ + ((ot) & DMU_OT_ENCRYPTED) : \ + dmu_ot[(int)(ot)].ot_encrypt) + /* * These object types use bp_fill != 1 for their L0 bp's. Therefore they can't * have their data embedded (i.e. use a BP_IS_EMBEDDED() bp), because bp_fill @@ -219,16 +226,27 @@ typedef enum dmu_object_type { /* * Names for valid types declared with DMU_OT(). */ - DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE), - DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE), - DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE), - DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE), - DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE), - DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE), - DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE), - DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE), - DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE), - DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE), + DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_FALSE), + DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_FALSE), + DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_FALSE), + DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_FALSE), + DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_FALSE), + DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_FALSE), + DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_FALSE), + DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_FALSE), + DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_FALSE), + DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_FALSE), + + DMU_OTN_UINT8_ENC_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_TRUE), + DMU_OTN_UINT8_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_TRUE), + DMU_OTN_UINT16_ENC_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_TRUE), + DMU_OTN_UINT16_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_TRUE), + DMU_OTN_UINT32_ENC_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_TRUE), + DMU_OTN_UINT32_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_TRUE), + DMU_OTN_UINT64_ENC_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_TRUE), + DMU_OTN_UINT64_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_TRUE), + DMU_OTN_ZAP_ENC_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_TRUE), + DMU_OTN_ZAP_ENC_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_TRUE), } dmu_object_type_t; typedef enum txg_how { @@ -265,19 +283,24 @@ void zfs_znode_byteswap(void *buf, size_t size); */ #define DMU_BONUS_BLKID (-1ULL) #define DMU_SPILL_BLKID (-2ULL) + /* * Public routines to create, destroy, open, and close objsets. */ +typedef void dmu_objset_create_sync_func_t(objset_t *os, void *arg, + cred_t *cr, dmu_tx_t *tx); + int dmu_objset_hold(const char *name, void *tag, objset_t **osp); int dmu_objset_own(const char *name, dmu_objset_type_t type, - boolean_t readonly, void *tag, objset_t **osp); + boolean_t readonly, boolean_t key_required, void *tag, objset_t **osp); void dmu_objset_rele(objset_t *os, void *tag); -void dmu_objset_disown(objset_t *os, void *tag); +void dmu_objset_disown(objset_t *os, boolean_t key_required, void *tag); int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp); void dmu_objset_evict_dbufs(objset_t *os); int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, - void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); + struct dsl_crypto_params *dcp, dmu_objset_create_sync_func_t func, + void *arg); int dmu_objset_clone(const char *name, const char *origin); int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer, struct nvlist *errlist); @@ -379,6 +402,13 @@ int dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx); int dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg); +/* + * Set the number of levels on a dnode. nlevels must be greater than the + * current number of levels or an EINVAL will be returned. + */ +int dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels, + dmu_tx_t *tx); + /* * Set the data blocksize for an object. * @@ -407,8 +437,9 @@ void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, dmu_tx_t *tx); -void -dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset, +int dmu_object_dirty_raw(objset_t *os, uint64_t object, dmu_tx_t *tx); + +void dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset, void *data, uint8_t etype, uint8_t comp, int uncompressed_size, int compressed_size, int byteorder, dmu_tx_t *tx); @@ -421,6 +452,11 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset, void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, struct zio_prop *zp); +void dmu_write_policy_override_compress(struct zio_prop *zp, + enum zio_compress compress); +void dmu_write_policy_override_encrypt(struct zio_prop *zp, + dmu_object_type_t ot, boolean_t byteorder, enum zio_compress compress, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac); /* * The bonus data is accessed more or less like a regular buffer. * You must dmu_bonus_hold() to get the buffer, which will give you a @@ -433,6 +469,8 @@ void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, * * Returns ENOENT, EIO, or 0. */ +int dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, + uint32_t flags, dmu_buf_t **dbp); int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **); int dmu_bonus_max(void); int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *); @@ -643,6 +681,7 @@ struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db); * (ie. you've called dmu_tx_hold_object(tx, db->db_object)). */ void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx); +void dmu_buf_will_change_crypt_params(dmu_buf_t *db, dmu_tx_t *tx); /* * You must create a transaction, then hold the objects which you will @@ -712,10 +751,13 @@ void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func, * -1, the range from offset to end-of-file is freed. */ int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, - uint64_t size, dmu_tx_t *tx); + uint64_t size, dmu_tx_t *tx); int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset, - uint64_t size); + uint64_t size); +int dmu_free_long_range_raw(objset_t *os, uint64_t object, uint64_t offset, + uint64_t size); int dmu_free_long_object(objset_t *os, uint64_t object); +int dmu_free_long_object_raw(objset_t *os, uint64_t object); /* * Convenience functions. @@ -725,6 +767,7 @@ int dmu_free_long_object(objset_t *os, uint64_t object); */ #define DMU_READ_PREFETCH 0 /* prefetch */ #define DMU_READ_NO_PREFETCH 1 /* don't prefetch */ +#define DMU_READ_NO_DECRYPT 2 /* don't decrypt */ int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, void *buf, uint32_t flags); int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf, @@ -745,8 +788,15 @@ int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, struct page *pp, dmu_tx_t *tx); struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size); void dmu_return_arcbuf(struct arc_buf *buf); -void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf, - dmu_tx_t *tx); +void dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, + struct arc_buf *buf, dmu_tx_t *tx); +void dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, + struct arc_buf *buf, dmu_tx_t *tx); +#define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf +void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx); +void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset, + dmu_buf_t *handle, dmu_tx_t *tx); int dmu_xuio_init(struct xuio *uio, int niov); void dmu_xuio_fini(struct xuio *uio); int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off, @@ -788,6 +838,7 @@ typedef void arc_byteswap_func_t(void *buf, size_t size); typedef struct dmu_object_type_info { dmu_object_byteswap_t ot_byteswap; boolean_t ot_metadata; + boolean_t ot_encrypt; char *ot_name; } dmu_object_type_info_t; diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h index b20a9f9557cb..b42ef585665f 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h @@ -58,13 +58,19 @@ struct dmu_tx; #define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0) +/* all flags are currently non-portable */ +#define OBJSET_CRYPT_PORTABLE_FLAGS_MASK (0) + typedef struct objset_phys { dnode_phys_t os_meta_dnode; zil_header_t os_zil_header; uint64_t os_type; uint64_t os_flags; + uint8_t os_portable_mac[ZIO_OBJSET_MAC_LEN]; + uint8_t os_local_mac[ZIO_OBJSET_MAC_LEN]; char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 - - sizeof (zil_header_t) - sizeof (uint64_t)*2]; + sizeof (zil_header_t) - sizeof (uint64_t)*2 - + 2*ZIO_OBJSET_MAC_LEN]; dnode_phys_t os_userused_dnode; dnode_phys_t os_groupused_dnode; } objset_phys_t; @@ -75,6 +81,8 @@ struct objset { spa_t *os_spa; arc_buf_t *os_phys_buf; objset_phys_t *os_phys; + boolean_t os_encrypted; + /* * The following "special" dnodes have no parent, are exempt * from dnode_move(), and are not recorded in os_dnodes, but they @@ -115,6 +123,9 @@ struct objset { uint64_t os_freed_dnodes; boolean_t os_rescan_dnodes; + /* os_phys_buf should be written raw next txg */ + boolean_t os_next_write_raw; + /* Protected by os_obj_lock */ kmutex_t os_obj_lock; uint64_t os_obj_next; @@ -149,13 +160,18 @@ struct objset { /* called from zpl */ int dmu_objset_hold(const char *name, void *tag, objset_t **osp); +int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag, + objset_t **osp); int dmu_objset_own(const char *name, dmu_objset_type_t type, - boolean_t readonly, void *tag, objset_t **osp); + boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp); int dmu_objset_own_obj(struct dsl_pool *dp, uint64_t obj, - dmu_objset_type_t type, boolean_t readonly, void *tag, objset_t **osp); -void dmu_objset_refresh_ownership(objset_t *os, void *tag); + dmu_objset_type_t type, boolean_t readonly, boolean_t decrypt, + void *tag, objset_t **osp); +void dmu_objset_refresh_ownership(objset_t *os, boolean_t key_needed, + void *tag); void dmu_objset_rele(objset_t *os, void *tag); -void dmu_objset_disown(objset_t *os, void *tag); +void dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag); +void dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag); int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp); void dmu_objset_stats(objset_t *os, nvlist_t *nv); @@ -173,6 +189,9 @@ timestruc_t dmu_objset_snap_cmtime(objset_t *os); /* called from dsl */ void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx); boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg); +objset_t *dmu_objset_create_impl_dnstats(spa_t *spa, struct dsl_dataset *ds, + blkptr_t *bp, dmu_objset_type_t type, int levels, int blksz, int ibs, + dmu_tx_t *tx); objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx); int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp, diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_send.h b/usr/src/uts/common/fs/zfs/sys/dmu_send.h index 38b1b042e54e..781d1e62e5dc 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_send.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_send.h @@ -42,16 +42,15 @@ struct dmu_replay_record; extern const char *recv_clone_name; int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, - boolean_t large_block_ok, boolean_t compressok, int outfd, - uint64_t resumeobj, uint64_t resumeoff, - struct vnode *vp, offset_t *off); + boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, int outfd, + uint64_t resumeobj, uint64_t resumeoff, struct vnode *vp, offset_t *off); int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds, boolean_t stream_compressed, uint64_t *sizep); int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg, boolean_t stream_compressed, uint64_t *sizep); int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, boolean_t embedok, boolean_t large_block_ok, boolean_t compressok, - int outfd, struct vnode *vp, offset_t *off); + boolean_t rawok, int outfd, struct vnode *vp, offset_t *off); typedef struct dmu_recv_cookie { struct dsl_dataset *drc_ds; @@ -63,6 +62,7 @@ typedef struct dmu_recv_cookie { boolean_t drc_byteswap; boolean_t drc_force; boolean_t drc_resumable; + boolean_t drc_raw; struct avl_tree *drc_guid_to_ds_map; zio_cksum_t drc_cksum; uint64_t drc_newsnapobj; diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h b/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h index c010edd440d9..8ceef5cf13e1 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h @@ -49,6 +49,15 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, #define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA) #define TRAVERSE_HARD (1<<4) +/* + * Encrypted dnode blocks have encrypted bonus buffers while the rest + * of the dnode is left unencrypted. Callers can specify the + * TRAVERSE_NO_DECRYPT flag to indicate to the traversal code that + * they wish to receive the raw encrypted dnodes instead of attempting + * to read the logical data. + */ +#define TRAVERSE_NO_DECRYPT (1<<5) + /* Special traverse error return value to indicate skipping of children */ #define TRAVERSE_VISIT_NO_CHILDREN -1 diff --git a/usr/src/uts/common/fs/zfs/sys/dnode.h b/usr/src/uts/common/fs/zfs/sys/dnode.h index 92f50a01774c..9baaa6069795 100644 --- a/usr/src/uts/common/fs/zfs/sys/dnode.h +++ b/usr/src/uts/common/fs/zfs/sys/dnode.h @@ -74,9 +74,7 @@ extern "C" { /* * dnode id flags * - * Note: a file will never ever have its - * ids moved from bonus->spill - * and only in a crypto environment would it be on spill + * Note: a file will never ever have its ids moved from bonus->spill */ #define DN_ID_CHKED_BONUS 0x1 #define DN_ID_CHKED_SPILL 0x2 @@ -87,6 +85,9 @@ extern "C" { * Derived constants. */ #define DNODE_SIZE (1 << DNODE_SHIFT) +#define DN_BONUS_SIZE(dnsize) ((dnsize) - DNODE_CORE_SIZE - \ + (1 << SPA_BLKPTRSHIFT)) +#define DN_SLOTS_TO_BONUSLEN(slots) DN_BONUS_SIZE((slots) << DNODE_SHIFT) #define DN_MAX_NBLKPTR ((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT) #define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT)) #define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT) @@ -109,6 +110,10 @@ extern "C" { #define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \ (((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t)))) +#define DN_MAX_BONUS_LEN(dnp) \ + ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? \ + (uint8_t *)DN_SPILL_BLKPTR(dnp) - (uint8_t *)DN_BONUS(dnp) : \ + (uint8_t *)(dnp + (dnp->dn_extra_slots + 1)) - (uint8_t *)DN_BONUS(dnp)) #define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \ (dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT) @@ -132,6 +137,8 @@ enum dnode_dirtycontext { /* Does dnode have a SA spill blkptr in bonus? */ #define DNODE_FLAG_SPILL_BLKPTR (1<<2) +#define DNODE_CRYPT_PORTABLE_FLAGS_MASK (DNODE_FLAG_SPILL_BLKPTR) + typedef struct dnode_phys { uint8_t dn_type; /* dmu_object_type_t */ uint8_t dn_indblkshift; /* ln2(indirect block size) */ @@ -143,7 +150,15 @@ typedef struct dnode_phys { uint8_t dn_flags; /* DNODE_FLAG_* */ uint16_t dn_datablkszsec; /* data block size in 512b sectors */ uint16_t dn_bonuslen; /* length of dn_bonus */ - uint8_t dn_pad2[4]; + /* + * dn_extra_slots is a placeholder for a feature in other ZFS + * implementations. In this implementation, its value is always + * 0. We declare it here to ensure it isn't used for a different + * purpose, and to improve code portability with implementations + * which support extra dnode slots. + */ + uint8_t dn_extra_slots; /* # of subsequent slots consumed */ + uint8_t dn_pad2[3]; /* accounting is protected by dn_dirty_mtx */ uint64_t dn_maxblkid; /* largest allocated block ID */ @@ -156,6 +171,9 @@ typedef struct dnode_phys { blkptr_t dn_spill; } dnode_phys_t; +#define DN_SPILL_BLKPTR(dnp) (blkptr_t *)((char *)(dnp) + \ + (((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT)) + struct dnode { /* * Protects the structure of the dnode, including the number of levels @@ -302,6 +320,7 @@ void dnode_free(dnode_t *dn, dmu_tx_t *tx); void dnode_byteswap(dnode_phys_t *dnp); void dnode_buf_byteswap(void *buf, size_t size); void dnode_verify(dnode_t *dn); +int dnode_set_nlevels(dnode_t *dn, int nlevels, dmu_tx_t *tx); int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx); void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx); void dnode_diduse_space(dnode_t *dn, int64_t space); diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h b/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h new file mode 100644 index 000000000000..c852f2b3781f --- /dev/null +++ b/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h @@ -0,0 +1,218 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#ifndef _SYS_DSL_CRYPT_H +#define _SYS_DSL_CRYPT_H + +#include +#include +#include +#include +#include + +/* + * ZAP entry keys for DSL Crypto Keys stored on disk. In addition, + * ZFS_PROP_KEYFORMAT, ZFS_PROP_PBKDF2_SALT, and ZFS_PROP_PBKDF2_ITERS are + * also maintained here using their respective property names. + */ +#define DSL_CRYPTO_KEY_CRYPTO_SUITE "DSL_CRYPTO_SUITE" +#define DSL_CRYPTO_KEY_GUID "DSL_CRYPTO_GUID" +#define DSL_CRYPTO_KEY_IV "DSL_CRYPTO_IV" +#define DSL_CRYPTO_KEY_MAC "DSL_CRYPTO_MAC" +#define DSL_CRYPTO_KEY_MASTER_KEY "DSL_CRYPTO_MASTER_KEY_1" +#define DSL_CRYPTO_KEY_HMAC_KEY "DSL_CRYPTO_HMAC_KEY_1" +#define DSL_CRYPTO_KEY_ROOT_DDOBJ "DSL_CRYPTO_ROOT_DDOBJ" +#define DSL_CRYPTO_KEY_REFCOUNT "DSL_CRYPTO_REFCOUNT" + + +/* + * In-memory representation of a wrapping key. One of these structs will exist + * for each encryption root with its key loaded. + */ +typedef struct dsl_wrapping_key { + /* link on spa_keystore_t:sk_wkeys */ + avl_node_t wk_avl_link; + + /* keyformat property enum */ + zfs_keyformat_t wk_keyformat; + + /* the pbkdf2 salt, if the keyformat is of type passphrase */ + uint64_t wk_salt; + + /* the pbkdf2 iterations, if the keyformat is of type passphrase */ + uint64_t wk_iters; + + /* actual wrapping key */ + crypto_key_t wk_key; + + /* refcount of number of dsl_crypto_key_t's holding this struct */ + refcount_t wk_refcnt; + + /* dsl directory object that owns this wrapping key */ + uint64_t wk_ddobj; +} dsl_wrapping_key_t; + +/* enum of commands indicating special actions that should be run */ +typedef enum dcp_cmd { + /* key creation commands */ + DCP_CMD_NONE = 0, /* no specific command */ + DCP_CMD_RAW_RECV, /* raw receive */ + + /* key changing commands */ + DCP_CMD_NEW_KEY, /* rewrap key as an encryption root */ + DCP_CMD_INHERIT, /* rewrap key with parent's wrapping key */ + DCP_CMD_FORCE_NEW_KEY, /* change to encryption root without rewrap */ + DCP_CMD_FORCE_INHERIT, /* inherit parent's key without rewrap */ + + DCP_CMD_MAX +} dcp_cmd_t; + +/* + * This struct is a simple wrapper around all the parameters that are usually + * required to setup encryption. It exists so that all of the params can be + * passed around the kernel together for convenience. + */ +typedef struct dsl_crypto_params { + /* command indicating intended action */ + dcp_cmd_t cp_cmd; + + /* the encryption algorithm */ + enum zio_encrypt cp_crypt; + + /* keylocation property string */ + char *cp_keylocation; + + /* the wrapping key */ + dsl_wrapping_key_t *cp_wkey; +} dsl_crypto_params_t; + +/* + * In-memory representation of a DSL Crypto Key object. One of these structs + * (and corresponding on-disk ZAP object) will exist for each encrypted + * clone family that is mounted or otherwise reading protected data. + */ +typedef struct dsl_crypto_key { + /* link on spa_keystore_t:sk_dsl_keys */ + avl_node_t dck_avl_link; + + /* refcount of dsl_key_mapping_t's holding this key */ + refcount_t dck_holds; + + /* master key used to derive encryption keys */ + zio_crypt_key_t dck_key; + + /* wrapping key for syncing this structure to disk */ + dsl_wrapping_key_t *dck_wkey; + + /* on-disk object id */ + uint64_t dck_obj; +} dsl_crypto_key_t; + +/* + * In-memory mapping of a dataset object id to a DSL Crypto Key. This is used + * to look up the corresponding dsl_crypto_key_t from the zio layer for + * performing data encryption and decryption. + */ +typedef struct dsl_key_mapping { + /* link on spa_keystore_t:sk_key_mappings */ + avl_node_t km_avl_link; + + /* refcount of how many users are depending on this mapping */ + refcount_t km_refcnt; + + /* dataset this crypto key belongs to (index) */ + uint64_t km_dsobj; + + /* crypto key (value) of this record */ + dsl_crypto_key_t *km_key; +} dsl_key_mapping_t; + +/* in memory structure for holding all wrapping and dsl keys */ +typedef struct spa_keystore { + /* lock for protecting sk_dsl_keys */ + krwlock_t sk_dk_lock; + + /* tree of all dsl_crypto_key_t's */ + avl_tree_t sk_dsl_keys; + + /* lock for protecting sk_key_mappings */ + krwlock_t sk_km_lock; + + /* tree of all dsl_key_mapping_t's, indexed by dsobj */ + avl_tree_t sk_key_mappings; + + /* lock for protecting the wrapping keys tree */ + krwlock_t sk_wkeys_lock; + + /* tree of all dsl_wrapping_key_t's, indexed by ddobj */ + avl_tree_t sk_wkeys; +} spa_keystore_t; + +int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props, + nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out); +void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload); +void dsl_dataset_crypt_stats(struct dsl_dataset *ds, nvlist_t *nv); +int dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation); + +void spa_keystore_init(spa_keystore_t *sk); +void spa_keystore_fini(spa_keystore_t *sk); + +void spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, void *tag); +int spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey); +int spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp, + boolean_t noop); +int spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj); +int spa_keystore_unload_wkey(const char *dsname); + +int spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj, dsl_dir_t *dd, + void *tag); +int spa_keystore_create_mapping(spa_t *spa, struct dsl_dataset *ds, void *tag); +int spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag); +int spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag, + dsl_crypto_key_t **dck_out); + +int dsl_crypto_populate_key_nvlist(struct dsl_dataset *ds, nvlist_t **nvl_out); +int dsl_crypto_recv_key(const char *poolname, uint64_t dsobj, + dmu_objset_type_t ostype, nvlist_t *nvl); + +int spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp); +int dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent); +int dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin); +void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, + dmu_tx_t *tx); +int dmu_objset_create_crypt_check(dsl_dir_t *parentdd, + dsl_crypto_params_t *dcp); +void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, + struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx); +uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, + dmu_tx_t *tx); +int dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd); +uint64_t dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx); +void dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx); + +int spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt); +int spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, + abd_t *abd, uint_t datalen, uint8_t *mac); +int spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, + abd_t *abd, uint_t datalen, boolean_t byteswap); +int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, uint64_t dsobj, + const blkptr_t *bp, uint64_t txgid, uint_t datalen, abd_t *pabd, + abd_t *cabd, uint8_t *iv, uint8_t *mac, uint8_t *salt, boolean_t *no_crypt); + +#endif /* _SYS_DSL_CRYPT_H */ diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h index f01c33aea855..30b1160eafe1 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #ifdef __cplusplus @@ -49,6 +50,7 @@ extern "C" { struct dsl_dataset; struct dsl_dir; struct dsl_pool; +struct dsl_crypto_params; #define DS_FLAG_INCONSISTENT (1ULL<<0) #define DS_IS_INCONSISTENT(ds) \ @@ -99,6 +101,7 @@ struct dsl_pool; #define DS_FIELD_RESUME_LARGEBLOCK "com.delphix:resume_largeblockok" #define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok" #define DS_FIELD_RESUME_COMPRESSOK "com.delphix:resume_compressok" +#define DS_FIELD_RESUME_RAWOK "com.datto:resume_rawok" /* * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose @@ -263,26 +266,38 @@ typedef struct dsl_dataset_snapshot_arg { #define DS_UNIQUE_IS_ACCURATE(ds) \ ((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0) +/* flags for holding the dataset */ +typedef enum ds_hold_flags { + DS_HOLD_FLAG_DECRYPT = 1 << 0 /* needs access to encrypted data */ +} ds_hold_flags_t; + int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag, dsl_dataset_t **dsp); +int dsl_dataset_hold_flags(struct dsl_pool *dp, const char *name, + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp); boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds, void *tag); int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag, dsl_dataset_t **); +int dsl_dataset_hold_obj_flags(struct dsl_pool *dp, uint64_t dsobj, + ds_hold_flags_t flags, void *tag, dsl_dataset_t **); void dsl_dataset_rele(dsl_dataset_t *ds, void *tag); +void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, + void *tag); int dsl_dataset_own(struct dsl_pool *dp, const char *name, - void *tag, dsl_dataset_t **dsp); + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp); int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj, - void *tag, dsl_dataset_t **dsp); -void dsl_dataset_disown(dsl_dataset_t *ds, void *tag); + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp); +void dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag); void dsl_dataset_name(dsl_dataset_t *ds, char *name); boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag); int dsl_dataset_namelen(dsl_dataset_t *ds); boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds); uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, - dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *); + dsl_dataset_t *origin, uint64_t flags, cred_t *, + struct dsl_crypto_params *, dmu_tx_t *); uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, - uint64_t flags, dmu_tx_t *tx); + struct dsl_crypto_params *dcp, uint64_t flags, dmu_tx_t *tx); void dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx); int dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx); int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors); @@ -397,6 +412,8 @@ void dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx); int dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner, nvlist_t *result); +void dsl_dataset_activate_feature(uint64_t dsobj, + spa_feature_t f, dmu_tx_t *tx); void dsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx); diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h b/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h index 59e8e055551a..922883ea4283 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h @@ -57,6 +57,8 @@ extern "C" { #define ZFS_DELEG_PERM_RELEASE "release" #define ZFS_DELEG_PERM_DIFF "diff" #define ZFS_DELEG_PERM_BOOKMARK "bookmark" +#define ZFS_DELEG_PERM_LOAD_KEY "load-key" +#define ZFS_DELEG_PERM_CHANGE_KEY "change-key" /* * Note: the names of properties that are marked delegatable are also diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h index c5ba6e2f6e43..bbb9f7b077fb 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h @@ -33,6 +33,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -47,6 +48,7 @@ struct dsl_dataset; #define DD_FIELD_FILESYSTEM_COUNT "com.joyent:filesystem_count" #define DD_FIELD_SNAPSHOT_COUNT "com.joyent:snapshot_count" +#define DD_FIELD_CRYPTO_KEY_OBJ "com.datto:crypto_key_obj" typedef enum dd_used { DD_USED_HEAD, @@ -89,6 +91,7 @@ struct dsl_dir { /* These are immutable; no lock needed: */ uint64_t dd_object; + uint64_t dd_crypto_obj; dsl_pool_t *dd_pool; /* Stable until user eviction; no lock needed: */ diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h index b6c51cb3cd69..ebc76e7368f8 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h @@ -49,6 +49,7 @@ struct dsl_dataset; struct dsl_pool; struct dmu_tx; struct dsl_scan; +struct dsl_crypto_params; extern uint64_t zfs_dirty_data_max; extern uint64_t zfs_dirty_data_max_max; @@ -139,7 +140,8 @@ typedef struct dsl_pool { int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp); int dsl_pool_open(dsl_pool_t *dp); void dsl_pool_close(dsl_pool_t *dp); -dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg); +dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, + struct dsl_crypto_params *dcp, uint64_t txg); void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg); void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg); int dsl_pool_sync_context(dsl_pool_t *dp); diff --git a/usr/src/uts/common/fs/zfs/sys/hkdf.h b/usr/src/uts/common/fs/zfs/sys/hkdf.h new file mode 100644 index 000000000000..e0f7678c03d5 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/sys/hkdf.h @@ -0,0 +1,29 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#ifndef _SYS_HKDF_H_ +#define _SYS_HKDF_H_ + +#include + +int hkdf_sha512(uint8_t *key_material, uint_t km_len, uint8_t *salt, + uint_t salt_len, uint8_t *info, uint_t info_len, uint8_t *output_key, + uint_t out_len); + +#endif /* _SYS_HKDF_H_ */ diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h index 91925a8cf241..68834dfd948c 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa.h +++ b/usr/src/uts/common/fs/zfs/sys/spa.h @@ -59,6 +59,7 @@ typedef struct ddt ddt_t; typedef struct ddt_entry ddt_entry_t; struct dsl_pool; struct dsl_dataset; +struct dsl_crypto_params; /* * General-purpose 32-bit and 64-bit bitfield encodings. @@ -213,7 +214,7 @@ typedef struct zio_cksum_salt { * G gang block indicator * B byteorder (endianness) * D dedup - * X encryption (on version 30, which is not supported) + * X encryption * E blkptr_t contains embedded data (see below) * lvl level of indirection * type DMU object type @@ -223,6 +224,83 @@ typedef struct zio_cksum_salt { * checksum[4] 256-bit checksum of the data this bp describes */ +/* + * The blkptr_t's of encrypted blocks also need to store the encryption + * parameters so that the block can be decrypted. This layout is as follows: + * + * 64 56 48 40 32 24 16 8 0 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 0 | vdev1 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 1 |G| offset1 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 2 | vdev2 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 3 |G| offset2 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 4 | salt | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 5 | IV1 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 7 | padding | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 8 | padding | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 9 | physical birth txg | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * a | logical birth txg | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * b | IV2 | fill count | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * c | checksum[0] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * d | checksum[1] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * e | MAC[0] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * f | MAC[1] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * Legend: + * + * salt Salt for generating encryption keys + * IV1 First 64 bits of encryption IV + * X Block requires encryption handling (set to 1) + * E blkptr_t contains embedded data (set to 0, see below) + * fill count number of non-zero blocks under this bp (truncated to 32 bits) + * IV2 Last 32 bits of encryption IV + * checksum[2] 128-bit checksum of the data this bp describes + * MAC[2] 128-bit message authentication code for this data + * + * The X bit being set indicates that this block is one of 3 types. If this is + * a level 0 block with an encrypted object type, the block is encrypted + * (see BP_IS_ENCRYPTED()). If this is a level 0 block with an unencrypted + * object type, this block is authenticated with an HMAC (see + * BP_IS_AUTHENTICATED()). Otherwise (if level > 0), this bp will use the MAC + * words to store a checksum-of-MACs from the level below (see + * BP_HAS_INDIRECT_MAC_CKSUM()). For convenience in the code, BP_IS_PROTECTED() + * refers to both encrypted and authenticated blocks and BP_USES_CRYPT() + * refers to any of these 3 kinds of blocks. + * + * The additional encryption parameters are the salt, IV, and MAC which are + * explained in greater detail in the block comment at the top of zio_crypt.c. + * The MAC occupies half of the checksum space since it serves a very similar + * purpose: to prevent data corruption on disk. The only functional difference + * is that the checksum is used to detect on-disk corruption whether or not the + * encryption key is loaded and the MAC provides additional protection against + * malicious disk tampering. We use the 3rd DVA to store the salt and first + * 64 bits of the IV. As a result encrypted blocks can only have 2 copies + * maximum instead of the normal 3. The last 32 bits of the IV are stored in + * the upper bits of what is usually the fill count. Note that only blocks at + * level 0 or -2 are ever encrypted, which allows us to guarantee that these + * 32 bits are not trampled over by other code (see zio_crypt.c for details). + * The salt and IV are not used for authenticated bps or bps with an indirect + * MAC checksum, so these blocks can utilize all 3 DVAs and the full 64 bits + * for the fill count. + */ + /* * "Embedded" blkptr_t's don't actually point to a block, instead they * have a data payload embedded in the blkptr_t itself. See the comment @@ -278,7 +356,9 @@ typedef struct zio_cksum_salt { * BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must * be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before * other macros, as they assert that they are only used on BP's of the correct - * "embedded-ness". + * "embedded-ness". Encrypted blkptr_t's cannot be embedded because they use + * the payload space for encryption parameters (see the comment above on + * how encryption parameters are stored). */ #define BPE_GET_ETYPE(bp) \ @@ -302,7 +382,7 @@ _NOTE(CONSTCOND) } while (0) BF64_GET_SB((bp)->blk_prop, 25, 7, 0, 1)) #define BPE_SET_PSIZE(bp, x) do { \ ASSERT(BP_IS_EMBEDDED(bp)); \ - BF64_SET_SB((bp)->blk_prop, 25, 7, 0, 1, x); \ + BF64_SET_SB((bp)->blk_prop, 25, 7, 0, 1, x); \ _NOTE(CONSTCOND) } while (0) typedef enum bp_embedded_type { @@ -402,6 +482,26 @@ _NOTE(CONSTCOND) } while (0) #define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5) #define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x) +/* encrypted, authenticated, and MAC cksum bps use the same bit */ +#define BP_USES_CRYPT(bp) BF64_GET((bp)->blk_prop, 61, 1) +#define BP_SET_CRYPT(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x) + +#define BP_IS_ENCRYPTED(bp) \ + (BP_USES_CRYPT(bp) && \ + BP_GET_LEVEL(bp) == 0 && \ + DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp))) + +#define BP_IS_AUTHENTICATED(bp) \ + (BP_USES_CRYPT(bp) && \ + BP_GET_LEVEL(bp) <= 0 && \ + !DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp))) + +#define BP_HAS_INDIRECT_MAC_CKSUM(bp) \ + (BP_USES_CRYPT(bp) && BP_GET_LEVEL(bp) > 0) + +#define BP_IS_PROTECTED(bp) \ + (BP_IS_ENCRYPTED(bp) || BP_IS_AUTHENTICATED(bp)) + #define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1) #define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x) @@ -419,7 +519,26 @@ _NOTE(CONSTCOND) } while (0) (bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \ } -#define BP_GET_FILL(bp) (BP_IS_EMBEDDED(bp) ? 1 : (bp)->blk_fill) +#define BP_GET_FILL(bp) \ + ((BP_IS_ENCRYPTED(bp)) ? BF64_GET((bp)->blk_fill, 0, 32) : \ + ((BP_IS_EMBEDDED(bp)) ? 1 : (bp)->blk_fill)) + +#define BP_SET_FILL(bp, fill) \ +{ \ + if (BP_IS_ENCRYPTED(bp)) \ + BF64_SET((bp)->blk_fill, 0, 32, fill); \ + else \ + (bp)->blk_fill = fill; \ +} + +#define BP_GET_IV2(bp) \ + (ASSERT(BP_IS_ENCRYPTED(bp)), \ + BF64_GET((bp)->blk_fill, 32, 32)) +#define BP_SET_IV2(bp, iv2) \ +{ \ + ASSERT(BP_IS_ENCRYPTED(bp)); \ + BF64_SET((bp)->blk_fill, 32, 32, iv2); \ +} #define BP_IS_METADATA(bp) \ (BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) @@ -428,7 +547,7 @@ _NOTE(CONSTCOND) } while (0) (BP_IS_EMBEDDED(bp) ? 0 : \ DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ - DVA_GET_ASIZE(&(bp)->blk_dva[2])) + (DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp))) #define BP_GET_UCSIZE(bp) \ (BP_IS_METADATA(bp) ? BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)) @@ -437,13 +556,13 @@ _NOTE(CONSTCOND) } while (0) (BP_IS_EMBEDDED(bp) ? 0 : \ !!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ !!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ - !!DVA_GET_ASIZE(&(bp)->blk_dva[2])) + (!!DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp))) #define BP_COUNT_GANG(bp) \ (BP_IS_EMBEDDED(bp) ? 0 : \ (DVA_GET_GANG(&(bp)->blk_dva[0]) + \ DVA_GET_GANG(&(bp)->blk_dva[1]) + \ - DVA_GET_GANG(&(bp)->blk_dva[2]))) + (DVA_GET_GANG(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))) #define DVA_EQUAL(dva1, dva2) \ ((dva1)->dva_word[1] == (dva2)->dva_word[1] && \ @@ -462,6 +581,10 @@ _NOTE(CONSTCOND) } while (0) ((zc1).zc_word[2] - (zc2).zc_word[2]) | \ ((zc1).zc_word[3] - (zc2).zc_word[3]))) +#define ZIO_CHECKSUM_MAC_EQUAL(zc1, zc2) \ + (0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \ + ((zc1).zc_word[1] - (zc2).zc_word[1]))) + #define ZIO_CHECKSUM_IS_ZERO(zc) \ (0 == ((zc)->zc_word[0] | (zc)->zc_word[1] | \ (zc)->zc_word[2] | (zc)->zc_word[3])) @@ -522,14 +645,15 @@ _NOTE(CONSTCOND) } while (0) #define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER) -#define BP_SPRINTF_LEN 320 +#define BP_SPRINTF_LEN 400 /* * This macro allows code sharing between zfs, libzpool, and mdb. * 'func' is either snprintf() or mdb_snprintf(). * 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line. */ -#define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, compress) \ +#define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, crypt_type, \ + compress) \ { \ static const char *copyname[] = \ { "zero", "single", "double", "triple" }; \ @@ -569,18 +693,27 @@ _NOTE(CONSTCOND) } while (0) (u_longlong_t)DVA_GET_ASIZE(dva), \ ws); \ } \ + if (BP_IS_ENCRYPTED(bp)) { \ + len += func(buf + len, size - len, \ + "salt=%llx iv=%llx:%llx%c", \ + (u_longlong_t)bp->blk_dva[2].dva_word[0], \ + (u_longlong_t)bp->blk_dva[2].dva_word[1], \ + (u_longlong_t)BP_GET_IV2(bp), \ + ws); \ + } \ if (BP_IS_GANG(bp) && \ DVA_GET_ASIZE(&bp->blk_dva[2]) <= \ DVA_GET_ASIZE(&bp->blk_dva[1]) / 2) \ copies--; \ len += func(buf + len, size - len, \ - "[L%llu %s] %s %s %s %s %s %s%c" \ + "[L%llu %s] %s %s %s %s %s %s %s%c" \ "size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \ "cksum=%llx:%llx:%llx:%llx", \ (u_longlong_t)BP_GET_LEVEL(bp), \ type, \ checksum, \ compress, \ + crypt_type, \ BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE", \ BP_IS_GANG(bp) ? "gang" : "contiguous", \ BP_GET_DEDUP(bp) ? "dedup" : "unique", \ @@ -614,8 +747,8 @@ extern int spa_open_rewind(const char *pool, spa_t **, void *tag, nvlist_t *policy, nvlist_t **config); extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot, size_t buflen); -extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props, - nvlist_t *zplprops); +extern int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, + nvlist_t *zplprops, struct dsl_crypto_params *dcp); extern int spa_import_rootpool(char *devpath, char *devid); extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags); @@ -850,9 +983,10 @@ extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, /* error handling */ struct zbookmark_phys; -extern void spa_log_error(spa_t *spa, zio_t *zio); +extern void spa_log_error(spa_t *spa, const struct zbookmark_phys *zb); extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd, - zio_t *zio, uint64_t stateoroffset, uint64_t length); + struct zbookmark_phys *zb, struct zio *zio, uint64_t stateoroffset, + uint64_t length); extern void zfs_post_remove(spa_t *spa, vdev_t *vd); extern void zfs_post_state_change(spa_t *spa, vdev_t *vd); extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd); diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h index bca91312612c..57f7e6db3f84 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -268,6 +269,7 @@ struct spa { uint64_t spa_deadman_synctime; /* deadman expiration timer */ uint64_t spa_all_vdev_zaps; /* ZAP of per-vd ZAP obj #s */ spa_avz_action_t spa_avz_action; /* destroy/rebuild AVZ? */ + spa_keystore_t spa_keystore; /* loaded crypto keys */ /* * spa_iokstat_lock protects spa_iokstat and diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h index 03f62c0f09a0..ef2afeb74e72 100644 --- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h +++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h @@ -94,6 +94,7 @@ typedef enum drr_headertype { /* flag #21 is reserved for a Delphix feature */ #define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22) /* flag #23 is reserved for the large dnode feature */ +#define DMU_BACKUP_FEATURE_RAW (1 << 24) /* * Mask of all supported backup features @@ -103,7 +104,8 @@ typedef enum drr_headertype { DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \ DMU_BACKUP_FEATURE_RESUMING | \ DMU_BACKUP_FEATURE_LARGE_BLOCKS | \ - DMU_BACKUP_FEATURE_COMPRESSED) + DMU_BACKUP_FEATURE_COMPRESSED | \ + DMU_BACKUP_FEATURE_RAW) /* Are all features in the given flag word currently supported? */ #define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK)) @@ -149,18 +151,26 @@ typedef enum dmu_send_resume_token_version { #define DRR_FLAG_FREERECORDS (1<<2) /* - * flags in the drr_checksumflags field in the DRR_WRITE and - * DRR_WRITE_BYREF blocks + * flags in the drr_flags field in the DRR_WRITE, DRR_SPILL, DRR_OBJECT, + * DRR_WRITE_BYREF, and DRR_OBJECT_RANGE blocks */ -#define DRR_CHECKSUM_DEDUP (1<<0) +#define DRR_CHECKSUM_DEDUP (1<<0) /* not used for DRR_SPILL blocks */ +#define DRR_RAW_BYTESWAP (1<<1) #define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP) +#define DRR_IS_RAW_BYTESWAPPED(flags) ((flags) & DRR_RAW_BYTESWAP) /* deal with compressed drr_write replay records */ #define DRR_WRITE_COMPRESSED(drrw) ((drrw)->drr_compressiontype != 0) #define DRR_WRITE_PAYLOAD_SIZE(drrw) \ (DRR_WRITE_COMPRESSED(drrw) ? (drrw)->drr_compressed_size : \ (drrw)->drr_logical_size) +#define DRR_SPILL_PAYLOAD_SIZE(drrs) \ + ((drrs)->drr_compressed_size ? \ + (drrs)->drr_compressed_size : (drrs)->drr_length) +#define DRR_OBJECT_PAYLOAD_SIZE(drro) \ + ((drro)->drr_raw_bonuslen != 0 ? \ + (drro)->drr_raw_bonuslen : P2ROUNDUP((drro)->drr_bonuslen, 8)) /* * zfs ioctl command structure @@ -169,7 +179,8 @@ typedef struct dmu_replay_record { enum { DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS, DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF, - DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_NUMTYPES + DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE, + DRR_NUMTYPES } drr_type; uint32_t drr_payloadlen; union { @@ -195,8 +206,14 @@ typedef struct dmu_replay_record { uint32_t drr_bonuslen; uint8_t drr_checksumtype; uint8_t drr_compress; - uint8_t drr_pad[6]; + uint8_t drr_dn_slots; /* place holder for large dnode */ + uint8_t drr_flags; + uint32_t drr_raw_bonuslen; uint64_t drr_toguid; + /* only nonzero for raw streams */ + uint8_t drr_indblkshift; + uint8_t drr_nlevels; + uint8_t drr_nblkptr; /* bonus content follows */ } drr_object; struct drr_freeobjects { @@ -212,13 +229,17 @@ typedef struct dmu_replay_record { uint64_t drr_logical_size; uint64_t drr_toguid; uint8_t drr_checksumtype; - uint8_t drr_checksumflags; + uint8_t drr_flags; uint8_t drr_compressiontype; uint8_t drr_pad2[5]; /* deduplication key */ ddt_key_t drr_key; /* only nonzero if drr_compressiontype is not 0 */ uint64_t drr_compressed_size; + /* only nonzero for raw streams */ + uint8_t drr_salt[ZIO_DATA_SALT_LEN]; + uint8_t drr_iv[ZIO_DATA_IV_LEN]; + uint8_t drr_mac[ZIO_DATA_MAC_LEN]; /* content follows */ } drr_write; struct drr_free { @@ -239,7 +260,7 @@ typedef struct dmu_replay_record { uint64_t drr_refoffset; /* properties of the data */ uint8_t drr_checksumtype; - uint8_t drr_checksumflags; + uint8_t drr_flags; uint8_t drr_pad2[6]; ddt_key_t drr_key; /* deduplication key */ } drr_write_byref; @@ -247,7 +268,15 @@ typedef struct dmu_replay_record { uint64_t drr_object; uint64_t drr_length; uint64_t drr_toguid; - uint64_t drr_pad[4]; /* needed for crypto */ + uint8_t drr_flags; + uint8_t drr_compressiontype; + uint8_t drr_pad[6]; + /* only nonzero for raw streams */ + uint64_t drr_compressed_size; + uint8_t drr_salt[ZIO_DATA_SALT_LEN]; + uint8_t drr_iv[ZIO_DATA_IV_LEN]; + uint8_t drr_mac[ZIO_DATA_MAC_LEN]; + dmu_object_type_t drr_type; /* spill data follows */ } drr_spill; struct drr_write_embedded { @@ -263,6 +292,16 @@ typedef struct dmu_replay_record { uint32_t drr_psize; /* compr. (real) size of payload */ /* (possibly compressed) content follows */ } drr_write_embedded; + struct drr_object_range { + uint64_t drr_firstobj; + uint64_t drr_numslots; + uint64_t drr_toguid; + uint8_t drr_salt[ZIO_DATA_SALT_LEN]; + uint8_t drr_iv[ZIO_DATA_IV_LEN]; + uint8_t drr_mac[ZIO_DATA_MAC_LEN]; + uint8_t drr_flags; + uint8_t drr_pad[3]; + } drr_object_range; /* * Nore: drr_checksum is overlaid with all record types diff --git a/usr/src/uts/common/fs/zfs/sys/zil.h b/usr/src/uts/common/fs/zfs/sys/zil.h index 1f1298e8c627..d513b105cdca 100644 --- a/usr/src/uts/common/fs/zfs/sys/zil.h +++ b/usr/src/uts/common/fs/zfs/sys/zil.h @@ -33,6 +33,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -394,7 +395,8 @@ typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, struct lwb *lwb, zio_t *zio); extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, - zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg); + zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg, + boolean_t decrypt); extern void zil_init(void); extern void zil_fini(void); diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h index c9503297ab37..b53d83419baf 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio.h +++ b/usr/src/uts/common/fs/zfs/sys/zio.h @@ -104,6 +104,29 @@ enum zio_checksum { #define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256 #define ZIO_DEDUPDITTO_MIN 100 +/* supported encryption algorithms */ +enum zio_encrypt { + ZIO_CRYPT_INHERIT = 0, + ZIO_CRYPT_ON, + ZIO_CRYPT_OFF, + ZIO_CRYPT_AES_128_CCM, + ZIO_CRYPT_AES_192_CCM, + ZIO_CRYPT_AES_256_CCM, + ZIO_CRYPT_AES_128_GCM, + ZIO_CRYPT_AES_192_GCM, + ZIO_CRYPT_AES_256_GCM, + ZIO_CRYPT_FUNCTIONS +}; + +#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_CCM +#define ZIO_CRYPT_DEFAULT ZIO_CRYPT_OFF + +/* macros defining encryption lengths */ +#define ZIO_OBJSET_MAC_LEN 32 +#define ZIO_DATA_IV_LEN 12 +#define ZIO_DATA_SALT_LEN 8 +#define ZIO_DATA_MAC_LEN 16 + /* * The number of "legacy" compression functions which can be set on individual * objects. @@ -185,16 +208,18 @@ enum zio_flag { ZIO_FLAG_DONT_PROPAGATE = 1 << 20, ZIO_FLAG_IO_BYPASS = 1 << 21, ZIO_FLAG_IO_REWRITE = 1 << 22, - ZIO_FLAG_RAW = 1 << 23, - ZIO_FLAG_GANG_CHILD = 1 << 24, - ZIO_FLAG_DDT_CHILD = 1 << 25, - ZIO_FLAG_GODFATHER = 1 << 26, - ZIO_FLAG_NOPWRITE = 1 << 27, - ZIO_FLAG_REEXECUTED = 1 << 28, - ZIO_FLAG_DELEGATED = 1 << 29, + ZIO_FLAG_RAW_COMPRESS = 1 << 23, + ZIO_FLAG_RAW_ENCRYPT = 1 << 24, + ZIO_FLAG_GANG_CHILD = 1 << 25, + ZIO_FLAG_DDT_CHILD = 1 << 26, + ZIO_FLAG_GODFATHER = 1 << 27, + ZIO_FLAG_NOPWRITE = 1 << 28, + ZIO_FLAG_REEXECUTED = 1 << 29, + ZIO_FLAG_DELEGATED = 1 << 30, }; #define ZIO_FLAG_MUSTSUCCEED 0 +#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT) #define ZIO_DDT_CHILD_FLAGS(zio) \ (((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) | \ @@ -296,12 +321,17 @@ typedef struct zio_prop { boolean_t zp_dedup; boolean_t zp_dedup_verify; boolean_t zp_nopwrite; + boolean_t zp_encrypt; + boolean_t zp_byteorder; + uint8_t zp_salt[ZIO_DATA_SALT_LEN]; + uint8_t zp_iv[ZIO_DATA_IV_LEN]; + uint8_t zp_mac[ZIO_DATA_MAC_LEN]; } zio_prop_t; typedef struct zio_cksum_report zio_cksum_report_t; typedef void zio_cksum_finish_f(zio_cksum_report_t *rep, - const void *good_data); + const abd_t *good_data); typedef void zio_cksum_free_f(void *cbdata, size_t size); struct zio_bad_cksum; /* defined in zio_checksum.h */ @@ -504,8 +534,8 @@ extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, enum zio_flag flags); -extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, - blkptr_t *old_bp, uint64_t size, boolean_t *slog); +extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, + blkptr_t *new_bp, blkptr_t *old_bp, uint64_t size, boolean_t *slog); extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp); extern void zio_flush(zio_t *zio, vdev_t *vd); extern void zio_shrink(zio_t *zio, uint64_t size); @@ -587,18 +617,19 @@ extern hrtime_t zio_handle_io_delay(zio_t *zio); /* * Checksum ereport functions */ -extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio, - uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info); +extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, + zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length, + void *arg, struct zio_bad_cksum *info); extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report, - const void *good_data, const void *bad_data, boolean_t drop_if_identical); + const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical); extern void zfs_ereport_send_interim_checksum(zio_cksum_report_t *report); extern void zfs_ereport_free_checksum(zio_cksum_report_t *report); /* If we have the good data in hand, this function can be used */ extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, - struct zio *zio, uint64_t offset, uint64_t length, - const void *good_data, const void *bad_data, struct zio_bad_cksum *info); + zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length, + const abd_t *good_data, const abd_t *bad_data, struct zio_bad_cksum *info); /* Called from spa_sync(), but primarily an injection handler */ extern void spa_handle_ignored_writes(spa_t *spa); diff --git a/usr/src/uts/common/fs/zfs/sys/zio_checksum.h b/usr/src/uts/common/fs/zfs/sys/zio_checksum.h index 3eda057eae80..ee7a9bf7c766 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio_checksum.h +++ b/usr/src/uts/common/fs/zfs/sys/zio_checksum.h @@ -54,7 +54,7 @@ typedef enum zio_checksum_flags { /* Uses salt value */ ZCHECKSUM_FLAG_SALTED = (1 << 4), /* Strong enough for nopwrite? */ - ZCHECKSUM_FLAG_NOPWRITE = (1 << 5) + ZCHECKSUM_FLAG_NOPWRITE = (1 << 5), } zio_checksum_flags_t; /* diff --git a/usr/src/uts/common/fs/zfs/sys/zio_crypt.h b/usr/src/uts/common/fs/zfs/sys/zio_crypt.h new file mode 100644 index 000000000000..aa1a9ee31937 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/sys/zio_crypt.h @@ -0,0 +1,146 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#ifndef _SYS_ZIO_CRYPT_H +#define _SYS_ZIO_CRYPT_H + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* forward declarations */ +struct zbookmark_phys; + +#define WRAPPING_KEY_LEN 32 +#define WRAPPING_IV_LEN ZIO_DATA_IV_LEN +#define WRAPPING_MAC_LEN ZIO_DATA_MAC_LEN +#define MASTER_KEY_MAX_LEN 32 +#define SHA512_HMAC_KEYLEN 64 + +typedef enum zio_crypt_type { + ZC_TYPE_NONE = 0, + ZC_TYPE_CCM, + ZC_TYPE_GCM +} zio_crypt_type_t; + +/* table of supported crypto algorithms, modes and keylengths. */ +typedef struct zio_crypt_info { + /* mechanism name, needed by ICP */ + crypto_mech_name_t ci_mechname; + + /* cipher mode type (GCM, CCM) */ + zio_crypt_type_t ci_crypt_type; + + /* length of the encryption key */ + size_t ci_keylen; + + /* human-readable name of the encryption alforithm */ + char *ci_name; +} zio_crypt_info_t; + +extern zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS]; + +/* in memory representation of an unwrapped key that is loaded into memory */ +typedef struct zio_crypt_key { + /* encryption algorithm */ + uint64_t zk_crypt; + + /* GUID for uniquely identifying this key. Not encrypted on disk. */ + uint64_t zk_guid; + + /* buffer for master key */ + uint8_t zk_master_keydata[MASTER_KEY_MAX_LEN]; + + /* buffer for hmac key */ + uint8_t zk_hmac_keydata[SHA512_HMAC_KEYLEN]; + + /* buffer for currrent encryption key derived from master key */ + uint8_t zk_current_keydata[MASTER_KEY_MAX_LEN]; + + /* current 64 bit salt for deriving an encryption key */ + uint8_t zk_salt[ZIO_DATA_SALT_LEN]; + + /* count of how many times the current salt has been used */ + uint64_t zk_salt_count; + + /* illumos crypto api current encryption key */ + crypto_key_t zk_current_key; + + /* template of current encryption key for illumos crypto api */ + crypto_ctx_template_t zk_current_tmpl; + + /* illumos crypto api current hmac key */ + crypto_key_t zk_hmac_key; + + /* template of hmac key for illumos crypto api */ + crypto_ctx_template_t zk_hmac_tmpl; + + /* lock for changing the salt and dependant values */ + krwlock_t zk_salt_lock; +} zio_crypt_key_t; + +void zio_crypt_key_destroy(zio_crypt_key_t *key); +int zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key); +int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt_out); + +int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, + uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out); +int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, + uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac, + zio_crypt_key_t *key); +int zio_crypt_generate_iv(uint8_t *ivbuf); +int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data, + uint_t datalen, uint8_t *ivbuf, uint8_t *salt); + +void zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv); +void zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv); +void zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac); +void zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac); +void zio_crypt_encode_mac_zil(void *data, uint8_t *mac); +void zio_crypt_decode_mac_zil(const void *data, uint8_t *mac); +void zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen); + +int zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, + uint_t datalen, boolean_t byteswap, uint8_t *cksum); +int zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd, + uint_t datalen, boolean_t byteswap, uint8_t *cksum); +int zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen, + uint8_t *digestbuf, uint_t digestlen); +int zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, + boolean_t byteswap, uint8_t *portable_mac, uint8_t *local_mac); +int zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt, + dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen, + boolean_t byteswap, uint8_t *plainbuf, uint8_t *cipherbuf, + boolean_t *no_crypt); +int zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt, + dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen, + boolean_t byteswap, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZIO_CRYPT_H */ diff --git a/usr/src/uts/common/fs/zfs/sys/zio_impl.h b/usr/src/uts/common/fs/zfs/sys/zio_impl.h index a36749a308d6..703522b67df7 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/zio_impl.h @@ -99,6 +99,18 @@ extern "C" { * physical I/O. The nop write feature can handle writes in either * syncing or open context (i.e. zil writes) and as a result is mutually * exclusive with dedup. + * + * Encryption: + * Encryption and authentication is handled by the ZIO_STAGE_ENCRYPT stage. + * This stage determines how the encryption metadata is stored in the bp. + * Decryption and MAC verification is performed during zio_decrypt() as a + * transform callback. Encryption is mutually exclusive with nopwrite, because + * blocks with the same plaintext will be encrypted with different salts and + * IV's (if dedup is off), and therefore have different ciphertexts. For dedup + * blocks we deterministically generate the IV and salt by performing an HMAC + * of the plaintext, which is computationally expensive, but allows us to keep + * support for encrypted dedup. See the block comment in zio_crypt.c for + * details. */ /* @@ -113,32 +125,33 @@ enum zio_stage { ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* RWF-- */ ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W--- */ - ZIO_STAGE_CHECKSUM_GENERATE = 1 << 6, /* -W--- */ + ZIO_STAGE_ENCRYPT = 1 << 6, /* -W--- */ + ZIO_STAGE_CHECKSUM_GENERATE = 1 << 7, /* -W--- */ - ZIO_STAGE_NOP_WRITE = 1 << 7, /* -W--- */ + ZIO_STAGE_NOP_WRITE = 1 << 8, /* -W--- */ - ZIO_STAGE_DDT_READ_START = 1 << 8, /* R---- */ - ZIO_STAGE_DDT_READ_DONE = 1 << 9, /* R---- */ - ZIO_STAGE_DDT_WRITE = 1 << 10, /* -W--- */ - ZIO_STAGE_DDT_FREE = 1 << 11, /* --F-- */ + ZIO_STAGE_DDT_READ_START = 1 << 9, /* R---- */ + ZIO_STAGE_DDT_READ_DONE = 1 << 10, /* R---- */ + ZIO_STAGE_DDT_WRITE = 1 << 11, /* -W--- */ + ZIO_STAGE_DDT_FREE = 1 << 12, /* --F-- */ - ZIO_STAGE_GANG_ASSEMBLE = 1 << 12, /* RWFC- */ - ZIO_STAGE_GANG_ISSUE = 1 << 13, /* RWFC- */ + ZIO_STAGE_GANG_ASSEMBLE = 1 << 13, /* RWFC- */ + ZIO_STAGE_GANG_ISSUE = 1 << 14, /* RWFC- */ - ZIO_STAGE_DVA_THROTTLE = 1 << 14, /* -W--- */ - ZIO_STAGE_DVA_ALLOCATE = 1 << 15, /* -W--- */ - ZIO_STAGE_DVA_FREE = 1 << 16, /* --F-- */ - ZIO_STAGE_DVA_CLAIM = 1 << 17, /* ---C- */ + ZIO_STAGE_DVA_THROTTLE = 1 << 15, /* -W--- */ + ZIO_STAGE_DVA_ALLOCATE = 1 << 16, /* -W--- */ + ZIO_STAGE_DVA_FREE = 1 << 17, /* --F-- */ + ZIO_STAGE_DVA_CLAIM = 1 << 18, /* ---C- */ - ZIO_STAGE_READY = 1 << 18, /* RWFCI */ + ZIO_STAGE_READY = 1 << 19, /* RWFCI */ - ZIO_STAGE_VDEV_IO_START = 1 << 19, /* RW--I */ - ZIO_STAGE_VDEV_IO_DONE = 1 << 20, /* RW--I */ - ZIO_STAGE_VDEV_IO_ASSESS = 1 << 21, /* RW--I */ + ZIO_STAGE_VDEV_IO_START = 1 << 20, /* RW--I */ + ZIO_STAGE_VDEV_IO_DONE = 1 << 21, /* RW--I */ + ZIO_STAGE_VDEV_IO_ASSESS = 1 << 22, /* RW--I */ - ZIO_STAGE_CHECKSUM_VERIFY = 1 << 22, /* R---- */ + ZIO_STAGE_CHECKSUM_VERIFY = 1 << 23, /* R---- */ - ZIO_STAGE_DONE = 1 << 23 /* RWFCI */ + ZIO_STAGE_DONE = 1 << 24 /* RWFCI */ }; #define ZIO_INTERLOCK_STAGES \ @@ -190,12 +203,14 @@ enum zio_stage { #define ZIO_REWRITE_PIPELINE \ (ZIO_WRITE_COMMON_STAGES | \ ZIO_STAGE_WRITE_COMPRESS | \ + ZIO_STAGE_ENCRYPT | \ ZIO_STAGE_WRITE_BP_INIT) #define ZIO_WRITE_PIPELINE \ (ZIO_WRITE_COMMON_STAGES | \ ZIO_STAGE_WRITE_BP_INIT | \ ZIO_STAGE_WRITE_COMPRESS | \ + ZIO_STAGE_ENCRYPT | \ ZIO_STAGE_DVA_THROTTLE | \ ZIO_STAGE_DVA_ALLOCATE) @@ -210,6 +225,7 @@ enum zio_stage { ZIO_STAGE_WRITE_BP_INIT | \ ZIO_STAGE_ISSUE_ASYNC | \ ZIO_STAGE_WRITE_COMPRESS | \ + ZIO_STAGE_ENCRYPT | \ ZIO_STAGE_CHECKSUM_GENERATE | \ ZIO_STAGE_DDT_WRITE) diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index 98e1c4833ed7..931f6e7cae4e 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -985,7 +985,7 @@ vdev_probe_done(zio_t *zio) } else { ASSERT(zio->io_error != 0); zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE, - spa, vd, NULL, 0, 0); + spa, vd, NULL, NULL, 0, 0); zio->io_error = SET_ERROR(ENXIO); } @@ -3322,7 +3322,8 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux) class = FM_EREPORT_ZFS_DEVICE_UNKNOWN; } - zfs_ereport_post(class, spa, vd, NULL, save_state, 0); + zfs_ereport_post(class, spa, vd, NULL, NULL, + save_state, 0); } /* Erase any notion of persistent removed state */ diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c index 109534f52963..20e01cc85280 100644 --- a/usr/src/uts/common/fs/zfs/vdev_raidz.c +++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c @@ -263,21 +263,17 @@ static void vdev_raidz_map_free(raidz_map_t *rm) { int c; - size_t size; for (c = 0; c < rm->rm_firstdatacol; c++) { abd_free(rm->rm_col[c].rc_abd); if (rm->rm_col[c].rc_gdata != NULL) - zio_buf_free(rm->rm_col[c].rc_gdata, - rm->rm_col[c].rc_size); + abd_free(rm->rm_col[c].rc_gdata); + } - size = 0; - for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) abd_put(rm->rm_col[c].rc_abd); - size += rm->rm_col[c].rc_size; - } if (rm->rm_abd_copy != NULL) abd_free(rm->rm_abd_copy); @@ -310,14 +306,14 @@ vdev_raidz_cksum_free(void *arg, size_t ignored) } static void -vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) +vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data) { raidz_map_t *rm = zcr->zcr_cbdata; size_t c = zcr->zcr_cbinfo; - size_t x; + size_t x, offset; - const char *good = NULL; - char *bad; + const abd_t *good = NULL; + const abd_t *bad = rm->rm_col[c].rc_abd; if (good_data == NULL) { zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE); @@ -332,8 +328,6 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) */ if (rm->rm_col[0].rc_gdata == NULL) { abd_t *bad_parity[VDEV_RAIDZ_MAXPARITY]; - char *buf; - int offset; /* * Set up the rm_col[]s to generate the parity for @@ -342,20 +336,21 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) */ for (x = 0; x < rm->rm_firstdatacol; x++) { bad_parity[x] = rm->rm_col[x].rc_abd; - rm->rm_col[x].rc_gdata = - zio_buf_alloc(rm->rm_col[x].rc_size); rm->rm_col[x].rc_abd = - abd_get_from_buf(rm->rm_col[x].rc_gdata, + rm->rm_col[x].rc_gdata = + abd_alloc_sametype(rm->rm_col[x].rc_abd, rm->rm_col[x].rc_size); } /* fill in the data columns from good_data */ - buf = (char *)good_data; + offset = 0; for (; x < rm->rm_cols; x++) { abd_put(rm->rm_col[x].rc_abd); - rm->rm_col[x].rc_abd = abd_get_from_buf(buf, - rm->rm_col[x].rc_size); - buf += rm->rm_col[x].rc_size; + + rm->rm_col[x].rc_abd = + abd_get_offset_size((abd_t *)good_data, + offset, rm->rm_col[x].rc_size); + offset += rm->rm_col[x].rc_size; } /* @@ -364,34 +359,35 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) vdev_raidz_generate_parity(rm); /* restore everything back to its original state */ - for (x = 0; x < rm->rm_firstdatacol; x++) { - abd_put(rm->rm_col[x].rc_abd); + for (x = 0; x < rm->rm_firstdatacol; x++) rm->rm_col[x].rc_abd = bad_parity[x]; - } offset = 0; for (x = rm->rm_firstdatacol; x < rm->rm_cols; x++) { abd_put(rm->rm_col[x].rc_abd); - rm->rm_col[x].rc_abd = abd_get_offset( - rm->rm_abd_copy, offset); + rm->rm_col[x].rc_abd = abd_get_offset_size( + rm->rm_abd_copy, offset, + rm->rm_col[x].rc_size); offset += rm->rm_col[x].rc_size; } } ASSERT3P(rm->rm_col[c].rc_gdata, !=, NULL); - good = rm->rm_col[c].rc_gdata; + good = abd_get_offset_size(rm->rm_col[c].rc_gdata, 0, + rm->rm_col[c].rc_size); } else { /* adjust good_data to point at the start of our column */ - good = good_data; - + offset = 0; for (x = rm->rm_firstdatacol; x < c; x++) - good += rm->rm_col[x].rc_size; + offset += rm->rm_col[x].rc_size; + + good = abd_get_offset_size((abd_t *)good_data, offset, + rm->rm_col[c].rc_size); } - bad = abd_borrow_buf_copy(rm->rm_col[c].rc_abd, rm->rm_col[c].rc_size); /* we drop the ereport if it ends up that the data was good */ zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE); - abd_return_buf(rm->rm_col[c].rc_abd, bad, rm->rm_col[c].rc_size); + abd_put((abd_t *)good); } /* @@ -434,14 +430,16 @@ vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg) for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) size += rm->rm_col[c].rc_size; - rm->rm_abd_copy = - abd_alloc_sametype(rm->rm_col[rm->rm_firstdatacol].rc_abd, size); + rm->rm_abd_copy = abd_alloc_for_io(size, B_FALSE); for (offset = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { raidz_col_t *col = &rm->rm_col[c]; - abd_t *tmp = abd_get_offset(rm->rm_abd_copy, offset); + abd_t *tmp = abd_get_offset_size(rm->rm_abd_copy, offset, + col->rc_size); - abd_copy(tmp, col->rc_abd, col->rc_size); + ASSERT3S(tmp->abd_size, >=, col->rc_size); + ASSERT3S(col->rc_abd->abd_size, >=, col->rc_size); + abd_copy_off(tmp, col->rc_abd, 0, 0, col->rc_size); abd_put(col->rc_abd); col->rc_abd = tmp; @@ -558,13 +556,15 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset, for (c = 0; c < rm->rm_firstdatacol; c++) rm->rm_col[c].rc_abd = - abd_alloc_linear(rm->rm_col[c].rc_size, B_TRUE); + abd_alloc_linear(rm->rm_col[c].rc_size, B_FALSE); - rm->rm_col[c].rc_abd = abd_get_offset(abd, 0); + rm->rm_col[c].rc_abd = abd_get_offset_size(abd, 0, + rm->rm_col[c].rc_size); off = rm->rm_col[c].rc_size; for (c = c + 1; c < acols; c++) { - rm->rm_col[c].rc_abd = abd_get_offset(abd, off); + rm->rm_col[c].rc_abd = abd_get_offset_size(abd, off, + rm->rm_col[c].rc_size); off += rm->rm_col[c].rc_size; } @@ -679,7 +679,8 @@ vdev_raidz_generate_parity_p(raidz_map_t *rm) p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd); if (c == rm->rm_firstdatacol) { - abd_copy_to_buf(p, src, rm->rm_col[c].rc_size); + ASSERT3U(src->abd_size, >=, rm->rm_col[c].rc_size); + abd_copy_to_buf_off(p, src, 0, rm->rm_col[c].rc_size); } else { struct pqr_struct pqr = { p, NULL, NULL }; (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, @@ -707,20 +708,22 @@ vdev_raidz_generate_parity_pq(raidz_map_t *rm) ccnt = rm->rm_col[c].rc_size / sizeof (p[0]); if (c == rm->rm_firstdatacol) { - abd_copy_to_buf(p, src, rm->rm_col[c].rc_size); - (void) memcpy(q, p, rm->rm_col[c].rc_size); - } else { - struct pqr_struct pqr = { p, q, NULL }; - (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, - vdev_raidz_pq_func, &pqr); - } + ASSERT(ccnt == pcnt || ccnt == 0); - if (c == rm->rm_firstdatacol) { + abd_copy_to_buf_off(p, src, 0, rm->rm_col[c].rc_size); + (void) memcpy(q, p, rm->rm_col[c].rc_size); for (i = ccnt; i < pcnt; i++) { p[i] = 0; q[i] = 0; } } else { + struct pqr_struct pqr = { p, q, NULL }; + + ASSERT(ccnt <= pcnt); + + (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, + vdev_raidz_pq_func, &pqr); + /* * Treat short columns as though they are full of 0s. * Note that there's therefore nothing needed for P. @@ -754,22 +757,24 @@ vdev_raidz_generate_parity_pqr(raidz_map_t *rm) ccnt = rm->rm_col[c].rc_size / sizeof (p[0]); if (c == rm->rm_firstdatacol) { - abd_copy_to_buf(p, src, rm->rm_col[c].rc_size); + ASSERT3S(src->abd_size, >=, rm->rm_col[c].rc_size); + ASSERT(ccnt == pcnt || ccnt == 0); + abd_copy_to_buf_off(p, src, 0, rm->rm_col[c].rc_size); (void) memcpy(q, p, rm->rm_col[c].rc_size); (void) memcpy(r, p, rm->rm_col[c].rc_size); - } else { - struct pqr_struct pqr = { p, q, r }; - (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, - vdev_raidz_pqr_func, &pqr); - } - if (c == rm->rm_firstdatacol) { for (i = ccnt; i < pcnt; i++) { p[i] = 0; q[i] = 0; r[i] = 0; } } else { + struct pqr_struct pqr = { p, q, r }; + + ASSERT(ccnt <= pcnt); + (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, + vdev_raidz_pqr_func, &pqr); + /* * Treat short columns as though they are full of 0s. * Note that there's therefore nothing needed for P. @@ -937,7 +942,9 @@ vdev_raidz_reconstruct_p(raidz_map_t *rm, int *tgts, int ntgts) src = rm->rm_col[VDEV_RAIDZ_P].rc_abd; dst = rm->rm_col[x].rc_abd; - abd_copy(dst, src, rm->rm_col[x].rc_size); + ASSERT3S(dst->abd_size, >=, rm->rm_col[x].rc_size); + ASSERT3S(src->abd_size, >=, rm->rm_col[x].rc_size); + abd_copy_off(dst, src, 0, 0, rm->rm_col[x].rc_size); for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { uint64_t size = MIN(rm->rm_col[x].rc_size, @@ -975,14 +982,19 @@ vdev_raidz_reconstruct_q(raidz_map_t *rm, int *tgts, int ntgts) dst = rm->rm_col[x].rc_abd; if (c == rm->rm_firstdatacol) { - abd_copy(dst, src, size); + if (dst != src) { + ASSERT3S(dst->abd_size, >=, size); + ASSERT3S(src->abd_size, >=, size); + abd_copy_off(dst, src, 0, 0, size); + } if (rm->rm_col[x].rc_size > size) abd_zero_off(dst, size, rm->rm_col[x].rc_size - size); } else { ASSERT3U(size, <=, rm->rm_col[x].rc_size); - (void) abd_iterate_func2(dst, src, 0, 0, size, - vdev_raidz_reconst_q_pre_func, NULL); + if (src != dst) + (void) abd_iterate_func2(dst, src, 0, 0, size, + vdev_raidz_reconst_q_pre_func, NULL); (void) abd_iterate_func(dst, size, rm->rm_col[x].rc_size - size, vdev_raidz_reconst_q_pre_tail_func, NULL); @@ -1471,7 +1483,9 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) bufs[c] = col->rc_abd; col->rc_abd = abd_alloc_linear(col->rc_size, B_TRUE); - abd_copy(col->rc_abd, bufs[c], col->rc_size); + ASSERT3S(col->rc_abd->abd_size, >=, col->rc_size); + ASSERT3S(bufs[c]->abd_size, >=, col->rc_size); + abd_copy_off(col->rc_abd, bufs[c], 0, 0, col->rc_size); } } @@ -1567,7 +1581,9 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { raidz_col_t *col = &rm->rm_col[c]; - abd_copy(bufs[c], col->rc_abd, col->rc_size); + ASSERT3S(bufs[c]->abd_size, >=, col->rc_size); + ASSERT3S(col->rc_abd->abd_size, >=, col->rc_size); + abd_copy_off(bufs[c], col->rc_abd, 0, 0, col->rc_size); abd_free(col->rc_abd); col->rc_abd = bufs[c]; } @@ -1998,9 +2014,8 @@ vdev_raidz_io_start(zio_t *zio) * Report a checksum error for a child of a RAID-Z device. */ static void -raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data) +raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data) { - void *buf; vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx]; if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { @@ -2014,11 +2029,9 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data) zbc.zbc_has_cksum = 0; zbc.zbc_injected = rm->rm_ecksuminjected; - buf = abd_borrow_buf_copy(rc->rc_abd, rc->rc_size); - zfs_ereport_post_checksum(zio->io_spa, vd, zio, - rc->rc_offset, rc->rc_size, buf, bad_data, - &zbc); - abd_return_buf(rc->rc_abd, buf, rc->rc_size); + zfs_ereport_post_checksum(zio->io_spa, vd, + &zio->io_bookmark, zio, rc->rc_offset, rc->rc_size, + rc->rc_abd, bad_data, &zbc); } } @@ -2048,7 +2061,7 @@ raidz_checksum_verify(zio_t *zio) static int raidz_parity_verify(zio_t *zio, raidz_map_t *rm) { - void *orig[VDEV_RAIDZ_MAXPARITY]; + abd_t *orig[VDEV_RAIDZ_MAXPARITY]; int c, ret = 0; raidz_col_t *rc; @@ -2063,8 +2076,8 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm) rc = &rm->rm_col[c]; if (!rc->rc_tried || rc->rc_error != 0) continue; - orig[c] = zio_buf_alloc(rc->rc_size); - abd_copy_to_buf(orig[c], rc->rc_abd, rc->rc_size); + orig[c] = abd_alloc_sametype(rc->rc_abd, rc->rc_size); + abd_copy(orig[c], rc->rc_abd, rc->rc_size); } vdev_raidz_generate_parity(rm); @@ -2073,12 +2086,12 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm) rc = &rm->rm_col[c]; if (!rc->rc_tried || rc->rc_error != 0) continue; - if (abd_cmp_buf(rc->rc_abd, orig[c], rc->rc_size) != 0) { + if (abd_cmp(orig[c], rc->rc_abd, rc->rc_abd->abd_size) != 0) { raidz_checksum_error(zio, rc, orig[c]); rc->rc_error = SET_ERROR(ECKSUM); ret++; } - zio_buf_free(orig[c], rc->rc_size); + abd_free(orig[c]); } return (ret); @@ -2113,7 +2126,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) { raidz_map_t *rm = zio->io_vsd; raidz_col_t *rc; - void *orig[VDEV_RAIDZ_MAXPARITY]; + abd_t *orig[VDEV_RAIDZ_MAXPARITY]; int tstore[VDEV_RAIDZ_MAXPARITY + 2]; int *tgts = &tstore[1]; int current, next, i, c, n; @@ -2162,7 +2175,8 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) ASSERT(orig[i] != NULL); } - orig[n - 1] = zio_buf_alloc(rm->rm_col[0].rc_size); + orig[n - 1] = abd_alloc_sametype(rm->rm_col[0].rc_abd, + rm->rm_col[0].rc_size); current = 0; next = tgts[current]; @@ -2181,7 +2195,9 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) ASSERT3S(c, >=, 0); ASSERT3S(c, <, rm->rm_cols); rc = &rm->rm_col[c]; - abd_copy_to_buf(orig[i], rc->rc_abd, + ASSERT3S(orig[i]->abd_size, >=, rc->rc_size); + ASSERT3S(rc->rc_abd->abd_size, >=, rc->rc_size); + abd_copy_off(orig[i], rc->rc_abd, 0, 0, rc->rc_size); } @@ -2213,7 +2229,9 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) for (i = 0; i < n; i++) { c = tgts[i]; rc = &rm->rm_col[c]; - abd_copy_from_buf(rc->rc_abd, orig[i], + ASSERT3S(rc->rc_abd->abd_size, >=, rc->rc_size); + ASSERT3S(orig[i]->abd_size, >=, rc->rc_size); + abd_copy_off(rc->rc_abd, orig[i], 0, 0, rc->rc_size); } @@ -2251,9 +2269,8 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) } n--; done: - for (i = 0; i < n; i++) { - zio_buf_free(orig[i], rm->rm_col[0].rc_size); - } + for (i = 0; i < n; i++) + abd_free(orig[i]); return (ret); } @@ -2512,7 +2529,8 @@ vdev_raidz_io_done(zio_t *zio) zfs_ereport_start_checksum( zio->io_spa, vd->vdev_child[rc->rc_devidx], - zio, rc->rc_offset, rc->rc_size, + &zio->io_bookmark, zio, + rc->rc_offset, rc->rc_size, (void *)(uintptr_t)c, &zbc); } } diff --git a/usr/src/uts/common/fs/zfs/zfeature.c b/usr/src/uts/common/fs/zfs/zfeature.c index 35ce827979e4..dc1c9166d42c 100644 --- a/usr/src/uts/common/fs/zfs/zfeature.c +++ b/usr/src/uts/common/fs/zfs/zfeature.c @@ -413,8 +413,8 @@ spa_feature_create_zap_objects(spa_t *spa, dmu_tx_t *tx) * We create feature flags ZAP objects in two instances: during pool * creation and during pool upgrade. */ - ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)) || (!spa->spa_sync_on && - tx->tx_txg == TXG_INITIAL)); + ASSERT((!spa->spa_sync_on && tx->tx_txg == TXG_INITIAL) || + dsl_pool_sync_context(spa_get_dsl(spa))); spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset, DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, diff --git a/usr/src/uts/common/fs/zfs/zfs_fm.c b/usr/src/uts/common/fs/zfs/zfs_fm.c index fa5903a432dd..8a69967aedc7 100644 --- a/usr/src/uts/common/fs/zfs/zfs_fm.c +++ b/usr/src/uts/common/fs/zfs/zfs_fm.c @@ -104,8 +104,8 @@ #ifdef _KERNEL static void zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, - const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, - uint64_t stateoroffset, uint64_t size) + const char *subclass, spa_t *spa, vdev_t *vd, zbookmark_phys_t *zb, + zio_t *zio, uint64_t stateoroffset, uint64_t size) { nvlist_t *ereport, *detector; @@ -318,24 +318,6 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE, DATA_TYPE_UINT64, zio->io_size, NULL); } - - /* - * Payload for I/Os with corresponding logical information. - */ - if (zio->io_logical != NULL) - fm_payload_set(ereport, - FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET, - DATA_TYPE_UINT64, - zio->io_logical->io_bookmark.zb_objset, - FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT, - DATA_TYPE_UINT64, - zio->io_logical->io_bookmark.zb_object, - FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL, - DATA_TYPE_INT64, - zio->io_logical->io_bookmark.zb_level, - FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID, - DATA_TYPE_UINT64, - zio->io_logical->io_bookmark.zb_blkid, NULL); } else if (vd != NULL) { /* * If we have a vdev but no zio, this is a device fault, and the @@ -347,6 +329,20 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, DATA_TYPE_UINT64, stateoroffset, NULL); } + /* + * Payload for I/Os with corresponding logical information. + */ + if (zb != NULL && (zio == NULL || zio->io_logical != NULL)) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET, + DATA_TYPE_UINT64, zb->zb_objset, + FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT, + DATA_TYPE_UINT64, zb->zb_object, + FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL, + DATA_TYPE_INT64, zb->zb_level, + FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID, + DATA_TYPE_UINT64, zb->zb_blkid, NULL); + mutex_exit(&spa->spa_errlist_lock); *ereport_out = ereport; @@ -501,11 +497,11 @@ range_total_size(zfs_ecksum_info_t *eip) static zfs_ecksum_info_t * annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, - const uint8_t *goodbuf, const uint8_t *badbuf, size_t size, + const abd_t *goodabd, const abd_t *badabd, size_t size, boolean_t drop_if_identical) { - const uint64_t *good = (const uint64_t *)goodbuf; - const uint64_t *bad = (const uint64_t *)badbuf; + const uint64_t *good; + const uint64_t *bad; uint64_t allset = 0; uint64_t allcleared = 0; @@ -549,14 +545,16 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, } } - if (badbuf == NULL || goodbuf == NULL) + if (badabd == NULL || goodabd == NULL) return (eip); - ASSERT3U(nui64s, <=, UINT16_MAX); ASSERT3U(size, ==, nui64s * sizeof (uint64_t)); ASSERT3U(size, <=, SPA_MAXBLOCKSIZE); ASSERT3U(size, <=, UINT32_MAX); + good = (const uint64_t *) abd_borrow_buf_copy((abd_t *)goodabd, size); + bad = (const uint64_t *) abd_borrow_buf_copy((abd_t *)badabd, size); + /* build up the range list by comparing the two buffers. */ for (idx = 0; idx < nui64s; idx++) { if (good[idx] == bad[idx]) { @@ -586,6 +584,8 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, */ if (inline_size == 0 && drop_if_identical) { kmem_free(eip, sizeof (*eip)); + abd_return_buf((abd_t *)goodabd, (void *)good, size); + abd_return_buf((abd_t *)badabd, (void *)bad, size); return (NULL); } @@ -626,6 +626,10 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, eip->zei_ranges[range].zr_start *= sizeof (uint64_t); eip->zei_ranges[range].zr_end *= sizeof (uint64_t); } + + abd_return_buf((abd_t *)goodabd, (void *)good, size); + abd_return_buf((abd_t *)badabd, (void *)bad, size); + eip->zei_allowed_mingap *= sizeof (uint64_t); inline_size *= sizeof (uint64_t); @@ -666,15 +670,16 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, #endif void -zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, - uint64_t stateoroffset, uint64_t size) +zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, + struct zbookmark_phys *zb, zio_t *zio, uint64_t stateoroffset, + uint64_t size) { #ifdef _KERNEL nvlist_t *ereport = NULL; nvlist_t *detector = NULL; - zfs_ereport_start(&ereport, &detector, - subclass, spa, vd, zio, stateoroffset, size); + zfs_ereport_start(&ereport, &detector, subclass, spa, vd, + zb, zio, stateoroffset, size); if (ereport == NULL) return; @@ -687,7 +692,7 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, } void -zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, +zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length, void *arg, zio_bad_cksum_t *info) { @@ -709,7 +714,7 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, #ifdef _KERNEL zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector, - FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length); + FM_EREPORT_ZFS_CHECKSUM, spa, vd, zb, zio, offset, length); if (report->zcr_ereport == NULL) { report->zcr_free(report->zcr_cbdata, report->zcr_cbinfo); @@ -729,8 +734,8 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, } void -zfs_ereport_finish_checksum(zio_cksum_report_t *report, - const void *good_data, const void *bad_data, boolean_t drop_if_identical) +zfs_ereport_finish_checksum(zio_cksum_report_t *report, const abd_t *good_data, + const abd_t *bad_data, boolean_t drop_if_identical) { #ifdef _KERNEL zfs_ecksum_info_t *info = NULL; @@ -777,17 +782,17 @@ zfs_ereport_send_interim_checksum(zio_cksum_report_t *report) } void -zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, +zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length, - const void *good_data, const void *bad_data, zio_bad_cksum_t *zbc) + const abd_t *good_data, const abd_t *bad_data, zio_bad_cksum_t *zbc) { #ifdef _KERNEL nvlist_t *ereport = NULL; nvlist_t *detector = NULL; zfs_ecksum_info_t *info; - zfs_ereport_start(&ereport, &detector, - FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length); + zfs_ereport_start(&ereport, &detector, FM_EREPORT_ZFS_CHECKSUM, + spa, vd, zb, zio, offset, length); if (ereport == NULL) return; diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index 52cf0145e3ce..52cebdc5f70e 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -32,7 +32,7 @@ * Copyright (c) 2014 Integros [integros.com] * Copyright 2016 Toomas Soome * Copyright 2017 RackTop Systems. - * Copyright (c) 2017 Datto Inc. + * Copyright (c) 2017, Datto, Inc. All rights reserved. */ /* @@ -188,6 +188,7 @@ #include #include #include +#include #include "zfs_namecheck.h" #include "zfs_prop.h" @@ -589,12 +590,12 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr) * Try to own the dataset; abort if there is any error, * (e.g., already mounted, in use, or other error). */ - error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, + error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE, setsl_tag, &os); if (error != 0) return (SET_ERROR(EPERM)); - dmu_objset_disown(os, setsl_tag); + dmu_objset_disown(os, B_TRUE, setsl_tag); if (new_default) { needed_priv = PRIV_FILE_DOWNGRADE_SL; @@ -1273,6 +1274,22 @@ zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) return (0); } +/* ARGSUSED */ +static int +zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + return (zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_LOAD_KEY, cr)); +} + +/* ARGSUSED */ +static int +zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + return (zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_CHANGE_KEY, cr)); +} + /* * Policy for allowing temporary snapshots to be taken or released */ @@ -1469,7 +1486,7 @@ zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag) if (zfsvfs->z_vfs) { VFS_RELE(zfsvfs->z_vfs); } else { - dmu_objset_disown(zfsvfs->z_os, zfsvfs); + dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs); zfsvfs_free(zfsvfs); } } @@ -1481,6 +1498,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) nvlist_t *config, *props = NULL; nvlist_t *rootprops = NULL; nvlist_t *zplprops = NULL; + dsl_crypto_params_t *dcp = NULL; if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config)) @@ -1495,6 +1513,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) if (props) { nvlist_t *nvl = NULL; + nvlist_t *hidden_args = NULL; uint64_t version = SPA_VERSION; (void) nvlist_lookup_uint64(props, @@ -1513,6 +1532,18 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) } (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS); } + + (void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS, + &hidden_args); + error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, + rootprops, hidden_args, &dcp); + if (error != 0) { + nvlist_free(config); + nvlist_free(props); + return (error); + } + (void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS); + VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); error = zfs_fill_zplprops_root(version, rootprops, zplprops, NULL); @@ -1520,7 +1551,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) goto pool_props_bad; } - error = spa_create(zc->zc_name, config, props, zplprops); + error = spa_create(zc->zc_name, config, props, zplprops, dcp); /* * Set the remaining root properties @@ -1534,6 +1565,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) nvlist_free(zplprops); nvlist_free(config); nvlist_free(props); + dsl_crypto_params_free(dcp, !!error); return (error); } @@ -1812,15 +1844,16 @@ zfs_ioc_obj_to_path(zfs_cmd_t *zc) int error; /* XXX reading from objset not owned */ - if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0) + if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, + FTAG, &os)) != 0) return (error); if (dmu_objset_type(os) != DMU_OST_ZFS) { - dmu_objset_rele(os, FTAG); + dmu_objset_rele_flags(os, B_TRUE, FTAG); return (SET_ERROR(EINVAL)); } error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value, sizeof (zc->zc_value)); - dmu_objset_rele(os, FTAG); + dmu_objset_rele_flags(os, B_TRUE, FTAG); return (error); } @@ -1841,15 +1874,16 @@ zfs_ioc_obj_to_stats(zfs_cmd_t *zc) int error; /* XXX reading from objset not owned */ - if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0) + if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, + FTAG, &os)) != 0) return (error); if (dmu_objset_type(os) != DMU_OST_ZFS) { - dmu_objset_rele(os, FTAG); + dmu_objset_rele_flags(os, B_TRUE, FTAG); return (SET_ERROR(EINVAL)); } error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value, sizeof (zc->zc_value)); - dmu_objset_rele(os, FTAG); + dmu_objset_rele_flags(os, B_TRUE, FTAG); return (error); } @@ -2415,7 +2449,8 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, { const char *propname = nvpair_name(pair); zfs_prop_t prop = zfs_name_to_prop(propname); - uint64_t intval; + uint64_t intval = 0; + char *strval = NULL; int err = -1; if (prop == ZPROP_INVAL) { @@ -2431,10 +2466,12 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, &pair) == 0); } - if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) - return (-1); - - VERIFY(0 == nvpair_value_uint64(pair, &intval)); + /* all special properties are numeric except for keylocation */ + if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) { + strval = fnvpair_value_string(pair); + } else { + intval = fnvpair_value_uint64(pair); + } switch (prop) { case ZFS_PROP_QUOTA: @@ -2451,6 +2488,16 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, } else { err = dsl_dir_activate_fs_ss_limit(dsname); } + /* + * Set err to -1 to force the zfs_set_prop_nvlist code down the + * default path to set the value in the nvlist. + */ + if (err == 0) + err = -1; + break; + case ZFS_PROP_KEYLOCATION: + err = dsl_crypto_can_set_keylocation(dsname, strval); + /* * Set err to -1 to force the zfs_set_prop_nvlist code down the * default path to set the value in the nvlist. @@ -3161,6 +3208,8 @@ zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops, * innvl: { * "type" -> dmu_objset_type_t (int32) * (optional) "props" -> { prop -> value } + * (optional) "hidden_args" -> { "wkeydata" -> value } + * raw uint8_t array of encryption wrapping key data (32 bytes) * } * * outnvl: propname -> error code (int32) @@ -3171,15 +3220,18 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) int error = 0; zfs_creat_t zct = { 0 }; nvlist_t *nvprops = NULL; + nvlist_t *hidden_args = NULL; void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); int32_t type32; dmu_objset_type_t type; boolean_t is_insensitive = B_FALSE; + dsl_crypto_params_t *dcp = NULL; if (nvlist_lookup_int32(innvl, "type", &type32) != 0) return (SET_ERROR(EINVAL)); type = type32; (void) nvlist_lookup_nvlist(innvl, "props", &nvprops); + (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args); switch (type) { case DMU_OST_ZFS: @@ -3245,9 +3297,18 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) } } + error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops, + hidden_args, &dcp); + if (error != 0) { + nvlist_free(zct.zct_zplprops); + return (error); + } + error = dmu_objset_create(fsname, type, - is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); + is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct); + nvlist_free(zct.zct_zplprops); + dsl_crypto_params_free(dcp, !!error); /* * It would be nice to do this atomically. @@ -3265,6 +3326,8 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) * innvl: { * "origin" -> name of origin snapshot * (optional) "props" -> { prop -> value } + * (optional) "hidden_args" -> { "wkeydata" -> value } + * raw uint8_t array of encryption wrapping key data (32 bytes) * } * * outnvl: propname -> error code (int32) @@ -3286,9 +3349,8 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) if (dataset_namecheck(origin_name, NULL, NULL) != 0) return (SET_ERROR(EINVAL)); + error = dmu_objset_clone(fsname, origin_name); - if (error != 0) - return (error); /* * It would be nice to do this atomically. @@ -4154,7 +4216,11 @@ extract_delay_props(nvlist_t *props) { nvlist_t *delayprops; nvpair_t *nvp, *tmp; - static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 }; + static const zfs_prop_t delayable[] = { + ZFS_PROP_REFQUOTA, + ZFS_PROP_KEYLOCATION, + 0 + }; int i; VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); @@ -4343,7 +4409,7 @@ zfs_ioc_recv(zfs_cmd_t *zc) } } - if (delayprops != NULL) { + if (delayprops != NULL && props != NULL) { /* * Merge delayed props back in with initial props, in case * we're DEBUG and zfs_ioc_recv_inject_err is set (which means @@ -4453,6 +4519,7 @@ zfs_ioc_send(zfs_cmd_t *zc) boolean_t embedok = (zc->zc_flags & 0x1); boolean_t large_block_ok = (zc->zc_flags & 0x2); boolean_t compressok = (zc->zc_flags & 0x4); + boolean_t rawok = (zc->zc_flags & 0x8); if (zc->zc_obj != 0) { dsl_pool_t *dp; @@ -4484,7 +4551,8 @@ zfs_ioc_send(zfs_cmd_t *zc) if (error != 0) return (error); - error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap); + error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, + FTAG, &tosnap); if (error != 0) { dsl_pool_rele(dp, FTAG); return (error); @@ -4500,7 +4568,7 @@ zfs_ioc_send(zfs_cmd_t *zc) } } - error = dmu_send_estimate(tosnap, fromsnap, compressok, + error = dmu_send_estimate(tosnap, fromsnap, compressok || rawok, &zc->zc_objset_type); if (fromsnap != NULL) @@ -4514,7 +4582,7 @@ zfs_ioc_send(zfs_cmd_t *zc) off = fp->f_offset; error = dmu_send_obj(zc->zc_name, zc->zc_sendobj, - zc->zc_fromobj, embedok, large_block_ok, compressok, + zc->zc_fromobj, embedok, large_block_ok, compressok, rawok, zc->zc_cookie, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) @@ -4892,7 +4960,7 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) error = zfs_suspend_fs(zfsvfs); if (error == 0) { dmu_objset_refresh_ownership(zfsvfs->z_os, - zfsvfs); + B_TRUE, zfsvfs); error = zfs_resume_fs(zfsvfs, ds); } } @@ -4901,12 +4969,12 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) VFS_RELE(zfsvfs->z_vfs); } else { /* XXX kind of reading contents without owning */ - error = dmu_objset_hold(zc->zc_name, FTAG, &os); + error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os); if (error != 0) return (error); error = dmu_objset_userspace_upgrade(os); - dmu_objset_rele(os, FTAG); + dmu_objset_rele_flags(os, B_TRUE, FTAG); } return (error); @@ -5064,7 +5132,7 @@ zfs_ioc_next_obj(zfs_cmd_t *zc) objset_t *os = NULL; int error; - error = dmu_objset_hold(zc->zc_name, FTAG, &os); + error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os); if (error != 0) return (error); @@ -5481,6 +5549,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl) * presence indicates DRR_WRITE_EMBEDDED records are permitted * (optional) "compressok" -> (value ignored) * presence indicates compressed DRR_WRITE records are permitted + * (optional) "rawok" -> (value ignored) + * presence indicates raw encrypted records should be used. * (optional) "resume_object" and "resume_offset" -> (uint64) * if present, resume send stream from specified object and offset. * } @@ -5498,6 +5568,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) boolean_t largeblockok; boolean_t embedok; boolean_t compressok; + boolean_t rawok; uint64_t resumeobj = 0; uint64_t resumeoff = 0; @@ -5510,6 +5581,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) largeblockok = nvlist_exists(innvl, "largeblockok"); embedok = nvlist_exists(innvl, "embedok"); compressok = nvlist_exists(innvl, "compressok"); + rawok = nvlist_exists(innvl, "rawok"); (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj); (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff); @@ -5520,7 +5592,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) off = fp->f_offset; error = dmu_send(snapname, fromname, embedok, largeblockok, compressok, - fd, resumeobj, resumeoff, fp->f_vnode, &off); + rawok, fd, resumeobj, resumeoff, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; @@ -5559,6 +5631,7 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) /* LINTED E_FUNC_SET_NOT_USED */ boolean_t embedok; boolean_t compressok; + boolean_t rawok; uint64_t space; error = dsl_pool_hold(snapname, FTAG, &dp); @@ -5574,6 +5647,7 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) largeblockok = nvlist_exists(innvl, "largeblockok"); embedok = nvlist_exists(innvl, "embedok"); compressok = nvlist_exists(innvl, "compressok"); + rawok = nvlist_exists(innvl, "rawok"); error = nvlist_lookup_string(innvl, "from", &fromname); if (error == 0) { @@ -5587,8 +5661,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap); if (error != 0) goto out; - error = dmu_send_estimate(tosnap, fromsnap, compressok, - &space); + error = dmu_send_estimate(tosnap, fromsnap, + compressok || rawok, &space); dsl_dataset_rele(fromsnap, FTAG); } else if (strchr(fromname, '#') != NULL) { /* @@ -5603,7 +5677,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) if (error != 0) goto out; error = dmu_send_estimate_from_txg(tosnap, - frombm.zbm_creation_txg, compressok, &space); + frombm.zbm_creation_txg, compressok || rawok, + &space); } else { /* * from is not properly formatted as a snapshot or @@ -5614,7 +5689,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) } } else { // If estimating the size of a full send, use dmu_send_estimate - error = dmu_send_estimate(tosnap, NULL, compressok, &space); + error = dmu_send_estimate(tosnap, NULL, compressok || rawok, + &space); } fnvlist_add_uint64(outnvl, "space", space); @@ -5625,6 +5701,124 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) return (error); } +/* + * Load a user's wrapping key into the kernel. + * innvl: { + * "hidden_args" -> { "wkeydata" -> value } + * raw uint8_t array of encryption wrapping key data (32 bytes) + * (optional) "noop" -> (value ignored) + * presence indicated key should only be verified, not loaded + * } + */ +/* ARGSUSED */ +static int +zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl) +{ + int ret = 0; + dsl_crypto_params_t *dcp = NULL; + nvlist_t *hidden_args; + boolean_t noop = nvlist_exists(innvl, "noop"); + + if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args); + if (ret != 0) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL, + hidden_args, &dcp); + if (ret != 0) + goto error; + + ret = spa_keystore_load_wkey(dsname, dcp, noop); + if (ret != 0) + goto error; + + dsl_crypto_params_free(dcp, noop); + + return (0); + +error: + dsl_crypto_params_free(dcp, B_TRUE); + return (ret); +} + +/* + * Unload a user's wrapping key from the kernel. + * Both innvl and outnvl are unused. + */ +/* ARGSUSED */ +static int +zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl) +{ + int ret = 0; + + if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) { + ret = (SET_ERROR(EINVAL)); + goto out; + } + + ret = spa_keystore_unload_wkey(dsname); + if (ret != 0) + goto out; + +out: + return (ret); +} + +/* + * Changes a user's wrapping key used to decrypt a dataset. The keyformat, + * keylocation, pbkdf2salt, and pbkdf2iters properties can also be specified + * here to change how the key is derived in userspace. + * + * innvl: { + * "hidden_args" (optional) -> { "wkeydata" -> value } + * raw uint8_t array of new encryption wrapping key data (32 bytes) + * "props" (optional) -> { prop -> value } + * } + * + * outnvl is unused + */ +/* ARGSUSED */ +static int +zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl) +{ + int ret; + uint64_t cmd = DCP_CMD_NONE; + dsl_crypto_params_t *dcp = NULL; + nvlist_t *args = NULL, *hidden_args = NULL; + + if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) { + ret = (SET_ERROR(EINVAL)); + goto error; + } + + (void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd); + (void) nvlist_lookup_nvlist(innvl, "props", &args); + (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args); + + ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp); + if (ret != 0) + goto error; + + ret = spa_keystore_change_key(dsname, dcp); + if (ret != 0) + goto error; + + dsl_crypto_params_free(dcp, B_FALSE); + + return (0); + +error: + dsl_crypto_params_free(dcp, B_TRUE); + return (ret); +} + static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST]; static void @@ -5798,6 +5992,17 @@ zfs_ioctl_init(void) POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE); + zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY, + zfs_ioc_load_key, zfs_secpolicy_load_key, + DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE); + zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY, + zfs_ioc_unload_key, zfs_secpolicy_load_key, + DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE); + zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY, + zfs_ioc_change_key, zfs_secpolicy_change_key, + DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, + B_TRUE, B_TRUE); + /* IOCTLS that use the legacy function signature */ zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, diff --git a/usr/src/uts/common/fs/zfs/zfs_vfsops.c b/usr/src/uts/common/fs/zfs/zfs_vfsops.c index 3de658666a2c..e0269961c29e 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c @@ -974,8 +974,8 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp) * We claim to always be readonly so we can open snapshots; * other ZPL code will prevent us from writing to snapshots. */ - - error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os); + error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, B_TRUE, zfsvfs, + &os); if (error != 0) { kmem_free(zfsvfs, sizeof (zfsvfs_t)); return (error); @@ -983,7 +983,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp) error = zfsvfs_create_impl(zfvp, zfsvfs, os); if (error != 0) { - dmu_objset_disown(os, zfsvfs); + dmu_objset_disown(os, B_TRUE, zfsvfs); } return (error); } @@ -1009,6 +1009,7 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os) error = zfsvfs_init(zfsvfs, os); if (error != 0) { + dmu_objset_disown(os, B_TRUE, zfsvfs); *zfvp = NULL; kmem_free(zfsvfs, sizeof (zfsvfs_t)); return (error); @@ -1235,7 +1236,7 @@ zfs_domount(vfs_t *vfsp, char *osname) zfsctl_create(zfsvfs); out: if (error) { - dmu_objset_disown(zfsvfs->z_os, zfsvfs); + dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs); zfsvfs_free(zfsvfs); } else { atomic_inc_32(&zfs_active_fs_count); @@ -1903,7 +1904,7 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) /* * Finally release the objset */ - dmu_objset_disown(os, zfsvfs); + dmu_objset_disown(os, B_TRUE, zfsvfs); } /* diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c index 7b60c76b8b49..8928f7574b26 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vnops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c @@ -914,8 +914,8 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) xuio_stat_wbuf_copied(); } else { ASSERT(xuio || tx_bytes == max_blksz); - dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), - woff, abuf, tx); + dmu_assign_arcbuf_by_dbuf( + sa_get_db(zp->z_sa_hdl), woff, abuf, tx); } ASSERT(tx_bytes <= uio->uio_resid); uioskip(uio, tx_bytes); diff --git a/usr/src/uts/common/fs/zfs/zil.c b/usr/src/uts/common/fs/zfs/zil.c index bc0b8dec3f00..b6c8ac88c85b 100644 --- a/usr/src/uts/common/fs/zfs/zil.c +++ b/usr/src/uts/common/fs/zfs/zil.c @@ -198,8 +198,8 @@ zil_init_log_chain(zilog_t *zilog, blkptr_t *bp) * Read a log block and make sure it's valid. */ static int -zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, - char **end) +zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp, + blkptr_t *nbp, void *dst, char **end) { enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; arc_flags_t aflags = ARC_FLAG_WAIT; @@ -213,11 +213,14 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, if (!(zilog->zl_header->zh_flags & ZIL_CLAIM_LR_SEQ_VALID)) zio_flags |= ZIO_FLAG_SPECULATIVE; + if (!decrypt) + zio_flags |= ZIO_FLAG_RAW; + SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET], ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); - error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf, - ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); + error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, + &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); if (error == 0) { zio_cksum_t cksum = bp->blk_cksum; @@ -292,6 +295,14 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf) if (zilog->zl_header->zh_claim_txg == 0) zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB; + /* + * If we are not using the resulting data, we are just checking that + * it hasn't been corrupted so we don't need to waste CPU time + * decompressing and decrypting it. + */ + if (wbuf == NULL) + zio_flags |= ZIO_FLAG_RAW; + SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid, ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); @@ -312,7 +323,8 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf) */ int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, - zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg) + zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg, + boolean_t decrypt) { const zil_header_t *zh = zilog->zl_header; boolean_t claimed = !!zh->zh_claim_txg; @@ -351,7 +363,9 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, if (blk_seq > claim_blk_seq) break; - if ((error = parse_blk_func(zilog, &blk, arg, txg)) != 0) + + error = parse_blk_func(zilog, &blk, arg, txg); + if (error != 0) break; ASSERT3U(max_blk_seq, <, blk_seq); max_blk_seq = blk_seq; @@ -360,7 +374,8 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, if (max_lr_seq == claim_lr_seq && max_blk_seq == claim_blk_seq) break; - error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end); + error = zil_read_log_block(zilog, decrypt, &blk, &next_blk, + lrbuf, &end); if (error != 0) break; @@ -370,7 +385,9 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, ASSERT3U(reclen, >=, sizeof (lr_t)); if (lr->lrc_seq > claim_lr_seq) goto done; - if ((error = parse_lr_func(zilog, lr, arg, txg)) != 0) + + error = parse_lr_func(zilog, lr, arg, txg); + if (error != 0) goto done; ASSERT3U(max_lr_seq, <, lr->lrc_seq); max_lr_seq = lr->lrc_seq; @@ -385,7 +402,8 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, zilog->zl_parse_lr_count = lr_count; ASSERT(!claimed || !(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID) || - (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq)); + (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq) || + (decrypt && error == EIO)); zil_bp_tree_fini(zilog); zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE); @@ -426,9 +444,12 @@ zil_claim_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t first_txg) * waited for all writes to be stable first), so it is semantically * correct to declare this the end of the log. */ - if (lr->lr_blkptr.blk_birth >= first_txg && - (error = zil_read_log_data(zilog, lr, NULL)) != 0) - return (error); + if (lr->lr_blkptr.blk_birth >= first_txg) { + error = zil_read_log_data(zilog, lr, NULL); + if (error != 0) + return (error); + } + return (zil_claim_log_block(zilog, &lr->lr_blkptr, tx, first_txg)); } @@ -644,8 +665,8 @@ zil_create(zilog_t *zilog) BP_ZERO(&blk); } - error = zio_alloc_zil(zilog->zl_spa, txg, &blk, NULL, - ZIL_MIN_BLKSZ, &slog); + error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk, + NULL, ZIL_MIN_BLKSZ, &slog); if (error == 0) zil_init_log_chain(zilog, &blk); @@ -733,7 +754,7 @@ zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx) { ASSERT(list_is_empty(&zilog->zl_lwb_list)); (void) zil_parse(zilog, zil_free_log_block, - zil_free_log_record, tx, zilog->zl_header->zh_claim_txg); + zil_free_log_record, tx, zilog->zl_header->zh_claim_txg, B_FALSE); } int @@ -747,7 +768,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) int error; error = dmu_objset_own_obj(dp, ds->ds_object, - DMU_OST_ANY, B_FALSE, FTAG, &os); + DMU_OST_ANY, B_FALSE, B_FALSE, FTAG, &os); if (error != 0) { /* * EBUSY indicates that the objset is inconsistent, in which @@ -767,8 +788,10 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) if (!BP_IS_HOLE(&zh->zh_log)) zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log); BP_ZERO(&zh->zh_log); + if (os->os_encrypted) + os->os_next_write_raw = B_TRUE; dsl_dataset_dirty(dmu_objset_ds(os), tx); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (0); } @@ -782,7 +805,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) ASSERT3U(zh->zh_claim_txg, <=, first_txg); if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) { (void) zil_parse(zilog, zil_claim_log_block, - zil_claim_log_record, tx, first_txg); + zil_claim_log_record, tx, first_txg, B_FALSE); zh->zh_claim_txg = first_txg; zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq; zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq; @@ -793,7 +816,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) } ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1)); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (0); } @@ -851,7 +874,8 @@ zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx) * which will update spa_max_claim_txg. See spa_load() for details. */ error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx, - zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa)); + zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa), + B_FALSE); return ((error == ECKSUM || error == ENOENT) ? 0 : error); } @@ -1271,7 +1295,9 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) BP_ZERO(bp); /* pass the old blkptr in order to spread log blocks across devs */ - error = zio_alloc_zil(spa, txg, bp, &lwb->lwb_blk, zil_blksz, &slog); + error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, &lwb->lwb_blk, + zil_blksz, &slog); + if (error == 0) { ASSERT3U(bp->blk_birth, ==, txg); bp->blk_cksum = lwb->lwb_blk.blk_cksum; @@ -2977,6 +3003,21 @@ zil_suspend(const char *osname, void **cookiep) return (0); } + /* + * The ZIL has work to do. Ensure that the associated encryption + * key will remain mapped while we are committing the log by + * grabbing a reference to it. If the key isn't loaded we have no + * choice but to return an error until the wrapping key is loaded. + */ + if (os->os_encrypted && spa_keystore_create_mapping(os->os_spa, + dmu_objset_ds(os), FTAG) != 0) { + zilog->zl_suspend--; + mutex_exit(&zilog->zl_lock); + dsl_dataset_long_rele(dmu_objset_ds(os), suspend_tag); + dsl_dataset_rele(dmu_objset_ds(os), suspend_tag); + return (SET_ERROR(EBUSY)); + } + zilog->zl_suspending = B_TRUE; mutex_exit(&zilog->zl_lock); @@ -2989,6 +3030,20 @@ zil_suspend(const char *osname, void **cookiep) cv_broadcast(&zilog->zl_cv_suspend); mutex_exit(&zilog->zl_lock); + if (os->os_encrypted) { + /* + * Encrypted datasets need to wait for all data to be + * synced out before removing the mapping. + * + * XXX: Depending on the number of datasets with + * outstanding ZIL data on a given log device, this + * might cause spa_offline_log() to take a long time. + */ + txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); + VERIFY0(spa_keystore_remove_mapping(os->os_spa, + dmu_objset_id(os), FTAG)); + } + if (cookiep == NULL) zil_resume(os); else @@ -3155,7 +3210,7 @@ zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE]) zilog->zl_replay_time = ddi_get_lbolt(); ASSERT(zilog->zl_replay_blks == 0); (void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr, - zh->zh_claim_txg); + zh->zh_claim_txg, B_TRUE); kmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE); zil_destroy(zilog, B_FALSE); diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c index b7aba119ceb6..b57ad6520e6f 100644 --- a/usr/src/uts/common/fs/zfs/zio.c +++ b/usr/src/uts/common/fs/zfs/zio.c @@ -42,6 +42,7 @@ #include #include #include +#include /* * ========================================================================== @@ -267,6 +268,13 @@ zio_data_buf_free(void *buf, size_t size) kmem_cache_free(zio_data_buf_cache[c], buf); } +/* ARGSUSED */ +static void +zio_abd_free(void *abd, size_t size) +{ + abd_free((abd_t *)abd); +} + /* * ========================================================================== * Push and pop I/O transform buffers @@ -319,7 +327,7 @@ zio_pop_transforms(zio_t *zio) /* * ========================================================================== - * I/O transform callbacks for subblocks and decompression + * I/O transform callbacks for subblocks, decompression, and decryption * ========================================================================== */ static void @@ -345,6 +353,126 @@ zio_decompress(zio_t *zio, abd_t *data, uint64_t size) } } +static void +zio_decrypt(zio_t *zio, abd_t *data, uint64_t size) +{ + int ret; + void *tmp; + blkptr_t *bp = zio->io_bp; + uint64_t lsize = BP_GET_LSIZE(bp); + dmu_object_type_t ot = BP_GET_TYPE(bp); + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + boolean_t no_crypt = B_FALSE; + + ASSERT(BP_USES_CRYPT(bp)); + ASSERT3U(size, !=, 0); + + if (zio->io_error != 0) + return; + + /* + * Verify the cksum of MACs stored in an indirect bp. It will always + * be possible to verify this since it does not require an encryption + * key. + */ + if (BP_HAS_INDIRECT_MAC_CKSUM(bp)) { + zio_crypt_decode_mac_bp(bp, mac); + + if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) { + /* + * We haven't decompressed the data yet, but + * zio_crypt_do_indirect_mac_checksum() requires + * decompressed data to be able to parse out the MACs + * from the indirect block. We decompress it now and + * throw away the result after we are finished. + */ + tmp = zio_buf_alloc(lsize); + ret = zio_decompress_data(BP_GET_COMPRESS(bp), + zio->io_abd, tmp, zio->io_size, lsize); + if (ret != 0) { + ret = SET_ERROR(EIO); + goto error; + } + ret = zio_crypt_do_indirect_mac_checksum(B_FALSE, + tmp, lsize, BP_SHOULD_BYTESWAP(bp), mac); + zio_buf_free(tmp, lsize); + } else { + ret = zio_crypt_do_indirect_mac_checksum_abd(B_FALSE, + zio->io_abd, size, BP_SHOULD_BYTESWAP(bp), mac); + } + abd_copy(data, zio->io_abd, size); + + if (ret != 0) + goto error; + + return; + } + + /* + * If this is an authenticated block, just check the MAC. It would be + * nice to separate this out into its own flag, but for the moment + * enum zio_flag is out of bits. + */ + if (BP_IS_AUTHENTICATED(bp)) { + if (ot == DMU_OT_OBJSET) { + ret = spa_do_crypt_objset_mac_abd(B_FALSE, zio->io_spa, + zio->io_bookmark.zb_objset, zio->io_abd, size, + BP_SHOULD_BYTESWAP(bp)); + } else { + zio_crypt_decode_mac_bp(bp, mac); + ret = spa_do_crypt_mac_abd(B_FALSE, zio->io_spa, + zio->io_bookmark.zb_objset, zio->io_abd, size, mac); + } + abd_copy(data, zio->io_abd, size); + + if (ret != 0) + goto error; + + return; + } + + zio_crypt_decode_params_bp(bp, salt, iv); + + if (ot == DMU_OT_INTENT_LOG) { + tmp = abd_borrow_buf_copy(zio->io_abd, sizeof (zil_chain_t)); + zio_crypt_decode_mac_zil(tmp, mac); + abd_return_buf(zio->io_abd, tmp, sizeof (zil_chain_t)); + } else { + zio_crypt_decode_mac_bp(bp, mac); + } + + ret = spa_do_crypt_abd(B_FALSE, zio->io_spa, zio->io_bookmark.zb_objset, + bp, bp->blk_birth, size, data, zio->io_abd, iv, mac, salt, + &no_crypt); + if (no_crypt) + abd_copy(data, zio->io_abd, size); + + if (ret != 0) + goto error; + + return; + +error: + /* assert that the key was found unless this was speculative */ + ASSERT(ret != ENOENT || (zio->io_flags & ZIO_FLAG_SPECULATIVE)); + + /* + * If there was a decryption / authentication error return EIO as + * the io_error. If this was not a speculative zio, create an ereport. + */ + if (ret == ECKSUM) { + ret = SET_ERROR(EIO); + if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) { + zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, + zio->io_spa, NULL, &zio->io_bookmark, zio, 0, 0); + } + } else { + zio->io_error = ret; + } +} + /* * ========================================================================== * I/O parent/child relationships and pipeline interlocks @@ -555,7 +683,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER)); ASSERT(vd || stage == ZIO_STAGE_OPEN); - IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW) != 0); + IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW_COMPRESS) != 0); zio = kmem_cache_alloc(zio_cache, KM_SLEEP); bzero(zio, sizeof (zio_t)); @@ -787,9 +915,12 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, * Data can be NULL if we are going to call zio_write_override() to * provide the already-allocated BP. But we may need the data to * verify a dedup hit (if requested). In this case, don't try to - * dedup (just take the already-allocated BP verbatim). + * dedup (just take the already-allocated BP verbatim). Encrypted + * dedup blocks need data as well so we also disable dedup in this + * case. */ - if (data == NULL && zio->io_prop.zp_dedup_verify) { + if (data == NULL && + (zio->io_prop.zp_dedup_verify || zio->io_prop.zp_encrypt)) { zio->io_prop.zp_dedup = zio->io_prop.zp_dedup_verify = B_FALSE; } @@ -1128,21 +1259,28 @@ static int zio_read_bp_init(zio_t *zio) { blkptr_t *bp = zio->io_bp; + uint64_t psize = + BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp); if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && zio->io_child_type == ZIO_CHILD_LOGICAL && - !(zio->io_flags & ZIO_FLAG_RAW)) { - uint64_t psize = - BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp); + !(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) { zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize), psize, psize, zio_decompress); } - if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) { - zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + if (((BP_IS_PROTECTED(bp) && !(zio->io_flags & ZIO_FLAG_RAW_ENCRYPT)) || + BP_HAS_INDIRECT_MAC_CKSUM(bp)) && + zio->io_child_type == ZIO_CHILD_LOGICAL) { + zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize), + psize, psize, zio_decrypt); + } + if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) { int psize = BPE_GET_PSIZE(bp); void *data = abd_borrow_buf(zio->io_abd, psize); + + zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; decode_embedded_bp_compressed(bp, data); abd_return_buf_copy(zio->io_abd, data, psize); } else { @@ -1202,7 +1340,8 @@ zio_write_bp_init(zio_t *zio) ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags & ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify); - if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) { + if (BP_GET_CHECKSUM(bp) == zp->zp_checksum && + !zp->zp_encrypt) { BP_SET_DEDUP(bp, 1); zio->io_pipeline |= ZIO_STAGE_DDT_WRITE; return (ZIO_PIPELINE_CONTINUE); @@ -1231,8 +1370,6 @@ zio_write_compress(zio_t *zio) uint64_t psize = zio->io_size; int pass = 1; - EQUIV(lsize != psize, (zio->io_flags & ZIO_FLAG_RAW) != 0); - /* * If our children haven't all reached the ready stage, * wait for them and then repeat this pipeline stage. @@ -1282,13 +1419,15 @@ zio_write_compress(zio_t *zio) } /* If it's a compressed write that is not raw, compress the buffer. */ - if (compress != ZIO_COMPRESS_OFF && psize == lsize) { + if (compress != ZIO_COMPRESS_OFF && + !(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) { void *cbuf = zio_buf_alloc(lsize); psize = zio_compress_data(compress, zio->io_abd, cbuf, lsize); if (psize == 0 || psize == lsize) { compress = ZIO_COMPRESS_OFF; zio_buf_free(cbuf, lsize); - } else if (!zp->zp_dedup && psize <= BPE_PAYLOAD_SIZE && + } else if (!zp->zp_dedup && !zp->zp_encrypt && + psize <= BPE_PAYLOAD_SIZE && zp->zp_level == 0 && !DMU_OT_HAS_FILL(zp->zp_type) && spa_feature_is_enabled(spa, SPA_FEATURE_EMBEDDED_DATA)) { encode_embedded_bp_compressed(bp, @@ -1350,8 +1489,8 @@ zio_write_compress(zio_t *zio) if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg && BP_GET_PSIZE(bp) == psize && pass >= zfs_sync_pass_rewrite) { - ASSERT(psize != 0); enum zio_stage gang_stages = zio->io_pipeline & ZIO_GANG_STAGES; + ASSERT(psize != 0); zio->io_pipeline = ZIO_REWRITE_PIPELINE | gang_stages; zio->io_flags |= ZIO_FLAG_IO_REWRITE; } else { @@ -1381,6 +1520,8 @@ zio_write_compress(zio_t *zio) if (zp->zp_dedup) { ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE)); + ASSERT(!zp->zp_encrypt || + DMU_OT_IS_ENCRYPTED(zp->zp_type)); zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE; } if (zp->zp_nopwrite) { @@ -1737,7 +1878,8 @@ zio_suspend(spa_t *spa, zio_t *zio) "failure and the failure mode property for this pool " "is set to panic.", spa_name(spa)); - zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, NULL, 0, 0); + zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, + NULL, NULL, 0, 0); mutex_enter(&spa->spa_suspend_lock); @@ -2166,6 +2308,13 @@ zio_write_gang_block(zio_t *pio) zio_prop_t zp; int error; + /* + * encrypted blocks need DVA[2] free so encrypted gang headers can't + * have a third copy. + */ + if (gio->io_prop.zp_encrypt && gbh_copies >= SPA_DVAS_PER_BP) + gbh_copies = SPA_DVAS_PER_BP - 1; + int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER; if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) { ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE); @@ -2243,6 +2392,11 @@ zio_write_gang_block(zio_t *pio) zp.zp_dedup = B_FALSE; zp.zp_dedup_verify = B_FALSE; zp.zp_nopwrite = B_FALSE; + zp.zp_encrypt = gio->io_prop.zp_encrypt; + zp.zp_byteorder = gio->io_prop.zp_byteorder; + bzero(zp.zp_salt, ZIO_DATA_SALT_LEN); + bzero(zp.zp_iv, ZIO_DATA_IV_LEN); + bzero(zp.zp_mac, ZIO_DATA_MAC_LEN); zio_t *cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g], abd_get_offset(pio->io_abd, pio->io_size - resid), lsize, @@ -2316,6 +2470,7 @@ zio_nop_write(zio_t *zio) if (BP_IS_HOLE(bp_orig) || !(zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_flags & ZCHECKSUM_FLAG_NOPWRITE) || + BP_IS_ENCRYPTED(bp) || BP_IS_ENCRYPTED(bp_orig) || BP_GET_CHECKSUM(bp) != BP_GET_CHECKSUM(bp_orig) || BP_GET_COMPRESS(bp) != BP_GET_COMPRESS(bp_orig) || BP_GET_DEDUP(bp) != BP_GET_DEDUP(bp_orig) || @@ -2928,8 +3083,8 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp) * Try to allocate an intent log block. Return 0 on success, errno on failure. */ int -zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, - uint64_t size, boolean_t *slog) +zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, + blkptr_t *old_bp, uint64_t size, boolean_t *slog) { int error = 1; zio_alloc_list_t io_alloc_list; @@ -2961,6 +3116,23 @@ zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, BP_SET_LEVEL(new_bp, 0); BP_SET_DEDUP(new_bp, 0); BP_SET_BYTEORDER(new_bp, ZFS_HOST_BYTEORDER); + + /* + * encrypted blocks will require an IV and salt. We generate + * these now since we will not be rewriting the bp at + * rewrite time. + */ + if (os->os_encrypted) { + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t salt[ZIO_DATA_SALT_LEN]; + + BP_SET_CRYPT(new_bp, B_TRUE); + VERIFY0(spa_crypt_get_salt(spa, + dmu_objset_id(os), salt)); + VERIFY0(zio_crypt_generate_iv(iv)); + + zio_crypt_encode_params_bp(new_bp, salt, iv); + } } else { zfs_dbgmsg("%s: zil block allocation failure: " "size %llu, error %d", spa_name(spa), size, error); @@ -3165,7 +3337,7 @@ zio_vdev_io_done(zio_t *zio) */ static void zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr, - const void *good_buf) + const abd_t *good_buf) { /* no processing needed */ zfs_ereport_finish_checksum(zcr, good_buf, zcr->zcr_cbdata, B_FALSE); @@ -3175,14 +3347,14 @@ zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr, void zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *ignored) { - void *buf = zio_buf_alloc(zio->io_size); + void *abd = abd_alloc_sametype(zio->io_abd, zio->io_size); - abd_copy_to_buf(buf, zio->io_abd, zio->io_size); + abd_copy(abd, zio->io_abd, zio->io_size); zcr->zcr_cbinfo = zio->io_size; - zcr->zcr_cbdata = buf; + zcr->zcr_cbdata = abd; zcr->zcr_finish = zio_vsd_default_cksum_finish; - zcr->zcr_free = zio_buf_free; + zcr->zcr_free = zio_abd_free; } static int @@ -3290,6 +3462,147 @@ zio_vdev_io_bypass(zio_t *zio) zio->io_stage = ZIO_STAGE_VDEV_IO_ASSESS >> 1; } +/* + * ========================================================================== + * Encrypt and store encryption parameters + * ========================================================================== + */ + + +/* + * This function is used for ZIO_STAGE_ENCRYPT. It is responsible for + * managing the storage of encryption parameters and passing them to the + * lower-level encryption functions. + */ +static int +zio_encrypt(zio_t *zio) +{ + zio_prop_t *zp = &zio->io_prop; + spa_t *spa = zio->io_spa; + blkptr_t *bp = zio->io_bp; + uint64_t psize = BP_GET_PSIZE(bp); + dmu_object_type_t ot = BP_GET_TYPE(bp); + void *enc_buf = NULL; + abd_t *eabd = NULL; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + boolean_t no_crypt = B_FALSE; + + /* the root zio already encrypted the data */ + if (zio->io_child_type == ZIO_CHILD_GANG) + return (ZIO_PIPELINE_CONTINUE); + + /* only ZIL blocks are re-encrypted on rewrite */ + if (!IO_IS_ALLOCATING(zio) && ot != DMU_OT_INTENT_LOG) + return (ZIO_PIPELINE_CONTINUE); + + if (!(zp->zp_encrypt || BP_IS_ENCRYPTED(bp))) { + BP_SET_CRYPT(bp, B_FALSE); + return (ZIO_PIPELINE_CONTINUE); + } + + /* if we are doing raw encryption set the provided encryption params */ + if (zio->io_flags & ZIO_FLAG_RAW_ENCRYPT) { + BP_SET_CRYPT(bp, B_TRUE); + BP_SET_BYTEORDER(bp, zp->zp_byteorder); + if (ot != DMU_OT_OBJSET) + zio_crypt_encode_mac_bp(bp, zp->zp_mac); + if (DMU_OT_IS_ENCRYPTED(ot)) + zio_crypt_encode_params_bp(bp, zp->zp_salt, zp->zp_iv); + return (ZIO_PIPELINE_CONTINUE); + } + + /* indirect blocks only maintain a cksum of the lower level MACs */ + if (BP_GET_LEVEL(bp) > 0) { + BP_SET_CRYPT(bp, B_TRUE); + VERIFY0(zio_crypt_do_indirect_mac_checksum_abd(B_TRUE, + zio->io_orig_abd, BP_GET_LSIZE(bp), BP_SHOULD_BYTESWAP(bp), + mac)); + zio_crypt_encode_mac_bp(bp, mac); + return (ZIO_PIPELINE_CONTINUE); + } + + /* + * Objset blocks are a special case since they have 2 256-bit MACs + * embedded within them. + */ + if (ot == DMU_OT_OBJSET) { + ASSERT0(DMU_OT_IS_ENCRYPTED(ot)); + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + BP_SET_CRYPT(bp, B_TRUE); + VERIFY0(spa_do_crypt_objset_mac_abd(B_TRUE, spa, + zio->io_bookmark.zb_objset, zio->io_abd, psize, + BP_SHOULD_BYTESWAP(bp))); + return (ZIO_PIPELINE_CONTINUE); + } + + /* unencrypted object types are only authenticated with a MAC */ + if (!DMU_OT_IS_ENCRYPTED(ot)) { + BP_SET_CRYPT(bp, B_TRUE); + VERIFY0(spa_do_crypt_mac_abd(B_TRUE, spa, + zio->io_bookmark.zb_objset, zio->io_abd, psize, mac)); + zio_crypt_encode_mac_bp(bp, mac); + return (ZIO_PIPELINE_CONTINUE); + } + + /* + * Later passes of sync-to-convergence may decide to rewrite data + * in place to avoid more disk reallocations. This presents a problem + * for encryption because this consitutes rewriting the new data with + * the same encryption key and IV. However, this only applies to blocks + * in the MOS (particularly the spacemaps) and we do not encrypt the + * MOS. We assert that the zio is allocating or an intent log write + * to enforce this. + */ + ASSERT(IO_IS_ALLOCATING(zio) || ot == DMU_OT_INTENT_LOG); + ASSERT(BP_GET_LEVEL(bp) == 0 || ot == DMU_OT_INTENT_LOG); + ASSERT(spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION)); + ASSERT3U(psize, !=, 0); + + enc_buf = zio_buf_alloc(psize); + eabd = abd_get_from_buf(enc_buf, psize); + abd_take_ownership_of_buf(eabd, B_TRUE); + + /* + * For an explanation of what encryption parameters are stored + * where, see the block comment in zio_crypt.c. + */ + if (ot == DMU_OT_INTENT_LOG) { + zio_crypt_decode_params_bp(bp, salt, iv); + } else { + BP_SET_CRYPT(bp, B_TRUE); + } + + /* Perform the encryption. This should not fail */ + VERIFY0(spa_do_crypt_abd(B_TRUE, spa, zio->io_bookmark.zb_objset, bp, + zio->io_txg, psize, zio->io_abd, eabd, iv, mac, salt, + &no_crypt)); + + /* encode encryption metadata into the bp */ + if (ot == DMU_OT_INTENT_LOG) { + /* + * ZIL blocks store the MAC in the embedded checksum, so the + * transform must always be applied. + */ + zio_crypt_encode_mac_zil(enc_buf, mac); + zio_push_transform(zio, eabd, psize, psize, NULL); + } else { + BP_SET_CRYPT(bp, B_TRUE); + zio_crypt_encode_params_bp(bp, salt, iv); + zio_crypt_encode_mac_bp(bp, mac); + + if (no_crypt) { + ASSERT3U(ot, ==, DMU_OT_DNODE); + abd_free(eabd); + } else { + zio_push_transform(zio, eabd, psize, psize, NULL); + } + } + + return (ZIO_PIPELINE_CONTINUE); +} + /* * ========================================================================== * Generate and verify checksums @@ -3351,8 +3664,8 @@ zio_checksum_verify(zio_t *zio) if (error == ECKSUM && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { zfs_ereport_start_checksum(zio->io_spa, - zio->io_vd, zio, zio->io_offset, - zio->io_size, NULL, &info); + zio->io_vd, &zio->io_bookmark, zio, + zio->io_offset, zio->io_size, NULL, &info); } } @@ -3615,26 +3928,19 @@ zio_done(zio_t *zio) zio_cksum_report_t *zcr = zio->io_cksum_report; uint64_t align = zcr->zcr_align; uint64_t asize = P2ROUNDUP(psize, align); - char *abuf = NULL; abd_t *adata = zio->io_abd; if (asize != psize) { - adata = abd_alloc_linear(asize, B_TRUE); + adata = abd_alloc(asize, B_TRUE); abd_copy(adata, zio->io_abd, psize); abd_zero_off(adata, psize, asize - psize); } - if (adata != NULL) - abuf = abd_borrow_buf_copy(adata, asize); - zio->io_cksum_report = zcr->zcr_next; zcr->zcr_next = NULL; - zcr->zcr_finish(zcr, abuf); + zcr->zcr_finish(zcr, adata); zfs_ereport_free_checksum(zcr); - if (adata != NULL) - abd_return_buf(adata, abuf, asize); - if (asize != psize) abd_free(adata); } @@ -3652,7 +3958,8 @@ zio_done(zio_t *zio) * device is currently unavailable. */ if (zio->io_error != ECKSUM && vd != NULL && !vdev_is_dead(vd)) - zfs_ereport_post(FM_EREPORT_ZFS_IO, spa, vd, zio, 0, 0); + zfs_ereport_post(FM_EREPORT_ZFS_IO, spa, vd, + &zio->io_bookmark, zio, 0, 0); if ((zio->io_error == EIO || !(zio->io_flags & (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) && @@ -3661,9 +3968,9 @@ zio_done(zio_t *zio) * For logical I/O requests, tell the SPA to log the * error and generate a logical data ereport. */ - spa_log_error(spa, zio); - zfs_ereport_post(FM_EREPORT_ZFS_DATA, spa, NULL, zio, - 0, 0); + spa_log_error(spa, &zio->io_bookmark); + zfs_ereport_post(FM_EREPORT_ZFS_DATA, spa, NULL, + &zio->io_bookmark, zio, 0, 0); } } @@ -3854,6 +4161,7 @@ static zio_pipe_stage_t *zio_pipeline[] = { zio_free_bp_init, zio_issue_async, zio_write_compress, + zio_encrypt, zio_checksum_generate, zio_nop_write, zio_ddt_read_start, diff --git a/usr/src/uts/common/fs/zfs/zio_checksum.c b/usr/src/uts/common/fs/zfs/zio_checksum.c index e1c98b0b99c3..1d2f75476dac 100644 --- a/usr/src/uts/common/fs/zfs/zio_checksum.c +++ b/usr/src/uts/common/fs/zfs/zio_checksum.c @@ -287,6 +287,25 @@ zio_checksum_template_init(enum zio_checksum checksum, spa_t *spa) mutex_exit(&spa->spa_cksum_tmpls_lock); } +/* convenience function to update a checksum to accomodate an encryption MAC */ +static void +zio_checksum_handle_crypt(zio_cksum_t *cksum, zio_cksum_t *saved, boolean_t xor) +{ + /* + * Weak checksums do not have their entropy spread evenly + * across the bits of the checksum. Therefore, when truncating + * a weak checksum we XOR the first 2 words with the last 2 so + * that we don't "lose" any entropy unnecessarily. + */ + if (xor) { + cksum->zc_word[0] ^= cksum->zc_word[2]; + cksum->zc_word[1] ^= cksum->zc_word[3]; + } + + cksum->zc_word[2] = saved->zc_word[2]; + cksum->zc_word[3] = saved->zc_word[3]; +} + /* * Generate the checksum. */ @@ -294,11 +313,13 @@ void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, abd_t *abd, uint64_t size) { + static const uint64_t zec_magic = ZEC_MAGIC; blkptr_t *bp = zio->io_bp; uint64_t offset = zio->io_offset; zio_checksum_info_t *ci = &zio_checksum_table[checksum]; - zio_cksum_t cksum; + zio_cksum_t cksum, saved; spa_t *spa = zio->io_spa; + boolean_t insecure = (ci->ci_flags & ZCHECKSUM_FLAG_DEDUP) == 0; ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS); ASSERT(ci->ci_func[0] != NULL); @@ -306,31 +327,57 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, zio_checksum_template_init(checksum, spa); if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { - zio_eck_t *eck; - void *data = abd_to_buf(abd); + zio_eck_t eck; + size_t eck_offset; + + bzero(&saved, sizeof (zio_cksum_t)); if (checksum == ZIO_CHECKSUM_ZILOG2) { - zil_chain_t *zilc = data; + zil_chain_t zilc; + abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t)); - size = P2ROUNDUP_TYPED(zilc->zc_nused, ZIL_MIN_BLKSZ, + size = P2ROUNDUP_TYPED(zilc.zc_nused, ZIL_MIN_BLKSZ, uint64_t); - eck = &zilc->zc_eck; + eck = zilc.zc_eck; + eck_offset = offsetof(zil_chain_t, zc_eck); } else { - eck = (zio_eck_t *)((char *)data + size) - 1; + eck_offset = size - sizeof (zio_eck_t); + abd_copy_to_buf_off(&eck, abd, eck_offset, + sizeof (zio_eck_t)); } - if (checksum == ZIO_CHECKSUM_GANG_HEADER) - zio_checksum_gang_verifier(&eck->zec_cksum, bp); - else if (checksum == ZIO_CHECKSUM_LABEL) - zio_checksum_label_verifier(&eck->zec_cksum, offset); - else - bp->blk_cksum = eck->zec_cksum; - eck->zec_magic = ZEC_MAGIC; + + if (checksum == ZIO_CHECKSUM_GANG_HEADER) { + zio_checksum_gang_verifier(&eck.zec_cksum, bp); + } else if (checksum == ZIO_CHECKSUM_LABEL) { + zio_checksum_label_verifier(&eck.zec_cksum, offset); + } else { + saved = eck.zec_cksum; + eck.zec_cksum = bp->blk_cksum; + } + + abd_copy_from_buf_off(abd, &zec_magic, + eck_offset + offsetof(zio_eck_t, zec_magic), + sizeof (zec_magic)); + abd_copy_from_buf_off(abd, &eck.zec_cksum, + eck_offset + offsetof(zio_eck_t, zec_cksum), + sizeof (zio_cksum_t)); + ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], &cksum); - eck->zec_cksum = cksum; + if (bp != NULL && BP_USES_CRYPT(bp) && + BP_GET_TYPE(bp) != DMU_OT_OBJSET) + zio_checksum_handle_crypt(&cksum, &saved, insecure); + + abd_copy_from_buf_off(abd, &cksum, + eck_offset + offsetof(zio_eck_t, zec_cksum), + sizeof (zio_cksum_t)); } else { + saved = bp->blk_cksum; ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], - &bp->blk_cksum); + &cksum); + if (BP_USES_CRYPT(bp) && BP_GET_TYPE(bp) != DMU_OT_OBJSET) + zio_checksum_handle_crypt(&cksum, &saved, insecure); + bp->blk_cksum = cksum; } } @@ -340,6 +387,7 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, { zio_checksum_info_t *ci = &zio_checksum_table[checksum]; zio_cksum_t actual_cksum, expected_cksum; + zio_eck_t eck; int byteswap; if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) @@ -348,33 +396,37 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, zio_checksum_template_init(checksum, spa); if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { - zio_eck_t *eck; zio_cksum_t verifier; - uint64_t data_size = size; - void *data = abd_borrow_buf_copy(abd, data_size); + size_t eck_offset; if (checksum == ZIO_CHECKSUM_ZILOG2) { - zil_chain_t *zilc = data; + zil_chain_t zilc; uint64_t nused; - eck = &zilc->zc_eck; - if (eck->zec_magic == ZEC_MAGIC) { - nused = zilc->zc_nused; - } else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC)) { - nused = BSWAP_64(zilc->zc_nused); + abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t)); + + eck = zilc.zc_eck; + eck_offset = offsetof(zil_chain_t, zc_eck) + + offsetof(zio_eck_t, zec_cksum); + + if (eck.zec_magic == ZEC_MAGIC) { + nused = zilc.zc_nused; + } else if (eck.zec_magic == BSWAP_64(ZEC_MAGIC)) { + nused = BSWAP_64(zilc.zc_nused); } else { - abd_return_buf(abd, data, data_size); return (SET_ERROR(ECKSUM)); } - if (nused > data_size) { - abd_return_buf(abd, data, data_size); + if (nused > size) { return (SET_ERROR(ECKSUM)); } size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t); } else { - eck = (zio_eck_t *)((char *)data + data_size) - 1; + eck_offset = size - sizeof (zio_eck_t); + abd_copy_to_buf_off(&eck, abd, eck_offset, + sizeof (zio_eck_t)); + eck_offset += offsetof(zio_eck_t, zec_cksum); } if (checksum == ZIO_CHECKSUM_GANG_HEADER) @@ -384,20 +436,21 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, else verifier = bp->blk_cksum; - byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC)); + byteswap = (eck.zec_magic == BSWAP_64(ZEC_MAGIC)); if (byteswap) byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); - size_t eck_offset = (size_t)(&eck->zec_cksum) - (size_t)data; - expected_cksum = eck->zec_cksum; - eck->zec_cksum = verifier; - abd_return_buf_copy(abd, data, data_size); + expected_cksum = eck.zec_cksum; + + abd_copy_from_buf_off(abd, &verifier, eck_offset, + sizeof (zio_cksum_t)); ci->ci_func[byteswap](abd, size, spa->spa_cksum_tmpls[checksum], &actual_cksum); - abd_copy_from_buf_off(abd, &expected_cksum, - eck_offset, sizeof (zio_cksum_t)); + + abd_copy_from_buf_off(abd, &expected_cksum, eck_offset, + sizeof (zio_cksum_t)); if (byteswap) { byteswap_uint64_array(&expected_cksum, @@ -410,6 +463,26 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, spa->spa_cksum_tmpls[checksum], &actual_cksum); } + /* + * MAC checksums are a special case since half of this checksum will + * actually be the encryption MAC. This will be verified by the + * decryption process, so we just check the truncated checksum now. + * Objset blocks use embedded MACs so we don't truncate the checksum + * for them. + */ + if (bp != NULL && BP_USES_CRYPT(bp) && + BP_GET_TYPE(bp) != DMU_OT_OBJSET) { + if (!(ci->ci_flags & ZCHECKSUM_FLAG_DEDUP)) { + actual_cksum.zc_word[0] ^= actual_cksum.zc_word[2]; + actual_cksum.zc_word[1] ^= actual_cksum.zc_word[3]; + } + + actual_cksum.zc_word[2] = 0; + actual_cksum.zc_word[3] = 0; + expected_cksum.zc_word[2] = 0; + expected_cksum.zc_word[3] = 0; + } + if (info != NULL) { info->zbc_expected = expected_cksum; info->zbc_actual = actual_cksum; @@ -418,7 +491,6 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, info->zbc_injected = 0; info->zbc_has_cksum = 1; } - if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) return (SET_ERROR(ECKSUM)); diff --git a/usr/src/uts/common/fs/zfs/zio_crypt.c b/usr/src/uts/common/fs/zfs/zio_crypt.c new file mode 100644 index 000000000000..8de56d9e4258 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/zio_crypt.c @@ -0,0 +1,1889 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This file is responsible for handling all of the details of generating + * encryption parameters and performing encryption and authentication. + * + * BLOCK ENCRYPTION PARAMETERS: + * Encryption /Authentication Algorithm Suite (crypt): + * The encryption algorithm, mode, and key length we are going to use. We + * currently support AES in either GCM or CCM modes with 128, 192, and 256 bit + * keys. All authentication is currently done with SHA512-HMAC. + * + * Plaintext: + * The unencrypted data that we want to encrypt. + * + * Initialization Vector (IV): + * An initialization vector for the encryption algorithms. This is used to + * "tweak" the encryption algorithms so that two blocks of the same data are + * encrypted into different ciphertext outputs, thus obfuscating block patterns. + * The supported encryption modes (AES-GCM and AES-CCM) require that an IV is + * never reused with the same encryption key. This value is stored unencrypted + * and must simply be provided to the decryption function. We use a 96 bit IV + * (as recommended by NIST) for all block encryption. For non-dedup blocks we + * derive the IV randomly. The first 64 bits of the IV are stored in the second + * word of DVA[2] and the remaining 32 bits are stored in the upper 32 bits of + * blk_fill. This is safe because encrypted blocks can't use the upper 32 bits + * of blk_fill. We only encrypt level 0 blocks, which normally have a fill count + * of 1. The only exception is for DMU_OT_DNODE objects, where the fill count of + * level 0 blocks is the number of allocated dnodes in that block. The on-disk + * format supports at most 2^15 slots per L0 dnode block, because the maximum + * block size is 16MB (2^24). In either case, for level 0 blocks this number + * will still be smaller than UINT32_MAX so it is safe to store the IV in the + * top 32 bits of blk_fill, while leaving the bottom 32 bits of the fill count + * for the dnode code. + * + * Master key: + * This is the most important secret data of an encrypted dataset. It is used + * along with the salt to generate that actual encryption keys via HKDF. We + * do not use the master key to directly encrypt any data because there are + * theoretical limits on how much data can actually be safely encrypted with + * any encryption mode. The master key is stored encrypted on disk with the + * user's wrapping key. Its length is determined by the encryption algorithm. + * For details on how this is stored see the block comment in dsl_crypt.c + * + * Salt: + * Used as an input to the HKDF function, along with the master key. We use a + * 64 bit salt, stored unencrypted in the first word of DVA[2]. Any given salt + * can be used for encrypting many blocks, so we cache the current salt and the + * associated derived key in zio_crypt_t so we do not need to derive it again + * needlessly. + * + * Encryption Key: + * A secret binary key, generated from an HKDF function used to encrypt and + * decrypt data. + * + * Message Authenication Code (MAC) + * The MAC is an output of authenticated encryption modes such as AES-GCM and + * AES-CCM. Its purpose is to ensure that an attacker cannot modify encrypted + * data on disk and return garbage to the application. Effectively, it is a + * checksum that can not be reproduced by an attacker. We store the MAC in the + * second 128 bits of blk_cksum, leaving the first 128 bits for a truncated + * regular checksum of the ciphertext which can be used for scrubbing. + * + * OBJECT AUTHENTICATION: + * Some object types, such as DMU_OT_MASTER_NODE cannot be encrypted because + * they contain some info that always needs to be readable. To prevent this + * data from being altered, we authenticate this data using SHA512-HMAC. This + * will produce a MAC (similar to the one produced via encryption) which can + * be used to verify the object was not modified. HMACs do not require key + * rotation or IVs, so we can keep up to the full 3 copies of authenticated + * data. + * + * ZIL ENCRYPTION: + * ZIL blocks have their bp written to disk ahead of the associated data, so we + * cannot store the MAC there as we normally do. For these blocks the MAC is + * stored in the embedded checksum within the zil_chain_t header. The salt and + * IV are generated for the block on bp allocation instead of at encryption + * time. In addition, ZIL blocks have some pieces that must be left in plaintext + * for claiming even though all of the sensitive user data still needs to be + * encrypted. The function zio_crypt_init_uios_zil() handles parsing which + * pieces of the block need to be encrypted. All data that is not encrypted is + * authenticated using the AAD mechanisms that the supported encryption modes + * provide for. In order to preserve the semantics of the ZIL for encrypted + * datasets, the ZIL is not protected at the objset level as described below. + * + * DNODE ENCRYPTION: + * Similarly to ZIL blocks, the core part of each dnode_phys_t needs to be left + * in plaintext for scrubbing and claiming, but the bonus buffers might contain + * sensitive user data. The function zio_crypt_init_uios_dnode() handles parsing + * which which pieces of the block need to be encrypted. For more details about + * dnode authentication and encryption, see zio_crypt_init_uios_dnode(). + * + * OBJECT SET AUTHENTICATION: + * Up to this point, everything we have encrypted and authenticated has been + * at level 0 (or -2 for the ZIL). If we did not do any further work the + * on-disk format would be susceptible to attacks that deleted or rearrannged + * the order of level 0 blocks. Ideally, the cleanest solution would be to + * maintain a tree of authentication MACs going up the bp tree. However, this + * presents a problem for raw sends. Send files do not send information about + * indirect blocks so there would be no convenient way to transfer the MACs and + * they cannot be recalculated on the receive side without the master key which + * would defeat one of the purposes of raw sends in the first place. Instead, + * for the indirect levels of the bp tree, we use a regular SHA512 of the MACs + * from the level below. We also include some portable fields from blk_prop such + * as the lsize and compression algorithm to prevent the data from being + * misinterpretted. + * + * At the objset level, we maintain 2 seperate 256 bit MACs in the + * objset_phys_t. The first one is "portable" and is the logical root of the + * MAC tree maintianed in the metadnode's bps. The second, is "local" and is + * used as the root MAC for the user accounting objects, which are also not + * transferred via "zfs send". The portable MAC is sent in the DRR_BEGIN payload + * of the send file. The useraccounting code ensures that the useraccounting + * info is not present upon a receive, so the local MAC can simply be cleared + * out at that time. For more info about objset_phys_t authentication, see + * zio_crypt_do_objset_hmacs(). + * + * CONSIDERATIONS FOR DEDUP: + * In order for dedup to work, blocks that we want to dedup with one another + * need to use the same IV and encryption key, so that they will have the same + * ciphertext. Normally, one should never reuse an IV with the same encryption + * key or else AES-GCM and AES-CCM can both actually leak the plaintext of both + * blocks. In this case, however, since we are using the same plaindata as + * well all that we end up with is a duplicate of the original ciphertext we + * already had. As a result, an attacker with read access to the raw disk will + * be able to tell which blocks are the same but this information is given away + * by dedup anyway. In order to get the same IVs and encryption keys for + * equivalent blocks of data we use an HMAC of the plaindata. We use an HMAC + * here so that a reproducible checksum of the plaindata is never available to + * the attacker. The HMAC key is kept alongside the master key, encrypted on + * disk. The first 64 bits of the HMAC are used in place of the random salt, and + * the next 96 bits are used as the IV. As a result of this mechanism, dedup + * will only work within a clone family since encrypted dedup requires use of + * the same master and HMAC keys. + */ + +/* + * After encrypting many blocks with the same key we may start to run up + * against the theoretical limits of how much data can securely be encrypted + * with a single key using the supported encryption modes. The most obvious + * limitation is that our risk of generating 2 equivalent 96 bit IVs increases + * the more IVs we generate (which both GCM and CCM modes strictly forbid). + * This risk actually grows surprisingly quickly over time according to the + * Birthday Problem. With a total IV space of 2^(96 bits), and assuming we have + * generated n IVs with a cryptographically secure RNG, the approximate + * probability p(n) of a collision is given as: + * + * p(n) ~= e^(-n*(n-1)/(2*(2^96))) + * + * [http://www.math.cornell.edu/~mec/2008-2009/TianyiZheng/Birthday.html] + * + * Assuming that we want to ensure that p(n) never goes over 1 / 1 trillion + * we must not write more than 398,065,730 blocks with the same encryption key. + * Therefore, we rotate our keys after 400,000,000 blocks have been written by + * generating a new random 64 bit salt for our HKDF encryption key generation + * function. + */ +#define ZFS_KEY_MAX_SALT_USES_DEFAULT 400000000 +#define ZFS_CURRENT_MAX_SALT_USES \ + (MIN(zfs_key_max_salt_uses, ZFS_KEY_MAX_SALT_USES_DEFAULT)) +unsigned long zfs_key_max_salt_uses = ZFS_KEY_MAX_SALT_USES_DEFAULT; + +zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = { + {"", ZC_TYPE_NONE, 0, "inherit"}, + {"", ZC_TYPE_NONE, 0, "on"}, + {"", ZC_TYPE_NONE, 0, "off"}, + {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 16, "aes-128-ccm"}, + {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 24, "aes-192-ccm"}, + {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 32, "aes-256-ccm"}, + {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 16, "aes-128-gcm"}, + {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 24, "aes-192-gcm"}, + {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 32, "aes-256-gcm"} +}; + +void +zio_crypt_key_destroy(zio_crypt_key_t *key) +{ + rw_destroy(&key->zk_salt_lock); + + /* free crypto templates */ + crypto_destroy_ctx_template(key->zk_current_tmpl); + crypto_destroy_ctx_template(key->zk_hmac_tmpl); + + /* zero out sensitive data */ + bzero(key, sizeof (zio_crypt_key_t)); +} + +int +zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key) +{ + int ret; + crypto_mechanism_t mech; + uint_t keydata_len; + + ASSERT(key != NULL); + ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); + + keydata_len = zio_crypt_table[crypt].ci_keylen; + bzero(key, sizeof (zio_crypt_key_t)); + + /* fill keydata buffers and salt with random data */ + ret = random_get_bytes((uint8_t *)&key->zk_guid, sizeof (uint64_t)); + if (ret != 0) + goto error; + + ret = random_get_bytes(key->zk_master_keydata, keydata_len); + if (ret != 0) + goto error; + + ret = random_get_bytes(key->zk_hmac_keydata, SHA512_HMAC_KEYLEN); + if (ret != 0) + goto error; + + ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN); + if (ret != 0) + goto error; + + /* derive the current key from the master key */ + ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0, + key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, + keydata_len); + if (ret != 0) + goto error; + + /* initialize keys for the ICP */ + key->zk_current_key.ck_format = CRYPTO_KEY_RAW; + key->zk_current_key.ck_data = key->zk_current_keydata; + key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len); + + key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW; + key->zk_hmac_key.ck_data = &key->zk_hmac_key; + key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN); + + /* + * Initialize the crypto templates. It's ok if this fails because + * this is just an optimization. + */ + mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname); + ret = crypto_create_ctx_template(&mech, &key->zk_current_key, + &key->zk_current_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_current_tmpl = NULL; + + mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); + ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key, + &key->zk_hmac_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_hmac_tmpl = NULL; + + key->zk_crypt = crypt; + key->zk_salt_count = 0; + rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); + + return (0); + +error: + zio_crypt_key_destroy(key); + return (ret); +} + +static int +zio_crypt_key_change_salt(zio_crypt_key_t *key) +{ + int ret = 0; + uint8_t salt[ZIO_DATA_SALT_LEN]; + crypto_mechanism_t mech; + uint_t keydata_len = zio_crypt_table[key->zk_crypt].ci_keylen; + + /* generate a new salt */ + ret = random_get_bytes(salt, ZIO_DATA_SALT_LEN); + if (ret != 0) + goto error; + + rw_enter(&key->zk_salt_lock, RW_WRITER); + + /* someone beat us to the salt rotation, just unlock and return */ + if (key->zk_salt_count < ZFS_CURRENT_MAX_SALT_USES) + goto out_unlock; + + /* derive the current key from the master key and the new salt */ + ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0, + salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, keydata_len); + if (ret != 0) + goto out_unlock; + + /* assign the salt and reset the usage count */ + bcopy(salt, key->zk_salt, ZIO_DATA_SALT_LEN); + key->zk_salt_count = 0; + + /* destroy the old context template and create the new one */ + crypto_destroy_ctx_template(key->zk_current_tmpl); + ret = crypto_create_ctx_template(&mech, &key->zk_current_key, + &key->zk_current_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_current_tmpl = NULL; + + rw_exit(&key->zk_salt_lock); + + return (0); + +out_unlock: + rw_exit(&key->zk_salt_lock); +error: + return (ret); +} + +/* See comment above zfs_key_max_salt_uses definition for details */ +int +zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt) +{ + int ret; + boolean_t salt_change; + + rw_enter(&key->zk_salt_lock, RW_READER); + + bcopy(key->zk_salt, salt, ZIO_DATA_SALT_LEN); + salt_change = (atomic_inc_64_nv(&key->zk_salt_count) >= + ZFS_CURRENT_MAX_SALT_USES); + + rw_exit(&key->zk_salt_lock); + + if (salt_change) { + ret = zio_crypt_key_change_salt(key); + if (ret != 0) + goto error; + } + + return (0); + +error: + return (ret); +} + +/* + * This function handles all encryption and decryption in zfs. When + * encrypting it expects puio to reference the plaintext and cuio to + * reference the cphertext. cuio must have enough space for the + * ciphertext + room for a MAC. datalen should be the length of the + * plaintext / ciphertext alone. + */ +/* ARGSUSED */ +static int +zio_do_crypt_uio(boolean_t encrypt, uint64_t crypt, crypto_key_t *key, + crypto_ctx_template_t tmpl, uint8_t *ivbuf, uint_t datalen, + uio_t *puio, uio_t *cuio, uint8_t *authbuf, uint_t auth_len) +{ + int ret; + crypto_data_t plaindata, cipherdata; + CK_AES_CCM_PARAMS ccmp; + CK_AES_GCM_PARAMS gcmp; + crypto_mechanism_t mech; + zio_crypt_info_t crypt_info; + uint_t plain_full_len, maclen; + + ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); + ASSERT3U(key->ck_format, ==, CRYPTO_KEY_RAW); + + /* lookup the encryption info */ + crypt_info = zio_crypt_table[crypt]; + + /* the mac will always be the last iovec_t in the cipher uio */ + maclen = cuio->uio_iov[cuio->uio_iovcnt - 1].iov_len; + + ASSERT(maclen <= ZIO_DATA_MAC_LEN); + + /* setup encryption mechanism (same as crypt) */ + mech.cm_type = crypto_mech2id(crypt_info.ci_mechname); + + /* + * Strangely, the ICP requires that plain_full_len must include + * the MAC length when decrypting, even though the UIO does not + * need to have the extra space allocated. + */ + if (encrypt) { + plain_full_len = datalen; + } else { + plain_full_len = datalen + maclen; + } + + /* + * setup encryption params (currently only AES CCM and AES GCM + * are supported) + */ + if (crypt_info.ci_crypt_type == ZC_TYPE_CCM) { + ccmp.ulNonceSize = ZIO_DATA_IV_LEN; + ccmp.ulAuthDataSize = auth_len; + ccmp.authData = authbuf; + ccmp.ulMACSize = maclen; + ccmp.nonce = ivbuf; + ccmp.ulDataSize = plain_full_len; + + mech.cm_param = (char *)(&ccmp); + mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS); + } else { + gcmp.ulIvLen = ZIO_DATA_IV_LEN; + gcmp.ulIvBits = CRYPTO_BYTES2BITS(ZIO_DATA_IV_LEN); + gcmp.ulAADLen = auth_len; + gcmp.pAAD = authbuf; + gcmp.ulTagBits = CRYPTO_BYTES2BITS(maclen); + gcmp.pIv = ivbuf; + + mech.cm_param = (char *)(&gcmp); + mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS); + } + + /* populate the cipher and plain data structs. */ + plaindata.cd_format = CRYPTO_DATA_UIO; + plaindata.cd_offset = 0; + plaindata.cd_uio = puio; + plaindata.cd_miscdata = NULL; + plaindata.cd_length = plain_full_len; + + cipherdata.cd_format = CRYPTO_DATA_UIO; + cipherdata.cd_offset = 0; + cipherdata.cd_uio = cuio; + cipherdata.cd_miscdata = NULL; + cipherdata.cd_length = datalen + maclen; + + /* perform the actual encryption */ + if (encrypt) { + ret = crypto_encrypt(&mech, &plaindata, key, tmpl, &cipherdata, + NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + } else { + ret = crypto_decrypt(&mech, &cipherdata, key, tmpl, &plaindata, + NULL); + if (ret != CRYPTO_SUCCESS) { + ASSERT3U(ret, ==, CRYPTO_INVALID_MAC); + ret = SET_ERROR(ECKSUM); + goto error; + } + } + + return (0); + +error: + return (ret); +} + +int +zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, + uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out) +{ + int ret; + uio_t puio, cuio; + iovec_t plain_iovecs[2], cipher_iovecs[3]; + uint64_t crypt = key->zk_crypt; + uint64_t le_guid = LE_64(key->zk_guid); + uint_t enc_len, keydata_len; + + ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); + ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); + + keydata_len = zio_crypt_table[crypt].ci_keylen; + + /* generate iv for wrapping the master and hmac key */ + ret = random_get_pseudo_bytes(iv, WRAPPING_IV_LEN); + if (ret != 0) + goto error; + + /* initialize uio_ts */ + plain_iovecs[0].iov_base = (char *)key->zk_master_keydata; + plain_iovecs[0].iov_len = keydata_len; + plain_iovecs[1].iov_base = (char *)key->zk_hmac_keydata; + plain_iovecs[1].iov_len = SHA512_HMAC_KEYLEN; + + cipher_iovecs[0].iov_base = (char *)keydata_out; + cipher_iovecs[0].iov_len = keydata_len; + cipher_iovecs[1].iov_base = (char *)hmac_keydata_out; + cipher_iovecs[1].iov_len = SHA512_HMAC_KEYLEN; + cipher_iovecs[2].iov_base = (char *)mac; + cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN; + + enc_len = zio_crypt_table[crypt].ci_keylen + SHA512_HMAC_KEYLEN; + puio.uio_iov = plain_iovecs; + puio.uio_iovcnt = 2; + puio.uio_segflg = UIO_SYSSPACE; + cuio.uio_iov = cipher_iovecs; + cuio.uio_iovcnt = 3; + cuio.uio_segflg = UIO_SYSSPACE; + + /* encrypt the keys and store the resulting ciphertext and mac */ + ret = zio_do_crypt_uio(B_TRUE, crypt, cwkey, NULL, iv, enc_len, + &puio, &cuio, (uint8_t *)&le_guid, sizeof (uint64_t)); + if (ret != 0) + goto error; + + return (0); + +error: + return (ret); +} + +int +zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, + uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac, + zio_crypt_key_t *key) +{ + int ret; + crypto_mechanism_t mech; + uio_t puio, cuio; + iovec_t plain_iovecs[2], cipher_iovecs[3]; + uint_t enc_len, keydata_len; + uint64_t le_guid = LE_64(guid); + + ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); + ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); + + keydata_len = zio_crypt_table[crypt].ci_keylen; + + /* initialize uio_ts */ + plain_iovecs[0].iov_base = (char *)key->zk_master_keydata; + plain_iovecs[0].iov_len = keydata_len; + plain_iovecs[1].iov_base = (char *)key->zk_hmac_keydata; + plain_iovecs[1].iov_len = SHA512_HMAC_KEYLEN; + + cipher_iovecs[0].iov_base = (char *)keydata; + cipher_iovecs[0].iov_len = keydata_len; + cipher_iovecs[1].iov_base = (char *)hmac_keydata; + cipher_iovecs[1].iov_len = SHA512_HMAC_KEYLEN; + cipher_iovecs[2].iov_base = (char *)mac; + cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN; + + enc_len = keydata_len + SHA512_HMAC_KEYLEN; + puio.uio_iov = plain_iovecs; + puio.uio_segflg = UIO_SYSSPACE; + puio.uio_iovcnt = 2; + cuio.uio_iov = cipher_iovecs; + cuio.uio_iovcnt = 3; + cuio.uio_segflg = UIO_SYSSPACE; + + /* decrypt the keys and store the result in the output buffers */ + ret = zio_do_crypt_uio(B_FALSE, crypt, cwkey, NULL, iv, enc_len, + &puio, &cuio, (uint8_t *)&le_guid, sizeof (uint64_t)); + if (ret != 0) + goto error; + + /* generate a fresh salt */ + ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN); + if (ret != 0) + goto error; + + /* derive the current key from the master key */ + ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0, + key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, + keydata_len); + if (ret != 0) + goto error; + + /* initialize keys for ICP */ + key->zk_current_key.ck_format = CRYPTO_KEY_RAW; + key->zk_current_key.ck_data = key->zk_current_keydata; + key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len); + + key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW; + key->zk_hmac_key.ck_data = key->zk_hmac_keydata; + key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN); + + /* + * Initialize the crypto templates. It's ok if this fails because + * this is just an optimization. + */ + mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname); + ret = crypto_create_ctx_template(&mech, &key->zk_current_key, + &key->zk_current_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_current_tmpl = NULL; + + mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); + ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key, + &key->zk_hmac_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_hmac_tmpl = NULL; + + key->zk_crypt = crypt; + key->zk_guid = guid; + key->zk_salt_count = 0; + rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); + + return (0); + +error: + zio_crypt_key_destroy(key); + return (ret); +} + +int +zio_crypt_generate_iv(uint8_t *ivbuf) +{ + int ret; + + /* randomly generate the IV */ + ret = random_get_pseudo_bytes(ivbuf, ZIO_DATA_IV_LEN); + if (ret != 0) + goto error; + + return (0); + +error: + bzero(ivbuf, ZIO_DATA_IV_LEN); + return (ret); +} + +int +zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen, + uint8_t *digestbuf, uint_t digestlen) +{ + int ret; + crypto_mechanism_t mech; + crypto_data_t in_data, digest_data; + uint8_t raw_digestbuf[SHA512_DIGEST_LENGTH]; + + ASSERT3U(digestlen, <=, SHA512_DIGEST_LENGTH); + + /* initialize sha512-hmac mechanism and crypto data */ + mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); + mech.cm_param = NULL; + mech.cm_param_len = 0; + + /* initialize the crypto data */ + in_data.cd_format = CRYPTO_DATA_RAW; + in_data.cd_offset = 0; + in_data.cd_length = datalen; + in_data.cd_raw.iov_base = (char *)data; + in_data.cd_raw.iov_len = in_data.cd_length; + + digest_data.cd_format = CRYPTO_DATA_RAW; + digest_data.cd_offset = 0; + digest_data.cd_length = SHA512_DIGEST_LENGTH; + digest_data.cd_raw.iov_base = (char *)raw_digestbuf; + digest_data.cd_raw.iov_len = digest_data.cd_length; + + /* generate the hmac */ + ret = crypto_mac(&mech, &in_data, &key->zk_hmac_key, key->zk_hmac_tmpl, + &digest_data, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + bcopy(raw_digestbuf, digestbuf, digestlen); + + return (0); + +error: + bzero(digestbuf, digestlen); + return (ret); +} + +int +zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data, + uint_t datalen, uint8_t *ivbuf, uint8_t *salt) +{ + int ret; + uint8_t digestbuf[SHA512_DIGEST_LENGTH]; + + ret = zio_crypt_do_hmac(key, data, datalen, + digestbuf, SHA512_DIGEST_LENGTH); + if (ret != 0) + return (ret); + + bcopy(digestbuf, salt, ZIO_DATA_SALT_LEN); + bcopy(digestbuf + ZIO_DATA_SALT_LEN, ivbuf, ZIO_DATA_IV_LEN); + + return (0); +} + +/* + * The following functions are used to encode and decode encryption parameters + * into blkptr_t and zil_header_t. The ICP wants to use these parameters as + * byte strings, which normally means that these strings would not need to deal + * with byteswapping at all. However, both blkptr_t and zil_header_t may be + * byteswapped by lower layers and so we must "undo" that byteswap here upon + * decoding. + */ +void +zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv) +{ + uint32_t val32; + + ASSERT(BP_IS_ENCRYPTED(bp)); + + bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t)); + bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t)); + bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); + BP_SET_IV2(bp, val32); +} + +void +zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv) +{ + uint64_t val64; + uint32_t val32; + + ASSERT(BP_IS_PROTECTED(bp)); + + /* for convenience, so callers don't need to check */ + if (BP_IS_AUTHENTICATED(bp)) { + bzero(salt, ZIO_DATA_SALT_LEN); + bzero(iv, ZIO_DATA_IV_LEN); + return; + } + + if (!BP_SHOULD_BYTESWAP(bp)) { + bcopy(&bp->blk_dva[2].dva_word[0], salt, sizeof (uint64_t)); + bcopy(&bp->blk_dva[2].dva_word[1], iv, sizeof (uint64_t)); + + val32 = (uint32_t)BP_GET_IV2(bp); + bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t)); + } else { + val64 = BSWAP_64(bp->blk_dva[2].dva_word[0]); + bcopy(&val64, salt, sizeof (uint64_t)); + + val64 = BSWAP_64(bp->blk_dva[2].dva_word[1]); + bcopy(&val64, iv, sizeof (uint64_t)); + + val32 = BSWAP_32((uint32_t)BP_GET_IV2(bp)); + bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t)); + } +} + +void +zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac) +{ + ASSERT(BP_USES_CRYPT(bp)); + ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_OBJSET); + + bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t)); + bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3], + sizeof (uint64_t)); +} + +void +zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac) +{ + uint64_t val64; + + ASSERT(BP_USES_CRYPT(bp) || BP_IS_HOLE(bp)); + + /* for convenience, so callers don't need to check */ + if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { + bzero(mac, ZIO_DATA_MAC_LEN); + return; + } + + if (!BP_SHOULD_BYTESWAP(bp)) { + bcopy(&bp->blk_cksum.zc_word[2], mac, sizeof (uint64_t)); + bcopy(&bp->blk_cksum.zc_word[3], mac + sizeof (uint64_t), + sizeof (uint64_t)); + } else { + val64 = BSWAP_64(bp->blk_cksum.zc_word[2]); + bcopy(&val64, mac, sizeof (uint64_t)); + + val64 = BSWAP_64(bp->blk_cksum.zc_word[3]); + bcopy(&val64, mac + sizeof (uint64_t), sizeof (uint64_t)); + } +} + +void +zio_crypt_encode_mac_zil(void *data, uint8_t *mac) +{ + zil_chain_t *zilc = data; + + bcopy(mac, &zilc->zc_eck.zec_cksum.zc_word[2], sizeof (uint64_t)); + bcopy(mac + sizeof (uint64_t), &zilc->zc_eck.zec_cksum.zc_word[3], + sizeof (uint64_t)); +} + +void +zio_crypt_decode_mac_zil(const void *data, uint8_t *mac) +{ + /* + * The ZIL MAC is embedded in the block it protects, which will + * not have been byteswapped by the time this function has been called. + * As a result, we don't need to worry about byteswapping the MAC. + */ + const zil_chain_t *zilc = data; + + bcopy(&zilc->zc_eck.zec_cksum.zc_word[2], mac, sizeof (uint64_t)); + bcopy(&zilc->zc_eck.zec_cksum.zc_word[3], mac + sizeof (uint64_t), + sizeof (uint64_t)); +} + +/* + * This routine takes a block of dnodes (src_abd) and copies only the bonus + * buffers to the same offsets in the dst buffer. datalen should be the size + * of both the src_abd and the dst buffer (not just the length of the bonus + * buffers). + */ +void +zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen) +{ + uint_t i, max_dnp = datalen >> DNODE_SHIFT; + uint8_t *src; + dnode_phys_t *dnp, *sdnp, *ddnp; + + src = abd_borrow_buf_copy(src_abd, datalen); + + sdnp = (dnode_phys_t *)src; + ddnp = (dnode_phys_t *)dst; + + for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) { + dnp = &sdnp[i]; + if (dnp->dn_type != DMU_OT_NONE && + DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) && + dnp->dn_bonuslen != 0) { + bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]), + DN_MAX_BONUS_LEN(dnp)); + } + } + + abd_return_buf(src_abd, src, datalen); +} + +static void +zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp) +{ + int avoidlint = SPA_MINBLOCKSIZE; + BP_SET_DEDUP(bp, 0); + BP_SET_CHECKSUM(bp, 0); + + /* + * psize cannot be set to zero or it will trigger asserts, but the + * value doesn't really matter as long as it is constant. + */ + BP_SET_PSIZE(bp, avoidlint); +} + +static int +zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, boolean_t should_bswap, + blkptr_t *bp) +{ + int ret; + crypto_data_t cd; + uint64_t le_blkprop; + blkptr_t tmpbp = *bp; + uint8_t mac[ZIO_DATA_MAC_LEN]; + + cd.cd_format = CRYPTO_DATA_RAW; + cd.cd_offset = 0; + + if (should_bswap) + byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); + + ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp)); + ASSERT0(BP_IS_EMBEDDED(&tmpbp)); + zio_crypt_bp_zero_nonportable_blkprop(&tmpbp); + + le_blkprop = (ZFS_HOST_BYTEORDER) ? + tmpbp.blk_prop : BSWAP_64(tmpbp.blk_prop); + + cd.cd_length = sizeof (uint64_t); + cd.cd_raw.iov_base = (char *)&le_blkprop; + cd.cd_raw.iov_len = cd.cd_length; + + ret = crypto_mac_update(ctx, &cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + zio_crypt_decode_mac_bp(&tmpbp, mac); + cd.cd_length = ZIO_DATA_MAC_LEN; + cd.cd_raw.iov_base = (char *)mac; + cd.cd_raw.iov_len = cd.cd_length; + + ret = crypto_mac_update(ctx, &cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + return (0); + +error: + return (ret); +} + +static void +zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, boolean_t should_bswap, + blkptr_t *bp) +{ + blkptr_t tmpbp = *bp; + uint8_t mac[ZIO_DATA_MAC_LEN]; + + if (should_bswap) + byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); + + ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp)); + ASSERT0(BP_IS_EMBEDDED(&tmpbp)); + zio_crypt_bp_zero_nonportable_blkprop(&tmpbp); + zio_crypt_decode_mac_bp(&tmpbp, mac); + + if (should_bswap) + byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); + + SHA2Update(ctx, &tmpbp.blk_prop, sizeof (uint64_t)); + SHA2Update(ctx, mac, ZIO_DATA_MAC_LEN); +} + +static void +zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len, + boolean_t should_bswap, blkptr_t *bp) +{ + uint_t crypt_len; + blkptr_t tmpbp = *bp; + uint8_t mac[ZIO_DATA_MAC_LEN]; + + if (should_bswap) + byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); + + ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp)); + ASSERT0(BP_IS_EMBEDDED(&tmpbp)); + zio_crypt_bp_zero_nonportable_blkprop(&tmpbp); + zio_crypt_decode_mac_bp(&tmpbp, mac); + + if (should_bswap) + byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); + + crypt_len = sizeof (uint64_t); + bcopy(&tmpbp.blk_prop, *aadp, crypt_len); + *aadp += crypt_len; + *aad_len += crypt_len; + + crypt_len = ZIO_DATA_MAC_LEN; + bcopy(mac, *aadp, crypt_len); + *aadp += crypt_len; + *aad_len += crypt_len; +} + +static int +zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, boolean_t should_bswap, + dnode_phys_t *dnp) +{ + int ret, i; + dnode_phys_t *adnp; + boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER); + crypto_data_t cd; + uint8_t tmp_dncore[offsetof(dnode_phys_t, dn_blkptr)]; + + cd.cd_format = CRYPTO_DATA_RAW; + cd.cd_offset = 0; + + /* authenticate the core dnode (masking out non-portable bits) */ + bcopy(dnp, tmp_dncore, sizeof (tmp_dncore)); + adnp = (dnode_phys_t *)tmp_dncore; + if (le_bswap) { + adnp->dn_datablkszsec = BSWAP_16(adnp->dn_datablkszsec); + adnp->dn_bonuslen = BSWAP_16(adnp->dn_bonuslen); + adnp->dn_maxblkid = BSWAP_64(adnp->dn_maxblkid); + adnp->dn_used = BSWAP_64(adnp->dn_used); + } + adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK; + adnp->dn_used = 0; + + cd.cd_length = sizeof (tmp_dncore); + cd.cd_raw.iov_base = (char *)adnp; + cd.cd_raw.iov_len = cd.cd_length; + + ret = crypto_mac_update(ctx, &cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + for (i = 0; i < dnp->dn_nblkptr; i++) { + ret = zio_crypt_bp_do_hmac_updates(ctx, + should_bswap, &dnp->dn_blkptr[i]); + if (ret != 0) + goto error; + } + + if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + ret = zio_crypt_bp_do_hmac_updates(ctx, + should_bswap, DN_SPILL_BLKPTR(dnp)); + if (ret != 0) + goto error; + } + + return (0); + +error: + return (ret); +} + +/* + * objset_phys_t blocks introduce a number of exceptions to the normal + * authentication process. objset_phys_t's contain 2 seperate HMACS for + * protecting the integrity of their data. The portable_mac protects the + * the metadnode. This MAC can be sent with a raw send and protects against + * reordering of data within the metadnode. The local_mac protects the user + * accounting objects which are not sent from one system to another. + * + * In addition, objset blocks are the only blocks that can be modified and + * written to disk without the key loaded under certain circumstances. During + * zil_claim() we need to be able to update the zil_header_t to complete + * claiming log blocks and during raw receives we need to write out the + * portable_mac from the send file. Both of these actions are possible + * because these fields are not protected by either MAC so neither one will + * need to modify the MACs without the key. However, when the modified blocks + * are written out they will be byteswapped into the host machine's native + * endianness which will modify fields protected by the MAC. As a result, MAC + * calculation for objset blocks works slightly differently from other block + * types. Where other block types MAC the data in whatever endianness is + * written to disk, objset blocks always MAC little endian version of their + * values. In the code, should_bswap is the value from BP_SHOULD_BYTESWAP() + * and le_bswap indicates whether a byteswap is needed to get this block + * into little endian format. + */ +/* ARGSUSED */ +int +zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, + boolean_t should_bswap, uint8_t *portable_mac, uint8_t *local_mac) +{ + int ret; + crypto_mechanism_t mech; + crypto_context_t ctx; + crypto_data_t cd; + objset_phys_t *osp = data; + uint64_t intval; + boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER); + uint8_t raw_portable_mac[SHA512_DIGEST_LENGTH]; + uint8_t raw_local_mac[SHA512_DIGEST_LENGTH]; + + /* initialize HMAC mechanism */ + mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); + mech.cm_param = NULL; + mech.cm_param_len = 0; + + cd.cd_format = CRYPTO_DATA_RAW; + cd.cd_offset = 0; + + /* calculate the portable MAC from the portable fields and metadnode */ + ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + /* add in the os_type */ + intval = (le_bswap) ? osp->os_type : BSWAP_64(osp->os_type); + cd.cd_length = sizeof (uint64_t); + cd.cd_raw.iov_base = (char *)&intval; + cd.cd_raw.iov_len = cd.cd_length; + + ret = crypto_mac_update(ctx, &cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + /* add in the portable os_flags */ + intval = osp->os_flags; + if (should_bswap) + intval = BSWAP_64(intval); + intval &= OBJSET_CRYPT_PORTABLE_FLAGS_MASK; + /* CONSTCOND */ + if (!ZFS_HOST_BYTEORDER) + intval = BSWAP_64(intval); + + cd.cd_length = sizeof (uint64_t); + cd.cd_raw.iov_base = (char *)&intval; + cd.cd_raw.iov_len = cd.cd_length; + + ret = crypto_mac_update(ctx, &cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + /* add in fields from the metadnode */ + ret = zio_crypt_do_dnode_hmac_updates(ctx, should_bswap, + &osp->os_meta_dnode); + if (ret) + goto error; + + /* store the final digest in a temporary buffer and copy what we need */ + cd.cd_length = SHA512_DIGEST_LENGTH; + cd.cd_raw.iov_base = (char *)raw_portable_mac; + cd.cd_raw.iov_len = cd.cd_length; + + ret = crypto_mac_final(ctx, &cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + bcopy(raw_portable_mac, portable_mac, ZIO_OBJSET_MAC_LEN); + + /* + * The local MAC protects the user and group accounting. If these + * objects are not present, the local MAC is zeroed out. + */ + if (osp->os_userused_dnode.dn_type == DMU_OT_NONE && + osp->os_groupused_dnode.dn_type == DMU_OT_NONE) { + bzero(local_mac, ZIO_OBJSET_MAC_LEN); + return (0); + } + + /* calculate the local MAC from the userused and groupused dnodes */ + ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + /* add in the non-portable os_flags */ + intval = osp->os_flags; + if (should_bswap) + intval = BSWAP_64(intval); + intval &= ~OBJSET_CRYPT_PORTABLE_FLAGS_MASK; + /* CONSTCOND */ + if (!ZFS_HOST_BYTEORDER) + intval = BSWAP_64(intval); + + cd.cd_length = sizeof (uint64_t); + cd.cd_raw.iov_base = (char *)&intval; + cd.cd_raw.iov_len = cd.cd_length; + + ret = crypto_mac_update(ctx, &cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + /* add in fields from the user accounting dnodes */ + ret = zio_crypt_do_dnode_hmac_updates(ctx, should_bswap, + &osp->os_userused_dnode); + if (ret) + goto error; + + ret = zio_crypt_do_dnode_hmac_updates(ctx, should_bswap, + &osp->os_groupused_dnode); + if (ret) + goto error; + + /* store the final digest in a temporary buffer and copy what we need */ + cd.cd_length = SHA512_DIGEST_LENGTH; + cd.cd_raw.iov_base = (char *)raw_local_mac; + cd.cd_raw.iov_len = cd.cd_length; + + ret = crypto_mac_final(ctx, &cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + bcopy(raw_local_mac, local_mac, ZIO_OBJSET_MAC_LEN); + + return (0); + +error: + bzero(portable_mac, ZIO_OBJSET_MAC_LEN); + bzero(local_mac, ZIO_OBJSET_MAC_LEN); + return (ret); +} + +static void +zio_crypt_destroy_uio(uio_t *uio) +{ + if (uio->uio_iov) + kmem_free(uio->uio_iov, uio->uio_iovcnt * sizeof (iovec_t)); +} + +/* + * This function parses an uncompressed indirect block and returns a checksum + * of all the portable fields from all of the contained bps. The portable + * fields are the MAC and all of the fields from blk_prop except for the dedup, + * checksum, and psize bits. For an explanation of the purpose of this, see + * the comment block on object set authentication. + */ +int +zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, + uint_t datalen, boolean_t byteswap, uint8_t *cksum) +{ + blkptr_t *bp; + int i, epb = datalen >> SPA_BLKPTRSHIFT; + SHA2_CTX ctx; + uint8_t digestbuf[SHA512_DIGEST_LENGTH]; + + /* checksum all of the MACs from the layer below */ + SHA2Init(SHA512, &ctx); + for (i = 0, bp = buf; i < epb; i++, bp++) { + zio_crypt_bp_do_indrect_checksum_updates(&ctx, byteswap, bp); + } + SHA2Final(digestbuf, &ctx); + + if (generate) { + bcopy(digestbuf, cksum, ZIO_DATA_MAC_LEN); + return (0); + } + + if (bcmp(digestbuf, cksum, ZIO_DATA_MAC_LEN) != 0) + return (SET_ERROR(ECKSUM)); + + return (0); +} + +int +zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd, + uint_t datalen, boolean_t byteswap, uint8_t *cksum) +{ + + int ret; + void *buf; + + buf = abd_borrow_buf_copy(abd, datalen); + ret = zio_crypt_do_indirect_mac_checksum(generate, buf, datalen, + byteswap, cksum); + abd_return_buf(abd, buf, datalen); + + return (ret); +} + +/* + * Special case handling routine for encrypting / decrypting ZIL blocks. + * We do not check for the older ZIL chain because the encryption feature + * was not available before the newer ZIL chain was introduced. The goal + * here is to encrypt everything except the blkptr_t of a lr_write_t and + * the zil_chain_t header. Everything that is not encrypted is authenticated. + */ + +/* ARGSUSED */ +static int +zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf, + uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uio_t *puio, + uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, + boolean_t *no_crypt) +{ + int ret; + uint64_t txtype, lr_len; + uint_t nr_src, nr_dst, crypt_len; + uint_t aad_len = 0, nr_iovecs = 0, total_len = 0; + iovec_t *src_iovecs = NULL, *dst_iovecs = NULL; + uint8_t *src, *dst, *slrp, *dlrp, *blkend, *aadp; + zil_chain_t *zilc; + lr_t *lr; + uint8_t *aadbuf = zio_buf_alloc(datalen); + + /* cipherbuf always needs an extra iovec for the MAC */ + if (encrypt) { + src = plainbuf; + dst = cipherbuf; + nr_src = 0; + nr_dst = 1; + } else { + src = cipherbuf; + dst = plainbuf; + nr_src = 1; + nr_dst = 0; + } + + /* find the start and end record of the log block */ + zilc = (zil_chain_t *)src; + slrp = src + sizeof (zil_chain_t); + aadp = aadbuf; + blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused); + + /* calculate the number of encrypted iovecs we will need */ + for (; slrp < blkend; slrp += lr_len) { + lr = (lr_t *)slrp; + + if (!byteswap) { + txtype = lr->lrc_txtype; + lr_len = lr->lrc_reclen; + } else { + txtype = BSWAP_64(lr->lrc_txtype); + lr_len = BSWAP_64(lr->lrc_reclen); + } + + nr_iovecs++; + if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t)) + nr_iovecs++; + } + + nr_src += nr_iovecs; + nr_dst += nr_iovecs; + + /* allocate the iovec arrays */ + if (nr_src != 0) { + src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP); + if (src_iovecs == NULL) { + ret = SET_ERROR(ENOMEM); + goto error; + } + } + + if (nr_dst != 0) { + dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP); + if (dst_iovecs == NULL) { + ret = SET_ERROR(ENOMEM); + goto error; + } + } + + /* + * Copy the plain zil header over and authenticate everything except + * the checksum that will store our MAC. If we are writing the data + * the embedded checksum will not have been calculated yet, so we don't + * authenticate that. + */ + bcopy(src, dst, sizeof (zil_chain_t)); + bcopy(src, aadp, sizeof (zil_chain_t) - sizeof (zio_eck_t)); + aadp += sizeof (zil_chain_t) - sizeof (zio_eck_t); + aad_len += sizeof (zil_chain_t) - sizeof (zio_eck_t); + + /* loop over records again, filling in iovecs */ + nr_iovecs = 0; + slrp = src + sizeof (zil_chain_t); + dlrp = dst + sizeof (zil_chain_t); + + for (; slrp < blkend; slrp += lr_len, dlrp += lr_len) { + lr = (lr_t *)slrp; + + if (!byteswap) { + txtype = lr->lrc_txtype; + lr_len = lr->lrc_reclen; + } else { + txtype = BSWAP_64(lr->lrc_txtype); + lr_len = BSWAP_64(lr->lrc_reclen); + } + + /* copy the common lr_t */ + bcopy(slrp, dlrp, sizeof (lr_t)); + bcopy(slrp, aadp, sizeof (lr_t)); + aadp += sizeof (lr_t); + aad_len += sizeof (lr_t); + + ASSERT3P(src_iovecs, !=, NULL); + ASSERT3P(dst_iovecs, !=, NULL); + + /* + * If this is a TX_WRITE record we want to encrypt everything + * except the bp if exists. If the bp does exist we want to + * authenticate it. + */ + if (txtype == TX_WRITE) { + crypt_len = sizeof (lr_write_t) - + sizeof (lr_t) - sizeof (blkptr_t); + src_iovecs[nr_iovecs].iov_base = (char *)slrp + + sizeof (lr_t); + src_iovecs[nr_iovecs].iov_len = crypt_len; + dst_iovecs[nr_iovecs].iov_base = (char *)dlrp + + sizeof (lr_t); + dst_iovecs[nr_iovecs].iov_len = crypt_len; + + /* copy the bp now since it will not be encrypted */ + bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t), + dlrp + sizeof (lr_write_t) - sizeof (blkptr_t), + sizeof (blkptr_t)); + bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t), + aadp, sizeof (blkptr_t)); + aadp += sizeof (blkptr_t); + aad_len += sizeof (blkptr_t); + nr_iovecs++; + total_len += crypt_len; + + if (lr_len != sizeof (lr_write_t)) { + crypt_len = lr_len - sizeof (lr_write_t); + src_iovecs[nr_iovecs].iov_base = (char *) + slrp + sizeof (lr_write_t); + src_iovecs[nr_iovecs].iov_len = crypt_len; + dst_iovecs[nr_iovecs].iov_base = (char *) + dlrp + sizeof (lr_write_t); + dst_iovecs[nr_iovecs].iov_len = crypt_len; + nr_iovecs++; + total_len += crypt_len; + } + } else { + crypt_len = lr_len - sizeof (lr_t); + src_iovecs[nr_iovecs].iov_base = (char *)slrp + + sizeof (lr_t); + src_iovecs[nr_iovecs].iov_len = crypt_len; + dst_iovecs[nr_iovecs].iov_base = (char *)dlrp + + sizeof (lr_t); + dst_iovecs[nr_iovecs].iov_len = crypt_len; + nr_iovecs++; + total_len += crypt_len; + } + } + + *no_crypt = (nr_iovecs == 0); + *enc_len = total_len; + *authbuf = aadbuf; + *auth_len = aad_len; + + if (encrypt) { + puio->uio_iov = src_iovecs; + puio->uio_iovcnt = nr_src; + cuio->uio_iov = dst_iovecs; + cuio->uio_iovcnt = nr_dst; + } else { + puio->uio_iov = dst_iovecs; + puio->uio_iovcnt = nr_dst; + cuio->uio_iov = src_iovecs; + cuio->uio_iovcnt = nr_src; + } + + return (0); + +error: + zio_buf_free(aadbuf, datalen); + if (src_iovecs != NULL) + kmem_free(src_iovecs, nr_src * sizeof (iovec_t)); + if (dst_iovecs != NULL) + kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t)); + + *enc_len = 0; + *authbuf = NULL; + *auth_len = 0; + *no_crypt = B_FALSE; + puio->uio_iov = NULL; + puio->uio_iovcnt = 0; + cuio->uio_iov = NULL; + cuio->uio_iovcnt = 0; + return (ret); +} + +/* + * Special case handling routine for encrypting / decrypting dnode blocks. + */ +static int +zio_crypt_init_uios_dnode(boolean_t encrypt, uint8_t *plainbuf, + uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uio_t *puio, + uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, + boolean_t *no_crypt) +{ + int ret; + uint_t nr_src, nr_dst, crypt_len; + uint_t aad_len = 0, nr_iovecs = 0, total_len = 0; + uint_t i, j, max_dnp = datalen >> DNODE_SHIFT; + iovec_t *src_iovecs = NULL, *dst_iovecs = NULL; + uint8_t *src, *dst, *aadp; + dnode_phys_t *dnp, *adnp, *sdnp, *ddnp; + uint8_t *aadbuf = zio_buf_alloc(datalen); + + if (encrypt) { + src = plainbuf; + dst = cipherbuf; + nr_src = 0; + nr_dst = 1; + } else { + src = cipherbuf; + dst = plainbuf; + nr_src = 1; + nr_dst = 0; + } + + sdnp = (dnode_phys_t *)src; + ddnp = (dnode_phys_t *)dst; + aadp = aadbuf; + + /* + * Count the number of iovecs we will need to do the encryption by + * counting the number of bonus buffers that need to be encrypted. + */ + for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) { + /* + * This block may still be byteswapped. However, all of the + * values we use are either uint8_t's (for which byteswapping + * is a noop) or a * != 0 check, which will work regardless + * of whether or not we byteswap. + */ + if (sdnp[i].dn_type != DMU_OT_NONE && + DMU_OT_IS_ENCRYPTED(sdnp[i].dn_bonustype) && + sdnp[i].dn_bonuslen != 0) { + nr_iovecs++; + } + } + + nr_src += nr_iovecs; + nr_dst += nr_iovecs; + + if (nr_src != 0) { + src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP); + if (src_iovecs == NULL) { + ret = SET_ERROR(ENOMEM); + goto error; + } + } + + if (nr_dst != 0) { + dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP); + if (dst_iovecs == NULL) { + ret = SET_ERROR(ENOMEM); + goto error; + } + } + + nr_iovecs = 0; + + /* + * Iterate through the dnodes again, this time filling in the uios + * we allocated earlier. We also concatenate any data we want to + * authenticate onto aadbuf. + */ + for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) { + dnp = &sdnp[i]; + /* copy over the core fields and blkptrs (kept as plaintext) */ + bcopy(dnp, &ddnp[i], (uint8_t *)DN_BONUS(dnp) - (uint8_t *)dnp); + if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + bcopy(DN_SPILL_BLKPTR(dnp), DN_SPILL_BLKPTR(&ddnp[i]), + sizeof (blkptr_t)); + } + + /* + * Handle authenticated data. We authenticate everything in + * the dnode that can be brought over when we do a raw send. + * This includes all of the core fields as well as the MACs + * stored in the bp checksums and all of the portable bits + * from blk_prop. We include the dnode padding here in case it + * ever gets used in the future. Some dn_flags and dn_used are + * not portable so we mask those out values out of the + * authenticated data. + */ + crypt_len = offsetof(dnode_phys_t, dn_blkptr); + bcopy(dnp, aadp, crypt_len); + adnp = (dnode_phys_t *)aadp; + adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK; + adnp->dn_used = 0; + aadp += crypt_len; + aad_len += crypt_len; + + for (j = 0; j < dnp->dn_nblkptr; j++) { + zio_crypt_bp_do_aad_updates(&aadp, &aad_len, + byteswap, &dnp->dn_blkptr[j]); + } + + if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + zio_crypt_bp_do_aad_updates(&aadp, &aad_len, + byteswap, DN_SPILL_BLKPTR(dnp)); + } + + /* + * If this bonus buffer needs to be encrypted, we prepare an + * iovec_t. The encryption / decryption functions will fill + * this in for us with the encrypted or decrypted data. + * Otherwise we add the bonus buffer to the authenticated + * data buffer and copy it over to the destination. The + * encrypted iovec extends to DN_MAX_BONUS_LEN(dnp) so that + * we can guarantee alignment with the AES block size + * (128 bits). + */ + crypt_len = DN_MAX_BONUS_LEN(dnp); + if (dnp->dn_type != DMU_OT_NONE && + DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) && + dnp->dn_bonuslen != 0) { + ASSERT3U(nr_iovecs, <, nr_src); + ASSERT3U(nr_iovecs, <, nr_dst); + ASSERT3P(src_iovecs, !=, NULL); + ASSERT3P(dst_iovecs, !=, NULL); + src_iovecs[nr_iovecs].iov_base = DN_BONUS(dnp); + src_iovecs[nr_iovecs].iov_len = crypt_len; + dst_iovecs[nr_iovecs].iov_base = DN_BONUS(&ddnp[i]); + dst_iovecs[nr_iovecs].iov_len = crypt_len; + + nr_iovecs++; + total_len += crypt_len; + } else { + bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]), crypt_len); + bcopy(DN_BONUS(dnp), aadp, crypt_len); + aadp += crypt_len; + aad_len += crypt_len; + } + } + + *no_crypt = (nr_iovecs == 0); + *enc_len = total_len; + *authbuf = aadbuf; + *auth_len = aad_len; + + if (encrypt) { + puio->uio_iov = src_iovecs; + puio->uio_iovcnt = nr_src; + cuio->uio_iov = dst_iovecs; + cuio->uio_iovcnt = nr_dst; + } else { + puio->uio_iov = dst_iovecs; + puio->uio_iovcnt = nr_dst; + cuio->uio_iov = src_iovecs; + cuio->uio_iovcnt = nr_src; + } + + return (0); + +error: + zio_buf_free(aadbuf, datalen); + if (src_iovecs != NULL) + kmem_free(src_iovecs, nr_src * sizeof (iovec_t)); + if (dst_iovecs != NULL) + kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t)); + + *enc_len = 0; + *authbuf = NULL; + *auth_len = 0; + *no_crypt = B_FALSE; + puio->uio_iov = NULL; + puio->uio_iovcnt = 0; + cuio->uio_iov = NULL; + cuio->uio_iovcnt = 0; + return (ret); +} + +/* ARGSUSED */ +static int +zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf, + uint8_t *cipherbuf, uint_t datalen, uio_t *puio, uio_t *cuio, + uint_t *enc_len) +{ + int ret; + uint_t nr_plain = 1, nr_cipher = 2; + iovec_t *plain_iovecs = NULL, *cipher_iovecs = NULL; + + /* allocate the iovecs for the plain and cipher data */ + plain_iovecs = kmem_alloc(nr_plain * sizeof (iovec_t), + KM_SLEEP); + if (!plain_iovecs) { + ret = SET_ERROR(ENOMEM); + goto error; + } + + cipher_iovecs = kmem_alloc(nr_cipher * sizeof (iovec_t), + KM_SLEEP); + if (!cipher_iovecs) { + ret = SET_ERROR(ENOMEM); + goto error; + } + + plain_iovecs[0].iov_base = (void *)plainbuf; + plain_iovecs[0].iov_len = datalen; + cipher_iovecs[0].iov_base = (void *)cipherbuf; + cipher_iovecs[0].iov_len = datalen; + + *enc_len = datalen; + puio->uio_iov = plain_iovecs; + puio->uio_iovcnt = nr_plain; + cuio->uio_iov = cipher_iovecs; + cuio->uio_iovcnt = nr_cipher; + + return (0); + +error: + if (plain_iovecs != NULL) + kmem_free(plain_iovecs, nr_plain * sizeof (iovec_t)); + if (cipher_iovecs != NULL) + kmem_free(cipher_iovecs, nr_cipher * sizeof (iovec_t)); + + *enc_len = 0; + puio->uio_iov = NULL; + puio->uio_iovcnt = 0; + cuio->uio_iov = NULL; + cuio->uio_iovcnt = 0; + return (ret); +} + +/* + * This function builds up the plaintext (puio) and ciphertext (cuio) uios so + * that they can be used for encryption and decryption by zio_do_crypt_uio(). + * Most blocks will use zio_crypt_init_uios_normal(), with ZIL and dnode blocks + * requiring special handling to parse out pieces that are to be encrypted. The + * authbuf is used by these special cases to store additional authenticated + * data (AAD) for the encryption modes. + */ +/* ARGSUSED */ +static int +zio_crypt_init_uios(boolean_t encrypt, dmu_object_type_t ot, uint8_t *plainbuf, + uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uint8_t *mac, + uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, + uint_t *auth_len, boolean_t *no_crypt) +{ + int ret; + iovec_t *mac_iov; + + ASSERT(DMU_OT_IS_ENCRYPTED(ot) || ot == DMU_OT_NONE); + + /* route to handler */ + switch (ot) { + case DMU_OT_INTENT_LOG: + ret = zio_crypt_init_uios_zil(encrypt, plainbuf, cipherbuf, + datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len, + no_crypt); + break; + case DMU_OT_DNODE: + ret = zio_crypt_init_uios_dnode(encrypt, plainbuf, cipherbuf, + datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len, + no_crypt); + break; + default: + ret = zio_crypt_init_uios_normal(encrypt, plainbuf, cipherbuf, + datalen, puio, cuio, enc_len); + *authbuf = NULL; + *auth_len = 0; + *no_crypt = B_FALSE; + break; + } + + if (ret != 0) + goto error; + + /* populate the uios */ + puio->uio_segflg = UIO_SYSSPACE; + cuio->uio_segflg = UIO_SYSSPACE; + + mac_iov = ((iovec_t *)&cuio->uio_iov[cuio->uio_iovcnt - 1]); + mac_iov->iov_base = (void *)mac; + mac_iov->iov_len = ZIO_DATA_MAC_LEN; + + return (0); + +error: + return (ret); +} + +/* + * Primary encryption / decryption entrypoint for zio data. + */ +int +zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt, + dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen, + boolean_t byteswap, uint8_t *plainbuf, uint8_t *cipherbuf, + boolean_t *no_crypt) +{ + int ret; + boolean_t locked = B_FALSE; + uint64_t crypt = key->zk_crypt; + uint_t keydata_len = zio_crypt_table[crypt].ci_keylen; + uint_t enc_len, auth_len; + uio_t puio, cuio; + uint8_t enc_keydata[MASTER_KEY_MAX_LEN]; + crypto_key_t tmp_ckey, *ckey = NULL; + crypto_ctx_template_t tmpl; + uint8_t *authbuf = NULL; + + bzero(&puio, sizeof (uio_t)); + bzero(&cuio, sizeof (uio_t)); + + /* create uios for encryption */ + ret = zio_crypt_init_uios(encrypt, ot, plainbuf, cipherbuf, datalen, + byteswap, mac, &puio, &cuio, &enc_len, &authbuf, &auth_len, + no_crypt); + if (ret != 0) + return (ret); + + /* + * If the needed key is the current one, just use it. Otherwise we + * need to generate a temporary one from the given salt + master key. + * If we are encrypting, we must return a copy of the current salt + * so that it can be stored in the blkptr_t. + */ + rw_enter(&key->zk_salt_lock, RW_READER); + locked = B_TRUE; + + if (bcmp(salt, key->zk_salt, ZIO_DATA_SALT_LEN) == 0) { + ckey = &key->zk_current_key; + tmpl = key->zk_current_tmpl; + } else { + rw_exit(&key->zk_salt_lock); + locked = B_FALSE; + + ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0, + salt, ZIO_DATA_SALT_LEN, enc_keydata, keydata_len); + if (ret != 0) + goto error; + + tmp_ckey.ck_format = CRYPTO_KEY_RAW; + tmp_ckey.ck_data = enc_keydata; + tmp_ckey.ck_length = CRYPTO_BYTES2BITS(keydata_len); + + ckey = &tmp_ckey; + tmpl = NULL; + } + + /* perform the encryption / decryption */ + ret = zio_do_crypt_uio(encrypt, key->zk_crypt, ckey, tmpl, iv, enc_len, + &puio, &cuio, authbuf, auth_len); + if (ret != 0) + goto error; + + if (locked) { + rw_exit(&key->zk_salt_lock); + locked = B_FALSE; + } + + if (authbuf != NULL) + zio_buf_free(authbuf, datalen); + if (ckey == &tmp_ckey) + bzero(enc_keydata, keydata_len); + zio_crypt_destroy_uio(&puio); + zio_crypt_destroy_uio(&cuio); + + return (0); + +error: + if (locked) + rw_exit(&key->zk_salt_lock); + if (authbuf != NULL) + zio_buf_free(authbuf, datalen); + if (ckey == &tmp_ckey) + bzero(enc_keydata, keydata_len); + zio_crypt_destroy_uio(&puio); + zio_crypt_destroy_uio(&cuio); + + return (ret); +} + +/* + * Simple wrapper around zio_do_crypt_data() to work with abd's instead of + * linear buffers. + */ +int +zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt, + dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen, + boolean_t byteswap, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt) +{ + int ret; + void *ptmp, *ctmp; + + if (encrypt) { + ptmp = abd_borrow_buf_copy(pabd, datalen); + ctmp = abd_borrow_buf(cabd, datalen); + } else { + ptmp = abd_borrow_buf(pabd, datalen); + ctmp = abd_borrow_buf_copy(cabd, datalen); + } + + ret = zio_do_crypt_data(encrypt, key, salt, ot, iv, mac, + datalen, byteswap, ptmp, ctmp, no_crypt); + if (ret != 0) + goto error; + + if (encrypt) { + abd_return_buf(pabd, ptmp, datalen); + abd_return_buf_copy(cabd, ctmp, datalen); + } else { + abd_return_buf_copy(pabd, ptmp, datalen); + abd_return_buf(cabd, ctmp, datalen); + } + + return (0); + +error: + if (encrypt) { + abd_return_buf(pabd, ptmp, datalen); + abd_return_buf_copy(cabd, ctmp, datalen); + } else { + abd_return_buf_copy(pabd, ptmp, datalen); + abd_return_buf(cabd, ctmp, datalen); + } + + return (ret); +} + +#if defined(_KERNEL) && defined(HAVE_SPL) +/* BEGIN CSTYLED */ +module_param(zfs_key_max_salt_uses, ulong, 0644); +MODULE_PARM_DESC(zfs_key_max_salt_uses, "Max number of times a salt value " + "can be used for generating encryption keys before it is rotated"); +/* END CSTYLED */ +#endif diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c index 1e0529964b93..35161ad7ad5f 100644 --- a/usr/src/uts/common/fs/zfs/zvol.c +++ b/usr/src/uts/common/fs/zfs/zvol.c @@ -501,7 +501,7 @@ zvol_create_minor(const char *name) } /* lie and say we're read-only */ - error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os); + error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, B_TRUE, FTAG, &os); if (error) { mutex_exit(&zfsdev_state_lock); @@ -509,13 +509,13 @@ zvol_create_minor(const char *name) } if ((minor = zfsdev_minor_alloc()) == 0) { - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, 1, FTAG); mutex_exit(&zfsdev_state_lock); return (SET_ERROR(ENXIO)); } if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) { - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, 1, FTAG); mutex_exit(&zfsdev_state_lock); return (SET_ERROR(EAGAIN)); } @@ -527,7 +527,7 @@ zvol_create_minor(const char *name) if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { ddi_soft_state_free(zfsdev_state, minor); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, 1, FTAG); mutex_exit(&zfsdev_state_lock); return (SET_ERROR(EAGAIN)); } @@ -538,7 +538,7 @@ zvol_create_minor(const char *name) minor, DDI_PSEUDO, 0) == DDI_FAILURE) { ddi_remove_minor_node(zfs_dip, chrbuf); ddi_soft_state_free(zfsdev_state, minor); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, 1, FTAG); mutex_exit(&zfsdev_state_lock); return (SET_ERROR(EAGAIN)); } @@ -568,7 +568,7 @@ zvol_create_minor(const char *name) else zil_replay(os, zv, zvol_replay_vector); } - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, 1, FTAG); zv->zv_objset = NULL; zvol_minors++; @@ -633,7 +633,7 @@ zvol_first_open(zvol_state_t *zv) uint64_t readonly; /* lie and say we're read-only */ - error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE, + error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE, B_TRUE, zvol_tag, &os); if (error) return (error); @@ -642,13 +642,13 @@ zvol_first_open(zvol_state_t *zv) error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) { ASSERT(error == 0); - dmu_objset_disown(os, zvol_tag); + dmu_objset_disown(os, 1, zvol_tag); return (error); } error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf); if (error) { - dmu_objset_disown(os, zvol_tag); + dmu_objset_disown(os, 1, zvol_tag); return (error); } @@ -682,7 +682,7 @@ zvol_last_close(zvol_state_t *zv) txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); dmu_objset_evict_dbufs(zv->zv_objset); - dmu_objset_disown(zv->zv_objset, zvol_tag); + dmu_objset_disown(zv->zv_objset, 1, zvol_tag); zv->zv_objset = NULL; } @@ -850,7 +850,7 @@ zvol_set_volsize(const char *name, uint64_t volsize) zv = zvol_minor_lookup(name); if (zv == NULL || zv->zv_objset == NULL) { - if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, + if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, B_TRUE, FTAG, &os)) != 0) { mutex_exit(&zfsdev_state_lock); return (error); @@ -872,7 +872,7 @@ zvol_set_volsize(const char *name, uint64_t volsize) error = zvol_update_live_volsize(zv, volsize); out: if (owned) { - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); if (zv != NULL) zv->zv_objset = NULL; } @@ -2056,6 +2056,9 @@ zvol_dumpify(zvol_state_t *zv) if (zv->zv_flags & ZVOL_RDONLY) return (SET_ERROR(EROFS)); + if (os->os_encrypted) + return (SET_ERROR(ENOTSUP)); + if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) { boolean_t resize = (dumpsize > 0); diff --git a/usr/src/uts/common/sys/fm/fs/zfs.h b/usr/src/uts/common/sys/fm/fs/zfs.h index 029af540b3c7..51a12f6817c9 100644 --- a/usr/src/uts/common/sys/fm/fs/zfs.h +++ b/usr/src/uts/common/sys/fm/fs/zfs.h @@ -33,6 +33,7 @@ extern "C" { #define ZFS_ERROR_CLASS "fs.zfs" #define FM_EREPORT_ZFS_CHECKSUM "checksum" +#define FM_EREPORT_ZFS_AUTHENTICATION "authentication" #define FM_EREPORT_ZFS_IO "io" #define FM_EREPORT_ZFS_DATA "data" #define FM_EREPORT_ZFS_POOL "zpool" diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h index c12cb65084ca..299aa7f9abc6 100644 --- a/usr/src/uts/common/sys/fs/zfs.h +++ b/usr/src/uts/common/sys/fs/zfs.h @@ -160,6 +160,14 @@ typedef enum { ZFS_PROP_REDUNDANT_METADATA, ZFS_PROP_PREV_SNAP, ZFS_PROP_RECEIVE_RESUME_TOKEN, + ZFS_PROP_ENCRYPTION, + ZFS_PROP_KEYLOCATION, + ZFS_PROP_KEYFORMAT, + ZFS_PROP_PBKDF2_SALT, + ZFS_PROP_PBKDF2_ITERS, + ZFS_PROP_ENCRYPTION_ROOT, + ZFS_PROP_KEY_GUID, + ZFS_PROP_KEYSTATUS, ZFS_NUM_PROPS } zfs_prop_t; @@ -268,6 +276,8 @@ boolean_t zfs_prop_readonly(zfs_prop_t); boolean_t zfs_prop_visible(zfs_prop_t prop); boolean_t zfs_prop_inheritable(zfs_prop_t); boolean_t zfs_prop_setonce(zfs_prop_t); +boolean_t zfs_prop_encryption_key_param(zfs_prop_t); +boolean_t zfs_prop_valid_keylocation(const char *, boolean_t); const char *zfs_prop_to_name(zfs_prop_t); zfs_prop_t zfs_name_to_prop(const char *); boolean_t zfs_prop_user(const char *); @@ -368,6 +378,30 @@ typedef enum { ZFS_REDUNDANT_METADATA_MOST } zfs_redundant_metadata_type_t; +typedef enum zfs_keystatus { + ZFS_KEYSTATUS_NONE = 0, + ZFS_KEYSTATUS_UNAVAILABLE, + ZFS_KEYSTATUS_AVAILABLE +} zfs_keystatus_t; + +typedef enum zfs_keyformat { + ZFS_KEYFORMAT_NONE = 0, + ZFS_KEYFORMAT_RAW, + ZFS_KEYFORMAT_HEX, + ZFS_KEYFORMAT_PASSPHRASE, + ZFS_KEYFORMAT_FORMATS +} zfs_keyformat_t; + +typedef enum zfs_key_location { + ZFS_KEYLOCATION_NONE = 0, + ZFS_KEYLOCATION_PROMPT, + ZFS_KEYLOCATION_URI, + ZFS_KEYLOCATION_LOCATIONS +} zfs_keylocation_t; + +#define DEFAULT_PBKDF2_ITERATIONS 350000 +#define MIN_PBKDF2_ITERATIONS 100000 + /* * On-disk version number. */ @@ -895,6 +929,9 @@ typedef enum zfs_ioc { ZFS_IOC_GET_BOOKMARKS, ZFS_IOC_DESTROY_BOOKMARKS, ZFS_IOC_CHANNEL_PROGRAM, + ZFS_IOC_LOAD_KEY, + ZFS_IOC_UNLOAD_KEY, + ZFS_IOC_CHANGE_KEY, ZFS_IOC_LAST } zfs_ioc_t; @@ -941,6 +978,12 @@ typedef enum { #define ZPOOL_HIST_DSID "dsid" #define ZPOOL_HIST_ERRNO "errno" +/* + * Special nvlist name that will not have its args recorded in the pool's + * history log. + */ +#define ZPOOL_HIDDEN_ARGS "hidden_args" + /* * Flags for ZFS_IOC_VDEV_SET_STATE */ @@ -958,6 +1001,7 @@ typedef enum { #define ZFS_IMPORT_ANY_HOST 0x2 #define ZFS_IMPORT_MISSING_LOG 0x4 #define ZFS_IMPORT_ONLY 0x8 +#define ZFS_IMPORT_LOAD_KEYS 0x20 /* * Channel program argument/return nvlist keys and defaults. diff --git a/usr/src/uts/common/sys/mount.h b/usr/src/uts/common/sys/mount.h index 07151d1d1391..b8f67a8e253a 100644 --- a/usr/src/uts/common/sys/mount.h +++ b/usr/src/uts/common/sys/mount.h @@ -58,6 +58,12 @@ extern "C" { #define MS_SYSSPACE 0x0008 /* Mounta already in kernel space */ #define MS_NOSPLICE 0x1000 /* Don't splice fs instance into name space */ #define MS_NOCHECK 0x2000 /* Clustering: suppress mount busy checks */ +/* + * MS_CRYPT indicates that encryption keys should be loaded if they are not + * already available. This is not defined in glibc, but it is never seen by + * the kernel so it will not cause any problems. + */ +#define MS_CRYPT 0x4000 /* * Mask to sift out flag bits allowable from mount(2). */