From 891c8f23a7a7d6d01e5a4ae6ff5621e51654ecb1 Mon Sep 17 00:00:00 2001 From: Mariusz Zaborski Date: Thu, 31 Oct 2024 14:26:33 +0000 Subject: [PATCH] Add TXG timestamp database This feature enables tracking of when TXGs are committed to disk, providing an estimated timestamp for each TXG. With this information, it becomes possible to perform scrubs based on specific date ranges, improving the granularity of data management and recovery operations. Signed-off-by: Mariusz Zaborski --- cmd/zpool/zpool_main.c | 43 ++++++- include/Makefile.am | 1 + include/libzfs.h | 3 +- include/sys/dmu.h | 3 + include/sys/spa_impl.h | 8 ++ include/zfeature_common.h | 1 + include/zfs_crrd.h | 67 ++++++++++ lib/libnvpair/libnvpair.abi | 5 + lib/libuutil/libuutil.abi | 5 + lib/libzfs/libzfs.abi | 23 +++- lib/libzfs/libzfs_pool.c | 5 +- lib/libzfs_core/libzfs_core.abi | 5 + lib/libzfsbootenv/libzfsbootenv.abi | 2 +- lib/libzpool/Makefile.am | 1 + man/man7/zpool-features.7 | 22 +++- man/man8/zpool-scrub.8 | 45 ++++++- module/Kbuild.in | 1 + module/Makefile.bsd | 1 + module/zcommon/zfeature_common.c | 12 ++ module/zfs/spa.c | 121 +++++++++++++++++++ module/zfs/spa_misc.c | 2 + module/zfs/zfs_crrd.c | 181 ++++++++++++++++++++++++++++ module/zfs/zfs_ioctl.c | 27 ++++- 23 files changed, 571 insertions(+), 13 deletions(-) create mode 100644 include/zfs_crrd.h create mode 100644 module/zfs/zfs_crrd.c diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 506427a10672..bd1cb75127a9 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -8367,6 +8367,8 @@ zpool_do_reopen(int argc, char **argv) typedef struct scrub_cbdata { int cb_type; pool_scrub_cmd_t cb_scrub_cmd; + time_t cb_date_start; + time_t cb_date_end; } scrub_cbdata_t; static boolean_t @@ -8410,7 +8412,8 @@ scrub_callback(zpool_handle_t *zhp, void *data) return (1); } - err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd); + err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd, cb->cb_date_start, + cb->cb_date_end); if (err == 0 && zpool_has_checkpoint(zhp) && cb->cb_type == POOL_SCAN_SCRUB) { @@ -8429,10 +8432,32 @@ wait_callback(zpool_handle_t *zhp, void *data) return (zpool_wait(zhp, *act)); } +static time_t +date_string_to_sec(const char *timestr) +{ + int ret; + struct tm tm = {0}; + + ret = sscanf(timestr, "%4d-%2d-%2d %2d:%2d", &tm.tm_year, &tm.tm_mon, + &tm.tm_mday, &tm.tm_hour, &tm.tm_min); + if (ret < 3) { + fprintf(stderr, gettext("Failed to parse the date.\n")); + usage(B_FALSE); + } + + // Adjust struct + tm.tm_year -= 1900; + tm.tm_mon -= 1; + + return (timegm(&tm)); +} + /* * zpool scrub [-e | -s | -p | -C] [-w] ... * * -e Only scrub blocks in the error log. + * -E End date of scrub. + * -S Start date of scrub. * -s Stop. Stops any in-progress scrub. * -p Pause. Pause in-progress scrub. * -w Wait. Blocks until scrub has completed. @@ -8448,6 +8473,7 @@ zpool_do_scrub(int argc, char **argv) cb.cb_type = POOL_SCAN_SCRUB; cb.cb_scrub_cmd = POOL_SCRUB_NORMAL; + cb.cb_date_start = cb.cb_date_end = 0; boolean_t is_error_scrub = B_FALSE; boolean_t is_pause = B_FALSE; @@ -8455,14 +8481,20 @@ zpool_do_scrub(int argc, char **argv) boolean_t is_txg_continue = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, "spweC")) != -1) { + while ((c = getopt(argc, argv, "spweCE:S:")) != -1) { switch (c) { case 'e': is_error_scrub = B_TRUE; break; + case 'E': + cb.cb_date_end = date_string_to_sec(optarg); + break; case 's': is_stop = B_TRUE; break; + case 'S': + cb.cb_date_start = date_string_to_sec(optarg); + break; case 'p': is_pause = B_TRUE; break; @@ -8510,6 +8542,13 @@ zpool_do_scrub(int argc, char **argv) } } + if ((cb.cb_date_start != 0 || cb.cb_date_end != 0) && + cb.cb_scrub_cmd != POOL_SCRUB_NORMAL) { + (void) fprintf(stderr, gettext("invalid option combination: " + "start/end date is avlilable only with normal scrub\n")); + usage(B_FALSE); + } + if (wait && (cb.cb_type == POOL_SCAN_NONE || cb.cb_scrub_cmd == POOL_SCRUB_PAUSE)) { (void) fprintf(stderr, gettext("invalid option combination: " diff --git a/include/Makefile.am b/include/Makefile.am index f173064efc99..99c9ce5ed292 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -10,6 +10,7 @@ COMMON_H = \ cityhash.h \ zfeature_common.h \ zfs_comutil.h \ + zfs_crrd.h \ zfs_deleg.h \ zfs_fletcher.h \ zfs_namecheck.h \ diff --git a/include/libzfs.h b/include/libzfs.h index 01d51999f4eb..3121fdb79b50 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -290,7 +290,8 @@ typedef struct trimflags { /* * Functions to manipulate pool and vdev state */ -_LIBZFS_H int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t); +_LIBZFS_H int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t, + time_t, time_t); _LIBZFS_H int zpool_initialize(zpool_handle_t *, pool_initialize_func_t, nvlist_t *); _LIBZFS_H int zpool_initialize_wait(zpool_handle_t *, pool_initialize_func_t, diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 29f715039d29..c5a121670d87 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -393,6 +393,9 @@ typedef struct dmu_buf { #define DMU_POOL_ZPOOL_CHECKPOINT "com.delphix:zpool_checkpoint" #define DMU_POOL_LOG_SPACEMAP_ZAP "com.delphix:log_spacemap_zap" #define DMU_POOL_DELETED_CLONES "com.delphix:deleted_clones" +#define DMU_POOL_TXG_LOG_TIME_MINUTES "com.klaraystems:txg_log_time:minutes" +#define DMU_POOL_TXG_LOG_TIME_DAYS "com.klaraystems:txg_log_time:days" +#define DMU_POOL_TXG_LOG_TIME_MONTHS "com.klaraystems:txg_log_time:months" /* * Allocate an object from this objset. The range of object numbers diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index b0a2d46ff2c4..cd9ab8088f9d 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -54,6 +54,8 @@ #include #include +#include "zfs_crrd.h" + #ifdef __cplusplus extern "C" { #endif @@ -353,6 +355,12 @@ struct spa { spa_checkpoint_info_t spa_checkpoint_info; /* checkpoint accounting */ zthr_t *spa_checkpoint_discard_zthr; + kmutex_t spa_txg_log_time_lock; /* for spa_txg_log_time */ + dbrrd_t spa_txg_log_time; + uint64_t spa_last_noted_txg; + uint64_t spa_last_noted_txg_time; + uint64_t spa_last_flush_txg_time; + space_map_t *spa_syncing_log_sm; /* current log space map */ avl_tree_t spa_sm_logs_by_txg; kmutex_t spa_flushed_ms_lock; /* for metaslabs_by_flushed */ diff --git a/include/zfeature_common.h b/include/zfeature_common.h index ac42b5c0cd6b..7d9045f7b6ad 100644 --- a/include/zfeature_common.h +++ b/include/zfeature_common.h @@ -86,6 +86,7 @@ typedef enum spa_feature { SPA_FEATURE_FAST_DEDUP, SPA_FEATURE_LONGNAME, SPA_FEATURE_LARGE_MICROZAP, + SPA_FEATURE_TXG_TIMELOG, SPA_FEATURES } spa_feature_t; diff --git a/include/zfs_crrd.h b/include/zfs_crrd.h new file mode 100644 index 000000000000..9fed99a0cc38 --- /dev/null +++ b/include/zfs_crrd.h @@ -0,0 +1,67 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2024 Klara Inc. + * + * This software was developed by + * Fred Weigel + * Mariusz Zaborski + * under sponsorship from Wasabi Technology, Inc. and Klara Inc. + */ + +#ifndef _CRRD_H_ +#define _CRRD_H_ + +#define RRD_MAX_ENTRIES 256 + +typedef struct { + hrtime_t rrdd_time; + uint64_t rrdd_txg; +} rrd_data_t; + +typedef struct { + int rrd_head; /* head (beginning) */ + int rrd_tail; /* tail (end) */ + size_t rrd_length; + + rrd_data_t rrd_entries[RRD_MAX_ENTRIES]; +} rrd_t; + +typedef struct { + rrd_t dbr_minutes; + rrd_t dbr_days; + rrd_t dbr_months; +} dbrrd_t; + +rrd_t *rrd_create(void); +size_t rrd_len(rrd_t *rrd); + +const rrd_data_t *rrd_entry(rrd_t *r, size_t i); +const rrd_data_t *rrd_tail_entry(rrd_t *rrd); +uint64_t rrd_tail(rrd_t *rrd); +uint64_t rrd_get(rrd_t *rrd, size_t i); + +void rrd_add(rrd_t *rrd, hrtime_t time, uint64_t txg); + +void dbrrd_add(dbrrd_t *db, hrtime_t time, uint64_t txg); +uint64_t dbrrd_query(dbrrd_t *r, hrtime_t tv); + +#endif diff --git a/lib/libnvpair/libnvpair.abi b/lib/libnvpair/libnvpair.abi index e3eacb195463..fa8efbcd9d7e 100644 --- a/lib/libnvpair/libnvpair.abi +++ b/lib/libnvpair/libnvpair.abi @@ -2194,6 +2194,7 @@ + @@ -2306,6 +2307,10 @@ + + + + diff --git a/lib/libuutil/libuutil.abi b/lib/libuutil/libuutil.abi index 7cb92ac9f3f8..220b26228dac 100644 --- a/lib/libuutil/libuutil.abi +++ b/lib/libuutil/libuutil.abi @@ -652,6 +652,7 @@ + @@ -763,6 +764,10 @@ + + + + diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 1f9fde6677d8..b1e163159b24 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -629,7 +629,7 @@ - + @@ -1170,6 +1170,7 @@ + @@ -1275,6 +1276,10 @@ + + + + @@ -6197,7 +6202,8 @@ - + + @@ -6720,6 +6726,8 @@ + + @@ -8368,6 +8376,11 @@ + + + + + @@ -9376,8 +9389,8 @@ - - + + @@ -9454,7 +9467,7 @@ - + diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index f256535e8ea0..bd668cf42a20 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -2728,7 +2728,8 @@ zpool_trim(zpool_handle_t *zhp, pool_trim_func_t cmd_type, nvlist_t *vds, * Scan the pool. */ int -zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd) +zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd, + time_t date_start, time_t date_end) { char errbuf[ERRBUFLEN]; int err; @@ -2737,6 +2738,8 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd) nvlist_t *args = fnvlist_alloc(); fnvlist_add_uint64(args, "scan_type", (uint64_t)func); fnvlist_add_uint64(args, "scan_command", (uint64_t)cmd); + fnvlist_add_uint64(args, "scan_date_start", (uint64_t)date_start); + fnvlist_add_uint64(args, "scan_date_end", (uint64_t)date_end); err = lzc_scrub(ZFS_IOC_POOL_SCRUB, zhp->zpool_name, args, NULL); fnvlist_free(args); diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi index 6a9c20a2bb88..1f6711c7fb58 100644 --- a/lib/libzfs_core/libzfs_core.abi +++ b/lib/libzfs_core/libzfs_core.abi @@ -651,6 +651,7 @@ + @@ -762,6 +763,10 @@ + + + + diff --git a/lib/libzfsbootenv/libzfsbootenv.abi b/lib/libzfsbootenv/libzfsbootenv.abi index 5903d5dcbe21..bf866b0fa61b 100644 --- a/lib/libzfsbootenv/libzfsbootenv.abi +++ b/lib/libzfsbootenv/libzfsbootenv.abi @@ -1,6 +1,6 @@ - + diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index 404b737c204d..47644cc2a4e8 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -178,6 +178,7 @@ nodist_libzpool_la_SOURCES = \ module/zfs/zfeature.c \ module/zfs/zfs_byteswap.c \ module/zfs/zfs_chksum.c \ + module/zfs/zfs_crrd.c \ module/zfs/zfs_fm.c \ module/zfs/zfs_fuid.c \ module/zfs/zfs_ratelimit.c \ diff --git a/man/man7/zpool-features.7 b/man/man7/zpool-features.7 index 7b392a896150..476071d6349d 100644 --- a/man/man7/zpool-features.7 +++ b/man/man7/zpool-features.7 @@ -18,7 +18,7 @@ .\" Copyright (c) 2019, Allan Jude .\" Copyright (c) 2021, Colm Buckley .\" -.Dd October 2, 2024 +.Dd November 20, 2024 .Dt ZPOOL-FEATURES 7 .Os . @@ -958,6 +958,26 @@ once it is and never returns back to being .Sy enabled . . +.feature com.klarasystems txg_log_time yes +This feature enables the use of the TXG time database, allowing tracking of +when TXGs have been committed to disk. +While this feature does not retain all TXGs, it provides a good estimate of +the time range in which TXGs were committed. +This functionality can be used with +.Xr zpool-scrub 8 +to scrub only the TXGs flushed within a specified date range. +.Pp +The database is created after this feature is enabled. +As a result, TXGs flushed before the feature was enabled will not be +timestamped. +.Pp +This feature becomes +.Sy active +once it is +.Sy enabled , +and never returns back to being +.Sy disabled . +. .feature org.zfsonlinux userobj_accounting yes extensible_dataset This feature allows administrators to account the object usage information by user and group. diff --git a/man/man8/zpool-scrub.8 b/man/man8/zpool-scrub.8 index 676286b038da..f5aa23d1b4f3 100644 --- a/man/man8/zpool-scrub.8 +++ b/man/man8/zpool-scrub.8 @@ -26,7 +26,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd November 18, 2024 +.Dd December 11, 2024 .Dt ZPOOL-SCRUB 8 .Os . @@ -38,6 +38,8 @@ .Cm scrub .Op Ns Fl e | Ns Fl p | Fl s Ns | Fl C Ns .Op Fl w +.Op Fl S Ar date +.Op Fl E Ar date .Ar pool Ns … . .Sh DESCRIPTION @@ -117,6 +119,47 @@ resilvering, nor can it be run when a regular scrub is paused. Continue scrub from last saved txg (see zpool .Sy last_scrubbed_txg property). +.It Fl S Ar date , Fl E Ar date +Allows specifying the date range for blocks created between these dates. +.Bl -bullet -compact -offset indent +.It +.Fl S +Defines a start date. +If not specified, scrubbing begins from the start of the pool's +existence. +.It +.Fl E +Defines an end date. +If not specified, scrubbing continues up to the most recent data. +.El +The provided date should be in the format: +.Dq YYYY-MM-DD HH:MM . +Where: +.Bl -bullet -compact -offset indent +.It +.Dq YYYY +is the year. +.It +.Dq MM +is the numeric representation of the month. +.It +.Dq DD +is the day of the month. +.It +.Dq HH +is the hour. +.It +.Dq MM +is the minutes. +.El +The hour and minutes parameters can be omitted. +The time should be provided in machine local time zone. +The pool must have the +.Sy txg_log_time +feature enabled to use this option. +This feature only gathers dates after it is enabled. +Specifying dates prior to enabling this feature will result in scrubbing +starting from the date the pool was created. .El .Sh EXAMPLES .Ss Example 1 diff --git a/module/Kbuild.in b/module/Kbuild.in index fc14d5cb535e..8e8b4589e5be 100644 --- a/module/Kbuild.in +++ b/module/Kbuild.in @@ -412,6 +412,7 @@ ZFS_OBJS := \ zfeature.o \ zfs_byteswap.o \ zfs_chksum.o \ + zfs_crrd.o \ zfs_fm.o \ zfs_fuid.o \ zfs_impl.o \ diff --git a/module/Makefile.bsd b/module/Makefile.bsd index c605069d07d3..9e60d402c854 100644 --- a/module/Makefile.bsd +++ b/module/Makefile.bsd @@ -204,6 +204,7 @@ SRCS+= abd_os.c \ vdev_label_os.c \ zfs_acl.c \ zfs_ctldir.c \ + zfs_crrd.c \ zfs_debug.c \ zfs_dir.c \ zfs_file_os.c \ diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c index 96f0086d7858..febcae3cf527 100644 --- a/module/zcommon/zfeature_common.c +++ b/module/zcommon/zfeature_common.c @@ -785,6 +785,18 @@ zpool_feature_init(void) ZFEATURE_TYPE_BOOLEAN, large_microzap_deps, sfeatures); } + { + static const spa_feature_t txg_log_time_deps[] = { + SPA_FEATURE_EXTENSIBLE_DATASET, + SPA_FEATURE_NONE + }; + zfeature_register(SPA_FEATURE_TXG_TIMELOG, + "com.klaraystems:txg_log_time", "txg_log_time", + "Log history of txg.", + ZFEATURE_FLAG_PER_DATASET | ZFEATURE_FLAG_READONLY_COMPAT, + ZFEATURE_TYPE_BOOLEAN, txg_log_time_deps, sfeatures); + } + zfs_mod_list_supported_free(sfeatures); } diff --git a/module/zfs/spa.c b/module/zfs/spa.c index b83c982c13fd..187f0f2e3e21 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -99,6 +99,7 @@ #include #endif /* _KERNEL */ +#include "zfs_crrd.h" #include "zfs_prop.h" #include "zfs_comutil.h" #include @@ -309,6 +310,17 @@ static int zfs_livelist_condense_zthr_cancel = 0; */ static int zfs_livelist_condense_new_alloc = 0; +/* + * Time variable to decide how often the txg should be added into the + * database. + */ +static uint_t spa_note_txg_time = 60; + +/* + * How often flush txg database to a disk. + */ +static uint_t spa_flush_txg_time = 5 * 60; + /* * ========================================================================== * SPA properties routines @@ -3326,6 +3338,103 @@ spa_start_livelist_condensing_thread(spa_t *spa) spa_livelist_condense_cb, spa, minclsyspri); } +static void +spa_sync_time_logger(spa_t *spa, dmu_tx_t *tx) +{ + uint64_t curtime; + + if (!spa_feature_is_enabled(spa, SPA_FEATURE_TXG_TIMELOG)) { + return; + } + if (!spa_writeable(spa)) { + return; + } + if (tx->tx_txg == spa->spa_last_noted_txg) { + return; + } + curtime = gethrestime_sec(); + if (curtime < spa->spa_last_noted_txg_time + spa_note_txg_time) { + return; + } + + spa->spa_last_noted_txg_time = curtime; + spa->spa_last_noted_txg = tx->tx_txg; + + mutex_enter(&spa->spa_txg_log_time_lock); + dbrrd_add(&spa->spa_txg_log_time, curtime, tx->tx_txg); + + if (curtime < spa->spa_last_flush_txg_time + spa_flush_txg_time) { + goto out; + } + spa->spa_last_flush_txg_time = curtime; + + if (zap_contains(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_TXG_LOG_TIME_MINUTES) == ENOENT) { + VERIFY0(zap_add(spa_meta_objset(spa), + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TXG_LOG_TIME_MINUTES, + 1, sizeof (spa->spa_txg_log_time.dbr_minutes), + &spa->spa_txg_log_time.dbr_minutes, tx)); + VERIFY0(zap_add(spa_meta_objset(spa), + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TXG_LOG_TIME_DAYS, + 1, sizeof (spa->spa_txg_log_time.dbr_days), + &spa->spa_txg_log_time.dbr_days, tx)); + VERIFY0(zap_add(spa_meta_objset(spa), + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TXG_LOG_TIME_MONTHS, + 1, sizeof (spa->spa_txg_log_time.dbr_months), + &spa->spa_txg_log_time.dbr_months, tx)); + spa_feature_incr(spa, SPA_FEATURE_TXG_TIMELOG, tx); + } else { + VERIFY0(zap_update(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TXG_LOG_TIME_MINUTES, + 1, sizeof (spa->spa_txg_log_time.dbr_minutes), + &spa->spa_txg_log_time.dbr_minutes, tx)); + VERIFY0(zap_update(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TXG_LOG_TIME_DAYS, + 1, sizeof (spa->spa_txg_log_time.dbr_days), + &spa->spa_txg_log_time.dbr_days, tx)); + VERIFY0(zap_update(spa_meta_objset(spa), + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TXG_LOG_TIME_MONTHS, + 1, sizeof (spa->spa_txg_log_time.dbr_months), + &spa->spa_txg_log_time.dbr_months, tx)); + } + +out: + mutex_exit(&spa->spa_txg_log_time_lock); +} + +static int +spa_load_txg_log_time(spa_t *spa) +{ + int error; + + error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_TXG_LOG_TIME_MINUTES, + 1, sizeof (spa->spa_txg_log_time.dbr_minutes), + &spa->spa_txg_log_time.dbr_minutes); + + if (error == ENOENT) + return (0); + if (error != 0) + return (error); + + error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_TXG_LOG_TIME_DAYS, + 1, sizeof (spa->spa_txg_log_time.dbr_days), + &spa->spa_txg_log_time.dbr_days); + if (error != 0) + return (error); + + error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_TXG_LOG_TIME_MONTHS, + 1, sizeof (spa->spa_txg_log_time.dbr_months), + &spa->spa_txg_log_time.dbr_months); + if (error != 0) + return (error); + + return (0); +} + + static void spa_spawn_aux_threads(spa_t *spa) { @@ -4714,6 +4823,11 @@ spa_ld_get_props(spa_t *spa) if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + /* Load time log */ + error = spa_load_txg_log_time(spa); + if (error != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + /* * Load the persistent error log. If we have an older pool, this will * not be present. @@ -10229,6 +10343,7 @@ spa_sync(spa_t *spa, uint64_t txg) } spa_sync_rewrite_vdev_config(spa, tx); + spa_sync_time_logger(spa, tx); dmu_tx_commit(tx); taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid); @@ -11053,6 +11168,12 @@ ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, new_alloc, INT, "Whether extra ALLOC blkptrs were added to a livelist entry while it " "was being condensed"); +ZFS_MODULE_PARAM(zfs_spa, spa_, note_txg_time, UINT, ZMOD_RW, + "How often txg should be registred in database"); + +ZFS_MODULE_PARAM(zfs_spa, spa_, flush_txg_time, UINT, ZMOD_RW, + "How often txg database should be flushed"); + #ifdef _KERNEL ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_read, spa_taskq_read_param_set, spa_taskq_read_param_get, ZMOD_RW, diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 7fae51cc2c52..e93430a852a7 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -714,6 +714,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) mutex_init(&spa->spa_feat_stats_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_flushed_ms_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_activities_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa->spa_txg_log_time_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL); cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL); @@ -916,6 +917,7 @@ spa_remove(spa_t *spa) mutex_destroy(&spa->spa_vdev_top_lock); mutex_destroy(&spa->spa_feat_stats_lock); mutex_destroy(&spa->spa_activities_lock); + mutex_destroy(&spa->spa_txg_log_time_lock); kmem_free(spa, sizeof (spa_t)); } diff --git a/module/zfs/zfs_crrd.c b/module/zfs/zfs_crrd.c new file mode 100644 index 000000000000..492ab6be3073 --- /dev/null +++ b/module/zfs/zfs_crrd.c @@ -0,0 +1,181 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2024 Klara Inc. + * + * This software was developed by + * Fred Weigel + * Mariusz Zaborski + * under sponsorship from Wasabi Technology, Inc. and Klara Inc. + */ + +#include + +#include "zfs_crrd.h" + +#define rrd_abs(x) ((x) < 0 ? -(x) : (x)) + +const rrd_data_t * +rrd_tail_entry(rrd_t *rrd) +{ + size_t n; + + if (rrd_len(rrd) == 0) + return (NULL); + + if (rrd->rrd_tail == 0) + n = RRD_MAX_ENTRIES - 1; + else + n = rrd->rrd_tail - 1; + + return (&rrd->rrd_entries[n]); +} + +uint64_t +rrd_tail(rrd_t *rrd) +{ + const rrd_data_t *tail; + + tail = rrd_tail_entry(rrd); + + return (tail == NULL ? 0 : tail->rrdd_time); +} + +/* + * Return length of data in the rrd. + * rrd_get works from 0..rrd_len()-1. + */ +size_t +rrd_len(rrd_t *rrd) +{ + + return (rrd->rrd_length); +} + +const rrd_data_t * +rrd_entry(rrd_t *rrd, size_t i) +{ + size_t n; + + if (i >= rrd_len(rrd)) { + return (0); + } + + n = (rrd->rrd_head + i) % RRD_MAX_ENTRIES; + return (&rrd->rrd_entries[n]); +} + +uint64_t +rrd_get(rrd_t *rrd, size_t i) +{ + const rrd_data_t *data = rrd_entry(rrd, i); + + return (data == NULL ? 0 : data->rrdd_txg); +} + +/* Add value to database. */ +void +rrd_add(rrd_t *rrd, hrtime_t time, uint64_t txg) +{ + + rrd->rrd_entries[rrd->rrd_tail].rrdd_time = time; + rrd->rrd_entries[rrd->rrd_tail].rrdd_txg = txg; + + rrd->rrd_tail = (rrd->rrd_tail + 1) % RRD_MAX_ENTRIES; + + if (rrd->rrd_length < RRD_MAX_ENTRIES) { + rrd->rrd_length++; + } else { + rrd->rrd_head = (rrd->rrd_head + 1) % RRD_MAX_ENTRIES; + } +} + +void +dbrrd_add(dbrrd_t *db, hrtime_t time, uint64_t txg) +{ + hrtime_t daydiff, monthdiff; + + daydiff = time - rrd_tail(&db->dbr_days); + monthdiff = time - rrd_tail(&db->dbr_months); + + rrd_add(&db->dbr_minutes, time, txg); + if (daydiff >= 0 && daydiff >= SEC2NSEC(60 * 3600)) + rrd_add(&db->dbr_days, time, txg); + if (monthdiff >= 0 && monthdiff >= SEC2NSEC(60 * 3600 * 30)) + rrd_add(&db->dbr_months, time, txg); +} + +/* + * XXXosho: We might want to do a binary search here, + * although the data is small, and the routine + * isn't used so often that we stick to simple methods. + */ +static const rrd_data_t * +rrd_query(rrd_t *rrd, hrtime_t tv) +{ + hrtime_t mindiff; + const rrd_data_t *data; + + data = NULL; + for (size_t i = 0; i < rrd_len(rrd); i++) { + const rrd_data_t *cur = rrd_entry(rrd, i); + + if (data == NULL || mindiff > rrd_abs(tv - cur->rrdd_time)) { + data = cur; + mindiff = rrd_abs(tv - cur->rrdd_time); + } + + if (cur->rrdd_time > tv) + break; + } + + return (data); +} + +uint64_t +dbrrd_query(dbrrd_t *r, hrtime_t tv) +{ + const rrd_data_t *data, *dm, *dd, *dy; + + data = NULL; + + dm = rrd_query(&r->dbr_minutes, tv); + if (dm != NULL) + data = dm; + dd = rrd_query(&r->dbr_days, tv); + if (dd != NULL) { + if (data == NULL || + rrd_abs(data->rrdd_time - tv) > + rrd_abs(dd->rrdd_time - tv)) { + data = dd; + } + } + dy = rrd_query(&r->dbr_months, tv); + if (dy != NULL) { + if (data == NULL || + rrd_abs(data->rrdd_time - tv) > + rrd_abs(dy->rrdd_time - tv)) { + data = dy; + } + } + + return (data == NULL ? 0 : data->rrdd_txg); +} diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index b1b0ae54460b..f6de6b0d6d19 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -1694,6 +1694,8 @@ zfs_ioc_pool_scan(zfs_cmd_t *zc) static const zfs_ioc_key_t zfs_keys_pool_scrub[] = { {"scan_type", DATA_TYPE_UINT64, 0}, {"scan_command", DATA_TYPE_UINT64, 0}, + {"scan_date_start", DATA_TYPE_UINT64, 0}, + {"scan_date_end", DATA_TYPE_UINT64, 0}, }; static int @@ -1702,6 +1704,7 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) spa_t *spa; int error; uint64_t scan_type, scan_cmd; + uint64_t date_start, date_end; if (nvlist_lookup_uint64(innvl, "scan_type", &scan_type) != 0) return (SET_ERROR(EINVAL)); @@ -1711,6 +1714,11 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) if (scan_cmd >= POOL_SCRUB_FLAGS_END) return (SET_ERROR(EINVAL)); + if (nvlist_lookup_uint64(innvl, "scan_date_start", &date_start) != 0) + date_start = 0; + if (nvlist_lookup_uint64(innvl, "scan_date_end", &date_end) != 0) + date_end = 0; + if ((error = spa_open(poolname, &spa, FTAG)) != 0) return (error); @@ -1722,7 +1730,24 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) error = spa_scan_range(spa, scan_type, spa_get_last_scrubbed_txg(spa), 0); } else { - error = spa_scan(spa, scan_type); + uint64_t txg_start, txg_end; + + txg_start = txg_end = 0; + if (date_start || date_end) { + mutex_enter(&spa->spa_txg_log_time_lock); + if (date_start != 0) { + txg_start = dbrrd_query(&spa->spa_txg_log_time, + date_start); + } + + if (date_end != 0) { + txg_end = dbrrd_query(&spa->spa_txg_log_time, + date_end); + } + mutex_exit(&spa->spa_txg_log_time_lock); + } + + error = spa_scan_range(spa, scan_type, txg_start, txg_end); } spa_close(spa, FTAG);