diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 506427a10672..bd1cb75127a9 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -8367,6 +8367,8 @@ zpool_do_reopen(int argc, char **argv) typedef struct scrub_cbdata { int cb_type; pool_scrub_cmd_t cb_scrub_cmd; + time_t cb_date_start; + time_t cb_date_end; } scrub_cbdata_t; static boolean_t @@ -8410,7 +8412,8 @@ scrub_callback(zpool_handle_t *zhp, void *data) return (1); } - err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd); + err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd, cb->cb_date_start, + cb->cb_date_end); if (err == 0 && zpool_has_checkpoint(zhp) && cb->cb_type == POOL_SCAN_SCRUB) { @@ -8429,10 +8432,32 @@ wait_callback(zpool_handle_t *zhp, void *data) return (zpool_wait(zhp, *act)); } +static time_t +date_string_to_sec(const char *timestr) +{ + int ret; + struct tm tm = {0}; + + ret = sscanf(timestr, "%4d-%2d-%2d %2d:%2d", &tm.tm_year, &tm.tm_mon, + &tm.tm_mday, &tm.tm_hour, &tm.tm_min); + if (ret < 3) { + fprintf(stderr, gettext("Failed to parse the date.\n")); + usage(B_FALSE); + } + + // Adjust struct + tm.tm_year -= 1900; + tm.tm_mon -= 1; + + return (timegm(&tm)); +} + /* * zpool scrub [-e | -s | -p | -C] [-w] ... * * -e Only scrub blocks in the error log. + * -E End date of scrub. + * -S Start date of scrub. * -s Stop. Stops any in-progress scrub. * -p Pause. Pause in-progress scrub. * -w Wait. Blocks until scrub has completed. @@ -8448,6 +8473,7 @@ zpool_do_scrub(int argc, char **argv) cb.cb_type = POOL_SCAN_SCRUB; cb.cb_scrub_cmd = POOL_SCRUB_NORMAL; + cb.cb_date_start = cb.cb_date_end = 0; boolean_t is_error_scrub = B_FALSE; boolean_t is_pause = B_FALSE; @@ -8455,14 +8481,20 @@ zpool_do_scrub(int argc, char **argv) boolean_t is_txg_continue = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, "spweC")) != -1) { + while ((c = getopt(argc, argv, "spweCE:S:")) != -1) { switch (c) { case 'e': is_error_scrub = B_TRUE; break; + case 'E': + cb.cb_date_end = date_string_to_sec(optarg); + break; case 's': is_stop = B_TRUE; break; + case 'S': + cb.cb_date_start = date_string_to_sec(optarg); + break; case 'p': is_pause = B_TRUE; break; @@ -8510,6 +8542,13 @@ zpool_do_scrub(int argc, char **argv) } } + if ((cb.cb_date_start != 0 || cb.cb_date_end != 0) && + cb.cb_scrub_cmd != POOL_SCRUB_NORMAL) { + (void) fprintf(stderr, gettext("invalid option combination: " + "start/end date is avlilable only with normal scrub\n")); + usage(B_FALSE); + } + if (wait && (cb.cb_type == POOL_SCAN_NONE || cb.cb_scrub_cmd == POOL_SCRUB_PAUSE)) { (void) fprintf(stderr, gettext("invalid option combination: " diff --git a/include/Makefile.am b/include/Makefile.am index f173064efc99..99c9ce5ed292 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -10,6 +10,7 @@ COMMON_H = \ cityhash.h \ zfeature_common.h \ zfs_comutil.h \ + zfs_crrd.h \ zfs_deleg.h \ zfs_fletcher.h \ zfs_namecheck.h \ diff --git a/include/libzfs.h b/include/libzfs.h index 01d51999f4eb..3121fdb79b50 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -290,7 +290,8 @@ typedef struct trimflags { /* * Functions to manipulate pool and vdev state */ -_LIBZFS_H int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t); +_LIBZFS_H int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t, + time_t, time_t); _LIBZFS_H int zpool_initialize(zpool_handle_t *, pool_initialize_func_t, nvlist_t *); _LIBZFS_H int zpool_initialize_wait(zpool_handle_t *, pool_initialize_func_t, diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 29f715039d29..c5a121670d87 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -393,6 +393,9 @@ typedef struct dmu_buf { #define DMU_POOL_ZPOOL_CHECKPOINT "com.delphix:zpool_checkpoint" #define DMU_POOL_LOG_SPACEMAP_ZAP "com.delphix:log_spacemap_zap" #define DMU_POOL_DELETED_CLONES "com.delphix:deleted_clones" +#define DMU_POOL_TXG_LOG_TIME_MINUTES "com.klaraystems:txg_log_time:minutes" +#define DMU_POOL_TXG_LOG_TIME_DAYS "com.klaraystems:txg_log_time:days" +#define DMU_POOL_TXG_LOG_TIME_MONTHS "com.klaraystems:txg_log_time:months" /* * Allocate an object from this objset. The range of object numbers diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index b0a2d46ff2c4..cd9ab8088f9d 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -54,6 +54,8 @@ #include #include +#include "zfs_crrd.h" + #ifdef __cplusplus extern "C" { #endif @@ -353,6 +355,12 @@ struct spa { spa_checkpoint_info_t spa_checkpoint_info; /* checkpoint accounting */ zthr_t *spa_checkpoint_discard_zthr; + kmutex_t spa_txg_log_time_lock; /* for spa_txg_log_time */ + dbrrd_t spa_txg_log_time; + uint64_t spa_last_noted_txg; + uint64_t spa_last_noted_txg_time; + uint64_t spa_last_flush_txg_time; + space_map_t *spa_syncing_log_sm; /* current log space map */ avl_tree_t spa_sm_logs_by_txg; kmutex_t spa_flushed_ms_lock; /* for metaslabs_by_flushed */ diff --git a/include/zfeature_common.h b/include/zfeature_common.h index ac42b5c0cd6b..7d9045f7b6ad 100644 --- a/include/zfeature_common.h +++ b/include/zfeature_common.h @@ -86,6 +86,7 @@ typedef enum spa_feature { SPA_FEATURE_FAST_DEDUP, SPA_FEATURE_LONGNAME, SPA_FEATURE_LARGE_MICROZAP, + SPA_FEATURE_TXG_TIMELOG, SPA_FEATURES } spa_feature_t; diff --git a/include/zfs_crrd.h b/include/zfs_crrd.h new file mode 100644 index 000000000000..9fed99a0cc38 --- /dev/null +++ b/include/zfs_crrd.h @@ -0,0 +1,67 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2024 Klara Inc. + * + * This software was developed by + * Fred Weigel + * Mariusz Zaborski + * under sponsorship from Wasabi Technology, Inc. and Klara Inc. + */ + +#ifndef _CRRD_H_ +#define _CRRD_H_ + +#define RRD_MAX_ENTRIES 256 + +typedef struct { + hrtime_t rrdd_time; + uint64_t rrdd_txg; +} rrd_data_t; + +typedef struct { + int rrd_head; /* head (beginning) */ + int rrd_tail; /* tail (end) */ + size_t rrd_length; + + rrd_data_t rrd_entries[RRD_MAX_ENTRIES]; +} rrd_t; + +typedef struct { + rrd_t dbr_minutes; + rrd_t dbr_days; + rrd_t dbr_months; +} dbrrd_t; + +rrd_t *rrd_create(void); +size_t rrd_len(rrd_t *rrd); + +const rrd_data_t *rrd_entry(rrd_t *r, size_t i); +const rrd_data_t *rrd_tail_entry(rrd_t *rrd); +uint64_t rrd_tail(rrd_t *rrd); +uint64_t rrd_get(rrd_t *rrd, size_t i); + +void rrd_add(rrd_t *rrd, hrtime_t time, uint64_t txg); + +void dbrrd_add(dbrrd_t *db, hrtime_t time, uint64_t txg); +uint64_t dbrrd_query(dbrrd_t *r, hrtime_t tv); + +#endif diff --git a/lib/libnvpair/libnvpair.abi b/lib/libnvpair/libnvpair.abi index e3eacb195463..fa8efbcd9d7e 100644 --- a/lib/libnvpair/libnvpair.abi +++ b/lib/libnvpair/libnvpair.abi @@ -2194,6 +2194,7 @@ + @@ -2306,6 +2307,10 @@ + + + + diff --git a/lib/libuutil/libuutil.abi b/lib/libuutil/libuutil.abi index 7cb92ac9f3f8..220b26228dac 100644 --- a/lib/libuutil/libuutil.abi +++ b/lib/libuutil/libuutil.abi @@ -652,6 +652,7 @@ + @@ -763,6 +764,10 @@ + + + + diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 1f9fde6677d8..b1e163159b24 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -629,7 +629,7 @@ - + @@ -1170,6 +1170,7 @@ + @@ -1275,6 +1276,10 @@ + + + + @@ -6197,7 +6202,8 @@ - + + @@ -6720,6 +6726,8 @@ + + @@ -8368,6 +8376,11 @@ + + + + + @@ -9376,8 +9389,8 @@ - - + + @@ -9454,7 +9467,7 @@ - + diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index f256535e8ea0..bd668cf42a20 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -2728,7 +2728,8 @@ zpool_trim(zpool_handle_t *zhp, pool_trim_func_t cmd_type, nvlist_t *vds, * Scan the pool. */ int -zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd) +zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd, + time_t date_start, time_t date_end) { char errbuf[ERRBUFLEN]; int err; @@ -2737,6 +2738,8 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd) nvlist_t *args = fnvlist_alloc(); fnvlist_add_uint64(args, "scan_type", (uint64_t)func); fnvlist_add_uint64(args, "scan_command", (uint64_t)cmd); + fnvlist_add_uint64(args, "scan_date_start", (uint64_t)date_start); + fnvlist_add_uint64(args, "scan_date_end", (uint64_t)date_end); err = lzc_scrub(ZFS_IOC_POOL_SCRUB, zhp->zpool_name, args, NULL); fnvlist_free(args); diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi index 6a9c20a2bb88..1f6711c7fb58 100644 --- a/lib/libzfs_core/libzfs_core.abi +++ b/lib/libzfs_core/libzfs_core.abi @@ -651,6 +651,7 @@ + @@ -762,6 +763,10 @@ + + + + diff --git a/lib/libzfsbootenv/libzfsbootenv.abi b/lib/libzfsbootenv/libzfsbootenv.abi index 5903d5dcbe21..bf866b0fa61b 100644 --- a/lib/libzfsbootenv/libzfsbootenv.abi +++ b/lib/libzfsbootenv/libzfsbootenv.abi @@ -1,6 +1,6 @@ - + diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index 404b737c204d..47644cc2a4e8 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -178,6 +178,7 @@ nodist_libzpool_la_SOURCES = \ module/zfs/zfeature.c \ module/zfs/zfs_byteswap.c \ module/zfs/zfs_chksum.c \ + module/zfs/zfs_crrd.c \ module/zfs/zfs_fm.c \ module/zfs/zfs_fuid.c \ module/zfs/zfs_ratelimit.c \ diff --git a/man/man7/zpool-features.7 b/man/man7/zpool-features.7 index 7b392a896150..476071d6349d 100644 --- a/man/man7/zpool-features.7 +++ b/man/man7/zpool-features.7 @@ -18,7 +18,7 @@ .\" Copyright (c) 2019, Allan Jude .\" Copyright (c) 2021, Colm Buckley .\" -.Dd October 2, 2024 +.Dd November 20, 2024 .Dt ZPOOL-FEATURES 7 .Os . @@ -958,6 +958,26 @@ once it is and never returns back to being .Sy enabled . . +.feature com.klarasystems txg_log_time yes +This feature enables the use of the TXG time database, allowing tracking of +when TXGs have been committed to disk. +While this feature does not retain all TXGs, it provides a good estimate of +the time range in which TXGs were committed. +This functionality can be used with +.Xr zpool-scrub 8 +to scrub only the TXGs flushed within a specified date range. +.Pp +The database is created after this feature is enabled. +As a result, TXGs flushed before the feature was enabled will not be +timestamped. +.Pp +This feature becomes +.Sy active +once it is +.Sy enabled , +and never returns back to being +.Sy disabled . +. .feature org.zfsonlinux userobj_accounting yes extensible_dataset This feature allows administrators to account the object usage information by user and group. diff --git a/man/man8/zpool-scrub.8 b/man/man8/zpool-scrub.8 index 676286b038da..f5aa23d1b4f3 100644 --- a/man/man8/zpool-scrub.8 +++ b/man/man8/zpool-scrub.8 @@ -26,7 +26,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd November 18, 2024 +.Dd December 11, 2024 .Dt ZPOOL-SCRUB 8 .Os . @@ -38,6 +38,8 @@ .Cm scrub .Op Ns Fl e | Ns Fl p | Fl s Ns | Fl C Ns .Op Fl w +.Op Fl S Ar date +.Op Fl E Ar date .Ar pool Ns … . .Sh DESCRIPTION @@ -117,6 +119,47 @@ resilvering, nor can it be run when a regular scrub is paused. Continue scrub from last saved txg (see zpool .Sy last_scrubbed_txg property). +.It Fl S Ar date , Fl E Ar date +Allows specifying the date range for blocks created between these dates. +.Bl -bullet -compact -offset indent +.It +.Fl S +Defines a start date. +If not specified, scrubbing begins from the start of the pool's +existence. +.It +.Fl E +Defines an end date. +If not specified, scrubbing continues up to the most recent data. +.El +The provided date should be in the format: +.Dq YYYY-MM-DD HH:MM . +Where: +.Bl -bullet -compact -offset indent +.It +.Dq YYYY +is the year. +.It +.Dq MM +is the numeric representation of the month. +.It +.Dq DD +is the day of the month. +.It +.Dq HH +is the hour. +.It +.Dq MM +is the minutes. +.El +The hour and minutes parameters can be omitted. +The time should be provided in machine local time zone. +The pool must have the +.Sy txg_log_time +feature enabled to use this option. +This feature only gathers dates after it is enabled. +Specifying dates prior to enabling this feature will result in scrubbing +starting from the date the pool was created. .El .Sh EXAMPLES .Ss Example 1 diff --git a/module/Kbuild.in b/module/Kbuild.in index fc14d5cb535e..8e8b4589e5be 100644 --- a/module/Kbuild.in +++ b/module/Kbuild.in @@ -412,6 +412,7 @@ ZFS_OBJS := \ zfeature.o \ zfs_byteswap.o \ zfs_chksum.o \ + zfs_crrd.o \ zfs_fm.o \ zfs_fuid.o \ zfs_impl.o \ diff --git a/module/Makefile.bsd b/module/Makefile.bsd index c605069d07d3..9e60d402c854 100644 --- a/module/Makefile.bsd +++ b/module/Makefile.bsd @@ -204,6 +204,7 @@ SRCS+= abd_os.c \ vdev_label_os.c \ zfs_acl.c \ zfs_ctldir.c \ + zfs_crrd.c \ zfs_debug.c \ zfs_dir.c \ zfs_file_os.c \ diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c index 96f0086d7858..febcae3cf527 100644 --- a/module/zcommon/zfeature_common.c +++ b/module/zcommon/zfeature_common.c @@ -785,6 +785,18 @@ zpool_feature_init(void) ZFEATURE_TYPE_BOOLEAN, large_microzap_deps, sfeatures); } + { + static const spa_feature_t txg_log_time_deps[] = { + SPA_FEATURE_EXTENSIBLE_DATASET, + SPA_FEATURE_NONE + }; + zfeature_register(SPA_FEATURE_TXG_TIMELOG, + "com.klaraystems:txg_log_time", "txg_log_time", + "Log history of txg.", + ZFEATURE_FLAG_PER_DATASET | ZFEATURE_FLAG_READONLY_COMPAT, + ZFEATURE_TYPE_BOOLEAN, txg_log_time_deps, sfeatures); + } + zfs_mod_list_supported_free(sfeatures); } diff --git a/module/zfs/spa.c b/module/zfs/spa.c index b83c982c13fd..187f0f2e3e21 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -99,6 +99,7 @@ #include #endif /* _KERNEL */ +#include "zfs_crrd.h" #include "zfs_prop.h" #include "zfs_comutil.h" #include @@ -309,6 +310,17 @@ static int zfs_livelist_condense_zthr_cancel = 0; */ static int zfs_livelist_condense_new_alloc = 0; +/* + * Time variable to decide how often the txg should be added into the + * database. + */ +static uint_t spa_note_txg_time = 60; + +/* + * How often flush txg database to a disk. + */ +static uint_t spa_flush_txg_time = 5 * 60; + /* * ========================================================================== * SPA properties routines @@ -3326,6 +3338,103 @@ spa_start_livelist_condensing_thread(spa_t *spa) spa_livelist_condense_cb, spa, minclsyspri); } +static void +spa_sync_time_logger(spa_t *spa, dmu_tx_t *tx) +{ + uint64_t curtime; + + if (!spa_feature_is_enabled(spa, SPA_FEATURE_TXG_TIMELOG)) { + return; + } + if (!spa_writeable(spa)) { + return; + } + if (tx->tx_txg == spa->spa_last_noted_txg) { + return; + } + curtime = gethrestime_sec(); + if (curtime < spa->spa_last_noted_txg_time + spa_note_txg_time) { + return; + } + + spa->spa_last_noted_txg_time = curtime; + spa->spa_last_noted_txg = tx->tx_txg; + + mutex_enter(&spa->spa_txg_log_time_lock); + dbrrd_add(&spa->spa_txg_log_time, curtime, tx->tx_txg); + + if (curtime < spa->spa_last_flush_txg_time + spa_flush_txg_time) { + goto out; + } + spa->spa_last_flush_txg_time = curtime; + + if (zap_contains(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_TXG_LOG_TIME_MINUTES) == ENOENT) { + VERIFY0(zap_add(spa_meta_objset(spa), + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TXG_LOG_TIME_MINUTES, + 1, sizeof (spa->spa_txg_log_time.dbr_minutes), + &spa->spa_txg_log_time.dbr_minutes, tx)); + VERIFY0(zap_add(spa_meta_objset(spa), + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TXG_LOG_TIME_DAYS, + 1, sizeof (spa->spa_txg_log_time.dbr_days), + &spa->spa_txg_log_time.dbr_days, tx)); + VERIFY0(zap_add(spa_meta_objset(spa), + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TXG_LOG_TIME_MONTHS, + 1, sizeof (spa->spa_txg_log_time.dbr_months), + &spa->spa_txg_log_time.dbr_months, tx)); + spa_feature_incr(spa, SPA_FEATURE_TXG_TIMELOG, tx); + } else { + VERIFY0(zap_update(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TXG_LOG_TIME_MINUTES, + 1, sizeof (spa->spa_txg_log_time.dbr_minutes), + &spa->spa_txg_log_time.dbr_minutes, tx)); + VERIFY0(zap_update(spa->spa_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TXG_LOG_TIME_DAYS, + 1, sizeof (spa->spa_txg_log_time.dbr_days), + &spa->spa_txg_log_time.dbr_days, tx)); + VERIFY0(zap_update(spa_meta_objset(spa), + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TXG_LOG_TIME_MONTHS, + 1, sizeof (spa->spa_txg_log_time.dbr_months), + &spa->spa_txg_log_time.dbr_months, tx)); + } + +out: + mutex_exit(&spa->spa_txg_log_time_lock); +} + +static int +spa_load_txg_log_time(spa_t *spa) +{ + int error; + + error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_TXG_LOG_TIME_MINUTES, + 1, sizeof (spa->spa_txg_log_time.dbr_minutes), + &spa->spa_txg_log_time.dbr_minutes); + + if (error == ENOENT) + return (0); + if (error != 0) + return (error); + + error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_TXG_LOG_TIME_DAYS, + 1, sizeof (spa->spa_txg_log_time.dbr_days), + &spa->spa_txg_log_time.dbr_days); + if (error != 0) + return (error); + + error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_TXG_LOG_TIME_MONTHS, + 1, sizeof (spa->spa_txg_log_time.dbr_months), + &spa->spa_txg_log_time.dbr_months); + if (error != 0) + return (error); + + return (0); +} + + static void spa_spawn_aux_threads(spa_t *spa) { @@ -4714,6 +4823,11 @@ spa_ld_get_props(spa_t *spa) if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + /* Load time log */ + error = spa_load_txg_log_time(spa); + if (error != 0) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + /* * Load the persistent error log. If we have an older pool, this will * not be present. @@ -10229,6 +10343,7 @@ spa_sync(spa_t *spa, uint64_t txg) } spa_sync_rewrite_vdev_config(spa, tx); + spa_sync_time_logger(spa, tx); dmu_tx_commit(tx); taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid); @@ -11053,6 +11168,12 @@ ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, new_alloc, INT, "Whether extra ALLOC blkptrs were added to a livelist entry while it " "was being condensed"); +ZFS_MODULE_PARAM(zfs_spa, spa_, note_txg_time, UINT, ZMOD_RW, + "How often txg should be registred in database"); + +ZFS_MODULE_PARAM(zfs_spa, spa_, flush_txg_time, UINT, ZMOD_RW, + "How often txg database should be flushed"); + #ifdef _KERNEL ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_read, spa_taskq_read_param_set, spa_taskq_read_param_get, ZMOD_RW, diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 7fae51cc2c52..e93430a852a7 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -714,6 +714,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) mutex_init(&spa->spa_feat_stats_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_flushed_ms_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_activities_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa->spa_txg_log_time_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL); cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL); @@ -916,6 +917,7 @@ spa_remove(spa_t *spa) mutex_destroy(&spa->spa_vdev_top_lock); mutex_destroy(&spa->spa_feat_stats_lock); mutex_destroy(&spa->spa_activities_lock); + mutex_destroy(&spa->spa_txg_log_time_lock); kmem_free(spa, sizeof (spa_t)); } diff --git a/module/zfs/zfs_crrd.c b/module/zfs/zfs_crrd.c new file mode 100644 index 000000000000..492ab6be3073 --- /dev/null +++ b/module/zfs/zfs_crrd.c @@ -0,0 +1,181 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2024 Klara Inc. + * + * This software was developed by + * Fred Weigel + * Mariusz Zaborski + * under sponsorship from Wasabi Technology, Inc. and Klara Inc. + */ + +#include + +#include "zfs_crrd.h" + +#define rrd_abs(x) ((x) < 0 ? -(x) : (x)) + +const rrd_data_t * +rrd_tail_entry(rrd_t *rrd) +{ + size_t n; + + if (rrd_len(rrd) == 0) + return (NULL); + + if (rrd->rrd_tail == 0) + n = RRD_MAX_ENTRIES - 1; + else + n = rrd->rrd_tail - 1; + + return (&rrd->rrd_entries[n]); +} + +uint64_t +rrd_tail(rrd_t *rrd) +{ + const rrd_data_t *tail; + + tail = rrd_tail_entry(rrd); + + return (tail == NULL ? 0 : tail->rrdd_time); +} + +/* + * Return length of data in the rrd. + * rrd_get works from 0..rrd_len()-1. + */ +size_t +rrd_len(rrd_t *rrd) +{ + + return (rrd->rrd_length); +} + +const rrd_data_t * +rrd_entry(rrd_t *rrd, size_t i) +{ + size_t n; + + if (i >= rrd_len(rrd)) { + return (0); + } + + n = (rrd->rrd_head + i) % RRD_MAX_ENTRIES; + return (&rrd->rrd_entries[n]); +} + +uint64_t +rrd_get(rrd_t *rrd, size_t i) +{ + const rrd_data_t *data = rrd_entry(rrd, i); + + return (data == NULL ? 0 : data->rrdd_txg); +} + +/* Add value to database. */ +void +rrd_add(rrd_t *rrd, hrtime_t time, uint64_t txg) +{ + + rrd->rrd_entries[rrd->rrd_tail].rrdd_time = time; + rrd->rrd_entries[rrd->rrd_tail].rrdd_txg = txg; + + rrd->rrd_tail = (rrd->rrd_tail + 1) % RRD_MAX_ENTRIES; + + if (rrd->rrd_length < RRD_MAX_ENTRIES) { + rrd->rrd_length++; + } else { + rrd->rrd_head = (rrd->rrd_head + 1) % RRD_MAX_ENTRIES; + } +} + +void +dbrrd_add(dbrrd_t *db, hrtime_t time, uint64_t txg) +{ + hrtime_t daydiff, monthdiff; + + daydiff = time - rrd_tail(&db->dbr_days); + monthdiff = time - rrd_tail(&db->dbr_months); + + rrd_add(&db->dbr_minutes, time, txg); + if (daydiff >= 0 && daydiff >= SEC2NSEC(60 * 3600)) + rrd_add(&db->dbr_days, time, txg); + if (monthdiff >= 0 && monthdiff >= SEC2NSEC(60 * 3600 * 30)) + rrd_add(&db->dbr_months, time, txg); +} + +/* + * XXXosho: We might want to do a binary search here, + * although the data is small, and the routine + * isn't used so often that we stick to simple methods. + */ +static const rrd_data_t * +rrd_query(rrd_t *rrd, hrtime_t tv) +{ + hrtime_t mindiff; + const rrd_data_t *data; + + data = NULL; + for (size_t i = 0; i < rrd_len(rrd); i++) { + const rrd_data_t *cur = rrd_entry(rrd, i); + + if (data == NULL || mindiff > rrd_abs(tv - cur->rrdd_time)) { + data = cur; + mindiff = rrd_abs(tv - cur->rrdd_time); + } + + if (cur->rrdd_time > tv) + break; + } + + return (data); +} + +uint64_t +dbrrd_query(dbrrd_t *r, hrtime_t tv) +{ + const rrd_data_t *data, *dm, *dd, *dy; + + data = NULL; + + dm = rrd_query(&r->dbr_minutes, tv); + if (dm != NULL) + data = dm; + dd = rrd_query(&r->dbr_days, tv); + if (dd != NULL) { + if (data == NULL || + rrd_abs(data->rrdd_time - tv) > + rrd_abs(dd->rrdd_time - tv)) { + data = dd; + } + } + dy = rrd_query(&r->dbr_months, tv); + if (dy != NULL) { + if (data == NULL || + rrd_abs(data->rrdd_time - tv) > + rrd_abs(dy->rrdd_time - tv)) { + data = dy; + } + } + + return (data == NULL ? 0 : data->rrdd_txg); +} diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index b1b0ae54460b..f6de6b0d6d19 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -1694,6 +1694,8 @@ zfs_ioc_pool_scan(zfs_cmd_t *zc) static const zfs_ioc_key_t zfs_keys_pool_scrub[] = { {"scan_type", DATA_TYPE_UINT64, 0}, {"scan_command", DATA_TYPE_UINT64, 0}, + {"scan_date_start", DATA_TYPE_UINT64, 0}, + {"scan_date_end", DATA_TYPE_UINT64, 0}, }; static int @@ -1702,6 +1704,7 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) spa_t *spa; int error; uint64_t scan_type, scan_cmd; + uint64_t date_start, date_end; if (nvlist_lookup_uint64(innvl, "scan_type", &scan_type) != 0) return (SET_ERROR(EINVAL)); @@ -1711,6 +1714,11 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) if (scan_cmd >= POOL_SCRUB_FLAGS_END) return (SET_ERROR(EINVAL)); + if (nvlist_lookup_uint64(innvl, "scan_date_start", &date_start) != 0) + date_start = 0; + if (nvlist_lookup_uint64(innvl, "scan_date_end", &date_end) != 0) + date_end = 0; + if ((error = spa_open(poolname, &spa, FTAG)) != 0) return (error); @@ -1722,7 +1730,24 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) error = spa_scan_range(spa, scan_type, spa_get_last_scrubbed_txg(spa), 0); } else { - error = spa_scan(spa, scan_type); + uint64_t txg_start, txg_end; + + txg_start = txg_end = 0; + if (date_start || date_end) { + mutex_enter(&spa->spa_txg_log_time_lock); + if (date_start != 0) { + txg_start = dbrrd_query(&spa->spa_txg_log_time, + date_start); + } + + if (date_end != 0) { + txg_end = dbrrd_query(&spa->spa_txg_log_time, + date_end); + } + mutex_exit(&spa->spa_txg_log_time_lock); + } + + error = spa_scan_range(spa, scan_type, txg_start, txg_end); } spa_close(spa, FTAG);