From d08d796663219c188626fabbbfee25623f2de76b Mon Sep 17 00:00:00 2001 From: Oblet Alexis Date: Tue, 9 May 2023 11:48:20 +0200 Subject: [PATCH 1/3] max-map-size: add command line option On fast storage, reading with a specific block size can drastically improve fetch performance. This option acts as length mmap argument. On scale-out or flash disk storage, this value can be set higher than 1MB. Tests on my side resulted on rsync whole-file, 5x time faster with the follwowing specs: - max-map-size 4MB - network 10Gb link - hpnssh / 4800 MTU --- checksum.c | 3 ++- generator.c | 3 ++- options.c | 10 +++++++++- rsync.1.md | 14 ++++++++++++++ rsync.h | 1 - sender.c | 3 ++- 6 files changed, 29 insertions(+), 5 deletions(-) diff --git a/checksum.c b/checksum.c index cb21882c5..c1c7802fc 100644 --- a/checksum.c +++ b/checksum.c @@ -41,6 +41,7 @@ extern int checksum_seed; extern int protocol_version; extern int proper_seed_order; extern const char *checksum_choice; +extern int max_map_size; #define NNI_BUILTIN (1<<0) #define NNI_EVP (1<<1) @@ -412,7 +413,7 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum) return; } - buf = map_file(fd, len, MAX_MAP_SIZE, CHUNK_SIZE); + buf = map_file(fd, len, max_map_size, CHUNK_SIZE); #ifdef USE_OPENSSL if (file_sum_evp_md) { diff --git a/generator.c b/generator.c index 110db28fc..a3da1955a 100644 --- a/generator.c +++ b/generator.c @@ -99,6 +99,7 @@ extern char *tmpdir; extern char *basis_dir[MAX_BASIS_DIRS+1]; extern struct file_list *cur_flist, *first_flist, *dir_flist; extern filter_rule_list filter_list, daemon_filter_list; +extern int max_map_size; int maybe_ATTRS_REPORT = 0; int maybe_ATTRS_ACCURATE_TIME = 0; @@ -776,7 +777,7 @@ static int generate_and_send_sums(int fd, OFF_T len, int f_out, int f_copy) return 0; if (len > 0) - mapbuf = map_file(fd, len, MAX_MAP_SIZE, sum.blength); + mapbuf = map_file(fd, len, max_map_size, sum.blength); else mapbuf = NULL; diff --git a/options.c b/options.c index fd674754c..756243185 100644 --- a/options.c +++ b/options.c @@ -136,6 +136,7 @@ int checksum_seed = 0; int inplace = 0; int delay_updates = 0; int32 block_size = 0; +int max_map_size = 256*1024; time_t stop_at_utime = 0; char *skip_compress = NULL; char *copy_as = NULL; @@ -740,6 +741,7 @@ static struct poptOption long_options[] = { {"checksum-choice", 0, POPT_ARG_STRING, &checksum_choice, 0, 0, 0 }, {"cc", 0, POPT_ARG_STRING, &checksum_choice, 0, 0, 0 }, {"block-size", 'B', POPT_ARG_STRING, 0, OPT_BLOCK_SIZE, 0, 0 }, + {"max-map-size", 0, POPT_ARG_INT, &max_map_size, 0, 0, 0 }, {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 }, {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 }, {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 }, @@ -2772,7 +2774,13 @@ void server_options(char **args, int *argc_p) args[ac++] = arg; } - if (io_timeout) { + if (max_map_size) { + if (asprintf(&arg, "--max-map-size=%d", max_map_size) < 0) + goto oom; + args[ac++] = arg; + } + + if (io_timeout) { if (asprintf(&arg, "--timeout=%d", io_timeout) < 0) goto oom; args[ac++] = arg; diff --git a/rsync.1.md b/rsync.1.md index afaf1de89..0063f2c6d 100644 --- a/rsync.1.md +++ b/rsync.1.md @@ -472,6 +472,7 @@ has its own detailed description later in this manpage. --checksum-choice=STR choose the checksum algorithm (aka --cc) --one-file-system, -x don't cross filesystem boundaries --block-size=SIZE, -B force a fixed checksum block-size +--max-map-size force mmap read block size (expressed in bytes, useful for fast storage, default 256K) --rsh=COMMAND, -e specify the remote shell to use --rsync-path=PROGRAM specify the rsync to run on remote machine --existing skip creating new files on receiver @@ -2125,6 +2126,19 @@ expand it. Beginning in 3.2.3 the SIZE can be specified with a suffix as detailed in the [`--max-size`](#opt) option. Older versions only accepted a byte count. +0. `--max-map-size=SIZE` + + On fast storage, reading with a specific block size can drastically improve + fetch performance. This option acts as length mmap argument. + + On scale-out or flash disk storage, this value can be set higher than 1MB. + + By default, 256K and expressed in Bytes. + + To use 4K read block size: + > --max-map-size 4194304 + + 0. `--rsh=COMMAND`, `-e` This option allows you to choose an alternative remote shell program to use diff --git a/rsync.h b/rsync.h index d3709fe0f..8608a832f 100644 --- a/rsync.h +++ b/rsync.h @@ -155,7 +155,6 @@ #define SPARSE_WRITE_SIZE (1024) #define WRITE_SIZE (32*1024) #define CHUNK_SIZE (32*1024) -#define MAX_MAP_SIZE (256*1024) #define IO_BUFFER_SIZE (32*1024) #define MAX_BLOCK_SIZE ((int32)1 << 17) diff --git a/sender.c b/sender.c index 3d4f052e9..73f14601b 100644 --- a/sender.c +++ b/sender.c @@ -53,6 +53,7 @@ extern BOOL want_progress_now; extern struct stats stats; extern struct file_list *cur_flist, *first_flist, *dir_flist; extern char num_dev_ino_buf[4 + 8 + 8]; +extern int max_map_size; BOOL extra_flist_sending_enabled; @@ -396,7 +397,7 @@ void send_files(int f_in, int f_out) } if (st.st_size) { - int32 read_size = MAX(s->blength * 3, MAX_MAP_SIZE); + int32 read_size = MAX(s->blength * 3, max_map_size); mbuf = map_file(fd, st.st_size, read_size, s->blength); } else mbuf = NULL; From d98022824e641b005fee6553fff560b1dcde012c Mon Sep 17 00:00:00 2001 From: Oblet Alexis Date: Tue, 9 May 2023 11:50:06 +0200 Subject: [PATCH 2/3] write-size: add command line option This option forces the buffer size when writing a file on the receiver side. --- fileio.c | 3 ++- options.c | 8 ++++++++ rsync.1.md | 7 +++++++ rsync.h | 1 - 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/fileio.c b/fileio.c index 69c9a7b49..93dfd139b 100644 --- a/fileio.c +++ b/fileio.c @@ -34,6 +34,7 @@ #define ALIGNED_LENGTH(len) ((((len) - 1) | (ALIGN_BOUNDARY-1)) + 1) extern int sparse_files; +extern int write_size; OFF_T preallocated_len = 0; @@ -158,7 +159,7 @@ int write_file(int f, int use_seek, OFF_T offset, const char *buf, int len) offset += r1; } else { if (!wf_writeBuf) { - wf_writeBufSize = WRITE_SIZE * 8; + wf_writeBufSize = write_size * 8; wf_writeBufCnt = 0; wf_writeBuf = new_array(char, wf_writeBufSize); } diff --git a/options.c b/options.c index 756243185..fd9df9962 100644 --- a/options.c +++ b/options.c @@ -137,6 +137,7 @@ int inplace = 0; int delay_updates = 0; int32 block_size = 0; int max_map_size = 256*1024; +int write_size = 32*1024; time_t stop_at_utime = 0; char *skip_compress = NULL; char *copy_as = NULL; @@ -742,6 +743,7 @@ static struct poptOption long_options[] = { {"cc", 0, POPT_ARG_STRING, &checksum_choice, 0, 0, 0 }, {"block-size", 'B', POPT_ARG_STRING, 0, OPT_BLOCK_SIZE, 0, 0 }, {"max-map-size", 0, POPT_ARG_INT, &max_map_size, 0, 0, 0 }, + {"write-size", 0, POPT_ARG_INT, &write_size, 0, 0, 0 }, {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 }, {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 }, {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 }, @@ -2780,6 +2782,12 @@ void server_options(char **args, int *argc_p) args[ac++] = arg; } + if (write_size) { + if (asprintf(&arg, "--write-size=%d", write_size) < 0) + goto oom; + args[ac++] = arg; + } + if (io_timeout) { if (asprintf(&arg, "--timeout=%d", io_timeout) < 0) goto oom; diff --git a/rsync.1.md b/rsync.1.md index 0063f2c6d..c332fefa7 100644 --- a/rsync.1.md +++ b/rsync.1.md @@ -473,6 +473,7 @@ has its own detailed description later in this manpage. --one-file-system, -x don't cross filesystem boundaries --block-size=SIZE, -B force a fixed checksum block-size --max-map-size force mmap read block size (expressed in bytes, useful for fast storage, default 256K) +--write-size force write block size (expressed in bytes, default 32K) --rsh=COMMAND, -e specify the remote shell to use --rsync-path=PROGRAM specify the rsync to run on remote machine --existing skip creating new files on receiver @@ -2139,6 +2140,12 @@ expand it. > --max-map-size 4194304 +0. `--write-size=SIZE` + + This option forces the buffer size when writing a file on the receiver side. + + By default, 32K and expressed in Bytes. + 0. `--rsh=COMMAND`, `-e` This option allows you to choose an alternative remote shell program to use diff --git a/rsync.h b/rsync.h index 8608a832f..bca544175 100644 --- a/rsync.h +++ b/rsync.h @@ -153,7 +153,6 @@ #define RSYNC_PORT 873 #define SPARSE_WRITE_SIZE (1024) -#define WRITE_SIZE (32*1024) #define CHUNK_SIZE (32*1024) #define IO_BUFFER_SIZE (32*1024) #define MAX_BLOCK_SIZE ((int32)1 << 17) From 9f0fcd997f113ec9907458a7549fa30f175fcb75 Mon Sep 17 00:00:00 2001 From: Alexis Oblet Date: Mon, 16 Oct 2023 11:42:25 +0200 Subject: [PATCH 3/3] doc: format max map and write size with =SIZE --- rsync.1.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rsync.1.md b/rsync.1.md index c332fefa7..36e9e79c7 100644 --- a/rsync.1.md +++ b/rsync.1.md @@ -472,8 +472,8 @@ has its own detailed description later in this manpage. --checksum-choice=STR choose the checksum algorithm (aka --cc) --one-file-system, -x don't cross filesystem boundaries --block-size=SIZE, -B force a fixed checksum block-size ---max-map-size force mmap read block size (expressed in bytes, useful for fast storage, default 256K) ---write-size force write block size (expressed in bytes, default 32K) +--max-map-size=SIZE force mmap read block size (expressed in bytes, useful for fast storage, default 256K) +--write-size=SIZE force write block size (expressed in bytes, default 32K) --rsh=COMMAND, -e specify the remote shell to use --rsync-path=PROGRAM specify the rsync to run on remote machine --existing skip creating new files on receiver