From f6644b9c479eb69f198082ec7463c4fd9fa23d15 Mon Sep 17 00:00:00 2001 From: Christopher Laprise Date: Thu, 2 Feb 2023 15:49:37 -0500 Subject: [PATCH] Discard unused chunks in sparse mode, issue #117 --- wyng | 58 ++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/wyng b/wyng index c48b761..ea4e9dd 100755 --- a/wyng +++ b/wyng @@ -17,7 +17,7 @@ import sys, signal, os, stat, shutil, subprocess as SPr, time, datetime import re, mmap, bz2, zlib, gzip, tarfile, io, fcntl, tempfile import argparse, configparser, hashlib, hmac, functools, uuid -import getpass, base64, platform, resource, itertools, string +import getpass, base64, platform, resource, itertools, string, struct import xml.etree.ElementTree from array import array ; from urllib.parse import urlparse @@ -842,6 +842,32 @@ class CP: blkdiscard = "/sbin/blkdiscard" ; ionice = "/usr/bin/ionice" +class LowLevelIO: + import ctypes, ctypes.util + libc = ctypes.CDLL(ctypes.util.find_library("c")) + + FALLOC_FL_KEEP_SIZE = 0x01 ; FALLOC_FL_PUNCH_HOLE = 0x02 + FALLOC_FL_COLLAPSE_RANGE = 0x08 ; FALLOC_FL_ZERO_RANGE = 0x10 + FALLOC_FL_INSERT_RANGE = 0x20 ; FALLOC_FL_UNSHARE_RANGE = 0x40 + FALLOC_FL_PUNCH_FULL = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE + + BLKDISCARD = 0x1277 ; BLKDISCARDZEROES = 0x127c + + fallocate = libc.fallocate + fallocate.restype = ctypes.c_int + fallocate.argtypes = [ctypes.c_int, ctypes.c_int, ctypes.c_int64, ctypes.c_int64] + + # Note: file_punch_hole() and block_discard_chunk() have the same arg signature... + def file_punch_hole(self, fn, start, length): + return self.fallocate(fn, self.FALLOC_FL_PUNCH_FULL, start, length) + + def block_discard_chunk(self, fn, start, length): + try: + return fcntl.ioctl(fn, self.BLKDISCARD, struct.pack("LL", start, length)) + except Exception as e: + return None + + class Lvm_VolGroup: def __init__(self, name): self.name = name @@ -2844,8 +2870,8 @@ def receive_volume(datavol, select_ses="", save_path="", diff=False, verify_only if save_path: # Decode dev path semantics and match to vg/lv if possible. Otherwise, open # simple block dev or file. - save_type = "block device" - returned_home = False ; lv, pool, vg = get_lv_path_pool(save_path) + save_type = "block device" ; punch_hole = low_level_io.block_discard_chunk + returned_home = False ; lv, pool, vg = get_lv_path_pool(save_path) if not lv and vg_exists(os.path.dirname(save_path)): # Got vg path, lv does not exist lv = os.path.basename(save_path) @@ -2867,17 +2893,16 @@ def receive_volume(datavol, select_ses="", save_path="", diff=False, verify_only if exists(save_path) and stat.S_ISBLK(os.stat(save_path).st_mode): if not sparse_write: do_exec([[CP.blkdiscard, save_path]]) - volf = open(save_path, "w+b") + volf = open(save_path, fopenmode:="r+b") elif save_path.startswith("/dev/"): err_out("Cannot create new volume from ambiguous /dev path." " Please create the volume before using 'receive', or specify" " --save-to=/dev/volgroup/lv in case of a thin LV.") return None else: # file - save_type = "file" - volf = open(save_path, "w+b") - if not sparse_write: volf.truncate(0) ; volf.flush() - volf.truncate(volsize) ; volf.flush() + save_type = "file" ; punch_hole = low_level_io.file_punch_hole + fopenmode = "r+b" if sparse_write and exists(save_path) else "wb" + volf = open(save_path, fopenmode) ; volf.truncate(volsize) elif diff: if not lv_exists(vgname, datavol): @@ -2907,7 +2932,9 @@ def receive_volume(datavol, select_ses="", save_path="", diff=False, verify_only volf = open(pjoin("/dev",vgname,snap1vol), "rb") - if volf: volf_read = volf.read ; volf_write = volf.write ; volf_seek = volf.seek + if volf: + volf_read = volf.read ; volf_write = volf.write ; volf_seek = volf.seek + volfno = volf.fileno() ; fcntl.lockf(volf, fcntl.LOCK_EX|fcntl.LOCK_NB) if verbose: print("Receiving" if save_path else "Scanning", "volume:", datavol, select_ses[2:]) @@ -2924,7 +2951,7 @@ def receive_volume(datavol, select_ses="", save_path="", diff=False, verify_only gv_stdin = io.TextIOWrapper(getvol.stdin, encoding="utf-8") if sparse else None # Open manifest then receive, check and save data - addr = bcount = diff_count = 0 + addr = bcount = diff_count = 0 ; buf = b'' for mfline in open(manifest, "r"): if addr >= lchunk_addr: break cksum, faddr, ses = mfline.split() ; addr = int(faddr[1:], 16) @@ -2932,12 +2959,13 @@ def receive_volume(datavol, select_ses="", save_path="", diff=False, verify_only if verbose: print("%.2f%%" % (addr/volsize*100), end="\x0d") # Process zeros quickly - if cksum.strip() == "0": + if cksum == "0": if save_path: volf_seek(addr) if sparse_write and volf_read(chunksize) != zeros: - ## Enh: Use hole punch when available... + # Note: punch_hole() might not work, write zeros first anyway volf_seek(addr) ; volf_write(zeros) ; diff_count += chunksize + punch_hole(volfno, addr, chunksize) elif diff: volf_seek(addr) ; diff_count += diff_compare(zeros,True) @@ -3045,7 +3073,7 @@ def receive_volume(datavol, select_ses="", save_path="", diff=False, verify_only bmapf.close() if diff_count > 0 and options.action != "send": print("\nNext 'send' will bring this volume into sync.") - elif diff_count: + elif diff and diff_count: return None return bcount @@ -3294,7 +3322,7 @@ def cleanup(): # Constants / Globals prog_name = "wyng" -prog_version = "0.4.0alpha2" ; prog_date = "20230128" +prog_version = "0.4.0alpha2" ; prog_date = "20230201" format_version = 3 ; debug = False ; tmpdir = None admin_permission = os.getuid() == 0 @@ -3422,6 +3450,8 @@ agent_helper_write() signal_handlers = {} ; signals_caught = [] ; error_cache = [] +low_level_io = LowLevelIO() + # Dict of compressors in the form: (library, default_compress_level, compress_func) compressors = {"zlib": (zlib, 4, zlib.compress), "bz2" : (bz2, 9, bz2.compress)}