Skip to content

Commit

Permalink
F #6411 + B #6741: Incremental backups for Ceph datastore
Browse files Browse the repository at this point in the history
Implementation overview:

- Incremental points are saved as dedicated rbd snapshots under the
  "one_backup_<increment_id>" namespace. This snapshots are used to
  generate delta files in rbdiff format.

- The rbdiff formats are stored in the backup server to restore the rbd volumes.

- The restore process is performed directly on the Ceph cluster
  importing the base image (first full backup in the chain, rbd import)
  and then applying the increments (rbd import-diff) up to the target
  increment.

- Two new pseudo-protocols has been implemented to adopt the restore
  pattern above (restic+rbd, rsync+rbd). This protocols bundle of the
  rbdiff files in a tarball for transfer from the backup server. Note:
  reconstruct process uses the Ceph BRIDGE_LIST and not the backup
  server (as opposed to qcow2 backups)

Other bug fixes

- This commit also fixes #6741, resetting the backup chain after a
  restore

- The original ceph drivers do not receive the full action information,
  this now has been fixed by including VM information in the STDIN string sent
  to the driver.

Compatibility note.

- backup actions should return now the backup format used raw, rbd, ...
  If not provided oned (6.10.x) will use raw as a default to accommodate any
  third party driver implementation. It is recommended to include this
  third argument.

Signed-off-by: Guillermo Ramos <[email protected]>
Co-authored-by: Guillermo Ramos <[email protected]>
  • Loading branch information
rsmontero and 1gramos committed Oct 4, 2024
1 parent e13c329 commit 5f7b370
Show file tree
Hide file tree
Showing 23 changed files with 504 additions and 304 deletions.
16 changes: 16 additions & 0 deletions include/Backups.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,11 @@ class Backups
config.replace("LAST_BACKUP_SIZE", size);
}

void last_backup_format(const std::string& format)
{
config.replace("LAST_BACKUP_FORMAT", format);
}

void last_increment_id(int id)
{
config.replace("LAST_INCREMENT_ID", id);
Expand Down Expand Up @@ -224,6 +229,15 @@ class Backups
return sz;
}

std::string last_backup_format() const
{
std::string fmt;

config.get("LAST_BACKUP_FORMAT", fmt);

return fmt;
}

int last_increment_id() const
{
int id;
Expand Down Expand Up @@ -283,6 +297,8 @@ class Backups

config.erase("LAST_BACKUP_ID");
config.erase("LAST_BACKUP_SIZE");

config.erase("LAST_BACKUP_FORMAT");
}

/**
Expand Down
46 changes: 33 additions & 13 deletions src/datastore_mad/remotes/ceph/cp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ done < <($XPATH /DS_DRIVER_ACTION_DATA/DATASTORE/BASE_PATH \
/DS_DRIVER_ACTION_DATA/IMAGE/SIZE \
/DS_DRIVER_ACTION_DATA/IMAGE/TEMPLATE/MD5 \
/DS_DRIVER_ACTION_DATA/IMAGE/TEMPLATE/SHA1 \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/NO_DECOMPRESS \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/LIMIT_TRANSFER_BW \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/CEPH_USER \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/CEPH_KEY \
Expand All @@ -81,7 +80,6 @@ SRC="${XPATH_ELEMENTS[i++]}"
SIZE="${XPATH_ELEMENTS[i++]}"
MD5="${XPATH_ELEMENTS[i++]}"
SHA1="${XPATH_ELEMENTS[i++]}"
NO_DECOMPRESS="${XPATH_ELEMENTS[i++]}"
LIMIT_TRANSFER_BW="${XPATH_ELEMENTS[i++]}"
CEPH_USER="${XPATH_ELEMENTS[i++]}"
CEPH_KEY="${XPATH_ELEMENTS[i++]}"
Expand Down Expand Up @@ -119,7 +117,7 @@ TMP_DST="$STAGING_DIR/$IMAGE_HASH"
IMAGE_NAME="one-${ID}"
RBD_SOURCE="${POOL_NAME}/${IMAGE_NAME}"

DOWNLOADER_ARGS=`set_downloader_args "$MD5" "$SHA1" "$NO_DECOMPRESS" "$LIMIT_TRANSFER_BW" "$SRC" -`
DOWNLOADER_ARGS=`set_downloader_args "$MD5" "$SHA1" "yes" "$LIMIT_TRANSFER_BW" "$SRC" -`

COPY_COMMAND="$UTILS_PATH/downloader.sh $DOWNLOADER_ARGS"

Expand Down Expand Up @@ -153,21 +151,43 @@ fi
REGISTER_CMD=$(cat <<EOF
set -e -o pipefail
FORMAT=\$($QEMU_IMG info $TMP_DST | grep "^file format:" | awk '{print \$3}' || :)
if file $TMP_DST | grep -q gzip; then
mkdir $TMP_DST.d
cd $TMP_DST.d
if [ "\$FORMAT" != "raw" ] && [ "\$FORMAT" != "luks" ]; then
$QEMU_IMG convert -O raw $TMP_DST $TMP_DST.raw
mv $TMP_DST.raw $TMP_DST
fi
tar zxf $TMP_DST
# Upload base image and snapshot
$RBD import --export-format 2 - $RBD_SOURCE < disk.*.rbd2
# Apply increments
for f in \$(ls disk.*.*.rbdiff | sort -k3 -t.); do
$RBD import-diff - $RBD_SOURCE < \$f
done
# Delete all snapshots
$RBD snap ls $RBD_SOURCE --format json | jq -r '.[] | select(.protected == "true").name' | xargs -rI{} $RBD snap unprotect $RBD_SOURCE@{}
$RBD snap ls $RBD_SOURCE --format json | jq -r '.[].name' | xargs -rI{} $RBD snap rm $RBD_SOURCE@{}
$RBD import $FORMAT_OPT $TMP_DST $RBD_SOURCE
cd -
$RM -rf $TMP_DST $TMP_DST.d
else
FORMAT=\$($QEMU_IMG info $TMP_DST | grep "^file format:" | awk '{print \$3}' || :)
# remove original
$RM -f $TMP_DST
if [ "\$FORMAT" != "raw" ] && [ "\$FORMAT" != "luks" ]; then
$QEMU_IMG convert -O raw $TMP_DST $TMP_DST.raw
mv $TMP_DST.raw $TMP_DST
fi
$RBD import $FORMAT_OPT $TMP_DST $RBD_SOURCE
# remove original
$RM -f $TMP_DST
fi
EOF
)

ssh_exec_and_log "$DST_HOST" "$REGISTER_CMD" \
"Error registering $RBD_SOURCE in $DST_HOST"
ssh_exec_and_log "$DST_HOST" "$REGISTER_CMD" \
"Error registering $RBD_SOURCE in $DST_HOST"

echo "$RBD_SOURCE raw"
14 changes: 10 additions & 4 deletions src/datastore_mad/remotes/downloader.sh
Original file line number Diff line number Diff line change
Expand Up @@ -431,11 +431,17 @@ lxd://*)
file_type="application/octet-stream"
command="$VAR_LOCATION/remotes/datastore/lxd_downloader.sh \"$FROM\""
;;
restic://*)
eval `$VAR_LOCATION/remotes/datastore/restic_downloader.rb "$FROM" | grep -e '^command=' -e '^clean_command='`
restic://*|restic+rbd://*)
defs=`$VAR_LOCATION/remotes/datastore/restic_downloader.rb "$FROM" | grep -e '^command=' -e '^clean_command='`
ret=$?
[ $ret -ne 0 ] && exit $ret
eval "$defs"
;;
rsync://*)
eval `$VAR_LOCATION/remotes/datastore/rsync_downloader.rb "$FROM" | grep -e '^command=' -e '^clean_command='`
rsync://*|rsync+rbd://*)
defs=`$VAR_LOCATION/remotes/datastore/rsync_downloader.rb "$FROM" | grep -e '^command=' -e '^clean_command='`
ret=$?
[ $ret -ne 0 ] && exit $ret
eval "$defs"
;;
*)
if [ ! -r $FROM ]; then
Expand Down
24 changes: 17 additions & 7 deletions src/datastore_mad/remotes/restic/backup
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ require_relative 'restic'

TransferManager::Datastore.load_env

ds_xml = STDIN.read
xml = STDIN.read

dir = ARGV[0].split(':')
_disks = ARGV[1]
Expand All @@ -89,12 +89,13 @@ repo_id = if bj_id != '-'
end

begin
ds = TransferManager::Datastore.from_xml(:ds_xml => ds_xml)
ds_xml = REXML::Document.new(xml).root.elements['DATASTORE']
ds = TransferManager::Datastore.from_xml(:ds_xml => ds_xml.to_s)

rds = Restic.new ds_xml, :create_repo => true,
:repo_type => :sftp,
:host_type => :hypervisor,
:repo_id => repo_id
rds = Restic.new ds_xml.to_s, :create_repo => true,
:repo_type => :sftp,
:host_type => :hypervisor,
:repo_id => repo_id
rds.resticenv_rb
rescue StandardError => e
STDERR.puts e.full_message
Expand Down Expand Up @@ -216,5 +217,14 @@ end
id = parts[0]
short_id = id[0..7] # first 8 chars only

STDOUT.puts "#{short_id} #{parts[1].to_i / (1024 * 1024)}"
vm = REXML::Document.new(xml).root.elements['VM']
backup_format =
if vm.elements['TEMPLATE/TM_MAD_SYSTEM'].text == 'ceph' &&
vm.elements['BACKUPS/BACKUP_CONFIG/MODE']&.text == 'INCREMENT'
'rbd'
else
'raw'
end

STDOUT.puts "#{short_id} #{parts[1].to_i / (1024 * 1024)} #{backup_format}"
exit(0)
5 changes: 4 additions & 1 deletion src/datastore_mad/remotes/restic/ls
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ begin
image.chain_up_to(increment_id)
end

xml = REXML::Document.new(action).root
format = xml.elements['IMAGE/FORMAT'].text

rds = Restic.new action, :prefix => 'DATASTORE/',
:repo_type => :local,
:host_type => :frontend
Expand All @@ -103,7 +106,7 @@ begin
ds_id = rds['DATASTORE/ID']

snap = image.selected || image.last
burl = "restic://#{ds_id}/#{image.bj_id}/#{chain}"
burl = "restic#{format == 'rbd' ? '+rbd' : ''}://#{ds_id}/#{image.bj_id}/#{chain}"

# --------------------------------------------------------------------------
# Get a list of disk paths stored in the backup
Expand Down
14 changes: 8 additions & 6 deletions src/datastore_mad/remotes/restic/restore
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,14 @@ one_client = OpenNebula::Client.new token
# ------------------------------------------------------------------------------
# Create backup object templates for VM and associated disk images
# ------------------------------------------------------------------------------
restorer = TransferManager::BackupRestore.new :vm_xml64 => vm_xml,
:backup_id => snap,
:bimage => image,
:ds_id => ds_id,
:txml => rds,
:proto => 'restic'
restorer = TransferManager::BackupRestore.new(
:vm_xml64 => vm_xml,
:backup_id => snap,
:bimage => image,
:ds_id => ds_id,
:txml => rds,
:proto => image.proto('restic')
)

br_disks = restorer.disk_images disks

Expand Down
4 changes: 3 additions & 1 deletion src/datastore_mad/remotes/restic/stat
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ begin
rds = Restic.new action, :prefix =>'DATASTORE/'
rds.resticenv_rb

file = rds['IMAGE/PATH'].delete_prefix('restic://')
file = rds['IMAGE/PATH']
file.slice! %r{restic(\+[^:]+)?://}

parts = file.split('/')
diskid = parts[-1].match(/disk\.(\d+)/)
base_path = "/#{parts[3..-2].join('/')}/"
Expand Down
84 changes: 47 additions & 37 deletions src/datastore_mad/remotes/restic_downloader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,13 @@

# restic://<datastore_id>/<bj_id>/<id>:<snapshot_id>,.../<file_name>
restic_url = ARGV[0]
tokens = restic_url.delete_prefix('restic://').split('/')

proto, url = restic_url.split(%r{://}, 2)
tokens = url.split('/', 4)
ds_id = tokens[0].to_i
bj_id = tokens[1]
snaps = tokens[2].split(',').map {|s| s.split(':')[1] }
disk_path = tokens[3..-1].join('/')
disk_path = "/#{tokens[3]}"
disk_index = Pathname.new(disk_path).basename.to_s.split('.')[1]
vm_id = disk_path.match('/(\d+)/backup/[^/]+$')[1].to_i

Expand Down Expand Up @@ -109,52 +111,60 @@
# Prepare image.

begin
tmp_dir = "#{rds.tmp_dir}/#{SecureRandom.uuid}"

tmp_dir = "#{rds.tmp_dir}/#{SecureRandom.uuid}"
paths = rds.pull_chain(snaps, disk_index, rds.sftp, tmp_dir)
disk_paths = paths[:disks][:by_index][disk_index]

tmp_path = "#{tmp_dir}/#{Pathname.new(disk_paths.last).basename}"

# FULL BACKUP

if disk_paths.size == 1
# Return shell code snippets according to the downloader's interface.
STDOUT.puts <<~EOS
command="ssh #{SSH_OPTS} '#{rds.user}@#{rds.sftp}' cat '#{tmp_path}'"
clean_command="ssh #{SSH_OPTS} '#{rds.user}@#{rds.sftp}' rm -rf '#{tmp_dir}/'"
disk_paths = paths[:disks][:by_index][disk_index].map {|d| Pathname.new(d) }
tmp_path = "#{tmp_dir}/#{disk_paths.last.basename}"

if proto == 'restic+rbd'
# FULL/INCREMENTAL BACKUP (RBD)

tmp_path = "#{tmp_dir}/disk.#{disk_index}.#{snaps.last[0]}.tar.gz"
script = <<~EOS
set -e -o pipefail; shopt -qs failglob
mkdir -p '#{tmp_dir}/'
tar zcvf '#{tmp_path}' -C #{tmp_dir} #{disk_paths.map {|d| d.basename }.join(' ')}
rm #{disk_paths.map {|d| "#{tmp_dir}/#{d.basename}" }.join(' ')}
EOS
exit(0)
end

# INCREMENTAL BACKUP

script = [<<~EOS]
set -e -o pipefail; shopt -qs failglob
#{rds.resticenv_sh}
EOS

script << TransferManager::BackupImage.reconstruct_chain(disk_paths,
:workdir => tmp_dir)

script << TransferManager::BackupImage.merge_chain(disk_paths,
:workdir => tmp_dir)
rc = TransferManager::Action.ssh('prepare_image',
:host => "#{rds.user}@#{rds.sftp}",
:forward => true,
:cmds => script,
:nostdout => false,
:nostderr => false)

raise StandardError, "Unable to prepare image: #{rc.stderr}" if rc.code != 0
elsif disk_paths.size == 1
# FULL BACKUP (QCOW2)

# No additional preparation needed
true
else
# INCREMENTAL BACKUP (QCOW2)

script = [<<~EOS]
set -e -o pipefail; shopt -qs failglob
#{rds.resticenv_sh}
#{TransferManager::BackupImage.reconstruct_chain(disk_paths, :workdir => tmp_dir)}
#{TransferManager::BackupImage.merge_chain(disk_paths, :workdir => tmp_dir)}
EOS

rc = TransferManager::Action.ssh 'prepare_image',
:host => "#{rds.user}@#{rds.sftp}",
:forward => true,
:cmds => script.join("\n"),
:nostdout => true,
:nostderr => false
rc = TransferManager::Action.ssh('prepare_image',
:host => "#{rds.user}@#{rds.sftp}",
:forward => true,
:cmds => script.join("\n"),
:nostdout => true,
:nostderr => false)

raise StandardError, "Unable to prepare image: #{rc.stderr}" if rc.code != 0
raise StandardError, "Unable to prepare image: #{rc.stderr}" if rc.code != 0
end

# Return shell code snippets according to the downloader's interface.
STDOUT.puts <<~EOS
command="ssh #{SSH_OPTS} '#{rds.user}@#{rds.sftp}' cat '#{tmp_path}'"
clean_command="ssh #{SSH_OPTS} '#{rds.user}@#{rds.sftp}' rm -rf '#{tmp_dir}/'"
EOS
exit(0)
rescue StandardError => e
STDERR.puts e.full_message
exit(-1)
Expand Down
27 changes: 18 additions & 9 deletions src/datastore_mad/remotes/rsync/backup
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ require_relative '../../tm/lib/tm_action'

TransferManager::Datastore.load_env

ds_xml = STDIN.read
xml = STDIN.read

dir = ARGV[0].split(':')
_disks = ARGV[1].split(':')
Expand All @@ -81,20 +81,20 @@ vm_dir = if dsrdir
end

begin
ds = REXML::Document.new(ds_xml).root
ds_xml = REXML::Document.new(xml).root.elements['DATASTORE']

rsync_user = ds.elements['TEMPLATE/RSYNC_USER'].text
rsync_host = ds.elements['TEMPLATE/RSYNC_HOST'].text
rsync_user = ds_xml.elements['TEMPLATE/RSYNC_USER'].text
rsync_host = ds_xml.elements['TEMPLATE/RSYNC_HOST'].text

base = ds.elements['BASE_PATH'].text
base = ds_xml.elements['BASE_PATH'].text

if ds.elements['TEMPLATE/RSYNC_ARGS'].nil?
if ds_xml.elements['TEMPLATE/RSYNC_ARGS'].nil?
args = '-aS'
else
args = ds.elements['TEMPLATE/RSYNC_ARGS'].text
args = ds_xml.elements['TEMPLATE/RSYNC_ARGS'].text
end

ds = TransferManager::Datastore.from_xml(:ds_xml => ds_xml)
ds = TransferManager::Datastore.from_xml(:ds_xml => ds_xml.to_s)
rescue StandardError => e
STDERR.puts e.message
exit(-1)
Expand Down Expand Up @@ -204,5 +204,14 @@ if rc.code != 0 || rc.stdout.empty?
exit(-1)
end

STDOUT.puts "#{backup_id} #{rc.stdout.lines.last.split[0]}"
vm = REXML::Document.new(xml).root.elements['VM']
backup_format =
if vm.elements['TEMPLATE/TM_MAD_SYSTEM'].text == 'ceph' &&
vm.elements['BACKUPS/BACKUP_CONFIG/MODE']&.text == 'INCREMENT'
'rbd'
else
'raw'
end

STDOUT.puts "#{backup_id} #{rc.stdout.lines.last.split[0]} #{backup_format}"
exit(0)
Loading

0 comments on commit 5f7b370

Please sign in to comment.