Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

switch to pathlib and improve type checking #65

Merged
merged 6 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,9 @@ repos:
- types-PyYAML==5.4.0
- types-setuptools
- types-tzlocal==4.2
exclude: tests
args:
- --check-untyped-defs
- --ignore-missing-imports
repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.0.1'
rev: 'v1.8.0'
3 changes: 3 additions & 0 deletions changelog.d/20240312_145226_jb_pathlib.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.. A new scriv changelog fragment.

- Support backups with mixed backends
3 changes: 3 additions & 0 deletions changelog.d/20240312_145254_jb_pathlib.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.. A new scriv changelog fragment.

- use the same parent revision in source and backend
5 changes: 0 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,3 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry.scripts]
backy = "backy.main:main"

[tool.poetry.plugins."backy.sources"]
ceph-rbd = "backy.sources.ceph.source:CephRBD"
file = "backy.sources.file:File"
flyingcircus = "backy.sources.flyingcircus.source:FlyingCircusRootDisk"
7 changes: 0 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,6 @@ def long_desc():
entry_points="""
[console_scripts]
backy = backy.main:main

[backy.sources]
ceph-rbd = backy.sources.ceph.source:CephRBD
file = backy.sources.file:File
flyingcircus = \
backy.sources.flyingcircus.source:FlyingCircusRootDisk

""",
author=(
"Christian Theune <[email protected]>, "
Expand Down
26 changes: 18 additions & 8 deletions src/backy/backends/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from abc import ABC, abstractmethod
from typing import IO, TYPE_CHECKING, Optional, Type

from structlog.stdlib import BoundLogger

import backy.backup
import backy.revision
if TYPE_CHECKING:
from backy.revision import Revision


class BackendException(IOError):
Expand All @@ -12,13 +13,11 @@ class BackendException(IOError):

class BackyBackend(ABC):
@abstractmethod
def __init__(
self, revision: "backy.revision.Revision", log: BoundLogger
) -> None:
def __init__(self, revision: "Revision", log: BoundLogger) -> None:
...

@abstractmethod
def open(self, mode="rb"):
def open(self, mode: str = "rb", parent: Optional["Revision"] = None) -> IO:
...

def purge(self) -> None:
Expand All @@ -27,5 +26,16 @@ def purge(self) -> None:
def verify(self) -> None:
pass

def scrub(self, backup: "backy.backup.Backup", type: str) -> None:
pass

def select_backend(type_: str) -> Type[BackyBackend]:
ctheune marked this conversation as resolved.
Show resolved Hide resolved
match type_:
case "chunked":
from backy.backends.chunked import ChunkedFileBackend

return ChunkedFileBackend
case "cowfile":
from backy.backends.cowfile import COWFileBackend

return COWFileBackend
case _:
raise ValueError(f"Invalid backend '{type_}'")
99 changes: 32 additions & 67 deletions src/backy/backends/chunked/__init__.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,37 @@
import os.path
from pathlib import Path
from typing import Optional, Set

from structlog.stdlib import BoundLogger

from backy.revision import Revision, Trust
from backy.utils import END, report_status

from .. import BackyBackend
from .chunk import Chunk
from .chunk import Chunk, Hash
from .file import File
from .store import Store


class ChunkedFileBackend(BackyBackend):
# Normally a new revision will be made by copying the last revision's file.
# We need to be able to not do this in case of converting from a different
# format.
clone_parent = True

# multiple Backends may share the same store
STORES: dict[str, Store] = dict()
STORES: dict[Path, Store] = dict()

def __init__(self, revision: Revision, log: BoundLogger):
assert revision.backend_type == "chunked"
self.backup = revision.backup
self.revision = revision
path = self.revision.backup.path + "/chunks"
path = self.backup.path / "chunks"
if path not in self.STORES:
self.STORES[path] = Store(
self.revision.backup.path + "/chunks", log
)
self.STORES[path] = Store(self.backup.path / "chunks", log)
self.store = self.STORES[path]
self.log = log.bind(subsystem="chunked")

def open(self, mode="rb"):
if "w" in mode or "+" in mode and self.clone_parent:
parent = self.revision.get_parent()
if parent and not os.path.exists(self.revision.filename):
with open(self.revision.filename, "wb") as new, open(
parent.filename, "rb"
) as old:
def open(self, mode: str = "rb", parent: Optional[Revision] = None) -> File: # type: ignore[override]
if "w" in mode or "+" in mode:
if parent and not self.revision.filename.exists():
with self.revision.filename.open(
"wb"
) as new, parent.filename.open("rb") as old:
# This is ok, this is just metadata, not the actual data.
new.write(old.read())
overlay = False
Expand All @@ -53,31 +47,33 @@ def open(self, mode="rb"):

return file

def purge(self):
def purge(self) -> None:
self.log.debug("purge")
self.store.users = []
used_chunks: Set[Hash] = set()
for revision in self.backup.history:
try:
self.store.users.append(
self.backup.backend_factory(revision, self.log).open()
)
except ValueError:
# Invalid format, like purging non-chunked with chunked backend
pass
self.store.purge()
if revision.backend_type != "chunked":
continue
used_chunks.update(
type(self)(revision, self.log).open()._mapping.values()
)
self.store.purge(used_chunks)

@report_status
def verify(self):
log = self.log.bind(revision_uuid=self.revision.uuid)
log.info("verify-start")
verified_chunks = set()
verified_chunks: Set[Hash] = set()

# Load verified chunks to avoid duplicate work
for revision in self.backup.clean_history:
if revision.trust != Trust.VERIFIED:
if (
revision.trust != Trust.VERIFIED
or revision.backend_type != "chunked"
):
continue
f = self.backup.backend_factory(revision, log).open()
verified_chunks.update(f._mapping.values())
verified_chunks.update(
type(self)(revision, self.log).open()._mapping.values()
)

log.debug("verify-loaded-chunks", verified_chunks=len(verified_chunks))

Expand All @@ -91,14 +87,14 @@ def verify(self):
if candidate in verified_chunks:
continue
try:
c = Chunk(f, 0, self.store, candidate)
c = Chunk(self.store, candidate)
c._read_existing()
except Exception:
log.exception("verify-error", chunk=candidate)
errors = True
if os.path.exists(self.store.chunk_path(candidate)):
if self.store.chunk_path(candidate).exists():
try:
os.unlink(self.store.chunk_path(candidate))
self.store.chunk_path(candidate).unlink()
except Exception:
log.exception("verify-remove-error", chunk=candidate)
# This is an optimisation: we can skip this revision, purge it
Expand All @@ -125,34 +121,3 @@ def verify(self):

yield END
yield None

def scrub(self, backup, type):
if type == "light":
return self.scrub_light(backup)
elif type == "deep":
return self.scrub_deep(backup)
else:
raise RuntimeError("Invalid scrubbing type {}".format(type))

def scrub_light(self, backup):
errors = 0
self.log.info("scrub-light")
for revision in backup.history:
self.log.info("scrub-light-rev", revision_uuid=revision.uuid)
backend = backup.backend_factory(revision, self.log).open()
for hash in backend._mapping.values():
if os.path.exists(backend.store.chunk_path(hash)):
continue
self.log.error(
"scrub-light-missing-chunk",
hash=hash,
revision_uuid=revision.uuid,
)
errors += 1
return errors

def scrub_deep(self, backup):
errors = self.scrub_light(backup)
self.log.info("scrub-deep")
errors += self.store.validate_chunks()
return errors
Loading
Loading