From 67fb57da20d2264193182e85323e949975bd897f Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 11 Jan 2023 17:37:39 -0500 Subject: [PATCH] Tests --- .circleci/config.yml | 2 + readthedocs/builds/storage.py | 11 +++- .../rtd_tests/tests/test_build_storage.py | 61 +++++++++++++++++++ readthedocs/storage/rclone.py | 55 +++++++++++------ readthedocs/storage/s3_storage.py | 3 +- 5 files changed, 111 insertions(+), 21 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 9facf43fdfb..921cdd080fe 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,6 +18,8 @@ jobs: - checkout - run: git submodule sync - run: git submodule update --init + - run: sudo apt update + - run: sudo apt install -y rclone - run: pip install --user 'tox<5' - run: tox -e py310 - codecov/upload diff --git a/readthedocs/builds/storage.py b/readthedocs/builds/storage.py index 9d8c6780f0f..94f417b6269 100644 --- a/readthedocs/builds/storage.py +++ b/readthedocs/builds/storage.py @@ -8,7 +8,7 @@ from storages.utils import get_available_overwrite_name, safe_join from readthedocs.core.utils.filesystem import safe_open -from readthedocs.storage.rclone import RClone +from readthedocs.storage.rclone import RCloneLocal log = structlog.get_logger(__name__) @@ -157,10 +157,13 @@ def sync_directory(self, source, destination): @cached_property def _rclone(self): - return RClone() + raise NotImplementedError def rclone_sync(self, source, destination): """Sync a directory recursively to storage using rclone sync.""" + if destination in ("", "/"): + raise SuspiciousFileOperation("Syncing all storage cannot be right") + return self._rclone.sync(source, destination) def join(self, directory, filepath): @@ -197,6 +200,10 @@ def __init__(self, **kwargs): super().__init__(location) + @cached_property + def _rclone(self): + return RCloneLocal(location=self.location) + def get_available_name(self, name, max_length=None): """ A hack to overwrite by default with the FileSystemStorage. diff --git a/readthedocs/rtd_tests/tests/test_build_storage.py b/readthedocs/rtd_tests/tests/test_build_storage.py index 8724dbe0669..1344cb1eae9 100644 --- a/readthedocs/rtd_tests/tests/test_build_storage.py +++ b/readthedocs/rtd_tests/tests/test_build_storage.py @@ -1,7 +1,10 @@ import os import shutil import tempfile +from pathlib import Path +import pytest +from django.core.exceptions import SuspiciousFileOperation from django.test import TestCase, override_settings from readthedocs.builds.storage import BuildMediaFileSystemStorage @@ -118,3 +121,61 @@ def test_walk(self): self.assertEqual(top, 'files/api') self.assertCountEqual(dirs, []) self.assertCountEqual(files, ['index.html']) + + def test_rclone_sync(self): + tmp_files_dir = Path(tempfile.mkdtemp()) / "files" + shutil.copytree(files_dir, tmp_files_dir, symlinks=True) + storage_dir = "files" + + tree = [ + ("api", ["index.html"]), + "api.fjson", + "conf.py", + "test.html", + ] + with override_settings(DOCROOT=tmp_files_dir): + self.storage.rclone_sync(tmp_files_dir, storage_dir) + self.assertFileTree(storage_dir, tree) + + tree = [ + ("api", ["index.html"]), + "conf.py", + "test.html", + ] + (tmp_files_dir / "api.fjson").unlink() + with override_settings(DOCROOT=tmp_files_dir): + self.storage.rclone_sync(tmp_files_dir, storage_dir) + self.assertFileTree(storage_dir, tree) + + tree = [ + "conf.py", + "test.html", + ] + shutil.rmtree(tmp_files_dir / "api") + with override_settings(DOCROOT=tmp_files_dir): + self.storage.rclone_sync(tmp_files_dir, storage_dir) + self.assertFileTree(storage_dir, tree) + + @pytest.mark.skip( + "Waiting for https://github.com/readthedocs/readthedocs.org/pull/9890" + ) + def test_rclone_sync_source_symlink(self): + tmp_dir = Path(tempfile.mkdtemp()) + tmp_symlink_dir = Path(tempfile.mkdtemp()) / "files" + tmp_symlink_dir.symlink_to(tmp_dir) + + with override_settings(DOCROOT=tmp_dir): + with pytest.raises(SuspiciousFileOperation, match="symbolic link"): + self.storage.rclone_sync(tmp_symlink_dir, "files") + + @pytest.mark.skip( + "Waiting for https://github.com/readthedocs/readthedocs.org/pull/9890" + ) + def test_rclone_sync_source_outside_docroot(self): + tmp_dir = Path(tempfile.mkdtemp()) + tmp_docroot = Path(tempfile.mkdtemp()) / "docroot" + tmp_docroot.mkdir() + + with override_settings(DOCROOT=tmp_docroot): + with pytest.raises(SuspiciousFileOperation, match="outside the docroot"): + self.storage.rclone_sync(tmp_dir, "files") diff --git a/readthedocs/storage/rclone.py b/readthedocs/storage/rclone.py index db6ebe46ded..4730fc5d6cf 100644 --- a/readthedocs/storage/rclone.py +++ b/readthedocs/storage/rclone.py @@ -4,15 +4,16 @@ See https://rclone.org/docs. """ -import os import subprocess import structlog +from django.utils._os import safe_join as safe_join_fs +from storages.utils import safe_join log = structlog.get_logger(__name__) -class RClone: +class BaseRClone: """ RClone base class. @@ -24,8 +25,7 @@ class RClone: This base class allows you to use the local file system as remote. :param remote_type: You can see the full list of supported providers at - https://rclone.org/#providers. Defaults to use the local filesystem - (https://rclone.org/local/). + https://rclone.org/#providers. :param rclone_bin: Binary name or path to the rclone binary. Defaults to ``rclone``. :param default_options: Options passed to the rclone command. @@ -33,10 +33,10 @@ class RClone: Useful to pass secrets to the command, since all arguments and options will be logged. """ - remote_type = "local" + remote_type = None rclone_bin = "rclone" default_options = [ - # Number of file transfers to run in parallel. + # Number of file transfers to run in parallel. "--transfers=8", # Skip based on checksum (if available) & size, not mod-time & size. "--checksum", @@ -44,6 +44,11 @@ class RClone: ] env_vars = {} + # pylint: disable=no-self-use + def _build_target_path(self, path): + """Build the final target path for the remote.""" + return path + def build_target(self, path): """ Build the proper target using the current remote type. @@ -54,6 +59,7 @@ def build_target(self, path): :param path: Path to the remote target. """ + path = self._build_target_path(path) return f":{self.remote_type}:{path}" def execute(self, action, args, options=None): @@ -73,8 +79,8 @@ def execute(self, action, args, options=None): "--", *args, ] - env = os.environ.copy() - # env = {} + # env = os.environ.copy() + env = {} env.update(self.env_vars) log.info("Executing rclone command.", command=command) log.debug("env", env=env) @@ -106,7 +112,28 @@ def sync(self, source, destination): return self.execute("sync", args=[source, self.build_target(destination)]) -class RCloneS3Remote(RClone): +class RCloneLocal(BaseRClone): + + """ + RClone remote implementation for the local file system. + + Used for local testing only. + + See https://rclone.org/local/. + + :param location: Root directory where the files will be stored. + """ + + remote_type = "local" + + def __init__(self, location): + self.location = location + + def _build_target_path(self, path): + return safe_join_fs(self.location, path) + + +class RCloneS3Remote(BaseRClone): """ RClone remote implementation for S3. @@ -139,11 +166,6 @@ def __init__( acl=None, endpoint=None, ): - super().__init__() - - # When using minion, the region is set to None. - region = region or "" - # rclone S3 options passed as env vars. # https://rclone.org/s3/#standard-options. self.env_vars = { @@ -159,7 +181,6 @@ def __init__( self.env_vars["RCLONE_S3_ENDPOINT"] = endpoint self.bucket_name = bucket_name - def build_target(self, path): + def _build_target_path(self, path): """Overridden to prepend the bucket name to the path.""" - path = f"{self.bucket_name}/{path}" - return super().build_target(path) + return safe_join(self.bucket_name, path) diff --git a/readthedocs/storage/s3_storage.py b/readthedocs/storage/s3_storage.py index f6b06b4a27c..d0d8e42cccb 100644 --- a/readthedocs/storage/s3_storage.py +++ b/readthedocs/storage/s3_storage.py @@ -22,7 +22,6 @@ class S3BuildMediaStorageMixin(BuildMediaStorageMixin, S3Boto3Storage): - @cached_property def _rclone(self): provider = "AWS" @@ -35,7 +34,7 @@ def _rclone(self): bucket_name=self.bucket_name, access_key_id=self.access_key, secret_acces_key=self.secret_key, - region=self.region_name, + region=self.region_name or "", acl=self.default_acl, endpoint=self.endpoint_url, provider=provider,