-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Build: handle symlinks in a centralized way #9800
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,32 +21,16 @@ | |
log = structlog.get_logger(__name__) | ||
|
||
|
||
def _assert_path_is_inside_docroot(path): | ||
resolved_path = path.absolute().resolve() | ||
docroot = Path(settings.DOCROOT).absolute() | ||
if not path.is_relative_to(docroot): | ||
log.error( | ||
"Suspicious operation outside the docroot directory.", | ||
path_resolved=str(resolved_path), | ||
) | ||
raise SuspiciousFileOperation(path) | ||
|
||
|
||
def safe_open(path, *args, allow_symlinks=False, base_path=None, **kwargs): | ||
def safe_open(path, *args, base_path=None, **kwargs): | ||
""" | ||
Wrapper around path.open() to check for symlinks. | ||
|
||
- Checks for symlinks to avoid symlink following vulnerabilities | ||
like GHSA-368m-86q9-m99w. | ||
- Checks that files aren't out of the DOCROOT directory. | ||
|
||
:param allow_symlinks: If `False` and the path is a symlink, raise `FileIsSymlink` | ||
This prevents reading the contents of other files users shouldn't have access to. | ||
|
||
:param base_path: If given, check that the path isn't located outside the base path | ||
(usually the directory where the project was cloned). | ||
It must be given if `allow_symlinks` is set to `True`. | ||
This prevents path traversal attacks (even when using symlinks). | ||
|
||
The extra *args and **kwargs will be passed to the open() method. | ||
|
||
|
@@ -55,43 +39,44 @@ def safe_open(path, *args, allow_symlinks=False, base_path=None, **kwargs): | |
This operation isn't safe to TocTou (Time-of-check to Time-of-use) attacks. | ||
Users shouldn't be able to change files while this operation is done. | ||
""" | ||
if allow_symlinks and not base_path: | ||
raise ValueError("base_path mut be given if symlinks are allowed.") | ||
|
||
docroot = Path(settings.DOCROOT).absolute() | ||
path = Path(path).absolute() | ||
|
||
# TODO: this `relative_path` can be improved to remove the first part: | ||
# /symlink-security-exploit/artifacts/latest/generic/passwd.txt | ||
# This is shown to the user currently. | ||
relative_path = str(path).replace(str(docroot.resolve()), '') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is the Path.relative_to method that should be used instead of replace. |
||
log.bind( | ||
path_resolved=str(path.absolute().resolve()), | ||
relative_path=relative_path, | ||
) | ||
|
||
if path.is_symlink(): | ||
symlink_path = str(path.readlink().resolve()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. resolve already follows symlinks, readlink isn't needed here. |
||
log.bind( | ||
symlink_path=symlink_path, | ||
) | ||
|
||
if path.exists() and not path.is_file(): | ||
raise FileIsNotRegularFile(path) | ||
|
||
if not allow_symlinks and path.is_symlink(): | ||
log.info("Skipping file becuase it's a symlink.") | ||
raise UnsupportedSymlinkFileError(path) | ||
|
||
# Expand symlinks. | ||
# Expand symlinks | ||
resolved_path = path.resolve() | ||
|
||
if allow_symlinks and base_path: | ||
base_path = Path(base_path).absolute() | ||
if not resolved_path.is_relative_to(base_path): | ||
# Trying to path traversal via a symlink, sneaky! | ||
log.info("Path traversal via symlink.") | ||
raise SymlinkOutsideBasePath(path) | ||
|
||
_assert_path_is_inside_docroot(resolved_path) | ||
# Don't follow symlinks outside DOCROOT or base_path | ||
if path.is_symlink() and (not resolved_path.is_relative_to(docroot) or (base_path and not resolved_path.is_relative_to(base_path))): | ||
raise UnsupportedSymlinkFileError(filepath=relative_path) | ||
|
||
return resolved_path.open(*args, **kwargs) | ||
|
||
|
||
# NOTE: I think `safe_copytree` is useful with `symlinks=True`, | ||
# and we shouldn't perform all the other checks here. | ||
# We are always using `safe_open` at the storage level, | ||
# and we can keep all these checks contained there (in one place) | ||
def safe_copytree(from_dir, to_dir, *args, **kwargs): | ||
""" | ||
Wrapper around shutil.copytree to check for symlinks. | ||
|
||
If any of the directories point to symlinks, cancel the operation. | ||
We don't want to copy contents outside of those directories. | ||
Wrapper around shutil.copytree to copy symlinks as is, instead of its contents. | ||
|
||
The extra *args and **kwargs will be passed to the copytree() function. | ||
|
||
|
@@ -100,21 +85,6 @@ def safe_copytree(from_dir, to_dir, *args, **kwargs): | |
This operation isn't safe to TocTou (Time-of-check to Time-of-use) attacks. | ||
Users shouldn't be able to change files while this operation is done. | ||
""" | ||
from_dir = Path(from_dir) | ||
to_dir = Path(to_dir) | ||
if from_dir.is_symlink() or to_dir.is_symlink(): | ||
log.info( | ||
"Not copying directory, one of the paths is a symlink.", | ||
from_dir=from_dir, | ||
from_dir_resolved=from_dir.resolve(), | ||
to_dir=to_dir, | ||
to_dir_resolved=to_dir.resolve(), | ||
) | ||
return False | ||
|
||
_assert_path_is_inside_docroot(from_dir) | ||
_assert_path_is_inside_docroot(to_dir) | ||
|
||
Comment on lines
-103
to
-117
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these checks are needed, since copytree will follow the symlink when doing the copy of the initial directory (there is a test for this as well), what it won't do is follow any symlinks inside that directory. And the check for _assert_path_is_inside_docroot is also useful, it will help us to prevent any directory traversal vulnerabilities being exploited. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
What is that initial directory you refer to here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The directory itself that you are going to copy ( |
||
return shutil.copytree( | ||
from_dir, | ||
to_dir, | ||
|
@@ -127,21 +97,11 @@ def safe_copytree(from_dir, to_dir, *args, **kwargs): | |
|
||
def safe_rmtree(path, *args, **kwargs): | ||
""" | ||
Wrapper around shutil.rmtree to check for symlinks. | ||
Wrapper around shutil.rmtree for security reasons. | ||
|
||
shutil.rmtree doens't follow symlinks by default, | ||
this function just logs in case users are trying to use symlinks. | ||
https://docs.python.org/3/library/shutil.html#shutil.rmtree | ||
|
||
The extra *args and **kwargs will be passed to the rmtree() function. | ||
""" | ||
path = Path(path) | ||
if path.is_symlink(): | ||
log.info( | ||
"Not deleting directory because it's a symlink.", | ||
path=str(path), | ||
resolved_path=path.resolve(), | ||
) | ||
return None | ||
_assert_path_is_inside_docroot(path) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rmtree fails hard (OSError) when trying to delete a dir that points to a symlink, but |
||
return shutil.rmtree(path, *args, **kwargs) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,6 +48,7 @@ | |
MkDocsYAMLParseError, | ||
ProjectBuildsSkippedError, | ||
YAMLParseError, | ||
UnsupportedSymlinkFileError, | ||
) | ||
from readthedocs.storage import build_media_storage | ||
from readthedocs.telemetry.collectors import BuildDataCollector | ||
|
@@ -514,29 +515,13 @@ def on_failure(self, exc, task_id, args, kwargs, einfo): | |
self.data.build['success'] = False | ||
|
||
def on_success(self, retval, task_id, args, kwargs): | ||
html = self.data.outcomes['html'] | ||
search = self.data.outcomes['search'] | ||
localmedia = self.data.outcomes['localmedia'] | ||
pdf = self.data.outcomes['pdf'] | ||
epub = self.data.outcomes['epub'] | ||
|
||
time_before_store_build_artifacts = timezone.now() | ||
# Store build artifacts to storage (local or cloud storage) | ||
self.store_build_artifacts( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should probably do this change in another PR, doesn't look related to symlinks There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to move this call inside |
||
html=html, | ||
search=search, | ||
localmedia=localmedia, | ||
pdf=pdf, | ||
epub=epub, | ||
) | ||
log.info( | ||
"Store build artifacts finished.", | ||
time=(timezone.now() - time_before_store_build_artifacts).seconds, | ||
) | ||
|
||
# NOTE: we are updating the db version instance *only* when | ||
# HTML are built successfully. | ||
html = self.data.outcomes['html'] | ||
if html: | ||
localmedia = self.data.outcomes['localmedia'] | ||
pdf = self.data.outcomes['pdf'] | ||
epub = self.data.outcomes['epub'] | ||
try: | ||
api_v2.version(self.data.version.pk).patch( | ||
{ | ||
|
@@ -699,6 +684,26 @@ def execute(self): | |
finally: | ||
self.data.build_data = self.collect_build_data() | ||
|
||
html = self.data.outcomes['html'] | ||
search = self.data.outcomes['search'] | ||
localmedia = self.data.outcomes['localmedia'] | ||
pdf = self.data.outcomes['pdf'] | ||
epub = self.data.outcomes['epub'] | ||
|
||
time_before_store_build_artifacts = timezone.now() | ||
# Store build artifacts to storage (local or cloud storage) | ||
self.store_build_artifacts( | ||
html=html, | ||
search=search, | ||
localmedia=localmedia, | ||
pdf=pdf, | ||
epub=epub, | ||
) | ||
log.info( | ||
"Store build artifacts finished.", | ||
time=(timezone.now() - time_before_store_build_artifacts).seconds, | ||
) | ||
|
||
def collect_build_data(self): | ||
""" | ||
Collect data from the current build. | ||
|
@@ -834,9 +839,14 @@ def store_build_artifacts( | |
) | ||
try: | ||
build_media_storage.sync_directory(from_path, to_path) | ||
except UnsupportedSymlinkFileError: | ||
raise | ||
except Exception: | ||
# Ideally this should just be an IOError | ||
# but some storage backends unfortunately throw other errors | ||
# | ||
# NOTE: this should hard fail the build to avoid unexpected results: | ||
# missing pages, for example | ||
log.exception( | ||
'Error copying to storage (not failing build)', | ||
media_type=media_type, | ||
|
@@ -856,6 +866,9 @@ def store_build_artifacts( | |
except Exception: | ||
# Ideally this should just be an IOError | ||
# but some storage backends unfortunately throw other errors | ||
# | ||
# NOTE: this should hard fail the build to avoid unexpected results: | ||
# pages that should not exist anymore, for example | ||
log.exception( | ||
'Error deleting from storage (not failing build)', | ||
media_type=media_type, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we should follow symlinks when uploading files (github pages doesn't for example), and with our plans to test rclone, rclone doesn't support something like "follow symlinks but only inside this path", and we also need to check that the symlinks aren't outside the version path not just the docroot path.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I remove this check from here to allow communicating this decision, "do not follow symlinks", to the user clearly. Otherwise, with this check we are just ignoring symlinks without telling this to the user, which lead to unexpected behavior from a user perspective.