Skip to content

Commit

Permalink
Set safe default extraction filter for tar archives
Browse files Browse the repository at this point in the history
[PEP 706](https://peps.python.org/pep-0706/), first implemented in Python
3.11.4, mitigates some of the security issues of `TarFile.extract()` and
`TarFile.extractall()` by allowing to specify a `filter` keyword-only
parameter.
Set a safe default (`data_filter`) for the filter if available,
reverting to Python 3.11 behavior ('fully_trusted') otherwise, see
https://docs.python.org/3/library/tarfile.html#supporting-older-python-versions
  • Loading branch information
nsoranzo committed Jan 13, 2025
1 parent 9fc0f05 commit 5e39e34
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 5 deletions.
3 changes: 3 additions & 0 deletions lib/galaxy/tool_util/verify/interactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,9 @@ def test_data_download(self, tool_id, filename, mode="file", is_output=True, too
else:
# Galaxy < 21.01
with tarfile.open(fileobj=fileobj) as tar_contents:
tar_contents.extraction_filter = getattr(
tarfile, "data_filter", (lambda member, path: member)
)
tar_contents.extractall(path=path)
result = path
else:
Expand Down
6 changes: 3 additions & 3 deletions lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ def unpack_archive(archive_file, dest_dir):
with zipfile.ZipFile(archive_file, "r") as zip_archive:
zip_archive.extractall(path=dest_dir)
else:
archive_fp = tarfile.open(archive_file, mode="r")
archive_fp.extractall(path=dest_dir)
archive_fp.close()
with tarfile.open(archive_file, mode="r") as archive_fp:
archive_fp.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member))
archive_fp.extractall(path=dest_dir)


def main(options, args):
Expand Down
4 changes: 3 additions & 1 deletion lib/galaxy/util/compression_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,9 @@ def isfile(self, member: ArchiveMemberType) -> bool:
return False

def open_tar(self, filepath: StrPath, mode: Literal["a", "r", "w", "x"]) -> tarfile.TarFile:
return tarfile.open(filepath, mode, errorlevel=0)
tf = tarfile.open(filepath, mode, errorlevel=0)
tf.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member))
return tf

def open_zip(self, filepath: StrPath, mode: Literal["a", "r", "w", "x"]) -> zipfile.ZipFile:
return zipfile.ZipFile(filepath, mode)
Expand Down
1 change: 1 addition & 0 deletions lib/tool_shed/test/base/twilltestcase.py
Original file line number Diff line number Diff line change
Expand Up @@ -1156,6 +1156,7 @@ def add_tar_to_repository(self, repository: Repository, source: str, strings_dis
with self.cloned_repo(repository) as temp_directory:
full_source = TEST_DATA_REPO_FILES.joinpath(source)
tar = tar_open(full_source)
tar.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member))
tar.extractall(path=temp_directory)
tar.close()
commit_message = "Uploaded revision with added files from tar."
Expand Down
1 change: 1 addition & 0 deletions lib/tool_shed/util/repository_content_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def upload_tar(
undesirable_dirs_removed = len(check_results.undesirable_dirs)
filenames_in_archive = [ti.name for ti in check_results.valid]
# Extract the uploaded tar to the load_point within the repository hierarchy.
tar.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member))
tar.extractall(path=full_path, members=check_results.valid)
tar.close()
try:
Expand Down
4 changes: 3 additions & 1 deletion test/unit/tool_shed/test_shed_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ def community_file_dir():
response = requests.get(URL)
response.raise_for_status()
b = BytesIO(response.content)
tarfile.open(fileobj=b, mode="r:gz").extractall(extracted_archive_dir)
with tarfile.open(fileobj=b, mode="r:gz") as tar:
tar.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member))
tar.extractall(extracted_archive_dir)
try:
yield extracted_archive_dir
finally:
Expand Down

0 comments on commit 5e39e34

Please sign in to comment.