From 5740ba048bc0d84afcb5a7c926dd31979faba9ef Mon Sep 17 00:00:00 2001 From: IAlibay Date: Fri, 3 Nov 2023 18:29:32 +0000 Subject: [PATCH 01/10] optional biopython --- package/MDAnalysis/analysis/align.py | 24 ++++++++++++++++++++---- package/MDAnalysis/core/topologyattrs.py | 21 +++++++++++++++++++-- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/package/MDAnalysis/analysis/align.py b/package/MDAnalysis/analysis/align.py index 5b4bb145efc..cbe51762f35 100644 --- a/package/MDAnalysis/analysis/align.py +++ b/package/MDAnalysis/analysis/align.py @@ -196,10 +196,14 @@ import numpy as np -import Bio.SeqIO -import Bio.AlignIO -import Bio.Align -import Bio.Align.Applications +try: + import Bio.AlignIO + import Bio.Align + import Bio.Align.Applications +except ImportError: + HAS_BIOPYTHON = False +else: + HAS_BIOPYTHON = True import MDAnalysis as mda import MDAnalysis.lib.qcprot as qcp @@ -1058,6 +1062,12 @@ def sequence_alignment(mobile, reference, match_score=2, mismatch_penalty=-1, :class:`Bio.Align.PairwiseAligner`. """ + if not HAS_BIOPYTHON: + errmsg = ("The `sequence_alignment` method requires an installation " + "biopython. Please install biopython to use this method: " + "https://biopython.org/wiki/Download") + raise ImportError(errmsg) + aligner = Bio.Align.PairwiseAligner( mode="global", match_score=match_score, @@ -1168,6 +1178,12 @@ def fasta2select(fastafilename, is_aligned=False, the current working directory. """ + if not HAS_BIOPYTHON: + errmsg = ("The `sequence_alignment` method requires an installation " + "of biopython. Please install biopython to use this method: " + "https://biopython.org/wiki/Download") + raise ImportError(errmsg) + if is_aligned: logger.info("Using provided alignment {}".format(fastafilename)) with open(fastafilename) as fasta: diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index cfa99dd55a6..0c36ebac199 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -48,8 +48,14 @@ import textwrap from types import MethodType -import Bio.Seq -import Bio.SeqRecord +try: + import Bio.Seq + import Bio.SeqRecord +except ImportError: + HAS_BIOPYTHON = False +else: + HAS_BIOPYTHON = True + import numpy as np from ..lib.util import (cached, convert_aa_code, iterable, warn_if_not_unique, @@ -2810,9 +2816,20 @@ def sequence(self, **kwargs): :exc:`TypeError` if an unknown *format* is selected. + :exc:`ImportError` is the biopython package is not available. + .. versionadded:: 0.9.0 + .. versionchanged:: 2.7.0 + Biopython is now an optional dependency """ + if not HAS_BIOPYTHON: + errmsg = ("The `sequence_alignment` method requires an " + "installation of biopython. Please install " + "biopython to use this method: " + "https://biopython.org/wiki/Download") + raise ImportError(errmsg) + formats = ('string', 'Seq', 'SeqRecord') format = kwargs.pop("format", "SeqRecord") From bbcf6d753c61b10a8b188ad84a786cb09f6e1f0a Mon Sep 17 00:00:00 2001 From: IAlibay Date: Fri, 3 Nov 2023 19:13:10 +0000 Subject: [PATCH 02/10] handle tests --- .github/actions/setup-deps/action.yaml | 6 ++-- package/MDAnalysis/analysis/align.py | 20 +++++++++++-- package/pyproject.toml | 2 +- package/setup.py | 2 +- .../MDAnalysisTests/analysis/test_align.py | 30 +++++++++++++++---- .../MDAnalysisTests/core/test_residuegroup.py | 13 ++++++-- 6 files changed, 59 insertions(+), 14 deletions(-) diff --git a/.github/actions/setup-deps/action.yaml b/.github/actions/setup-deps/action.yaml index 3152caabe76..e0e0740db93 100644 --- a/.github/actions/setup-deps/action.yaml +++ b/.github/actions/setup-deps/action.yaml @@ -17,8 +17,6 @@ inputs: description: 'use micromamba instead of conda' default: false # conda-installed min dependencies - biopython: - default: 'biopython>=1.80' codecov: default: 'codecov' cython: @@ -54,6 +52,8 @@ inputs: tqdm: default: 'tqdm>=4.43.0' # conda-installed optional dependencies + biopython: + default: 'biopython>=1.80' chemfiles-python: default: 'chemfiles-python>=0.9' clustalw: @@ -106,7 +106,6 @@ runs: shell: bash -l {0} env: CONDA_MIN_DEPS: | - ${{ inputs.biopython }} ${{ inputs.codecov }} ${{ inputs.cython }} ${{ inputs.fasteners }} @@ -124,6 +123,7 @@ runs: ${{ inputs.threadpoolctl }} ${{ inputs.tqdm }} CONDA_OPT_DEPS: | + ${{ inputs.biopython }} ${{ inputs.chemfiles-python }} ${{ inputs.clustalw }} ${{ inputs.distopia }} diff --git a/package/MDAnalysis/analysis/align.py b/package/MDAnalysis/analysis/align.py index cbe51762f35..85b167ad3ae 100644 --- a/package/MDAnalysis/analysis/align.py +++ b/package/MDAnalysis/analysis/align.py @@ -1022,6 +1022,11 @@ def sequence_alignment(mobile, reference, match_score=2, mismatch_penalty=-1, Tuple of top sequence matching output `('Sequence A', 'Sequence B', score, begin, end)` + Raises + ------ + ImportError + If optional dependency Biopython is not available. + Notes ----- If you prefer to work directly with :mod:`Bio.Align` objects then you can @@ -1061,10 +1066,13 @@ def sequence_alignment(mobile, reference, match_score=2, mismatch_penalty=-1, Replace use of deprecated :func:`Bio.pairwise2.align.globalms` with :class:`Bio.Align.PairwiseAligner`. + .. versionchanged:: 2.7.0 + Biopython is now an optional dependency which this method requires. + """ if not HAS_BIOPYTHON: errmsg = ("The `sequence_alignment` method requires an installation " - "biopython. Please install biopython to use this method: " + "of biopython. Please install biopython to use this method: " "https://biopython.org/wiki/Download") raise ImportError(errmsg) @@ -1169,6 +1177,12 @@ def fasta2select(fastafilename, is_aligned=False, :func:`sequence_alignment`, which does not require external programs. + + Raises + ------ + ImportError + If optional dependency biopython is not available. + .. _ClustalW: http://www.clustal.org/ .. _STAMP: http://www.compbio.dundee.ac.uk/manuals/stamp.4.2/ @@ -1176,10 +1190,12 @@ def fasta2select(fastafilename, is_aligned=False, .. versionchanged:: 1.0.0 Passing `alnfilename` or `treefilename` as `None` will create a file in the current working directory. + .. versionchanged:: 2.7.0 + Biopython is now an optional dependency which this method requires. """ if not HAS_BIOPYTHON: - errmsg = ("The `sequence_alignment` method requires an installation " + errmsg = ("The `fasta2select` method requires an installation " "of biopython. Please install biopython to use this method: " "https://biopython.org/wiki/Download") raise ImportError(errmsg) diff --git a/package/pyproject.toml b/package/pyproject.toml index 1110c7e8372..387b98824ea 100644 --- a/package/pyproject.toml +++ b/package/pyproject.toml @@ -36,7 +36,6 @@ maintainers = [ requires-python = ">=3.9" dependencies = [ 'numpy>=1.22.3', - 'biopython>=1.80', 'networkx>=2.0', 'GridDataFormats>=0.4.0', 'mmtf-python>=1.0.0', @@ -86,6 +85,7 @@ extra_formats = [ "rdkit>=2020.03.1", ] analysis = [ + "biopython>=1.80", "seaborn", "scikit-learn", "tidynamics>=1.0.0", diff --git a/package/setup.py b/package/setup.py index c48bbc56e85..48059761052 100755 --- a/package/setup.py +++ b/package/setup.py @@ -594,7 +594,6 @@ def long_description(readme): install_requires = [ 'numpy>=1.22.3', - 'biopython>=1.80', 'networkx>=2.0', 'GridDataFormats>=0.4.0', 'mmtf-python>=1.0.0', @@ -660,6 +659,7 @@ def long_description(readme): 'parmed', # ParmEd converter ], 'analysis': [ + 'biopython>=1.80', # sequence generation & alignment 'seaborn', # for annotated heat map and nearest neighbor # plotting in PSA 'scikit-learn', # For clustering and dimensionality diff --git a/testsuite/MDAnalysisTests/analysis/test_align.py b/testsuite/MDAnalysisTests/analysis/test_align.py index 665abf18a1f..e739a4042a2 100644 --- a/testsuite/MDAnalysisTests/analysis/test_align.py +++ b/testsuite/MDAnalysisTests/analysis/test_align.py @@ -24,6 +24,7 @@ import MDAnalysis as mda import MDAnalysis.analysis.align as align +from MDAnalysis.analysis.align import HAS_BIOPYTHON import MDAnalysis.analysis.rms as rms import os import numpy as np @@ -496,10 +497,17 @@ def test_average_structure_in_memory(self, universe): assert avg.filename is None -class TestAlignmentProcessing(object): +class TestAlignmentProcessing: seq = FASTA error_msg = "selection string has unexpected length" + @pytest.mark.skipif(HAS_BIOPYTHON, reason='biopython is installed') + def test_importerror_biopython(self): + errmsg = "The `fasta2select` method requires an installation" + with pytest.raises(ImportError, match=errmsg): + _ = align.fasta2select(self.seq, is_aligned=True) + + @pytest.mark.skipif(not HAS_BIOPYTHON, reason='requires biopython') def test_fasta2select_aligned(self): """test align.fasta2select() on aligned FASTA (Issue 112)""" sel = align.fasta2select(self.seq, is_aligned=True) @@ -507,8 +515,9 @@ def test_fasta2select_aligned(self): assert len(sel['reference']) == 30623, self.error_msg assert len(sel['mobile']) == 30623, self.error_msg - @pytest.mark.skipif(executable_not_found("clustalw2"), - reason="Test skipped because clustalw2 executable not found") + @pytest.mark.skipif( + executable_not_found("clustalw2") and not HAS_BIOPYTHON, + reason="Test skipped because clustalw2 executable not found") def test_fasta2select_file(self, tmpdir): """test align.fasta2select() on a non-aligned FASTA with default filenames""" @@ -518,8 +527,9 @@ def test_fasta2select_file(self, tmpdir): assert len(sel['reference']) == 23080, self.error_msg assert len(sel['mobile']) == 23090, self.error_msg - @pytest.mark.skipif(executable_not_found("clustalw2"), - reason="Test skipped because clustalw2 executable not found") + @pytest.mark.skipif( + executable_not_found("clustalw2") and not HAS_BIOPYTHON, + reason="Test skipped because clustalw2 executable not found") def test_fasta2select_ClustalW(self, tmpdir): """MDAnalysis.analysis.align: test fasta2select() with ClustalW (Issue 113)""" @@ -533,6 +543,7 @@ def test_fasta2select_ClustalW(self, tmpdir): assert len(sel['reference']) == 23080, self.error_msg assert len(sel['mobile']) == 23090, self.error_msg + @pytest.mark.skipif(not HAS_BIOPYTHON, reason='requires biopython') def test_fasta2select_resids(self, tmpdir): """test align.fasta2select() when resids provided (Issue #3124)""" resids = [x for x in range(705)] @@ -554,6 +565,14 @@ def atomgroups(): mobile = universe.select_atoms("resid 122-159") return reference, mobile + @pytest.mark.skipif(HAS_BIOPYTHON, reason='biopython installed') + def test_biopython_import_error(self, atomgroups): + ref, mob = atomgroups + errmsg = "The `sequence_alignment` method requires an installation of" + with pytest.raises(ImportError, match=errmsg): + align.sequence_alignment(mob, ref) + + @pytest.mark.skipif(not HAS_BIOPYTHON, reason='requires biopython') @pytest.mark.filterwarnings("ignore:`sequence_alignment` is deprecated!") def test_sequence_alignment(self, atomgroups): reference, mobile = atomgroups @@ -569,6 +588,7 @@ def test_sequence_alignment(self, atomgroups): assert score == pytest.approx(54.6) assert_array_equal([begin, end], [0, reference.n_residues]) + @pytest.mark.skipif(not HAS_BIOPYTHON, reason='requires biopython') def test_sequence_alignment_deprecation(self, atomgroups): reference, mobile = atomgroups wmsg = ("`sequence_alignment` is deprecated!\n" diff --git a/testsuite/MDAnalysisTests/core/test_residuegroup.py b/testsuite/MDAnalysisTests/core/test_residuegroup.py index 1b661964b72..21091c817d8 100644 --- a/testsuite/MDAnalysisTests/core/test_residuegroup.py +++ b/testsuite/MDAnalysisTests/core/test_residuegroup.py @@ -26,11 +26,20 @@ import pytest import MDAnalysis as mda - +from MDAnalysis.core.topologyattrs import HAS_BIOPYTHON from MDAnalysisTests.datafiles import PSF, DCD -class TestSequence(object): +@pytest.mark.skipif(HAS_BIOPYTHON, reason="biopython is installed") +def test_sequence_import_error(): + p = mda.Universe(PSF, DCD).select_atoms('protein') + errmsg = "The `sequence_alignment` method requires an installation" + with pytest.raises(ImportError, match=errmsg): + _ = p.residues.sequence(format="string") + + +@pytest.mark.skipif(not HAS_BIOPYTHON, reason='requires biopython') +class TestSequence: # all tests are done with the AdK system (PSF and DCD) sequence: # http://www.uniprot.org/uniprot/P69441.fasta # >sp|P69441|KAD_ECOLI Adenylate kinase OS=Escherichia coli (strain K12) GN=adk PE=1 SV=1 From a936e613630d4744d390c2a2c19475a446833e97 Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Sat, 4 Nov 2023 08:20:27 +0000 Subject: [PATCH 03/10] or not and --- testsuite/MDAnalysisTests/analysis/test_align.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_align.py b/testsuite/MDAnalysisTests/analysis/test_align.py index e739a4042a2..e6ff3acb4cc 100644 --- a/testsuite/MDAnalysisTests/analysis/test_align.py +++ b/testsuite/MDAnalysisTests/analysis/test_align.py @@ -516,7 +516,7 @@ def test_fasta2select_aligned(self): assert len(sel['mobile']) == 30623, self.error_msg @pytest.mark.skipif( - executable_not_found("clustalw2") and not HAS_BIOPYTHON, + executable_not_found("clustalw2") or not HAS_BIOPYTHON, reason="Test skipped because clustalw2 executable not found") def test_fasta2select_file(self, tmpdir): """test align.fasta2select() on a non-aligned FASTA with default @@ -528,7 +528,7 @@ def test_fasta2select_file(self, tmpdir): assert len(sel['mobile']) == 23090, self.error_msg @pytest.mark.skipif( - executable_not_found("clustalw2") and not HAS_BIOPYTHON, + executable_not_found("clustalw2") or not HAS_BIOPYTHON, reason="Test skipped because clustalw2 executable not found") def test_fasta2select_ClustalW(self, tmpdir): """MDAnalysis.analysis.align: test fasta2select() with ClustalW From bbb1f3cf32d5ca84964d3ffcb47582323a5f6ce8 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sun, 5 Nov 2023 08:22:07 +0000 Subject: [PATCH 04/10] update biopython -> Biopython --- package/MDAnalysis/analysis/align.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/analysis/align.py b/package/MDAnalysis/analysis/align.py index 85b167ad3ae..78e811233b3 100644 --- a/package/MDAnalysis/analysis/align.py +++ b/package/MDAnalysis/analysis/align.py @@ -1072,8 +1072,8 @@ def sequence_alignment(mobile, reference, match_score=2, mismatch_penalty=-1, """ if not HAS_BIOPYTHON: errmsg = ("The `sequence_alignment` method requires an installation " - "of biopython. Please install biopython to use this method: " - "https://biopython.org/wiki/Download") + "of `Biopython`. Please install biopython to use this " + "method: https://biopython.org/wiki/Download") raise ImportError(errmsg) aligner = Bio.Align.PairwiseAligner( @@ -1181,7 +1181,7 @@ def fasta2select(fastafilename, is_aligned=False, Raises ------ ImportError - If optional dependency biopython is not available. + If optional dependency Biopython is not available. .. _ClustalW: http://www.clustal.org/ @@ -1196,8 +1196,8 @@ def fasta2select(fastafilename, is_aligned=False, """ if not HAS_BIOPYTHON: errmsg = ("The `fasta2select` method requires an installation " - "of biopython. Please install biopython to use this method: " - "https://biopython.org/wiki/Download") + "of `Biopython`. Please install `Biopython` to use this " + "method: https://biopython.org/wiki/Download") raise ImportError(errmsg) if is_aligned: From b7d94b69578d9c0d191120677dc7f9de57d00c2e Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sun, 5 Nov 2023 08:23:04 +0000 Subject: [PATCH 05/10] one more --- package/MDAnalysis/analysis/align.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/align.py b/package/MDAnalysis/analysis/align.py index 78e811233b3..6b85b5d533f 100644 --- a/package/MDAnalysis/analysis/align.py +++ b/package/MDAnalysis/analysis/align.py @@ -1072,7 +1072,7 @@ def sequence_alignment(mobile, reference, match_score=2, mismatch_penalty=-1, """ if not HAS_BIOPYTHON: errmsg = ("The `sequence_alignment` method requires an installation " - "of `Biopython`. Please install biopython to use this " + "of `Biopython`. Please install `Biopython` to use this " "method: https://biopython.org/wiki/Download") raise ImportError(errmsg) From 9f94d1b10f8b878647555d081481d4c218cc9476 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sun, 5 Nov 2023 08:23:35 +0000 Subject: [PATCH 06/10] one more --- package/MDAnalysis/core/topologyattrs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index 0c36ebac199..3ca8b279d25 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -2825,8 +2825,8 @@ def sequence(self, **kwargs): """ if not HAS_BIOPYTHON: errmsg = ("The `sequence_alignment` method requires an " - "installation of biopython. Please install " - "biopython to use this method: " + "installation of `Biopython`. Please install " + "`Biopython` to use this method: " "https://biopython.org/wiki/Download") raise ImportError(errmsg) From 5529a5d7ce7f627f9e58af80d3374d735a637638 Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Sun, 5 Nov 2023 08:56:10 +0000 Subject: [PATCH 07/10] Add test shim to check that we are covering it in CI --- testsuite/MDAnalysisTests/core/test_residuegroup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/testsuite/MDAnalysisTests/core/test_residuegroup.py b/testsuite/MDAnalysisTests/core/test_residuegroup.py index 21091c817d8..844b01afa18 100644 --- a/testsuite/MDAnalysisTests/core/test_residuegroup.py +++ b/testsuite/MDAnalysisTests/core/test_residuegroup.py @@ -34,6 +34,7 @@ def test_sequence_import_error(): p = mda.Universe(PSF, DCD).select_atoms('protein') errmsg = "The `sequence_alignment` method requires an installation" + raise ValueError('test test test') with pytest.raises(ImportError, match=errmsg): _ = p.residues.sequence(format="string") From 70e3e2b8952b22fab9bade0c7cc2a4a7dcc0f1e6 Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Sun, 5 Nov 2023 11:07:18 +0000 Subject: [PATCH 08/10] remove test shim --- testsuite/MDAnalysisTests/core/test_residuegroup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/core/test_residuegroup.py b/testsuite/MDAnalysisTests/core/test_residuegroup.py index 844b01afa18..21091c817d8 100644 --- a/testsuite/MDAnalysisTests/core/test_residuegroup.py +++ b/testsuite/MDAnalysisTests/core/test_residuegroup.py @@ -34,7 +34,6 @@ def test_sequence_import_error(): p = mda.Universe(PSF, DCD).select_atoms('protein') errmsg = "The `sequence_alignment` method requires an installation" - raise ValueError('test test test') with pytest.raises(ImportError, match=errmsg): _ = p.residues.sequence(format="string") From 09184ddf6e3fb0554cdffa1024b23a923d1b56bd Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sun, 5 Nov 2023 20:32:39 +0000 Subject: [PATCH 09/10] update changelog --- package/CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/package/CHANGELOG b/package/CHANGELOG index 27456adf64d..1a0864d577c 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -44,6 +44,7 @@ Enhancements PR #4284) Changes + * Biopython is now an optional dependency (Issue #3820, PR #4332) * High memory tests (enabled through the environment variable `ENABLE_HIGH_MEM_UNIT_TESTS` are now only enabled if the environment variable is set to "true") (PR #4295) From ffa3a57a059f317bc4ea3f5f6be2d530c213fb9b Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Tue, 7 Nov 2023 09:18:34 +0000 Subject: [PATCH 10/10] Apply suggestions from code review Co-authored-by: Rocco Meli --- package/MDAnalysis/core/topologyattrs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index 3ca8b279d25..6af35b389d7 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -2816,12 +2816,12 @@ def sequence(self, **kwargs): :exc:`TypeError` if an unknown *format* is selected. - :exc:`ImportError` is the biopython package is not available. + :exc:`ImportError` is the Biopython package is not available. .. versionadded:: 0.9.0 .. versionchanged:: 2.7.0 - Biopython is now an optional dependency + Biopython is now an optional dependency """ if not HAS_BIOPYTHON: errmsg = ("The `sequence_alignment` method requires an "