Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make biopython an optional dependency #4332

Merged
merged 11 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/actions/setup-deps/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ inputs:
description: 'use micromamba instead of conda'
default: false
# conda-installed min dependencies
biopython:
default: 'biopython>=1.80'
codecov:
default: 'codecov'
cython:
Expand Down Expand Up @@ -54,6 +52,8 @@ inputs:
tqdm:
default: 'tqdm>=4.43.0'
# conda-installed optional dependencies
biopython:
default: 'biopython>=1.80'
chemfiles-python:
default: 'chemfiles-python>=0.9'
clustalw:
Expand Down Expand Up @@ -106,7 +106,6 @@ runs:
shell: bash -l {0}
env:
CONDA_MIN_DEPS: |
${{ inputs.biopython }}
${{ inputs.codecov }}
${{ inputs.cython }}
${{ inputs.fasteners }}
Expand All @@ -124,6 +123,7 @@ runs:
${{ inputs.threadpoolctl }}
${{ inputs.tqdm }}
CONDA_OPT_DEPS: |
${{ inputs.biopython }}
${{ inputs.chemfiles-python }}
${{ inputs.clustalw }}
${{ inputs.distopia }}
Expand Down
40 changes: 36 additions & 4 deletions package/MDAnalysis/analysis/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,14 @@

import numpy as np

import Bio.SeqIO
import Bio.AlignIO
import Bio.Align
import Bio.Align.Applications
try:
import Bio.AlignIO
import Bio.Align
import Bio.Align.Applications
RMeli marked this conversation as resolved.
Show resolved Hide resolved
except ImportError:
HAS_BIOPYTHON = False

Check warning on line 204 in package/MDAnalysis/analysis/align.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/align.py#L203-L204

Added lines #L203 - L204 were not covered by tests
else:
HAS_BIOPYTHON = True

import MDAnalysis as mda
import MDAnalysis.lib.qcprot as qcp
Expand Down Expand Up @@ -1018,6 +1022,11 @@
Tuple of top sequence matching output `('Sequence A', 'Sequence B', score,
begin, end)`

Raises
------
ImportError
If optional dependency Biopython is not available.

Notes
-----
If you prefer to work directly with :mod:`Bio.Align` objects then you can
Expand Down Expand Up @@ -1057,7 +1066,16 @@
Replace use of deprecated :func:`Bio.pairwise2.align.globalms` with
:class:`Bio.Align.PairwiseAligner`.

.. versionchanged:: 2.7.0
Biopython is now an optional dependency which this method requires.

"""
if not HAS_BIOPYTHON:
errmsg = ("The `sequence_alignment` method requires an installation "

Check warning on line 1074 in package/MDAnalysis/analysis/align.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/align.py#L1074

Added line #L1074 was not covered by tests
"of `Biopython`. Please install `Biopython` to use this "
"method: https://biopython.org/wiki/Download")
raise ImportError(errmsg)

Check warning on line 1077 in package/MDAnalysis/analysis/align.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/align.py#L1077

Added line #L1077 was not covered by tests

aligner = Bio.Align.PairwiseAligner(
mode="global",
match_score=match_score,
Expand Down Expand Up @@ -1159,15 +1177,29 @@
:func:`sequence_alignment`, which does not require external
programs.


Raises
------
ImportError
If optional dependency Biopython is not available.


.. _ClustalW: http://www.clustal.org/
.. _STAMP: http://www.compbio.dundee.ac.uk/manuals/stamp.4.2/

.. versionchanged:: 1.0.0
Passing `alnfilename` or `treefilename` as `None` will create a file in
the current working directory.
.. versionchanged:: 2.7.0
Biopython is now an optional dependency which this method requires.

"""
if not HAS_BIOPYTHON:
errmsg = ("The `fasta2select` method requires an installation "

Check warning on line 1198 in package/MDAnalysis/analysis/align.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/align.py#L1198

Added line #L1198 was not covered by tests
"of `Biopython`. Please install `Biopython` to use this "
"method: https://biopython.org/wiki/Download")
raise ImportError(errmsg)

Check warning on line 1201 in package/MDAnalysis/analysis/align.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/align.py#L1201

Added line #L1201 was not covered by tests

if is_aligned:
logger.info("Using provided alignment {}".format(fastafilename))
with open(fastafilename) as fasta:
Expand Down
21 changes: 19 additions & 2 deletions package/MDAnalysis/core/topologyattrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,14 @@
import textwrap
from types import MethodType

import Bio.Seq
import Bio.SeqRecord
try:
import Bio.Seq
import Bio.SeqRecord
except ImportError:
HAS_BIOPYTHON = False

Check warning on line 55 in package/MDAnalysis/core/topologyattrs.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/core/topologyattrs.py#L54-L55

Added lines #L54 - L55 were not covered by tests
else:
HAS_BIOPYTHON = True

import numpy as np

from ..lib.util import (cached, convert_aa_code, iterable, warn_if_not_unique,
Expand Down Expand Up @@ -2810,9 +2816,20 @@

:exc:`TypeError` if an unknown *format* is selected.

:exc:`ImportError` is the biopython package is not available.
IAlibay marked this conversation as resolved.
Show resolved Hide resolved


.. versionadded:: 0.9.0
.. versionchanged:: 2.7.0
Biopython is now an optional dependency
IAlibay marked this conversation as resolved.
Show resolved Hide resolved
"""
if not HAS_BIOPYTHON:
errmsg = ("The `sequence_alignment` method requires an "

Check warning on line 2827 in package/MDAnalysis/core/topologyattrs.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/core/topologyattrs.py#L2827

Added line #L2827 was not covered by tests
"installation of `Biopython`. Please install "
"`Biopython` to use this method: "
"https://biopython.org/wiki/Download")
raise ImportError(errmsg)

Check warning on line 2831 in package/MDAnalysis/core/topologyattrs.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/core/topologyattrs.py#L2831

Added line #L2831 was not covered by tests

formats = ('string', 'Seq', 'SeqRecord')

format = kwargs.pop("format", "SeqRecord")
Expand Down
2 changes: 1 addition & 1 deletion package/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ maintainers = [
requires-python = ">=3.9"
dependencies = [
'numpy>=1.22.3',
'biopython>=1.80',
'networkx>=2.0',
'GridDataFormats>=0.4.0',
'mmtf-python>=1.0.0',
Expand Down Expand Up @@ -86,6 +85,7 @@ extra_formats = [
"rdkit>=2020.03.1",
]
analysis = [
"biopython>=1.80",
"seaborn",
"scikit-learn",
"tidynamics>=1.0.0",
Expand Down
2 changes: 1 addition & 1 deletion package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,6 @@ def long_description(readme):

install_requires = [
'numpy>=1.22.3',
'biopython>=1.80',
'networkx>=2.0',
'GridDataFormats>=0.4.0',
'mmtf-python>=1.0.0',
Expand Down Expand Up @@ -660,6 +659,7 @@ def long_description(readme):
'parmed', # ParmEd converter
],
'analysis': [
'biopython>=1.80', # sequence generation & alignment
'seaborn', # for annotated heat map and nearest neighbor
# plotting in PSA
'scikit-learn', # For clustering and dimensionality
Expand Down
30 changes: 25 additions & 5 deletions testsuite/MDAnalysisTests/analysis/test_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

import MDAnalysis as mda
import MDAnalysis.analysis.align as align
from MDAnalysis.analysis.align import HAS_BIOPYTHON
import MDAnalysis.analysis.rms as rms
import os
import numpy as np
Expand Down Expand Up @@ -496,19 +497,27 @@ def test_average_structure_in_memory(self, universe):
assert avg.filename is None


class TestAlignmentProcessing(object):
class TestAlignmentProcessing:
seq = FASTA
error_msg = "selection string has unexpected length"

@pytest.mark.skipif(HAS_BIOPYTHON, reason='biopython is installed')
def test_importerror_biopython(self):
errmsg = "The `fasta2select` method requires an installation"
with pytest.raises(ImportError, match=errmsg):
_ = align.fasta2select(self.seq, is_aligned=True)

@pytest.mark.skipif(not HAS_BIOPYTHON, reason='requires biopython')
def test_fasta2select_aligned(self):
"""test align.fasta2select() on aligned FASTA (Issue 112)"""
sel = align.fasta2select(self.seq, is_aligned=True)
# length of the output strings, not residues or anything real...
assert len(sel['reference']) == 30623, self.error_msg
assert len(sel['mobile']) == 30623, self.error_msg

@pytest.mark.skipif(executable_not_found("clustalw2"),
reason="Test skipped because clustalw2 executable not found")
@pytest.mark.skipif(
executable_not_found("clustalw2") or not HAS_BIOPYTHON,
reason="Test skipped because clustalw2 executable not found")
def test_fasta2select_file(self, tmpdir):
"""test align.fasta2select() on a non-aligned FASTA with default
filenames"""
Expand All @@ -518,8 +527,9 @@ def test_fasta2select_file(self, tmpdir):
assert len(sel['reference']) == 23080, self.error_msg
assert len(sel['mobile']) == 23090, self.error_msg

@pytest.mark.skipif(executable_not_found("clustalw2"),
reason="Test skipped because clustalw2 executable not found")
@pytest.mark.skipif(
executable_not_found("clustalw2") or not HAS_BIOPYTHON,
reason="Test skipped because clustalw2 executable not found")
def test_fasta2select_ClustalW(self, tmpdir):
"""MDAnalysis.analysis.align: test fasta2select() with ClustalW
(Issue 113)"""
Expand All @@ -533,6 +543,7 @@ def test_fasta2select_ClustalW(self, tmpdir):
assert len(sel['reference']) == 23080, self.error_msg
assert len(sel['mobile']) == 23090, self.error_msg

@pytest.mark.skipif(not HAS_BIOPYTHON, reason='requires biopython')
def test_fasta2select_resids(self, tmpdir):
"""test align.fasta2select() when resids provided (Issue #3124)"""
resids = [x for x in range(705)]
Expand All @@ -554,6 +565,14 @@ def atomgroups():
mobile = universe.select_atoms("resid 122-159")
return reference, mobile

@pytest.mark.skipif(HAS_BIOPYTHON, reason='biopython installed')
def test_biopython_import_error(self, atomgroups):
ref, mob = atomgroups
errmsg = "The `sequence_alignment` method requires an installation of"
with pytest.raises(ImportError, match=errmsg):
align.sequence_alignment(mob, ref)

@pytest.mark.skipif(not HAS_BIOPYTHON, reason='requires biopython')
@pytest.mark.filterwarnings("ignore:`sequence_alignment` is deprecated!")
def test_sequence_alignment(self, atomgroups):
reference, mobile = atomgroups
Expand All @@ -569,6 +588,7 @@ def test_sequence_alignment(self, atomgroups):
assert score == pytest.approx(54.6)
assert_array_equal([begin, end], [0, reference.n_residues])

@pytest.mark.skipif(not HAS_BIOPYTHON, reason='requires biopython')
def test_sequence_alignment_deprecation(self, atomgroups):
reference, mobile = atomgroups
wmsg = ("`sequence_alignment` is deprecated!\n"
Expand Down
13 changes: 11 additions & 2 deletions testsuite/MDAnalysisTests/core/test_residuegroup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,20 @@
import pytest

import MDAnalysis as mda

from MDAnalysis.core.topologyattrs import HAS_BIOPYTHON
from MDAnalysisTests.datafiles import PSF, DCD


class TestSequence(object):
@pytest.mark.skipif(HAS_BIOPYTHON, reason="biopython is installed")
def test_sequence_import_error():
p = mda.Universe(PSF, DCD).select_atoms('protein')
errmsg = "The `sequence_alignment` method requires an installation"
with pytest.raises(ImportError, match=errmsg):
_ = p.residues.sequence(format="string")


@pytest.mark.skipif(not HAS_BIOPYTHON, reason='requires biopython')
class TestSequence:
# all tests are done with the AdK system (PSF and DCD) sequence:
# http://www.uniprot.org/uniprot/P69441.fasta
# >sp|P69441|KAD_ECOLI Adenylate kinase OS=Escherichia coli (strain K12) GN=adk PE=1 SV=1
Expand Down
Loading