Skip to content

Commit

Permalink
Merge pull request #44 from vinisalazar/dev
Browse files Browse the repository at this point in the history
Merge v0.1.21
  • Loading branch information
vinisalazar authored Jan 4, 2021
2 parents 235007d + bc56825 commit f365335
Show file tree
Hide file tree
Showing 30 changed files with 291 additions and 286 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@
* .write_paths_to_file, .copy_files_to_dir(), .link_files_to_dir() [ ]
* Add logger calls when saving to JSON and uploading to ProvStore [ ]

### v0.1.21
* Add _config argument to `bp.load_project()` (this is a temporary fix) [x]
* Add db property to Config class (prevents bug when setting DB path) [x]
* Improve a few docstrings [x]
* Refactor sha1 as sha256 [x]
* Add add and radd dunder methods for Directory and File [x]
* Package workflows in single module [x]
* Add FastTree PresetProgram [x]

### v0.1.20
* Debug graphical DOT output [x]
* Add Muscle PresetProgram [x]
Expand Down
4 changes: 2 additions & 2 deletions bioprov/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.20"
__version__ = "0.1.21"


"""
Expand All @@ -11,7 +11,7 @@
Inherits objects from the src/ package.
"""

from .src.config import config, Environment, BioProvDB
from .src.config import config, Environment, BioProvDB, Config
from .src.files import File, SeqFile, Directory
from .src.main import (
Program,
Expand Down
2 changes: 1 addition & 1 deletion bioprov/bioprov
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ __author__ = "Vini Salazar"
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.20"
__version__ = "0.1.21"

"""
Executable that goes in $PATH. Code for the command-line is on the bioprov.py module.
Expand Down
2 changes: 1 addition & 1 deletion bioprov/bioprov.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.20"
__version__ = "0.1.21"

"""
BioProv command-line application. This module holds the main executable.
Expand Down
2 changes: 1 addition & 1 deletion bioprov/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.20"
__version__ = "0.1.21"


"""
Expand Down
3 changes: 2 additions & 1 deletion bioprov/programs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.20"
__version__ = "0.1.21"


from .programs import (
Expand All @@ -12,6 +12,7 @@
blastp,
muscle,
mafft,
fasttree,
prokka,
kallisto_quant,
kaiju,
Expand Down
22 changes: 21 additions & 1 deletion bioprov/programs/programs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.20"
__version__ = "0.1.21"


"""
Expand Down Expand Up @@ -179,6 +179,26 @@ def mafft(sample, input_tag="input", extra_flags=None):
return _mafft


def fasttree(sample, input_tag="input", extra_flags=None):
"""
:param Sample sample: Instance of BioProv.Sample.
:param str input_tag: A tag for the input multifasta file.
:param list extra_flags: A list of extra parameters to pass to FastTree.
:return: Instance of PresetProgram containing FastTree.
:rtype: BioProv.PresetProgram.
"""
_fasttree = PresetProgram(
name="fasttree",
sample=sample,
input_files={"": input_tag},
output_files={">": ("tree", ".tree")},
preffix_tag=input_tag,
extra_flags=extra_flags,
)

return _fasttree


def kallisto_quant(sample, index, output_dir="./", extra_flags=None):
"""
Run kallisto's alignment and quantification
Expand Down
2 changes: 1 addition & 1 deletion bioprov/src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.20"
__version__ = "0.1.21"


"""
Expand Down
27 changes: 19 additions & 8 deletions bioprov/src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.20"
__version__ = "0.1.21"


"""
Expand All @@ -20,7 +20,7 @@

from bioprov import __file__ as bp_file
from bioprov.data import data_dir, genomes_dir
from bioprov.utils import serializer, dict_to_sha1, serializer_filter, create_logger
from bioprov.utils import serializer, dict_to_sha256, serializer_filter, create_logger


class Config:
Expand All @@ -36,19 +36,18 @@ def __init__(self, db_path=None, threads=0):
# This duplication is to order the keys in the __dict__ attribute.
self.user = None
self.env = Environment()
self.user = self.env.user
if self.user is None:
self.user = self.env.user
if not threads:
threads = int(os.cpu_count() / 2)
self.db = None
self.db_path = None
self.threads = threads
self.bioprov_dir = Path(bp_file).parent
self.data = data_dir
self.genomes = genomes_dir
if db_path is None:
db_path = self.bioprov_dir.joinpath("db.json")
self.db_path = db_path
self.db = BioProvDB(self.db_path)
self._db_path = db_path
self._db = BioProvDB(self.db_path)
self._provstore_file = None
self._provstore_user = None
self._provstore_token = None
Expand All @@ -59,6 +58,18 @@ def __init__(self, db_path=None, threads=0):
def __repr__(self):
return f"BioProv Config class set in {__file__}"

@property
def db(self):
return self._db

@property
def db_path(self):
return self._db_path

@db_path.setter
def db_path(self, value):
self._db_path = value

def db_all(self):
"""
:return: List all items in BioProv database.
Expand Down Expand Up @@ -273,7 +284,7 @@ def update(self):
:return: Sets attributes to self.
"""
env_dict = dict(os.environ.items())
env_hash = dict_to_sha1(env_dict)
env_hash = dict_to_sha256(env_dict)
if env_hash != self.env_hash:
self.env_dict = env_dict
self.env_hash = env_hash
Expand Down
43 changes: 27 additions & 16 deletions bioprov/src/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.20"
__version__ = "0.1.21"


"""
Expand All @@ -21,7 +21,7 @@
Warnings,
serializer_filter,
serializer,
file_to_sha1,
file_to_sha256,
pattern_replacer,
)

Expand Down Expand Up @@ -54,7 +54,7 @@ def __init__(self, path, tag=None, attributes=None, _get_hash=True):
self._exists = self.path.exists()
self.size = get_size(self.path)
self.raw_size = get_size(self.path, convert=False)
self._sha1 = file_to_sha1(self.path)
self._sha256 = file_to_sha256(self.path)

# Provenance attributes
self._entity = None
Expand All @@ -65,14 +65,20 @@ def __repr__(self):
def __str__(self):
return self.__repr__()

def __add__(self, other):
return str(self) + other

def __radd__(self, other):
return other + str(self)

@property
def sha1(self):
self._sha1 = file_to_sha1(self.path)
return self._sha1
def sha256(self):
self._sha256 = file_to_sha256(self.path)
return self._sha256

@sha1.setter
def sha1(self, value):
self._sha1 = value # no cover
@sha256.setter
def sha256(self, value):
self._sha256 = value # no cover

@property
def exists(self):
Expand Down Expand Up @@ -107,21 +113,20 @@ def replace_path(self, old_terms, new, warnings=False):
:param old_terms: Terms to be replaced in the path.
:param new: New term.
:param warnings: Whether to warn if sha1 checksum differs or file does not exist.
:param warnings: Whether to warn if sha256 checksum differs or file does not exist.
:return: Updates self.
"""
old_hash, old_exists = self._sha1, self._exists
old_hash, old_exists = self._sha256, self._exists
self.path = Path(pattern_replacer(str(self.path), old_terms, new))
# TODO: replace these print statements for logger warning/debug level
if warnings:
if not self.exists and old_exists:
logging.warning(
f"File {self.path} was marked as existing but was not found."
)
if old_hash and self.sha1 != old_hash and self.exists: # no cover
if old_hash and self.sha256 != old_hash and self.exists: # no cover
logging.warning(
f"File {self.path} previous sha1 checksum differs from the current."
f"File {self.path} previous sha256 checksum differs from the current."
)

def serializer(self):
Expand Down Expand Up @@ -153,7 +158,7 @@ def replace_path(self, old_terms, new, warnings=False):
:param old_terms: Terms to be replaced in the path.
:param new: New term.
:param warnings: Whether to warn if sha1 checksum differs or file does not exist.
:param warnings: Whether to warn if sha256 checksum differs or file does not exist.
:return: Updates self.
"""
Expand All @@ -172,6 +177,12 @@ def __repr__(self):
def __str__(self):
return self.__repr__()

def __add__(self, other):
return str(self) + other

def __radd__(self, other):
return other + str(self)

@property
def exists(self):
self._exists = self.path.exists()
Expand Down Expand Up @@ -433,7 +444,7 @@ def seqrecordgenerator(path, format, parser="seq", warnings=False):
:param path: Path to file.
:param format: format to pass to SeqIO.parse().
:param parser: Whether to import records with SeqIO (default) or AlignIO
:param warnings: Whether to warn if sha1 checksum differs or file does not exist.
:param warnings: Whether to warn if sha256 checksum differs or file does not exist.
:return: A generator of SeqRecords.
"""
Expand Down
Loading

0 comments on commit f365335

Please sign in to comment.