Skip to content

Commit

Permalink
Package for pypi and modularize
Browse files Browse the repository at this point in the history
  • Loading branch information
a-slide committed Jun 21, 2019
1 parent d028d55 commit 5521ec7
Show file tree
Hide file tree
Showing 9 changed files with 205 additions and 1,922 deletions.
49 changes: 6 additions & 43 deletions NanoCount/NanoCount.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,18 @@

#~~~~~~~~~~~~~~IMPORTS~~~~~~~~~~~~~~#
# Standard library imports
from collections import Counter, defaultdict
import argparse
from collections import *

# Third party imports
import pysam
import pandas as pd

# Local imports
from NanoCount.Read import Read
from NanoCount.Helper_fun import stderr_print
from NanoCount import __version__
from NanoCount.common import *

#~~~~~~~~~~~~~~MAIN FUNCTION~~~~~~~~~~~~~~#
class NanoCount_main ():
class NanoCount ():

#~~~~~~~~~~~~~~MAGIC METHODS~~~~~~~~~~~~~~#
def __init__ (self,
Expand Down Expand Up @@ -100,6 +98,9 @@ def __init__ (self,
# Update compatibility assignments
self.compatibility_dict = self._update_compatibility ()

# Final line
stderr_print("\n")

#~~~~~~~~~~~~~~PROPERTY METHODS~~~~~~~~~~~~~~#
@property
def count_df (self):
Expand Down Expand Up @@ -229,41 +230,3 @@ def _update_compatibility (self):
compatibility_dict[read_name][ref_name] = self.abundance_dict [ref_name] / total

return compatibility_dict

#~~~~~~~~~~~~~~TOP LEVEL INSTRUCTIONS~~~~~~~~~~~~~~#

def main ():

# Define parser options
parser = argparse.ArgumentParser(
description='Calculate transcript abundance for a dRNA-Seq dataset from a BAM/SAM alignment file generated by minimap2')

parser.add_argument('--version', '-v', action='version', version=__version__)
parser.add_argument('-i', '--alignment_file', type=str, required=True,
help="BAM or SAM file containing aligned ONT dRNA-Seq reads including secondary and supplementary alignment")
parser.add_argument('-o', '--count_file', type=str, required=True,
help="Output count file")
parser.add_argument('--min_read_length', type=int, default=50,
help="Minimal length of the read to be considered valid")
parser.add_argument('--min_query_fraction_aligned', type=float, default=0.5,
help="Minimal fraction of the primary hit query aligned to consider the read valid")
parser.add_argument('--equivalent_threshold', type=float, default=0.9,
help="Fraction of the alignment score or the alignment length of secondary hits compared to the primary hit to be considered valid hits")
parser.add_argument('--scoring_value', type=str, default="alignment_score",
help="Value to use for score thresholding of secondary hits. Either alignment_score or alignment_length")
parser.add_argument('--convergence_target', type=float, default=0.005,
help="Convergence target value of the cummulative difference between abundance values of successive EM round to trigger the end of the EM loop")
parser.add_argument('--verbose', default=False, action='store_true',
help="If True will be chatty")
args = parser.parse_args()

m = NanoCount_main (
alignment_file =args.alignment_file,
min_read_length =args.min_read_length,
min_query_fraction_aligned =args.min_query_fraction_aligned,
equivalent_threshold =args.equivalent_threshold,
scoring_value =args.scoring_value,
convergence_target =args.convergence_target,
verbose =args.verbose)

m.write_count_file (args.count_file)
25 changes: 2 additions & 23 deletions NanoCount/__init__.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,6 @@
# -*- coding: utf-8 -*-

# Define self package variable
__version__ = "0.1.a2"
__version__ = "0.1.a3"
__all__ = ["NanoCount", "Read"]

description = 'EM based transcript abundance from nanopore reads mapped to a transcriptome with minimap2'

# Collect info in a dictionary for setup.py
setup_dict = {
"name": __name__,
"version": __version__,
"description": description,
"url": "https://github.com/a-slide/NanoCount",
"author": 'Adrien Leger',
"author_email": 'aleg {at} ebi.ac.uk',
"license": "MIT",
"python_requires":'>=3.5',
"classifiers": [
'Development Status :: 3 - Alpha',
'Intended Audience :: Science/Research',
'Topic :: Scientific/Engineering :: Bio-Informatics',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3'],
"install_requires": ['pysam>=0.14.1', 'pandas>=0.23.3'],
"packages": [__name__],
"entry_points":{'console_scripts': ['NanoCount = NanoCount.NanoCount:main']}}
__description__ = 'EM based transcript abundance from nanopore reads mapped to a transcriptome with minimap2'
54 changes: 54 additions & 0 deletions NanoCount/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

#~~~~~~~~~~~~~~IMPORTS~~~~~~~~~~~~~~#

# Standard library imports
import argparse
from collections import *

# Local imports
from NanoCount import __version__ as package_version
from NanoCount import __name__ as package_name
from NanoCount import __description__ as package_description
from NanoCount.NanoCount import NanoCount as nc

#~~~~~~~~~~~~~~MAIN PARSER ENTRY POINT~~~~~~~~~~~~~~#

def main(args=None):

# Define parser
parser = argparse.ArgumentParser(description=package_description)
parser.add_argument('--version', '-v', action='version', version="{} v{}".format(package_name, package_version))
parser.add_argument('-i', '--alignment_file', type=str, required=True,
help="BAM or SAM file containing aligned ONT dRNA-Seq reads including secondary and supplementary alignment")
parser.add_argument('-o', '--count_file', type=str, required=True,
help="Output count file")
parser.add_argument('--min_read_length', type=int, default=50,
help="Minimal length of the read to be considered valid")
parser.add_argument('--min_query_fraction_aligned', type=float, default=0.5,
help="Minimal fraction of the primary hit query aligned to consider the read valid")
parser.add_argument('--equivalent_threshold', type=float, default=0.9,
help="Fraction of the alignment score or the alignment length of secondary hits compared to the primary hit to be considered valid hits")
parser.add_argument('--scoring_value', type=str, default="alignment_score",
help="Value to use for score thresholding of secondary hits. Either alignment_score or alignment_length")
parser.add_argument('--convergence_target', type=float, default=0.005,
help="Convergence target value of the cummulative difference between abundance values of successive EM round to trigger the end of the EM loop")
parser.add_argument('--verbose', default=False, action='store_true',
help="If True will be chatty")
args = parser.parse_args()

nanocount = nc (
alignment_file =args.alignment_file,
min_read_length =args.min_read_length,
min_query_fraction_aligned =args.min_query_fraction_aligned,
equivalent_threshold =args.equivalent_threshold,
scoring_value =args.scoring_value,
convergence_target =args.convergence_target,
verbose =args.verbose)

nanocount.write_count_file (args.count_file)

# execute only if run as a script
if __name__ == "__main__":
main()
File renamed without changes.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# NanoCount

[![GitHub license](https://img.shields.io/github/license/a-slide/NanoCount.svg)](https://github.com/a-slide/NanoCount/blob/master/LICENSE)
[![PyPI version](https://badge.fury.io/py/NanoCount.svg)](https://badge.fury.io/py/NanoCount)
[![Downloads](https://pepy.tech/badge/NanoCount)](https://pepy.tech/project/NanoCount)

EM based transcript abundance from nanopore reads mapped to a transcriptome with minimap2
Python package adapted from https://github.com/jts/nanopore-rna-analysis by Jared Simpson

Expand Down Expand Up @@ -99,7 +103,7 @@ Convergence target reached after 8 rounds
Convergence value = 0.004801809595549253
```

The count results are stored in a Pandas Dataframe that can be conveniently rendered in Jupyter
The count results are stored in a Pandas Dataframe that can be conveniently rendered in Jupyter
```python3
display(n.count_df)
```
Expand All @@ -122,4 +126,4 @@ Please be aware this package is experimental . It was tested under Linux Ubuntu

You are welcome to contribute by requesting additional functionalities, reporting bugs or by forking and submitting patches or updates pull requests

Thank you
Thank you
25 changes: 23 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,26 @@
# -*- coding: utf-8 -*-

from setuptools import setup
from NanoCount import setup_dict
setup(**setup_dict)
import NanoCount as package

# Collect info in a dictionary for setup.py
setup(
name = package.__name__,
version = package.__version__,
description = package.__description__,
url = "https://github.com/a-slide/NanoCount",
author = 'Adrien Leger',
author_email = '[email protected]',
license = "MIT",
python_requires ='>=3.5',
classifiers = [
'Development Status :: 3 - Alpha',
'Intended Audience :: Science/Research',
'Topic :: Scientific/Engineering :: Bio-Informatics',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3'],
install_requires = [
'pysam>=0.14.1',
'pandas>=0.23.3'],
packages = [package.__name__],
entry_points = {'console_scripts': ['NanoCount = NanoCount.__main__:main']})
3 changes: 2 additions & 1 deletion test/data/count_file.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ YDR224C 0.05 1.0 1000000.0
YIL117C 0.05 1.0 1000000.0
YDL145C 0.05 1.0 1000000.0
YLR293C 0.05 1.0 1000000.0
YGL076C 0.05 1.0 1000000.0
YDR382W 0.05 1.0 1000000.0
YLR110C 0.05 1.0 1000000.0
YMR116C 0.05 1.0 1000000.0
YEL052W 0.05 1.0 1000000.0
YKL060C 0.05 1.0 1000000.0
YOL139C 0.05 1.0 1000000.0
YGL076C 0.025 0.5 500000.0
YPL198W 0.025 0.5 500000.0
4 changes: 4 additions & 0 deletions test/data/genome_aligned_reads.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
transcript_name raw est_count tpm
VIII 0.6 3.0 3000000.0
IV 0.2 1.0 1000000.0
V 0.2 1.0 1000000.0
Loading

0 comments on commit 5521ec7

Please sign in to comment.