Package for pypi and modularize

a-slide · Jun 21, 2019 · 5521ec7 · 5521ec7
1 parent d028d55
commit 5521ec7
Show file tree

Hide file tree

Showing 9 changed files with 205 additions and 1,922 deletions.
diff --git a/NanoCount/NanoCount.py b/NanoCount/NanoCount.py
@@ -3,20 +3,18 @@
 
 #~~~~~~~~~~~~~~IMPORTS~~~~~~~~~~~~~~#
 # Standard library imports
-from collections import Counter, defaultdict
-import argparse
+from collections import *
 
 # Third party imports
 import pysam
 import pandas as pd
 
 # Local imports
 from NanoCount.Read import Read
-from NanoCount.Helper_fun import stderr_print
-from NanoCount import __version__
+from NanoCount.common import *
 
 #~~~~~~~~~~~~~~MAIN FUNCTION~~~~~~~~~~~~~~#
-class NanoCount_main ():
+class NanoCount ():
 
     #~~~~~~~~~~~~~~MAGIC METHODS~~~~~~~~~~~~~~#
     def __init__ (self,
@@ -100,6 +98,9 @@ def __init__ (self,
             # Update compatibility assignments
             self.compatibility_dict = self._update_compatibility ()
 
+        # Final line
+        stderr_print("\n")
+
     #~~~~~~~~~~~~~~PROPERTY METHODS~~~~~~~~~~~~~~#
     @property
     def count_df (self):
@@ -229,41 +230,3 @@ def _update_compatibility (self):
                 compatibility_dict[read_name][ref_name] = self.abundance_dict [ref_name] / total
 
         return compatibility_dict
-
-#~~~~~~~~~~~~~~TOP LEVEL INSTRUCTIONS~~~~~~~~~~~~~~#
-
-def main ():
-
-    # Define parser options
-    parser = argparse.ArgumentParser(
-        description='Calculate transcript abundance for a dRNA-Seq dataset from a BAM/SAM alignment file generated by minimap2')
-
-    parser.add_argument('--version', '-v', action='version', version=__version__)
-    parser.add_argument('-i', '--alignment_file', type=str, required=True,
-        help="BAM or SAM file containing aligned ONT dRNA-Seq reads including secondary and supplementary alignment")
-    parser.add_argument('-o', '--count_file', type=str, required=True,
-        help="Output count file")
-    parser.add_argument('--min_read_length', type=int, default=50,
-        help="Minimal length of the read to be considered valid")
-    parser.add_argument('--min_query_fraction_aligned', type=float, default=0.5,
-        help="Minimal fraction of the primary hit query aligned to consider the read valid")
-    parser.add_argument('--equivalent_threshold', type=float, default=0.9,
-        help="Fraction of the alignment score or the alignment length of secondary hits compared to the primary hit to be considered valid hits")
-    parser.add_argument('--scoring_value', type=str, default="alignment_score",
-        help="Value to use for score thresholding of secondary hits. Either alignment_score or alignment_length")
-    parser.add_argument('--convergence_target', type=float, default=0.005,
-        help="Convergence target value of the cummulative difference between abundance values of successive EM round to trigger the end of the EM loop")
-    parser.add_argument('--verbose', default=False, action='store_true',
-        help="If True will be chatty")
-    args = parser.parse_args()
-
-    m = NanoCount_main (
-        alignment_file =args.alignment_file,
-        min_read_length =args.min_read_length,
-        min_query_fraction_aligned =args.min_query_fraction_aligned,
-        equivalent_threshold =args.equivalent_threshold,
-        scoring_value =args.scoring_value,
-        convergence_target =args.convergence_target,
-        verbose =args.verbose)
-
-    m.write_count_file (args.count_file)
diff --git a/NanoCount/__init__.py b/NanoCount/__init__.py
@@ -1,27 +1,6 @@
 # -*- coding: utf-8 -*-
 
 # Define self package variable
-__version__ = "0.1.a2"
+__version__ = "0.1.a3"
 __all__ = ["NanoCount", "Read"]
-
-description = 'EM based transcript abundance from nanopore reads mapped to a transcriptome with minimap2'
-
-# Collect info in a dictionary for setup.py
-setup_dict = {
-    "name": __name__,
-    "version": __version__,
-    "description": description,
-    "url": "https://github.com/a-slide/NanoCount",
-    "author": 'Adrien Leger',
-    "author_email": 'aleg {at} ebi.ac.uk',
-    "license": "MIT",
-    "python_requires":'>=3.5',
-    "classifiers": [
-        'Development Status :: 3 - Alpha',
-        'Intended Audience :: Science/Research',
-        'Topic :: Scientific/Engineering :: Bio-Informatics',
-        'License :: OSI Approved :: MIT License',
-        'Programming Language :: Python :: 3'],
-    "install_requires": ['pysam>=0.14.1', 'pandas>=0.23.3'],
-    "packages": [__name__],
-    "entry_points":{'console_scripts': ['NanoCount = NanoCount.NanoCount:main']}}
+__description__ = 'EM based transcript abundance from nanopore reads mapped to a transcriptome with minimap2'
diff --git a/NanoCount/__main__.py b/NanoCount/__main__.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+#~~~~~~~~~~~~~~IMPORTS~~~~~~~~~~~~~~#
+
+# Standard library imports
+import argparse
+from collections import *
+
+# Local imports
+from NanoCount import __version__ as package_version
+from NanoCount import __name__ as package_name
+from NanoCount import __description__ as package_description
+from NanoCount.NanoCount import NanoCount as nc
+
+#~~~~~~~~~~~~~~MAIN PARSER ENTRY POINT~~~~~~~~~~~~~~#
+
+def main(args=None):
+
+    # Define parser
+    parser = argparse.ArgumentParser(description=package_description)
+    parser.add_argument('--version', '-v', action='version', version="{} v{}".format(package_name, package_version))
+    parser.add_argument('-i', '--alignment_file', type=str, required=True,
+        help="BAM or SAM file containing aligned ONT dRNA-Seq reads including secondary and supplementary alignment")
+    parser.add_argument('-o', '--count_file', type=str, required=True,
+        help="Output count file")
+    parser.add_argument('--min_read_length', type=int, default=50,
+        help="Minimal length of the read to be considered valid")
+    parser.add_argument('--min_query_fraction_aligned', type=float, default=0.5,
+        help="Minimal fraction of the primary hit query aligned to consider the read valid")
+    parser.add_argument('--equivalent_threshold', type=float, default=0.9,
+        help="Fraction of the alignment score or the alignment length of secondary hits compared to the primary hit to be considered valid hits")
+    parser.add_argument('--scoring_value', type=str, default="alignment_score",
+        help="Value to use for score thresholding of secondary hits. Either alignment_score or alignment_length")
+    parser.add_argument('--convergence_target', type=float, default=0.005,
+        help="Convergence target value of the cummulative difference between abundance values of successive EM round to trigger the end of the EM loop")
+    parser.add_argument('--verbose', default=False, action='store_true',
+        help="If True will be chatty")
+    args = parser.parse_args()
+
+    nanocount = nc (
+        alignment_file =args.alignment_file,
+        min_read_length =args.min_read_length,
+        min_query_fraction_aligned =args.min_query_fraction_aligned,
+        equivalent_threshold =args.equivalent_threshold,
+        scoring_value =args.scoring_value,
+        convergence_target =args.convergence_target,
+        verbose =args.verbose)
+
+    nanocount.write_count_file (args.count_file)
+
+# execute only if run as a script
+if __name__ == "__main__":
+    main()
diff --git a/NanoCount/Helper_fun.py → NanoCount/common.py b/NanoCount/Helper_fun.py → NanoCount/common.py
diff --git a/README.md b/README.md
@@ -1,5 +1,9 @@
 # NanoCount
 
+[![GitHub license](https://img.shields.io/github/license/a-slide/NanoCount.svg)](https://github.com/a-slide/NanoCount/blob/master/LICENSE)
+[![PyPI version](https://badge.fury.io/py/NanoCount.svg)](https://badge.fury.io/py/NanoCount)
+[![Downloads](https://pepy.tech/badge/NanoCount)](https://pepy.tech/project/NanoCount)
+
 EM based transcript abundance from nanopore reads mapped to a transcriptome with minimap2
 Python package adapted from https://github.com/jts/nanopore-rna-analysis by Jared Simpson
 
@@ -99,7 +103,7 @@ Convergence target reached after 8 rounds
 Convergence value = 0.004801809595549253
 ```
 
-The count results are stored in a Pandas Dataframe that can be conveniently rendered in Jupyter 
+The count results are stored in a Pandas Dataframe that can be conveniently rendered in Jupyter
 ```python3
 display(n.count_df)
 ```
@@ -122,4 +126,4 @@ Please be aware this package is experimental . It was tested under Linux Ubuntu
 
 You are welcome to contribute by requesting additional functionalities, reporting bugs or by forking and submitting patches or updates pull requests
 
-Thank you
+Thank you
diff --git a/setup.py b/setup.py
@@ -2,5 +2,26 @@
 # -*- coding: utf-8 -*-
 
 from setuptools import setup
-from NanoCount import setup_dict
-setup(**setup_dict)
+import NanoCount as package
+
+# Collect info in a dictionary for setup.py
+setup(
+    name = package.__name__,
+    version = package.__version__,
+    description = package.__description__,
+    url = "https://github.com/a-slide/NanoCount",
+    author = 'Adrien Leger',
+    author_email = '[email protected]',
+    license = "MIT",
+    python_requires ='>=3.5',
+    classifiers = [
+        'Development Status :: 3 - Alpha',
+        'Intended Audience :: Science/Research',
+        'Topic :: Scientific/Engineering :: Bio-Informatics',
+        'License :: OSI Approved :: MIT License',
+        'Programming Language :: Python :: 3'],
+    install_requires = [
+        'pysam>=0.14.1',
+        'pandas>=0.23.3'],
+    packages = [package.__name__],
+    entry_points = {'console_scripts': ['NanoCount = NanoCount.__main__:main']})
diff --git a/test/data/count_file.tsv b/test/data/count_file.tsv
@@ -9,10 +9,11 @@ YDR224C	0.05	1.0	1000000.0
 YIL117C	0.05	1.0	1000000.0
 YDL145C	0.05	1.0	1000000.0
 YLR293C	0.05	1.0	1000000.0
-YGL076C	0.05	1.0	1000000.0
 YDR382W	0.05	1.0	1000000.0
 YLR110C	0.05	1.0	1000000.0
 YMR116C	0.05	1.0	1000000.0
 YEL052W	0.05	1.0	1000000.0
 YKL060C	0.05	1.0	1000000.0
 YOL139C	0.05	1.0	1000000.0
+YGL076C	0.025	0.5	500000.0
+YPL198W	0.025	0.5	500000.0
diff --git a/test/data/genome_aligned_reads.tsv b/test/data/genome_aligned_reads.tsv
@@ -0,0 +1,4 @@
+transcript_name	raw	est_count	tpm
+VIII	0.6	3.0	3000000.0
+IV	0.2	1.0	1000000.0
+V	0.2	1.0	1000000.0