Skip to content

Commit

Permalink
Merge branch 'release-0.3.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
konrad committed Jul 5, 2014
2 parents 773f068 + 957223b commit bbac9ed
Show file tree
Hide file tree
Showing 16 changed files with 263 additions and 66 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
v0.3.1 (2014-07-05)
- Add live / installation image (thanks anonymous reviewer #1)
- Add support for Fastq read files (thanks anonymous reviewer #3)
- Extend CLI documentation (thanks reviewer Matt MacManes)
- Fix/extend online documentation (thanks for help re. OS X to Lei Li
and the feedback to Petya Zhelyazkova)
v0.3.0 (2014-05-18)
- Improve Makefile
- Update docs
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ new_release:
@echo "* Test doc creation"
@echo "* make package_to_pypi"
@echo "* git add CHANGELOG.txt bin/reademption docs/source/conf.py setup.py"
@echo "* Commit changes e.g. 'git commit -m \"Set version to 0.2.X\"'"
@echo "* Tag the commit e.g. 'git tag -a v0.2.X -m \"version v0.2.X\"'"
@echo "* Commit changes e.g. 'git commit -m \"Set version to 0.3.X\"'"
@echo "* Tag the commit e.g. 'git tag -a v0.3.X -m \"version v0.3.X\"'"
@echo "* Merge release into dev and master"
@echo "* Push it to github: git push"
@echo "* Generate a new release based on this tag at"
Expand Down
14 changes: 9 additions & 5 deletions bin/reademption
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ __author__ = "Konrad Foerstner <[email protected]>"
__copyright__ = "2011-2014 by Konrad Foerstner <[email protected]>"
__license__ = "ISC license"
__email__ = "[email protected]"
__version__ = "0.3.0"
__version__ = "0.3.1"

def main():
parser = argparse.ArgumentParser()
Expand All @@ -34,10 +34,11 @@ def main():
"directory is used.")
read_aligning_parser.add_argument(
"--min_read_length", "-l", default=12, type=int,
help="Minimal read length after clipping.")
help="Minimal read length after clipping (default 12). Should be "
"higher for eukaryotic species.")
read_aligning_parser.add_argument(
"--processes", "-p", default=1, type=int,
help="Number of processes that should be used.")
help="Number of processes that should be used (default 1).")
read_aligning_parser.add_argument(
"--segemehl_accuracy", "-a", default=95.0, type=float,
help="Segemehl's minimal accuracy (in %%) (default 95).")
Expand Down Expand Up @@ -71,6 +72,9 @@ def main():
read_aligning_parser.add_argument(
"--lack_bin", "-L", default="lack.x",
help="Lack's binary path (default 'lack.x').")
read_aligning_parser.add_argument(
"--fastq", "-q", default=False, action="store_true",
help="Input reads are in FASTQ not FASTA format.")
read_aligning_parser.add_argument(
"--check_for_existing_files", "-f", default=False,
action="store_true", help="Check for existing files (e.g. from a "
Expand Down Expand Up @@ -108,7 +112,7 @@ def main():
"calculation.")
coverage_creation_parser.add_argument(
"--processes", "-p", default=1, type=int,
help="Number of processes that should be used.")
help="Number of processes that should be used (default 1).")
coverage_creation_parser.add_argument(
"--skip_read_count_splitting", "-s", default=False,
action="store_true", help="Do not split the read counting between "
Expand Down Expand Up @@ -151,7 +155,7 @@ def main():
"and anti-sense overlaps are counted and separately reported.")
gene_wise_quanti_parser.add_argument(
"--processes", "-p", default=1, type=int,
help="Number of processes that should be used.")
help="Number of processes that should be used (default 1).")
gene_wise_quanti_parser.add_argument(
"--features", "-t", dest="allowed_features", default=None,
help="Comma separated list of features that should be considered "
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
# The short X.Y version.
version = '0.3'
# The full version, including alpha/beta/rc tags.
release = '0.3.0'
release = '0.3.1'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
5 changes: 4 additions & 1 deletion docs/source/example_analysis.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,10 @@ Finally, we need the reads of the RNA-Seq libraries. To save some time
for running this examples we will work with subsampled libraries of 1M
reads each. This will the limit informative value of the results which
is acceptable as we just want to understand the workflow of the
READemption.
READemption. Please be aware that READemption does not perform quality
trimming or adapter clipping so far. For this purpose use the `FASTX
toolkit <http://hannonlab.cshl.edu/fastx_toolkit/>`_, `cutadapt
<https://code.google.com/p/cutadapt/>`_ or other tools.

::

Expand Down
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Table of content

index
installation
live_and_installation_image
subcommands
example_analysis
troubleshooting
Expand Down
84 changes: 61 additions & 23 deletions docs/source/installation.rst
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
Installation
============
Installation and updating
=========================

Requirements
------------

READemption was developed using Python 3.3 and the user is advised to
run READemption with this or a higher version. In any case, the third
party packages `pysam <https://code.google.com/p/pysam>`_ as well as
`setuptools <https://pypi.python.org/pypi/setuptools>`_ and `pip
READemption was started to be developed using Python 3.2 and the user
is advised to run READemption with this or a higher version. In any
case, the third party packages `pysam
<https://code.google.com/p/pysam>`_ as well as `setuptools
<https://pypi.python.org/pypi/setuptools>`_ and `pip
<http://www.pip-installer.org>`_ should be available on the system in
order to make the installation easy. READemption uses the short read
mapper `segemehl
Expand All @@ -21,8 +22,8 @@ necessary for the subcommand ``deseq`` which performs differential
gene expression analysis. Don't worry - in the following the
installation of all these requirements will be covered.

Installing on a fresh Ubuntu installation
-----------------------------------------
Installing on a fresh Ubuntu system
-----------------------------------

The following installation procedure was tested on a `Amazon AWS
t1.micro
Expand Down Expand Up @@ -53,22 +54,22 @@ Some comments:

::

curl http://www.bioinf.uni-leipzig.de/Software/segemehl/segemehl_0_1_7.tar.gz > segemehl_0_1_7.tar.gz
tar xzf segemehl_0_1_7.tar.gz
curl http://www.bioinf.uni-leipzig.de/Software/segemehl/segemehl_0_1_9.tar.gz > segemehl_0_1_9.tar.gz
tar xzf segemehl_0_1_9.tar.gz
cd segemehl_*/segemehl/ && make && cd ../../

Copying the executable to a location that is part of the ``PATH`` e.g
``/usr/bin/`` ...

::

sudo cp segemehl_0_1_7/segemehl/segemehl.x /usr/bin/segemehl.x
sudo cp segemehl_0_1_7/segemehl/lack.x /usr/bin/lack.x
sudo cp segemehl_0_1_9/segemehl/segemehl.x /usr/bin/segemehl.x
sudo cp segemehl_0_1_9/segemehl/lack.x /usr/bin/lack.x

... or the bin folder of your home directory::

mkdir ~/bin
cp segemehl_0_1_7/segemehl/segemehl.x ~/bin
cp segemehl_0_1_9/segemehl/segemehl.x ~/bin

3. Install DESeq2
~~~~~~~~~~~~~~~~~
Expand All @@ -82,7 +83,7 @@ and install the DESeq2 package inside of the interactive command line
interface. You might be asked to confirm the installation path::

source("http://bioconductor.org/biocLite.R")
biocLite("DESeq2").
biocLite("DESeq2")

Leave ``R``::

Expand All @@ -101,12 +102,49 @@ Voilà! You should now be able to call READemption::

reademption -h

..
.. Global installation
.. -------------------
..
.. Installation in the home directory of the user
.. ----------------------------------------------
..
.. Installation in a pyvenv
.. ----------------------

Installing on a Apple OS X
--------------------------

(Many thanks to Lei Li for contribution this part!)

1. Installing all required software/packages
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

To download and install Python 3 follow the instruction at this
`download page <https://www.python.org/downloads/>`_.

Download and install `xcode <https://developer.apple.com/xcode/>`_ (`download page <https://developer.apple.com/xcode/downloads/>`_) and R
(download links are on `the frontpage <http://www.r-project.org/>`_).

To install ``pip`` open a terminal and run

::

curl -O https://bitbucket.org/pypa/setuptools/raw/bootstrap/ez_setup.py python3 ez_setup.py # download and install pip
curl -O https://raw.github.com/pypa/pip/master/contrib/get-pip.py
python3 get-pip.py

Install ``matplotlib``:

::

pip3 install python3-matplotlib


1. Installing segemehl, DESeq, pysam and READemption
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The remaining installation steps are the same as descibed above. Just
open a terminal and run the commands.


Updating READemption
--------------------

Once you have installed READemption as described above you can easily
update it to the newest version by running

::

pip3 install READemption -U
19 changes: 19 additions & 0 deletions docs/source/live_and_installation_image.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Live system and installation image
==================================

For users who do not feel comfortable with the installation of
READemption, who want to run it under Windows or who just want to test
it without installation we offer a Ubuntu 14.4 based live system /
installation image (~ 1.2 GB large) which can be `downloaded here
<http://osimages.imib-zinf.net>`_. Once you have retrieved the image
you can either use it to install or test the system on a physical
machine or use it in a virtual machine (e.g. using the open source
software `VirtualBox <https://www.virtualbox.org/>`_). The required
installation/setup steps are the `same as for the official Ubuntu
version
<http://www.ubuntu.com/download/desktop/install-ubuntu-desktop>`_. The
only difference is that once you have a running system READemption can
be used out of the box without any further setup. A very detailed
description of the setup of a virtual machine with Ubuntu under
Windows can be found `here
<http://www.wikihow.com/Install-Ubuntu-on-VirtualBox>`_.
47 changes: 27 additions & 20 deletions docs/source/subcommands.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,25 @@ annotation files in GFF3 format have to be put into
align
-----

``align`` performs the clipping and size filtering of the reads, as well
as the actual aligning to the reference sequences. It also generates
statistics about the steps (e.g. number of aligned reads, number of
mappings). As the result of this steps are needed by the other
subcommands it has to be run before the others. It requires reads in
FASTA format (or counterparts compressed with ``gzip`` or ``bzip2``)
and reference sequences in FASTA format. ``align`` generates the read
alignments in BAM format (``*.bam``) and also index files for those
(``*.bam.bai``). Is also stores unmapped reads so that they can be
inspected e.g. to search for contaminations. The file
``align`` performs the clipping and size filtering of the reads, as
well as the actual aligning to the reference sequences. It also
generates statistics about the steps (e.g. number of aligned reads,
number of mappings). As the result of this steps are needed by the
other subcommands it has to be run before the others. It requires
reads in FASTA or FASTQ format (or counterparts compressed with
``gzip`` or ``bzip2``) and reference sequences in FASTA
format. ``align`` generates the read alignments in BAM format
(``*.bam``) and also index files for those (``*.bam.bai``). Is also
stores unmapped reads so that they can be inspected e.g. to search for
contaminations. The file
``output/align/reports_and_stats/read_alignment_stats.csv`` lists
several mapping statistics. The folder
``output/align/reports_and_stats/stats_data_json/`` contains files with
the original countings in JSON format.
``output/align/reports_and_stats/stats_data_json/`` contains files
with the original countings in JSON format. Please be aware that
READemption does not perform quality trimming or adapter clipping so
far. For this purpose use the `FASTX toolkit
<http://hannonlab.cshl.edu/fastx_toolkit/>`_, `cutadapt
<https://code.google.com/p/cutadapt/>`_ or other tools.

::

Expand All @@ -55,19 +60,21 @@ the original countings in JSON format.
[--segemehl_bin SEGEMEHL_BIN] [--paired_end]
[--split] [--poly_a_clipping] [--realign]
[--keep_original_alignments] [--lack_bin LACK_BIN]
[--check_for_existing_files] [--progress]
[--fastq] [--check_for_existing_files] [--progress]
[--crossalign_cleaning CROSSALIGN_CLEANING_STRING]
[project_path]

positional arguments:
project_path Path of the project folder. If none is given the
current directory is used.

optional arguments:
-h, --help show this help message and exit
--min_read_length MIN_READ_LENGTH, -l MIN_READ_LENGTH
Minimal read length after clipping.
Minimal read length after clipping (default 12).
Should be higher for eukaryotic species.
--processes PROCESSES, -p PROCESSES
Number of processes that should be used.
Number of processes that should be used (default 1).
--segemehl_accuracy SEGEMEHL_ACCURACY, -a SEGEMEHL_ACCURACY
Segemehl's minimal accuracy (in %) (default 95).
--segemehl_evalue SEGEMEHL_EVALUE, -e SEGEMEHL_EVALUE
Expand All @@ -91,6 +98,7 @@ the original countings in JSON format.
(lack) after merging.
--lack_bin LACK_BIN, -L LACK_BIN
Lack's binary path (default 'lack.x').
--fastq, -q Input reads are in FASTQ not FASTA format.
--check_for_existing_files, -f
Check for existing files (e.g. from a interrupted
previous run) and do not overwrite them if they exits.
Expand All @@ -104,7 +112,6 @@ the original countings in JSON format.
org_1_repl1>,<org_1_repl2>,..,<org_1_repl_n>;<ORG_NAME
_2>:<org_2_repl1>,<org_2_repl2>,..,<org_2_repl_n>'


coverage
--------

Expand Down Expand Up @@ -159,7 +166,7 @@ positions. To turn off this behavior use
number of aligned reads even if only uniquely aligned
reads are used for the coverage calculation.
--processes PROCESSES, -p PROCESSES
Number of processes that should be used.
Number of processes that should be used (default 1).
--skip_read_count_splitting, -s
Do not split the read counting between different
alignings. Default is to do the splitting.
Expand Down Expand Up @@ -216,7 +223,7 @@ overlaps are counted and separately listed.
sense and anti-sense overlaps are counted and
separately reported.
--processes PROCESSES, -p PROCESSES
Number of processes that should be used.
Number of processes that should be used (default 1).
--features ALLOWED_FEATURES, -t ALLOWED_FEATURES
Comma separated list of features that should be
considered (e.g. gene, cds, region, exon). Other
Expand Down
12 changes: 6 additions & 6 deletions reademptionlib/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,6 @@ def create_project(self, version):
project_creator.create_root_folder(self._args.project_path)
project_creator.create_subfolders(self._paths.required_folders())
project_creator.create_version_file(self._paths.version_path, version)
sys.stdout.write("Created folder \"%s\" and required subfolders.\n" % (
self._args.project_path))
sys.stdout.write("Created folder \"%s\" and required subfolders.\n" % (
self._args.project_path))
sys.stdout.write("Please copy read files into folder \"%s\" and "
Expand Down Expand Up @@ -253,8 +251,9 @@ def _prepare_reads_single_end(self):
continue
read_processor = ReadProcessor(
poly_a_clipping=self._args.poly_a_clipping,
min_read_length=self._args.min_read_length)
read_files_and_jobs[lib_name] = executor.submit(
min_read_length=self._args.min_read_length,
fastq=self._args.fastq)
read_files_and_jobs[lib_name] = executor.submit(
read_processor.process_single_end, read_path,
processed_read_path)
self._evaluet_job_and_generate_stat_file(lib_name, read_files_and_jobs)
Expand All @@ -272,7 +271,8 @@ def _prepare_reads_paired_end(self):
continue
read_processor = ReadProcessor(
poly_a_clipping=False,
min_read_length=self._args.min_read_length)
min_read_length=self._args.min_read_length,
fastq=self._args.fastq)
read_files_and_jobs[lib_name] = executor.submit(
read_processor.process_paired_end, read_path_pair,
processed_read_path_pair)
Expand All @@ -295,7 +295,7 @@ def _evaluet_job_and_generate_stat_file(
read_files_and_stats, self._paths.read_processing_stats_path)

def _align_single_end_reads(self):
"""Manage the actual alignement of single end reads."""
"""Manage the actual alignment of single end reads."""
read_aligner = ReadAligner(self._args.segemehl_bin, self._args.progress)
if self._file_needs_to_be_created(self._paths.index_path) is True:
read_aligner.build_index(
Expand Down
Loading

0 comments on commit bbac9ed

Please sign in to comment.