Skip to content

Commit

Permalink
Refactor initialization process and add genome configuration
Browse files Browse the repository at this point in the history
This commit refactors the initialization process in the `cli.py` file to include a new `init` command. The `init` command initializes the seqnado pipeline and sets up the correct environmental variables. It also adds the functionality to initialize genome files using a template file. The `get_genomes` function in the `helpers.py` file is modified to read the genome configuration from a JSON file and return the genome values. The commit also includes the addition of a new `init.sh` script that is used to initialize the environment for the SeqNado project. Lastly, a template file `genomes_template.json` is added to the `workflow/config` directory.
  • Loading branch information
alsmith151 committed Sep 26, 2024
1 parent b09d7ef commit 6361f6e
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 0 deletions.
40 changes: 40 additions & 0 deletions seqnado/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,50 @@
import sys
import pathlib
import shlex
from seqnado.helpers import get_genomes


FILE = os.path.abspath(__file__)
PACKAGE_DIR = os.path.dirname(FILE)
GENOMES = get_genomes()



@click.command()
def init():
"""
Initializes the seqnado pipeline
"""
import loguru.logger as logger
import os
import json

# Get current conda environment
conda_env = os.environ.get("CONDA_DEFAULT_ENV")
conda_env_ok = input(f"Current conda environment is {conda_env}. Is this correct? (y/n): ")
if conda_env_ok.lower() != "y":
logger.error("Please activate the correct conda environment and re-run the command")
sys.exit(1)

logger.info("Initialising the correct environmental variables for the pipeline")
os.system(f"bash {PACKAGE_DIR}/init.sh")

logger.info("Initialising genome files")
seqnado_config_dir = pathlib.Path('~/.config/seqnado').expanduser()
seqnado_config_dir.mkdir(parents=True, exist_ok=True)

genome_template = pathlib.Path(PACKAGE_DIR) / 'workflow' / 'config' / 'genomes_template.json'
genome_config = seqnado_config_dir / 'genomes.json'
if not genome_config.exists():
with open(genome_template, 'r') as f:
genome_template = json.load(f)

with open(genome_config, 'w') as f:
json.dump(genome_template, f)

logger.info("Initialisation complete")



@click.command(context_settings=dict(ignore_unknown_options=True))
Expand Down
33 changes: 33 additions & 0 deletions seqnado/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,3 +291,36 @@ def remove_unwanted_run_files():

except Exception as e:
print(e)


def get_genomes():

import json
import sys

seqnado_config_dir = pathlib.Path('~/.config/seqnado').expanduser()
genome_config = seqnado_config_dir / 'genomes.json'

if genome_config.exists():
logger.debug(f"Genome config found at {genome_config}")
with open(genome_config, 'r') as f:
genome_values = json.load(f)

if genome_values.get('dm6'):
if "PATH" in genome_values['dm6']['bt2_indices']:
logger.error(f'Template genome file found. Please update the genome file {genome_config} with the correct paths.')
sys.exit(1)

return genome_values










# with open(os.path.join(template_dir, "preset_genomes.json"), "r") as f:
# genome_values = json.load(f)
25 changes: 25 additions & 0 deletions seqnado/init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash

# This script is used to initialize the environment for the SeqNado project.


echo "Adding SylabsCloud remote to apptainer - This will allow you to download and run the pre-made containers"
apptainer remote add --no-login SylabsCloud cloud.sylabs.io
apptainer remote use SylabsCloud


IS_CCB=$(hostname | grep -c "imm-")

if [ -z "$APPTAINER_BINDPATH" ]; then
if [ $IS_CCB -eq 1 ]; then
export APPTAINER_BINDPATH="/ceph:/ceph, /project:/project, /databank:/databank"
echo 'export APPTAINER_BINDPATH="/ceph:/ceph, /project:/project, /databank:/databank" >> ~/.bashrc
else
echo "This is not a CCB node. Please set the APPTAINER_BINDPATH environment variable to bind the necessary directories."
fi
fi
9 changes: 9 additions & 0 deletions seqnado/workflow/config/genomes_template.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"dm6": {
"bt2_indices": "PATH_TO_INDICIES/dm6",
"star_indices": "PATH_TO_STAR/dm6/UCSC/STAR_2.7.10b",
"chromosome_sizes": "PATH/dm6/UCSC/sequence/dm6.chrom.sizes",
"gtf": "PATH/dm6/UCSC/genes/dm6.ncbiRefSeq.gtf",
"blacklist": "PATH/dm6/dm6-blacklist.v2.bed.gz"
}
}

0 comments on commit 6361f6e

Please sign in to comment.