diff --git a/README.md b/README.md index 30714ec..dd57f4b 100644 --- a/README.md +++ b/README.md @@ -191,13 +191,13 @@ If you would like to try Earl Grey, or prefer to use it in a browser, you can do NOTE: This pipeline is currently running with Dfam 3.7 curated elements only. We are working on updating to Dfam 3.8 for a future release. If required, you can modify the conda installation of RepeatMasker within the conda environment (do at your own risk!) -Earl Grey version 5.0.3 (latest stable release) with all required and configured dependencies is found in the `toby_baril_bio` and `biooconda` conda channels. To install, simply run the following depending on your installation: +Earl Grey version 5.1.0 (latest stable release) with all required and configured dependencies is found in the `toby_baril_bio` and `biooconda` conda channels. To install, simply run the following depending on your installation: ``` # With conda -conda create -n earlgrey -c conda-forge -c bioconda earlgrey=5.0.3 +conda create -n earlgrey -c conda-forge -c bioconda earlgrey=5.1.0 # With mamba -mamba create -n earlgrey -c conda-forge -c bioconda earlgrey=5.0.3 +mamba create -n earlgrey -c conda-forge -c bioconda earlgrey=5.1.0 ``` # Recommended Installation with Conda or Mamba on ARM-based Mac Systems (M chips) @@ -371,7 +371,7 @@ brewIntel install coreutils Change TEstrainer_for_earlGrey.sh for the macOS version: ``` -nano $(which earlGrey | gsed 's|bin.*|share/earlgrey-5.0.3-0/scripts/TEstrainer/TEstrainer_for_earlGrey.sh|g') +nano $(which earlGrey | gsed 's|bin.*|share/earlgrey-5.1.0-0/scripts/TEstrainer/TEstrainer_for_earlGrey.sh|g') # delete everything in this file. ``` @@ -582,12 +582,12 @@ Save the file with `CTRL+X` then press `Y` when asked to overwrite the file. Make sure the updated file is executable: ``` -chmod a+x $(which earlGrey | gsed 's|bin.*|share/earlgrey-5.0.3-0/scripts/TEstrainer/TEstrainer_for_earlGrey.sh|g') +chmod a+x $(which earlGrey | gsed 's|bin.*|share/earlgrey-5.1.0-0/scripts/TEstrainer/TEstrainer_for_earlGrey.sh|g') ``` Edit the script directory path in this file by running the following: ``` -gsed -i "s|INSERT_FILENAME_HERE|$(which earlGrey | gsed 's:bin.*:share/earlgrey-5.0.3-0/scripts/TEstrainer/scripts/:g')|g" $(which earlGrey | gsed 's|bin.*|share/earlgrey-5.0.3-0/scripts/TEstrainer/TEstrainer_for_earlGrey.sh|g') +gsed -i "s|INSERT_FILENAME_HERE|$(which earlGrey | gsed 's:bin.*:share/earlgrey-5.1.0-0/scripts/TEstrainer/scripts/:g')|g" $(which earlGrey | gsed 's|bin.*|share/earlgrey-5.1.0-0/scripts/TEstrainer/TEstrainer_for_earlGrey.sh|g') ``` Edit famdb.py for use with our environment: @@ -597,12 +597,12 @@ gsed -i 's/python3/python/g' $(which earlGrey | gsed 's|bin.*|share/RepeatMasker Edit LTR_FINDER_PARALLEL to be compatible with zsh ``` -gsed -i "s|\`timeout $timeout|\`gtimeout $timeout|g" $(which earlGrey | gsed 's|bin.*|share/earlgrey-5.0.3-0/scripts/LTR_FINDER_parallel|g') +gsed -i "s|\`timeout $timeout|\`gtimeout $timeout|g" $(which earlGrey | gsed 's|bin.*|share/earlgrey-5.1.0-0/scripts/LTR_FINDER_parallel|g') ``` Install LTR_Finder from source ``` -cd $(which earlGrey | gsed 's|bin.*|share/earlgrey-5.0.3-0/scripts/bin|g') +cd $(which earlGrey | gsed 's|bin.*|share/earlgrey-5.1.0-0/scripts/bin|g') git clone https://github.com/xzhub/LTR_Finder cd ./LTR_Finder/source make @@ -611,14 +611,14 @@ cp * ../../LTR_FINDER.x86_64-1.0.7/ Edit rcMergeRepeatsLoose: ``` -gsed -i 's|sed|gsed|g' $(which earlGrey | gsed 's|bin.*|share/earlgrey-5.0.3-0/scripts/rcMergeRepeatsLoose|g') +gsed -i 's|sed|gsed|g' $(which earlGrey | gsed 's|bin.*|share/earlgrey-5.1.0-0/scripts/rcMergeRepeatsLoose|g') var=$(which earlGrey | gsed "s/earlGrey/Rscript/g") -gsed -i "s|Rscript|${var}|g" $(which earlGrey | gsed 's|bin.*|share/earlgrey-5.0.3-0/scripts/rcMergeRepeatsLoose|g') +gsed -i "s|Rscript|${var}|g" $(which earlGrey | gsed 's|bin.*|share/earlgrey-5.1.0-0/scripts/rcMergeRepeatsLoose|g') ``` Edit main earlGrey script: ``` -gsed -i "s|Rscript|${var}|g" $(which earlGrey | gsed 's|bin.*|share/earlgrey-5.0.3-0/earlGrey|g') +gsed -i "s|Rscript|${var}|g" $(which earlGrey | gsed 's|bin.*|share/earlgrey-5.1.0-0/earlGrey|g') ``` Add an important directory to PERL5LIB (for RepeatMasker) @@ -635,7 +635,7 @@ You are ready to go! Just remember to activate the _intel_ terminal, then the co In this case, we need to bind a system directory to the docker container. In the line below, we are binding a directory call `host_data` that is found on our current path to `/data/` in the docker container. Please replace the file path before `:` to the directory you wish to bind to `/data/` in the container. This container must be run in interactive mode the first time you use it. ``` -docker run -it -v `pwd`/host_data/:/data/ quay.io/biocontainers/earlgrey:5.0.3--h4ac6f70_0 +docker run -it -v `pwd`/host_data/:/data/ quay.io/biocontainers/earlgrey:5.1.0--h4ac6f70_0 ``` ## If you are running the container for the first time, you need to enable Earl Grey to configure the Dfam libraries correctly in interactive mode. @@ -647,7 +647,7 @@ earlGrey -g /data/genome.fasta -s test_genome -t 8 -o /data/ ``` ## If you need the container to run offline and/or without interactive mode -I try to keep an up-to-date container in docker hub, but this might not always be the case depending on if I have had time to build and upload a new image. Currently, there is an image with Dfam 3.7 curated elements only, and this is version 5.0.3. You can use this image by pulling the container: +I try to keep an up-to-date container in docker hub, but this might not always be the case depending on if I have had time to build and upload a new image. Currently, there is an image with Dfam 3.7 curated elements only, and this is version 5.1.0. You can use this image by pulling the container: ``` # Interactive mode diff --git a/earlGrey b/earlGrey index a8a1fc0..2169563 100755 --- a/earlGrey +++ b/earlGrey @@ -3,7 +3,7 @@ usage() { echo " ############################# - earlGrey version 5.0.3 + earlGrey version 5.1.0 Required Parameters: -g == genome.fasta -s == species name diff --git a/earlGreyAnnotationOnly b/earlGreyAnnotationOnly index 6d7e941..e251d01 100755 --- a/earlGreyAnnotationOnly +++ b/earlGreyAnnotationOnly @@ -3,7 +3,7 @@ usage() { echo " ############################# - earlGrey version 5.0.3 (AnnotationOnly) + earlGrey version 5.1.0 (AnnotationOnly) Required Parameters: -g == genome.fasta -s == species name diff --git a/earlGreyLibConstruct b/earlGreyLibConstruct index aee8179..596182e 100644 --- a/earlGreyLibConstruct +++ b/earlGreyLibConstruct @@ -3,7 +3,7 @@ usage() { echo " ############################# - earlGrey version 5.0.3 (Library Construction Only) + earlGrey version 5.1.0 (Library Construction Only) Required Parameters: -g == genome.fasta -s == species name diff --git a/scripts/divergenceCalc/divergence_calc.py b/scripts/divergenceCalc/divergence_calc.py index da5e067..63b9b92 100644 --- a/scripts/divergenceCalc/divergence_calc.py +++ b/scripts/divergenceCalc/divergence_calc.py @@ -28,7 +28,7 @@ parser.add_argument('-t', '--cores', type=int, default=4, help='Number of cores') parser.add_argument('-k', '--timeout', type=int, default=30, - help='Seconds after which water will be cancelled and repeat treated as unalignable') + help='Seconds after which matcher will be cancelled and repeat treated as unalignable') args = parser.parse_args() @@ -131,27 +131,27 @@ def outer_func(genome_path, temp_dir, timeoutSeconds, gff): if exists(query_path) is True and getsize(query_path) > 0: # Set path to subject sequence subject_path=temp_dir+"/split_library/"+repeat_family+".fasta" - # Run water, with timeout exception - test_command = shlex.split("water "+query_path+" "+subject_path+" -gapopen 10 -gapextend 0.5 -outfile "+query_path+".water -aformat fasta") + # Run matcher, with timeout exception + test_command = shlex.split("matcher "+query_path+" "+subject_path+" -outfile "+query_path+".matcher -aformat fasta") # Run test and kill if it takes more than 10 seconds alignment_p = subprocess.Popen(test_command, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT) try: alignment_p.wait(timeoutSeconds) except subprocess.TimeoutExpired: - # if water fails to complete before timeout, kill and move on + # if matcher fails to complete before timeout, kill and move on with open(failed_file_name, "a") as failed_file: failed_file.write(seqnames+":"+start+"-"+end+"_"+strand+"_"+repeat_family+"\n") alignment_p.kill() - if exists(query_path+".water") is False or getsize(query_path+".water") == 0: + if exists(query_path+".matcher") is False or getsize(query_path+".matcher") == 0: # If no alignment is possible, set distances to NA and alignment length to 0 Kdist = "NA" os.remove(query_path) - if exists(query_path+".water") is True: - os.remove(query_path+".water") + if exists(query_path+".matcher") is True: + os.remove(query_path+".matcher") else: # Read in alignments - aln = list(SeqIO.parse(query_path+".water", 'fasta')) + aln = list(SeqIO.parse(query_path+".matcher", 'fasta')) ref_seq, gen_seq = str(aln[0].seq).upper(), str(aln[1].seq).upper() # Check ref and genome sequence are same length, set Kdist to NA if not if len(ref_seq) == len(gen_seq): @@ -163,7 +163,7 @@ def outer_func(genome_path, temp_dir, timeoutSeconds, gff): else: Kdist = "NA" # Delete temporary files - os.remove(query_path+".water") + os.remove(query_path+".matcher") os.remove(query_path) # Make line for temporary file and write to file tmp_holder = row[1].to_list()[1:]