From f0a7a47df628428d328c890c61c680a63db87c3a Mon Sep 17 00:00:00 2001 From: Eric Normandeau Date: Mon, 29 May 2023 14:06:10 -0400 Subject: [PATCH] Improve options --- 01_scripts/08_replace_coordinates_in_vcf.py | 10 +++++----- 02_infos/snplift_config.sh | 15 +++++++++------ 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/01_scripts/08_replace_coordinates_in_vcf.py b/01_scripts/08_replace_coordinates_in_vcf.py index 19507fc..5a88b8b 100755 --- a/01_scripts/08_replace_coordinates_in_vcf.py +++ b/01_scripts/08_replace_coordinates_in_vcf.py @@ -39,11 +39,11 @@ def reverse_complement(seq): input_vcf = sys.argv[1] input_correspondence = sys.argv[2] unique_pos = sys.argv[3] - correct_id = sys.argv[4] + correct_id = int(sys.argv[4]) id_column = sys.argv[5] - correct_alleles = sys.argv[6] + correct_alleles = int(sys.argv[6]) allele_columns = sys.argv[7] - sort_output = sys.argv[8] + sort_output = int(sys.argv[8]) output_vcf = sys.argv[9] except: print(__doc__) @@ -95,11 +95,11 @@ def reverse_complement(seq): # Skip already treated positions if unique_pos: - if l[2] in viewed_positions: + if (l[0], l[1]) in viewed_positions: continue else: - viewed_positions.add(l[2]) + viewed_positions.add((l[0], l[1])) # Sorting if sort_output: diff --git a/02_infos/snplift_config.sh b/02_infos/snplift_config.sh index c857269..9d78c33 100644 --- a/02_infos/snplift_config.sh +++ b/02_infos/snplift_config.sh @@ -21,7 +21,7 @@ export SKIP_VISUALIZATION=1 # Avoid creating a plot to explore features. These a # for debugging [0, 1]. # Number of CPUs -export NCPUS=10 # Number of cores to use (around 10 and maximum 20 is recommended) +export NCPUS=20 # Number of cores to use (around 10 and maximum 20 is recommended) # For less than 100K SNPs, 1 to 4 cores is a good choice. # For less than 1M SNPs, 10 cores is a good choice. # Above this, 20 cores is going to be slightly faster. @@ -33,13 +33,16 @@ export WINDOW_LENGTH=300 # Sise of sequences kept on both sides of each SNP. # Number of neighbours to use export NUM_NEIGHBOURS=20 # Number of neighbour SNPs to consider when trying to salvage - # more dubious SNPs using local correlations of positions + # more dubious SNPs using local correlations of positions. # Do final corrections to VCF file -export UNIQUE_POS=1 # Keep only the first locus encountered for each position [0, 1]. export CORRECT_ID=1 # Recompute the ID column from columns 1 and 2 [0, 1]. export ID_COLUMN=3 # ID column number [int]. Only used for `CORRECT_ID=1`. export CORRECT_ALLELES=1 # Reverse complement alleles when new genome region is inversed [0, 1]. -export ALLELE_COLUMNS=4,5 # Columns of alleles to reverse complement. Only used for `CORRECT_ALLELES=1` -export SORT_OUTPUT=1 # Sort VCF or output file according to the two first columns - # WARNING: For big VCFs, this will require a lot of RAM +export ALLELE_COLUMNS=4,5 # Columns of alleles to reverse complement. Only used for `CORRECT_ALLELES=1`. + +# Memory hungry options +export UNIQUE_POS=1 # Keep only the first locus encountered for each position [0, 1]. Memory hungry. +export SORT_OUTPUT=1 # Sort VCF or output file according to the two first columns. + # WARNING: For big VCFs, this will require a lot of RAM, around 3X the size + # of the output file.