Skip to content

Commit

Permalink
Improve options
Browse files Browse the repository at this point in the history
  • Loading branch information
enormandeau committed May 29, 2023
1 parent 2b1b875 commit f0a7a47
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 11 deletions.
10 changes: 5 additions & 5 deletions 01_scripts/08_replace_coordinates_in_vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ def reverse_complement(seq):
input_vcf = sys.argv[1]
input_correspondence = sys.argv[2]
unique_pos = sys.argv[3]
correct_id = sys.argv[4]
correct_id = int(sys.argv[4])
id_column = sys.argv[5]
correct_alleles = sys.argv[6]
correct_alleles = int(sys.argv[6])
allele_columns = sys.argv[7]
sort_output = sys.argv[8]
sort_output = int(sys.argv[8])
output_vcf = sys.argv[9]
except:
print(__doc__)
Expand Down Expand Up @@ -95,11 +95,11 @@ def reverse_complement(seq):

# Skip already treated positions
if unique_pos:
if l[2] in viewed_positions:
if (l[0], l[1]) in viewed_positions:
continue

else:
viewed_positions.add(l[2])
viewed_positions.add((l[0], l[1]))

# Sorting
if sort_output:
Expand Down
15 changes: 9 additions & 6 deletions 02_infos/snplift_config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export SKIP_VISUALIZATION=1 # Avoid creating a plot to explore features. These a
# for debugging [0, 1].

# Number of CPUs
export NCPUS=10 # Number of cores to use (around 10 and maximum 20 is recommended)
export NCPUS=20 # Number of cores to use (around 10 and maximum 20 is recommended)
# For less than 100K SNPs, 1 to 4 cores is a good choice.
# For less than 1M SNPs, 10 cores is a good choice.
# Above this, 20 cores is going to be slightly faster.
Expand All @@ -33,13 +33,16 @@ export WINDOW_LENGTH=300 # Sise of sequences kept on both sides of each SNP.

# Number of neighbours to use
export NUM_NEIGHBOURS=20 # Number of neighbour SNPs to consider when trying to salvage
# more dubious SNPs using local correlations of positions
# more dubious SNPs using local correlations of positions.

# Do final corrections to VCF file
export UNIQUE_POS=1 # Keep only the first locus encountered for each position [0, 1].
export CORRECT_ID=1 # Recompute the ID column from columns 1 and 2 [0, 1].
export ID_COLUMN=3 # ID column number [int]. Only used for `CORRECT_ID=1`.
export CORRECT_ALLELES=1 # Reverse complement alleles when new genome region is inversed [0, 1].
export ALLELE_COLUMNS=4,5 # Columns of alleles to reverse complement. Only used for `CORRECT_ALLELES=1`
export SORT_OUTPUT=1 # Sort VCF or output file according to the two first columns
# WARNING: For big VCFs, this will require a lot of RAM
export ALLELE_COLUMNS=4,5 # Columns of alleles to reverse complement. Only used for `CORRECT_ALLELES=1`.

# Memory hungry options
export UNIQUE_POS=1 # Keep only the first locus encountered for each position [0, 1]. Memory hungry.
export SORT_OUTPUT=1 # Sort VCF or output file according to the two first columns.
# WARNING: For big VCFs, this will require a lot of RAM, around 3X the size
# of the output file.

0 comments on commit f0a7a47

Please sign in to comment.