Skip to content

Commit

Permalink
Merge pull request #154 from XingerTang/feat_recomb_new
Browse files Browse the repository at this point in the history
Provide map file input option for non-hybrid mode #153
  • Loading branch information
gregorgorjanc authored Jan 12, 2024
2 parents f97e8c2 + ff8ac61 commit 383e44f
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 20 deletions.
4 changes: 2 additions & 2 deletions docs/source/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Input Arguments
-start_snp START_SNP
The first marker to consider. The first marker is "1". Default: 1.
-stop_snp STOP_SNP The last marker to consider. Default: all markers considered.
-map_file MAP_FILE A map file for all loci.

|Software| requires a pedigree file (``-ped_file``) and one or more genomic data files to run the analysis.

Expand Down Expand Up @@ -137,8 +138,7 @@ Hybrid peeling arguments
Single locus arguments:
-seg_file SEG_FILE A segregation probabilities file for hybrid peeling.
-seg_map_file SEG_MAP_FILE
A map file for loci in the segregation probabilities file.
-map_file MAP_FILE A map file for all loci in hybrid peeling.
A map file for loci in the segregation probabilities file in hybrid peeling.

In order to run hybrid peeling the user needs to supply a ``-map_file`` which gives the genetic positions for the SNPs in the sequence allele read counts data supplied, a ``-seg_map_file`` which gives the genetic position for the SNPs in the segregation file, and a ``-seg_file`` which gives the segregation values generated via multi-locus iterative peeling. These arguments are not required for running in multi-locus mode.

Expand Down
16 changes: 10 additions & 6 deletions src/tinypeel/Peeling/PeelingInfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from ..tinyhouse import ProbMath
from ..tinyhouse import HaplotypeOperations
from ..tinyhouse import InputOutput


#####################################################################
Expand All @@ -22,8 +23,11 @@ def createPeelingInfo(pedigree, args, createSeg=True, phaseFounder=False):
nInd=pedigree.maxIdn, nFam=pedigree.maxFam, nLoci=nLoci, createSeg=createSeg
)
peelingInfo.isSexChrom = args.sex_chrom
# Information about the peeling positions are handled elsewhere.
peelingInfo.positions = None
if args.map_file:
peelingInfo.positions = np.array(
InputOutput.readMapFile(args.map_file, args.startsnp, args.stopsnp)[2],
dtype=np.int64,
)

# Generate the segregation tensors.
peelingInfo.segregationTensor = ProbMath.generateSegregation(e=1e-06)
Expand Down Expand Up @@ -102,9 +106,9 @@ def setupTransmission(length, peelingInfo):
if peelingInfo.positions is None:
localMap = np.linspace(0, 1, num=peelingInfo.nLoci, dtype=np.float32)
else:
localMap = (
peelingInfo.positions / peelingInfo.positions[-1]
) # This should be sorted. Need to add in code to check.
localMap = (peelingInfo.positions - peelingInfo.positions[0]) / (
peelingInfo.positions[-1] - peelingInfo.positions[0]
)
for i in range(peelingInfo.nLoci - 1):
distance = localMap[i + 1] - localMap[i]
distance = distance * length
Expand Down Expand Up @@ -213,7 +217,7 @@ def getHetMidpoint(geno):
spec["transmissionRate"] = optional(float32[:])
spec["maf"] = optional(float32[:])

spec["positions"] = optional(float32[:]) # Not sure we use this.
spec["positions"] = optional(int64[:])
spec["iteration"] = int64


Expand Down
20 changes: 9 additions & 11 deletions src/tinypeel/tinypeel.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,7 @@ def getLociAndDistance(snpMap, segMap):
def generateSingleLocusSegregation(peelingInfo, pedigree, args):
if args.segfile is not None:
# This just gets the locations in the map files.
snpMap = np.array(
InputOutput.readMapFile(args.map_file, args.startsnp, args.stopsnp)[2]
)
snpMap = peelingInfo.positions
segMap = np.array(InputOutput.readMapFile(args.seg_map_file)[2])

loci, distance = getLociAndDistance(snpMap, segMap)
Expand Down Expand Up @@ -387,6 +385,13 @@ def getArgs():
"stopsnp",
],
)
input_parser.add_argument(
"-map_file",
default=None,
required=False,
type=str,
help="A map file for all loci.",
)

# Output options
output_parser = parser.add_argument_group("Output Options")
Expand Down Expand Up @@ -534,19 +539,12 @@ def getArgs():
)

singleLocus_parser = parser.add_argument_group("Hybrid peeling arguments")
singleLocus_parser.add_argument(
"-map_file",
default=None,
required=False,
type=str,
help="a map file for all loci in hybrid peeling.",
)
singleLocus_parser.add_argument(
"-seg_map_file",
default=None,
required=False,
type=str,
help="a map file for loci in the segregation probabilities file.",
help="A map file for loci in the segregation probabilities file in hybrid peeling.",
)
singleLocus_parser.add_argument(
"-seg_file",
Expand Down
44 changes: 43 additions & 1 deletion tests/functional_tests/run_func_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ def delete_columns(two_d_list, col_del):

class TestClass:
path = os.path.join("tests", "functional_tests")
command = "AlphaPeel "
test_cases = None
input_file_depend_on_test_cases = None

Expand Down Expand Up @@ -96,6 +95,8 @@ def generate_command(self):
"""
generate the command for the test
"""
self.command = "AlphaPeel "

for file in self.input_files:
if (
(self.test_cases is not None)
Expand Down Expand Up @@ -539,6 +540,47 @@ def test_out_id_only(self):
for ind in self.output:
assert "MotherOf" not in ind[0] and "FatherOf" not in ind[0]

def test_map_input(self):
"""
Run the test for the input map file
"""
self.test_name = "test_map_input"
self.prepare_path()

self.arguments = {"method": "multi"}
self.output_file_to_check = "dosage"

# without map file input
self.input_files = ["geno_file", "ped_file"]
self.output_file_prefix = "map_input.no_map_file"

self.generate_command()
os.system(self.command)

self.output_file_path = os.path.join(
self.output_path,
f"{self.output_file_prefix}.{self.output_file_to_check}.txt",
)

self.first_output = read_and_sort_file(self.output_file_path)

# with map file input
self.input_files.append("map_file")
self.output_file_prefix = "map_input.with_map_file"

self.generate_command()
os.system(self.command)

self.output_file_path = os.path.join(
self.output_path,
f"{self.output_file_prefix}.{self.output_file_to_check}.txt",
)

self.second_output = read_and_sort_file(self.output_file_path)

# the two outputs should match
assert self.first_output == self.second_output

# TODO test_plink for PLINK
# a. binary PLINK output
# b. binary output + input
Expand Down
6 changes: 6 additions & 0 deletions tests/functional_tests/test_map_input/geno_file.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
M0 1 2 1 0 9
F0 1 2 0 1 2
M1 0 2 0 1 2
F1 2 2 1 0 2
M2 1 2 1 1 2
F2 1 2 0 0 2
5 changes: 5 additions & 0 deletions tests/functional_tests/test_map_input/map_file.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1 1-1 1
1 1-2 2
1 1-3 3
1 1-4 4
1 1-5 5
6 changes: 6 additions & 0 deletions tests/functional_tests/test_map_input/ped_file.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
M0 0 0
F0 0 0
M1 M0 F0
F1 M0 F0
M2 M1 F1
F2 M1 F1

0 comments on commit 383e44f

Please sign in to comment.