-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
PIP-1538-use-assembly-name-for-pre (#97)
- Loading branch information
Showing
13 changed files
with
278 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import argparse | ||
|
||
VALID_ASSEMBLIES = ( | ||
"hg18", | ||
"hg19", | ||
"hg38", | ||
"dMel", | ||
"mm9", | ||
"mm10", | ||
"anasPlat1", | ||
"bTaurus3", | ||
"canFam3", | ||
"equCab2", | ||
"galGal4", | ||
"Pf3D7", | ||
"sacCer3", | ||
"sCerS288c", | ||
"susScr3", | ||
"TAIR10", | ||
) | ||
|
||
|
||
def main(): | ||
parser = get_parser() | ||
args = parser.parse_args() | ||
normalized_name, assembly_is_supported = normalize_assembly_name(args.assembly_name) | ||
write_string_to_file(normalized_name, args.normalized_name_outfile) | ||
write_string_to_file( | ||
get_wdl_boolean_string(assembly_is_supported), | ||
args.assembly_is_supported_outfile, | ||
) | ||
|
||
|
||
def normalize_assembly_name(assembly_name): | ||
""" | ||
Returns a tuple of the possibly normalized assembly name and a boolean indicating | ||
whether or not the assembly is supported. | ||
""" | ||
assembly_name = assembly_name.lower() | ||
if "grch" in assembly_name: | ||
assembly_name = normalize_grch_name(assembly_name) | ||
for canonical_name in VALID_ASSEMBLIES: | ||
if canonical_name.lower() == assembly_name: | ||
return canonical_name, True | ||
return assembly_name, False | ||
|
||
|
||
def normalize_grch_name(assembly_name): | ||
""" | ||
Convert `GRCh` names to `hg` ones, GRCh38 = hg38, GRCh37 = hg19, and GRCh36 = hg18 | ||
""" | ||
assembly_version = int(assembly_name.lower().replace("grch", "")) | ||
if assembly_version < 38: | ||
hg_assembly_version = assembly_version - 18 | ||
else: | ||
hg_assembly_version = assembly_version | ||
return "hg" + str(hg_assembly_version) | ||
|
||
|
||
def get_wdl_boolean_string(boolean): | ||
""" | ||
WDL expects `true` or `false` strings for `read_boolean`, Python `str` doesn't work | ||
""" | ||
return str(boolean).lower() | ||
|
||
|
||
def write_string_to_file(data, filename): | ||
with open(filename, "w") as f: | ||
f.write(data) | ||
|
||
|
||
def get_parser(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("assembly_name", help="Assembly name") | ||
parser.add_argument( | ||
"normalized_name_outfile", help="Name of file to write normalized name" | ||
) | ||
parser.add_argument( | ||
"assembly_is_supported_outfile", | ||
help=( | ||
"Name for file to write boolean indicating if the assembly is supported by " | ||
"Juicer" | ||
), | ||
) | ||
return parser | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"test_normalize_assembly_name.assembly_is_supported_output_path": "is_supported.txt", | ||
"test_normalize_assembly_name.assembly_name": "GRCh38", | ||
"test_normalize_assembly_name.normalized_assembly_name_output_path": "normalized.txt" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
--- | ||
- name: test_normalize_assembly_name | ||
tags: | ||
- integration | ||
command: >- | ||
tests/caper_run.sh | ||
tests/integration/wdl/test_normalize_assembly_name.wdl | ||
tests/integration/json/test_normalize_assembly_name.json | ||
# Cannot easily check the file contents, since not copied to test-output | ||
stdout: | ||
contains: | ||
- '"test_normalize_assembly_name.normalize_assembly_name.normalized_assembly_name": "hg38"' | ||
- '"test_normalize_assembly_name.normalize_assembly_name.assembly_is_supported": true' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
version 1.0 | ||
|
||
import "../../../hic.wdl" as hic | ||
|
||
workflow test_normalize_assembly_name { | ||
input { | ||
String assembly_name | ||
String normalized_assembly_name_output_path | ||
String assembly_is_supported_output_path | ||
} | ||
|
||
call hic.normalize_assembly_name { input: | ||
assembly_name = assembly_name, | ||
normalized_assembly_name_output_path = normalized_assembly_name_output_path, | ||
assembly_is_supported_output_path = assembly_is_supported_output_path, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import pytest | ||
|
||
from hic_pipeline.normalize_assembly_name import ( | ||
get_wdl_boolean_string, | ||
normalize_assembly_name, | ||
normalize_grch_name, | ||
) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"assembly_name,expected", | ||
[ | ||
("GRCh38", ("hg38", True)), | ||
("hg19", ("hg19", True)), | ||
("GRCh37", ("hg19", True)), | ||
("grch36", ("hg18", True)), | ||
("unknown", ("unknown", False)), | ||
], | ||
) | ||
def test_normalize_assembly_name(assembly_name, expected): | ||
result = normalize_assembly_name(assembly_name) | ||
assert result == expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"assembly_name,expected", | ||
[("GRCh38", "hg38"), ("GRCh37", "hg19"), ("GRCh36", "hg18")], | ||
) | ||
def test_normalize_grch_name_name(assembly_name, expected): | ||
result = normalize_grch_name(assembly_name) | ||
assert result == expected | ||
|
||
|
||
@pytest.mark.parametrize("boolean,expected", [(True, "true"), (False, "false")]) | ||
def test_get_wdl_boolean_string(boolean, expected): | ||
result = get_wdl_boolean_string(boolean) | ||
assert result == expected |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"test_normalize_assembly_name.assembly_is_supported_output_path": "bar.txt", | ||
"test_normalize_assembly_name.assembly_name": "hg38", | ||
"test_normalize_assembly_name.normalized_assembly_name_output_path": "foo.txt" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
--- | ||
- name: test_normalize_assembly_name_unit | ||
tags: | ||
- unit | ||
command: >- | ||
tests/caper_run.sh | ||
tests/unit/wdl/test_normalize_assembly_name.wdl | ||
tests/unit/json/test_normalize_assembly_name.json | ||
stdout: | ||
contains: | ||
- "hg38" | ||
- "foo.txt" | ||
- "bar.txt" |
Oops, something went wrong.