diff --git a/pvaccompare/compare_tools/comparison_router.py b/pvaccompare/compare_tools/comparison_router.py index 5240e14..6ca97b9 100644 --- a/pvaccompare/compare_tools/comparison_router.py +++ b/pvaccompare/compare_tools/comparison_router.py @@ -16,7 +16,9 @@ def find_file(results_folder, subfolder, pattern): return files[0] if files else None -def write_header(output_file, aggregated_columns, unaggregated_columns, reference_match_columns): +def write_header( + output_file, aggregated_columns, unaggregated_columns, reference_match_columns +): """ Purpose: Writes the report generation date and time to the top of the output file Modifies: Nothing @@ -44,7 +46,9 @@ def run_comparison( Returns: None """ output_file = output_file + "_" + prefix.replace("/", "_") + ".tsv" - write_header(output_file, aggregated_columns, unaggregated_columns, reference_match_columns) + write_header( + output_file, aggregated_columns, unaggregated_columns, reference_match_columns + ) if "pVACseq" not in prefix: yml1_path = find_file(results_folder1, prefix + "/log", "inputs.yml") diff --git a/pvaccompare/comparisons/compare_aggregated_tsv.py b/pvaccompare/comparisons/compare_aggregated_tsv.py index ab17820..7716277 100644 --- a/pvaccompare/comparisons/compare_aggregated_tsv.py +++ b/pvaccompare/comparisons/compare_aggregated_tsv.py @@ -1,4 +1,5 @@ from run_utils import * +import logging class CompareAggregatedTSV: @@ -12,23 +13,21 @@ def __init__(self, input_file1, input_file2, output_file, columns_to_compare): self.ID_replacement_cols = ["Gene", "AA Change"] self.columns_to_compare = columns_to_compare - def check_id(self, cols1_to_drop, cols2_to_drop): + def check_id(self): """ Purpose: Replace ID with Gene-AA_change if needed Modifies: self.contains_id, self.replaced_id Returns: None """ - if "ID" in cols1_to_drop or "ID" in cols2_to_drop: + if "ID" not in self.df1.columns or "ID" not in self.df2.columns: self.contains_id = False - - if not self.contains_id: can_replace = True for col in self.ID_replacement_cols: if col not in self.df1.columns or col not in self.df2.columns: can_replace = False if can_replace: self.combine_gene_and_AA_change() - print("\u2022", "Replaced ID with Gene and AA Change") + logging.info("\u2022 Replaced ID with Gene and AA Change") self.replaced_id = True def combine_gene_and_AA_change(self): diff --git a/pvaccompare/comparisons/compare_reference_matches_tsv.py b/pvaccompare/comparisons/compare_reference_matches_tsv.py index 9c1dd65..d2bebb7 100644 --- a/pvaccompare/comparisons/compare_reference_matches_tsv.py +++ b/pvaccompare/comparisons/compare_reference_matches_tsv.py @@ -1,4 +1,5 @@ from run_utils import * +import logging class CompareReferenceMatchesTSV: @@ -53,15 +54,15 @@ def check_duplicate_ids(self): if max_hits_file1 > 1 or max_hits_file2 > 1: if max_hits_file1 > 1 and max_hits_file2 > 1: - print( + logging.error( "ERROR: Duplicate unique records were found in both files. Writing number of hits only." ) elif max_hits_file1 > 1: - print( + logging.error( "ERROR: Duplicate unique records were found in file 1. Writing number of hits only." ) else: - print( + logging.error( "ERROR: Duplicate unique records were found in file 2. Writing number of hits only." ) return True diff --git a/pvaccompare/run.py b/pvaccompare/run.py index dc3df20..8c0b55e 100644 --- a/pvaccompare/run.py +++ b/pvaccompare/run.py @@ -5,7 +5,7 @@ logging.basicConfig(level=logging.DEBUG, format="%(message)s") # TODO: Speed up identical file comparison -# TODO: Implement tests +# TODO: Add line numbers to output differences def define_parser(): diff --git a/pvaccompare/run_utils.py b/pvaccompare/run_utils.py index d5463e1..1f2485a 100644 --- a/pvaccompare/run_utils.py +++ b/pvaccompare/run_utils.py @@ -37,35 +37,30 @@ def check_column_formatting(df1, df2): break -def output_dropped_cols(cols1_to_drop, cols2_to_drop): +def output_dropped_cols(df1, df2, original_columns): """ Purpose: Outputs the dropped comparison columns to the terminal and creates a columns dropped message for the generated report Modifies: Nothing Returns: String columns_dropped_message """ columns_dropped_message = "" - for col in cols1_to_drop: - if col in cols2_to_drop: + for col in original_columns: + if col not in df1.columns and col not in df2.columns: logging.info( - "\u2022 Comparison dropped: '%s' is not present in either file", col + "\u2022 Column dropped: '%s' is not present in either file", col ) columns_dropped_message += ( - f"Comparison dropped: '{col}' is not present in either file\n" - ) - else: - logging.info( - "\u2022 Comparison dropped: '%s' is only present in file 1", col + f"Column dropped: '{col}' is not present in either file\n" ) + elif col not in df1.columns: + logging.info("\u2022 Column dropped: '%s' is only present in file 2", col) columns_dropped_message += ( - f"Comparison dropped: '{col}' is only present in file 1\n" - ) - for col in cols2_to_drop: - if col not in cols1_to_drop: - logging.info( - "\u2022 Comparison dropped: '%s' is only present in file 2", col + f"Column dropped: '{col}' is only present in file 2\n" ) + elif col not in df2.columns: + logging.info("\u2022 Column dropped: '%s' is only present in file 1", col) columns_dropped_message += ( - f"Comparison dropped: '{col}' is only present in file 2\n" + f"Column dropped: '{col}' is only present in file 1\n" ) return columns_dropped_message @@ -93,63 +88,6 @@ def load_tsv_files(input_file1, input_file2): return df1, df2 -def make_rows_equal(df1, df2): - """ - Purpose: Add 'dummy data' to make the two dataframes have an equal number of rows - Modifies: One of the two dataframes depending on which is smaller - Returns: Two dataframes - """ - num_rows_to_add = abs(df1.shape[0] - df2.shape[0]) - if df1.shape[0] > df2.shape[0]: - dummy_data = pd.DataFrame( - np.nan, index=range(num_rows_to_add), columns=df2.columns - ) - df2 = pd.concat([df2, dummy_data], ignore_index=True) - else: - dummy_data = pd.DataFrame( - np.nan, index=range(num_rows_to_add), columns=df1.columns - ) - df1 = pd.concat([df1, dummy_data], ignore_index=True) - return df1, df2 - - -def drop_useless_columns(df1, df2, columns_to_compare): - """ - Purpose: First removes columns that are not included in the comparison, excluding 'ID', then removes columns not present - in both files - Modifies: df1 and df2 - Returns: Two lists containing the columns dropped in the corresponding dataframes - """ - columns_to_keep = set(["ID"]) - if "ID" not in df1.columns or "ID" not in df2.columns: - columns_to_keep.update(["Gene", "AA Change"]) - - # Drop columns that are not in columns_to_compare and not 'ID' - cols1_to_drop = [ - col - for col in df1.columns - if (col not in columns_to_compare) and (col not in columns_to_keep) - ] - cols2_to_drop = [ - col - for col in df2.columns - if (col not in columns_to_compare) and (col not in columns_to_keep) - ] - - df1.drop(columns=cols1_to_drop, inplace=True) - df2.drop(columns=cols2_to_drop, inplace=True) - - # Drop columns that are not present in both dataframes - common_cols = set(df1.columns).intersection(set(df2.columns)) - cols1_to_drop = [col for col in df1.columns if col not in common_cols] - cols2_to_drop = [col for col in df2.columns if col not in common_cols] - - df1.drop(columns=cols1_to_drop, inplace=True) - df2.drop(columns=cols2_to_drop, inplace=True) - - return cols1_to_drop, cols2_to_drop - - def check_columns_to_compare(df1, df2, columns_to_compare): """ Purpose: Add columns present in both dataframes to columns_to_keep @@ -220,7 +158,12 @@ def get_file_differences( Modifies: Nothing Returns: Dictionary of differences and a dictionary of unique variants """ - merged_df = pd.merge(df1, df2, on="ID", suffixes=("_file1", "_file2")) + df1_selected = df1[["ID"] + columns_to_compare] + df2_selected = df2[["ID"] + columns_to_compare] + + merged_df = pd.merge( + df1_selected, df2_selected, on="ID", suffixes=("_file1", "_file2") + ) differences = {} for col in columns_to_compare: @@ -242,9 +185,7 @@ def get_file_differences( ) # Mask for rows where one value is NaN and the other is not - nan_mask = ( - merged_df[col_file1].isna() & ~merged_df[col_file2].isna() - ) | ( + nan_mask = (merged_df[col_file1].isna() & ~merged_df[col_file2].isna()) | ( ~merged_df[col_file1].isna() & merged_df[col_file2].isna() ) diff --git a/pvaccompare/runners/run_compare_aggregated_tsv.py b/pvaccompare/runners/run_compare_aggregated_tsv.py index cca98e6..2939159 100755 --- a/pvaccompare/runners/run_compare_aggregated_tsv.py +++ b/pvaccompare/runners/run_compare_aggregated_tsv.py @@ -14,23 +14,20 @@ def main(input_file1, input_file2, output_file, columns_to_compare): input_file1, input_file2, output_file, columns_to_compare ) check_column_formatting(comparer.df1, comparer.df2) + comparer.check_id() - cols1_to_drop, cols2_to_drop = drop_useless_columns( + columns_dropped_message = output_dropped_cols( comparer.df1, comparer.df2, comparer.columns_to_compare ) - columns_dropped_message = output_dropped_cols(cols1_to_drop, cols2_to_drop) comparer.columns_to_compare = check_columns_to_compare( comparer.df1, comparer.df2, comparer.columns_to_compare ) - comparer.check_id(cols1_to_drop, cols2_to_drop) common_variants = get_common_variants(comparer.df1, comparer.df2) unique_variants_file1, unique_variants_file2 = get_unique_variants( comparer.df1, comparer.df2, common_variants ) - if comparer.df1.shape != comparer.df2.shape: - comparer.df1, comparer.df2 = make_rows_equal(comparer.df1, comparer.df2) differences, unique_variants = get_file_differences( comparer.df1, comparer.df2, diff --git a/pvaccompare/runners/run_compare_reference_matches_tsv.py b/pvaccompare/runners/run_compare_reference_matches_tsv.py index ab0d31d..6fae098 100644 --- a/pvaccompare/runners/run_compare_reference_matches_tsv.py +++ b/pvaccompare/runners/run_compare_reference_matches_tsv.py @@ -14,23 +14,19 @@ def main(input_file1, input_file2, output_file, columns_to_compare): input_file1, input_file2, output_file, columns_to_compare ) check_column_formatting(comparer.df1, comparer.df2) - comparer.create_id_column() - common_variants = get_common_variants(comparer.df1, comparer.df2) - unique_variants_file1, unique_variants_file2 = get_unique_variants( - comparer.df1, comparer.df2, common_variants - ) - cols1_to_drop, cols2_to_drop = drop_useless_columns( + columns_dropped_message = output_dropped_cols( comparer.df1, comparer.df2, comparer.columns_to_compare ) - columns_dropped_message = output_dropped_cols(cols1_to_drop, cols2_to_drop) comparer.columns_to_compare = check_columns_to_compare( comparer.df1, comparer.df2, comparer.columns_to_compare ) - if comparer.df1.shape != comparer.df2.shape: - comparer.df1, comparer.df2 = make_rows_equal(comparer.df1, comparer.df2) + common_variants = get_common_variants(comparer.df1, comparer.df2) + unique_variants_file1, unique_variants_file2 = get_unique_variants( + comparer.df1, comparer.df2, common_variants + ) if comparer.check_duplicate_ids(): differences_summary = generate_differences_summary( diff --git a/pvaccompare/runners/run_compare_unaggregated_tsv.py b/pvaccompare/runners/run_compare_unaggregated_tsv.py index 3ae3f17..ee9ea00 100644 --- a/pvaccompare/runners/run_compare_unaggregated_tsv.py +++ b/pvaccompare/runners/run_compare_unaggregated_tsv.py @@ -8,31 +8,26 @@ def main(input_file1, input_file2, output_file, columns_to_compare): Modifies: Nothing Returns: None """ - id_format = ( - "Chromosome-Start-Stop-Reference-Variant-HLA_Allele-Sub_peptide_Position-Mt_Epitope_Seq-Index" - ) + id_format = "Chromosome-Start-Stop-Reference-Variant-HLA_Allele-Sub_peptide_Position-Mt_Epitope_Seq-Index" comparer = CompareUnaggregatedTSV( input_file1, input_file2, output_file, columns_to_compare ) check_column_formatting(comparer.df1, comparer.df2) - comparer.create_id_column() - common_variants = get_common_variants(comparer.df1, comparer.df2) - unique_variants_file1, unique_variants_file2 = get_unique_variants( - comparer.df1, comparer.df2, common_variants - ) - cols1_to_drop, cols2_to_drop = drop_useless_columns( + columns_dropped_message = output_dropped_cols( comparer.df1, comparer.df2, comparer.columns_to_compare ) - columns_dropped_message = output_dropped_cols(cols1_to_drop, cols2_to_drop) comparer.columns_to_compare = check_columns_to_compare( comparer.df1, comparer.df2, comparer.columns_to_compare ) - if comparer.df1.shape != comparer.df2.shape: - comparer.df1, comparer.df2 = make_rows_equal(comparer.df1, comparer.df2) + common_variants = get_common_variants(comparer.df1, comparer.df2) + unique_variants_file1, unique_variants_file2 = get_unique_variants( + comparer.df1, comparer.df2, common_variants + ) + differences, unique_variants = get_file_differences( comparer.df1, comparer.df2, diff --git a/pvaccompare/tests/test_compare_aggregated_tsv.py b/pvaccompare/tests/test_compare_aggregated_tsv.py index 2c51935..1605a84 100644 --- a/pvaccompare/tests/test_compare_aggregated_tsv.py +++ b/pvaccompare/tests/test_compare_aggregated_tsv.py @@ -6,6 +6,7 @@ # To run the tests navigate to pvaccompare/ and run the following: # python -m unittest tests/test_compare_aggregated_tsv.py +# python -m unittest discover -s tests class TestRunCompareAggregatedTSV(unittest.TestCase): def setUp(self): self.input_file1 = tempfile.NamedTemporaryFile(delete=False, suffix=".tsv") @@ -74,3 +75,38 @@ def test_different_files(self): ) as expected_file: expected_output = expected_file.read().strip() self.assertEqual(sanitized_output.strip(), expected_output) + + def test_missing_id(self): + with open("tests/test_data/aggregated_input1.tsv", "r") as f: + content1 = f.read() + with open("tests/test_data/aggregated_input3.tsv", "r") as f: + content2 = f.read() + + self.input_file1.write(content1.encode()) + self.input_file2.write(content2.encode()) + self.input_file1.close() + self.input_file2.close() + + with self.assertLogs(level="INFO") as log: + main( + self.input_file1.name, + self.input_file2.name, + self.output_file.name, + self.columns_to_compare, + ) + self.assertIn("INFO:root:• Replaced ID with Gene and AA Change", log.output) + + self.output_file.seek(0) + output_content = self.output_file.read().decode() + sanitized_output = "\n".join( + [ + line + for line in output_content.splitlines() + if not line.startswith("File 1:") and not line.startswith("File 2:") + ] + ) + with open( + "tests/test_data/aggregated_id_change_output.tsv", "r" + ) as expected_file: + expected_output = expected_file.read().strip() + self.assertEqual(sanitized_output.strip(), expected_output) diff --git a/pvaccompare/tests/test_compare_json.py b/pvaccompare/tests/test_compare_json.py index 87f4834..b684a1b 100644 --- a/pvaccompare/tests/test_compare_json.py +++ b/pvaccompare/tests/test_compare_json.py @@ -6,6 +6,7 @@ # To run the tests navigate to pvaccompare/ and run the following: # python -m unittest tests/test_compare_json.py +# python -m unittest discover -s tests class TestRunCompareJSON(unittest.TestCase): def setUp(self): self.input_file1 = tempfile.NamedTemporaryFile(delete=False, suffix=".json") diff --git a/pvaccompare/tests/test_compare_reference_matches_tsv.py b/pvaccompare/tests/test_compare_reference_matches_tsv.py index 19f56d0..d8df5a6 100644 --- a/pvaccompare/tests/test_compare_reference_matches_tsv.py +++ b/pvaccompare/tests/test_compare_reference_matches_tsv.py @@ -6,6 +6,7 @@ # To run the tests navigate to pvaccompare/ and run the following: # python -m unittest tests/test_compare_reference_matches_tsv.py +# python -m unittest discover -s tests class TestRunCompareReferenceMatchesTSV(unittest.TestCase): def setUp(self): self.input_file1 = tempfile.NamedTemporaryFile(delete=False, suffix=".tsv") @@ -82,11 +83,16 @@ def test_duplicate_records(self): self.input_file1.close() self.input_file2.close() - main( - self.input_file1.name, - self.input_file2.name, - self.output_file.name, - self.columns_to_compare, + with self.assertLogs(level="INFO") as log: + main( + self.input_file1.name, + self.input_file2.name, + self.output_file.name, + self.columns_to_compare, + ) + self.assertIn( + "ERROR:root:ERROR: Duplicate unique records were found in file 2. Writing number of hits only.", + log.output, ) self.output_file.seek(0) diff --git a/pvaccompare/tests/test_compare_unaggregated_tsv.py b/pvaccompare/tests/test_compare_unaggregated_tsv.py index caf8609..2f67834 100644 --- a/pvaccompare/tests/test_compare_unaggregated_tsv.py +++ b/pvaccompare/tests/test_compare_unaggregated_tsv.py @@ -6,6 +6,7 @@ # To run the tests navigate to pvaccompare/ and run the following: # python -m unittest tests/test_compare_unaggregated_tsv.py +# python -m unittest discover -s tests class TestRunCompareUnaggregatedTSV(unittest.TestCase): def setUp(self): self.input_file1 = tempfile.NamedTemporaryFile(delete=False, suffix=".tsv") @@ -79,3 +80,44 @@ def test_different_files(self): ) as expected_file: expected_output = expected_file.read().strip() self.assertEqual(sanitized_output.strip(), expected_output) + + def test_columns_missing(self): + with open("tests/test_data/unaggregated_input1.tsv", "r") as f: + content1 = f.read() + with open("tests/test_data/unaggregated_input3.tsv", "r") as f: + content2 = f.read() + + self.input_file1.write(content1.encode()) + self.input_file2.write(content2.encode()) + self.input_file1.close() + self.input_file2.close() + + with self.assertLogs(level="INFO") as log: + main( + self.input_file1.name, + self.input_file2.name, + self.output_file.name, + self.columns_to_compare, + ) + expected_logs = [ + "INFO:root:• Column dropped: 'Median MT IC50 Score' is only present in file 1", + "INFO:root:• Column dropped: 'Median WT IC50 Score' is only present in file 1", + ] + + for expected_log in expected_logs: + self.assertIn(expected_log, log.output) + + self.output_file.seek(0) + output_content = self.output_file.read().decode() + sanitized_output = "\n".join( + [ + line + for line in output_content.splitlines() + if not line.startswith("File 1:") and not line.startswith("File 2:") + ] + ) + with open( + "tests/test_data/unaggregated_col_dropped_output.tsv", "r" + ) as expected_file: + expected_output = expected_file.read().strip() + self.assertEqual(sanitized_output.strip(), expected_output) diff --git a/pvaccompare/tests/test_compare_yml.py b/pvaccompare/tests/test_compare_yml.py index bfa7378..6c8a6cb 100644 --- a/pvaccompare/tests/test_compare_yml.py +++ b/pvaccompare/tests/test_compare_yml.py @@ -6,6 +6,7 @@ # To run the tests navigate to pvaccompare/ and run the following: # python -m unittest tests/test_compare_yml.py +# python -m unittest discover -s tests class TestRunCompareYML(unittest.TestCase): def setUp(self): self.input_file1 = tempfile.NamedTemporaryFile(delete=False, suffix=".yml") diff --git a/pvaccompare/tests/test_data/aggregated_id_change_output.tsv b/pvaccompare/tests/test_data/aggregated_id_change_output.tsv new file mode 100644 index 0000000..9786bf6 --- /dev/null +++ b/pvaccompare/tests/test_data/aggregated_id_change_output.tsv @@ -0,0 +1,22 @@ + +============================== AGGREGATED TSV COMPARISON ============================== + + + +/* Differences Summary */ +----------------------------- +Total number of variants: 18 +Number of common variants: 18 +Number of variants unique to file 1: 0 +Number of variants unique to file 2: 0 +----- +Number of differences in Best Peptide: 1 + + +============[ DIFFERENCES IN BEST PEPTIDE ]============ + + +ID Format: 'Gene (AA_Change)' + +ID File 1 File 2 +SIX4 (E23Q): QENGMQSA -> QENCMQSA \ No newline at end of file diff --git a/pvaccompare/tests/test_data/aggregated_input3.tsv b/pvaccompare/tests/test_data/aggregated_input3.tsv new file mode 100644 index 0000000..e58907c --- /dev/null +++ b/pvaccompare/tests/test_data/aggregated_input3.tsv @@ -0,0 +1,19 @@ +Gene AA Change Num Passing Transcripts Best Peptide Best Transcript TSL Allele Pos Prob Pos Num Passing Peptides IC50 MT IC50 WT %ile MT %ile WT RNA Expr RNA VAF Allele Expr RNA Depth DNA VAF Tier Ref Match Evaluation +ADAR E806V 15 AERMGFTVV ENST00000368474.9 1 HLA-B*45:01 8 None 6 76.110 61.796 0.100 0.125 131.831 0.348 45.877 1233.0 0.302 Pass False Pending +KIF1C S433F 1 TEFQIGPEEA ENST00000320785.10 1 HLA-B*45:01 3 None 2 152.101 166.309 0.350 0.473 121.452 0.298 36.193 1679.0 0.316 Pass False Pending +OSTC F9L 3 YRVPLLVL ENST00000361564.9 1 HLA-C*06:02 5 None 3 282.169 272.915 0.232 0.202 173.862 0.484 84.149 1028.0 0.462 Pass False Pending +ARID1B G910A 9 SPGGQMHAA ENST00000346085.10 1 HLA-B*82:02 9 None 3 345.587 6116.264 0.820 2.6 39.753 1.0 39.753 163.0 1.000 Pass False Pending +MSH6 D1255N 6 VENYSQNVA ENST00000234420.11 1 HLA-B*45:01 3 None 5 66.513 332.806 0.254 0.665 48.826 0.338 16.503 352.0 0.318 Pass False Pending +RPRD1A Q21H 4 SELSNSQHSV ENST00000399022.9 1 HLA-B*45:01 8 None 2 368.846 354.639 0.430 0.415 57.244 0.5 28.622 602.0 0.417 Pass False Pending +SURF1 N89K 1 RRKWKLKLI ENST00000371974.8 1 HLA-C*06:02 7 None 3 212.834 161.921 0.451 0.332 23.353 0.762 17.795 563.0 0.700 Pass False Pending +MAU2 S111R 1 VKFEAARLL ENST00000262815.13 1 HLA-C*06:02 7 None 3 212.075 484.278 0.424 1.251 34.845 0.445 15.506 238.0 0.345 Pass False Pending +SIX4 E23Q 2 QENCMQSA ENST00000216513.5 1 HLA-B*45:01 6 None 2 222.019 324.687 0.230 0.405 15.015 0.987 14.82 76.0 1.000 Pass False Pending +ZNF548 D12Y 9 VVFEYVAIY ENST00000366197.9 1 HLA-A*29:02 5 None 12 41.063 75.492 0.115 0.26 13.433 0.378 5.078 127.0 0.468 Pass False Pending +MYBBP1A E653G 3 VEVLVGILLA ENST00000254718.9 1 HLA-B*45:01 6 None 1 331.249 403.175 0.730 1.03 47.489 0.319 15.149 420.0 0.352 Pass False Pending +UQCC1 W44S 7 SRTSQSPQM ENST00000374385.10 1 HLA-C*06:02 6 None 1 365.305 752.965 0.594 1.064 47.720 0.327 15.604 581.0 0.434 Pass False Pending +SORBS3 R336P 1 APSLSPHKM ENST00000240123.12 1 HLA-B*82:02 2 None 2 250.848 24191.463 0.370 22.0 23.107 0.374 8.642 195.0 0.464 Pass True Pending +ADPRHL1 H220N 4 QENWFYFEA ENST00000375418.8 1 HLA-B*45:01 3 None 7 11.080 11.608 0.027 0.033 2.743 0.917 2.515 24.0 0.517 Pass False Pending +SETD6 R185H 4 DLANIHSEY ENST00000219315.9 1 HLA-A*29:02 6 None 2 103.731 277.9 0.483 0.94 11.621 0.34 3.951 100.0 0.335 Pass False Pending +ATP7A D870H 13 HESLITGEA ENST00000341514.11 1 HLA-B*45:01 1 None 1 164.310 360.223 0.400 0.75 5.152 0.951 4.9 41.0 0.982 Pass False Pending +ZBTB3 S405F 2 EPLYLSFEY ENST00000394807.5 1 HLA-A*29:02 7 None 3 382.420 2333.53 1.167 2.8 13.703 0.601 8.236 148.0 0.670 Pass False Pending +ZNF25 E21K 1 KEKWKLLTPA ENST00000302609.8 1 HLA-B*45:01 3 None 2 377.257 137.86 0.640 0.47 13.089 0.471 6.165 187.0 0.485 Pass False Pending \ No newline at end of file diff --git a/pvaccompare/tests/test_data/unaggregated_col_dropped_output.tsv b/pvaccompare/tests/test_data/unaggregated_col_dropped_output.tsv new file mode 100644 index 0000000..5105408 --- /dev/null +++ b/pvaccompare/tests/test_data/unaggregated_col_dropped_output.tsv @@ -0,0 +1,26 @@ + + +============================== UNAGGREGATED TSV COMPARISON ============================== + + + +Column dropped: 'Median MT IC50 Score' is only present in file 1 +Column dropped: 'Median WT IC50 Score' is only present in file 1 + +/* Differences Summary */ +----------------------------- +Total number of variants: 18 +Number of common variants: 18 +Number of variants unique to file 1: 0 +Number of variants unique to file 2: 0 +----- +Number of differences in Biotype: 1 + + +============[ DIFFERENCES IN BIOTYPE ]============ + + +ID Format: Chromosome-Start-Stop-Reference-Variant-HLA_Allele-Sub_peptide_Position-Mt_Epitope_Seq-Index + +ID File 1 File 2 +chr1-16006133-16006134-G-T-HLA-A*29:02-3-VTPMGWGC-1.SRARP.ENST00000329454.2.missense.100G/W: other -> protein_coding \ No newline at end of file diff --git a/pvaccompare/tests/test_data/unaggregated_input3.tsv b/pvaccompare/tests/test_data/unaggregated_input3.tsv new file mode 100644 index 0000000..39e462d --- /dev/null +++ b/pvaccompare/tests/test_data/unaggregated_input3.tsv @@ -0,0 +1,19 @@ +Chromosome Start Stop Reference Variant Transcript Transcript Support Level Transcript Length Biotype Ensembl Gene ID Variant Type Mutation Protein Position Gene Name HGVSc HGVSp HLA Allele Peptide Length Sub-peptide Position Mutation Position MT Epitope Seq WT Epitope Seq Best MT IC50 Score Method Best MT IC50 Score Corresponding WT IC50 Score Corresponding Fold Change Best MT Percentile Method Best MT Percentile Corresponding WT Percentile Tumor DNA Depth Tumor DNA VAF Tumor RNA Depth Tumor RNA VAF Normal Depth Normal VAF Gene Expression Transcript Expression Median Fold Change Median MT Percentile Median WT Percentile MHCflurryEL Processing WT Score MHCflurryEL Processing MT Score MHCflurryEL Presentation WT Score MHCflurryEL Presentation MT Score MHCflurryEL Presentation WT Percentile MHCflurryEL Presentation MT Percentile MHCflurry WT IC50 Score MHCflurry MT IC50 Score MHCflurry WT Percentile MHCflurry MT Percentile MHCnuggetsI WT IC50 Score MHCnuggetsI MT IC50 Score MHCnuggetsI WT Percentile MHCnuggetsI MT Percentile NetMHC WT IC50 Score NetMHC MT IC50 Score NetMHC WT Percentile NetMHC MT Percentile NetMHCcons WT IC50 Score NetMHCcons MT IC50 Score NetMHCcons WT Percentile NetMHCcons MT Percentile NetMHCpan WT IC50 Score NetMHCpan MT IC50 Score NetMHCpan WT Percentile NetMHCpan MT Percentile NetMHCpanEL WT Score NetMHCpanEL MT Score NetMHCpanEL WT Percentile NetMHCpanEL MT Percentile PickPocket WT IC50 Score PickPocket MT IC50 Score PickPocket WT Percentile PickPocket MT Percentile SMM WT IC50 Score SMM MT IC50 Score SMM WT Percentile SMM MT Percentile SMMPMBEC WT IC50 Score SMMPMBEC MT IC50 Score SMMPMBEC WT Percentile SMMPMBEC MT Percentile Index Problematic Positions cterm_7mer_gravy_score max_7mer_gravy_score difficult_n_terminal_residue c_terminal_cysteine c_terminal_proline cysteine_count n_terminal_asparagine asparagine_proline_bond_count +chr1 16006133 16006134 G T ENST00000329454.2 1 169 protein_coding ENSG00000183888 missense G/W 100 SRARP ENST00000329454.2:c.298G>T ENSP00000332162.2:p.Gly100Trp HLA-A*29:02 8 1 8 TRVTPMGW TRVTPMGG MHCnuggetsI 5975.7 5979.4 1.001 NetMHCpanEL 19.0 94.0 177 1.0 0 0.0 140 0.007 0.082 0.082 1.131 47.5 89.755 0.0162521302700042 0.2146173901855945 0.0032409171573275 0.0073226566762991 99.28660326086955 31.64317934782608 33131.214717405455 29360.079756494662 79.69287499999999 34.4285 5979.4 5975.7 88.51 86.9 42736.34 39003.21 94.0 73.0 37739.56 27131.77 91.0 47.0 45107.12 37303.14 94.0 54.0 1e-06 0.000244 94.0 19.0 50000.0 50000.0 48.0 48.0 72218.75546109042 95202.86103673978 75.0 87.0 38366.307210597464 34431.028829451876 40.0 35.0 1.SRARP.ENST00000329454.2.missense.100G/W None -0.28571428571428564 -0.2571428571428571 False False False 0 False 0 +chr1 16006133 16006134 G T ENST00000329454.2 1 169 protein_coding ENSG00000183888 missense G/W 100 SRARP ENST00000329454.2:c.298G>T ENSP00000332162.2:p.Gly100Trp HLA-A*29:02 8 2 7 RVTPMGWG RVTPMGGG MHCnuggetsI 5774.79 5915.31 1.024 MHCnuggetsI 16.79 51.61 177 1.0 0 0.0 140 0.007 0.082 0.082 1.0 45.5 68.725 0.0117710828781127 0.0056250356137752 0.003208284136529 0.0031380962516865 99.28660326086955 99.28660326086955 32937.3096681338 32952.3967604395 74.450375 74.450375 5915.31 5774.79 51.61 16.79 40195.52 35402.98 80.0 55.0 31569.21 28795.36 63.0 53.0 43232.57 36585.65 85.0 51.0 4e-06 3.5e-05 75.0 40.0 15881.048931852625 15881.048931852625 18.0 18.0 33392.581914282324 33392.581914282324 34.0 34.0 41015.68792124474 38190.03004120177 44.0 40.0 1.SRARP.ENST00000329454.2.missense.100G/W None 0.3 0.3 False False False 0 False 0 +chr1 16006133 16006134 G T ENST00000329454.2 1 169 protein_coding ENSG00000183888 missense G/W 100 SRARP ENST00000329454.2:c.298G>T ENSP00000332162.2:p.Gly100Trp HLA-A*29:02 8 3 6 VTPMGWGC VTPMGGGC MHCnuggetsI 5820.91 5880.83 1.01 SMM 8.1 8.1 177 1.0 0 0.0 140 0.007 0.082 0.082 1.06 47.0 58.0 0.0085067339241504 0.0201449580490589 0.0034462359996615 0.0036875480816715 99.28660326086955 99.28660326086955 30229.35802588496 29408.42162655613 41.302125 34.4285 5880.83 5820.91 37.0 22.61 41694.97 38124.12 88.0 68.0 34798.02 30726.73 78.0 60.0 43871.54 39574.11 88.0 65.0 6e-06 1.5e-05 68.0 53.0 50000.0 38984.65491118616 48.0 41.0 8504.523783121827 8504.523783121827 8.1 8.1 28903.470997020624 30616.10932414785 29.0 31.0 1.SRARP.ENST00000329454.2.missense.100G/W 8 0.05714285714285713 0.3 False True False 1 False 0 +chr1 16006133 16006134 G T ENST00000329454.2 1 169 protein_coding ENSG00000183888 missense G/W 100 SRARP ENST00000329454.2:c.298G>T ENSP00000332162.2:p.Gly100Trp HLA-A*29:02 8 4 5 TPMGWGCL TPMGGGCL MHCnuggetsI 5948.47 5958.24 1.002 NetMHCpan 34.0 52.0 177 1.0 0 0.0 140 0.007 0.082 0.082 0.902 48.531 46.5 0.2750012651085853 0.1149642989039421 0.0085204094797548 0.0047828223289128 27.61739130434782 62.74467391304348 31261.296587367808 31746.638199036588 50.764875 59.0625 5958.24 5948.47 77.49000000000001 71.54 38875.14 36511.31 72.0 60.0 27875.69 23067.13 50.0 35.0 36835.88 31405.68 52.0 34.0 4.4e-05 4.4e-05 37.0 37.0 44872.50393235162 37333.42327399945 45.0 40.0 30314.47141744229 87026.19589953587 31.0 84.0 26850.353017500933 37320.7188256828 27.0 39.0 1.SRARP.ENST00000329454.2.missense.100G/W 7 0.7 0.7 False False False 1 False 0 +chr1 16006133 16006134 G T ENST00000329454.2 1 169 protein_coding ENSG00000183888 missense G/W 100 SRARP ENST00000329454.2:c.298G>T ENSP00000332162.2:p.Gly100Trp HLA-A*29:02 8 5 4 PMGWGCLA PMGGGCLA MHCnuggetsI 5913.41 5926.68 1.002 SMM 16.0 19.0 177 1.0 0 0.0 140 0.007 0.082 0.082 1.104 43.0 NA 0.0053385943174362 0.0105953887104988 0.0033092989357108 0.0033669849259659 99.28660326086955 99.28660326086955 31160.68448475006 31201.132424958218 50.764875 50.764875 5926.68 5913.41 57.99 50.63 34767.17 31424.38 53.0 41.0 17505.31 14643.23 23.0 17.0 40266.92 35078.12 69.0 45.0 8e-06 1.1e-05 63.0 58.0 24747.820312174 23192.263360933746 28.0 27.0 20167.404078469328 16582.499772033992 19.0 16.0 27161.26545212926 23820.452104817577 27.0 23.0 1.SRARP.ENST00000329454.2.missense.100G/W 6 1.1857142857142857 1.1857142857142857 False False False 1 False 0 +chr1 16006133 16006134 G T ENST00000329454.2 1 169 protein_coding ENSG00000183888 missense G/W 100 SRARP ENST00000329454.2:c.298G>T ENSP00000332162.2:p.Gly100Trp HLA-A*29:02 8 6 3 MGWGCLAQ MGGGCLAQ MHCnuggetsI 5868.16 5856.0 0.998 NetMHCcons 8.4 28.0 177 1.0 0 0.0 140 0.007 0.082 0.082 1.176 36.243 45.0 0.0028298906981945 0.0081132613122463 0.0034694289208414 0.0035029098623762 99.28660326086955 99.28660326086955 29406.2330596394 29681.66066528507 34.4285 36.485875 5856.0 5868.16 29.81 33.08 31778.6 27590.03 42.0 31.0 20040.43 8433.14 28.0 8.4 39303.57 30272.0 64.0 31.0 4e-06 1.9e-05 75.0 49.0 50000.0 33144.32235498932 48.0 36.0 94983.90053244469 94765.44362333506 87.0 86.0 38721.30628040024 36724.001799460595 41.0 38.0 1.SRARP.ENST00000329454.2.missense.100G/W 5 0.4142857142857142 1.1857142857142857 False False False 1 False 0 +chr1 16006133 16006134 G T ENST00000329454.2 1 169 protein_coding ENSG00000183888 missense G/W 100 SRARP ENST00000329454.2:c.298G>T ENSP00000332162.2:p.Gly100Trp HLA-A*29:02 8 7 2 GWGCLAQA GGGCLAQA MHCnuggetsI 5902.91 5962.45 1.01 NetMHCcons 28.0 73.0 177 1.0 0 0.0 140 0.007 0.082 0.082 1.145 47.82 77.22 0.0125645361840724 0.0110534355044364 0.0032073908927789 0.0032773035754855 99.28660326086955 99.28660326086955 33042.14184992329 32137.008594616924 74.450375 63.8735 5962.45 5902.91 79.99000000000001 45.64 41579.64 34084.43 88.0 50.0 33686.63 20040.43 73.0 28.0 45866.52 39011.66 97.0 62.0 1e-06 7e-06 94.0 66.0 50000.0 42971.88692543735 48.0 44.0 58836.92946080371 33624.048234218884 63.0 34.0 33647.2829279429 28836.994858420832 34.0 29.0 1.SRARP.ENST00000329454.2.missense.100G/W 4 0.7285714285714285 0.7285714285714285 False False False 1 False 0 +chr1 16006133 16006134 G T ENST00000329454.2 1 169 protein_coding ENSG00000183888 missense G/W 100 SRARP ENST00000329454.2:c.298G>T ENSP00000332162.2:p.Gly100Trp HLA-A*29:02 8 8 1 WGCLAQAR GGCLAQAR MHCnuggetsI 5829.43 5835.93 1.001 MHCnuggetsI 24.08 25.32 177 1.0 0 0.0 140 0.007 0.082 0.082 1.108 43.5 52.0 0.0127962455153465 0.0064251162111759 0.0035595753070329 0.0035029444827385 99.28660326086955 99.28660326086955 29695.87027952681 29499.406161513143 36.485875 36.485875 5835.93 5829.43 25.32 24.08 41985.61 36751.88 90.0 61.0 28640.0 23699.6 52.0 37.0 42022.88 36378.02 78.0 50.0 1.4e-05 1.7e-05 54.0 51.0 34986.781616071436 27278.95215511154 38.0 31.0 47495.23053304064 47495.23053304064 52.0 52.0 25819.629140564757 28903.470997020624 25.0 29.0 1.SRARP.ENST00000329454.2.missense.100G/W 3 0.21428571428571427 0.7285714285714285 False False False 1 False 0 +chr1 16972417 16972418 C G ENST00000375541.10 5 2017 protein_coding ENSG00000058453 missense S/C 2009 CROCC ENST00000375541.10:c.6026C>G ENSP00000364691.4:p.Ser2009Cys HLA-A*29:02 8 1 8 RRSSAPFC RRSSAPFS MHCnuggetsI 5988.38 5974.96 0.998 SMM 26.0 35.0 85 0.988 56 1.0 80 0.0 11.753 2.147 1.049 79.5 77.5 0.0034668780863285 0.0055077373981475 0.0029955445029173 0.0030033801030101 100.0 99.28660326086955 34302.270674557265 34465.12217183127 92.988625 92.988625 5974.96 5988.38 86.56 91.97 42409.75 42891.54 92.0 94.0 33144.32 34798.02 70.0 78.0 43266.26 42430.88 85.0 81.0 9e-06 7e-06 62.0 66.0 50000.0 50000.0 48.0 48.0 34725.61932176095 26647.102961758024 35.0 26.0 40921.354456751775 37320.71882568288 44.0 39.0 2.CROCC.ENST00000375541.10.missense.2009S/C 8 -0.08571428571428578 -0.08571428571428578 False True False 1 False 0 +chr1 16972417 16972418 C G ENST00000375541.10 5 2017 protein_coding ENSG00000058453 missense S/C 2009 CROCC ENST00000375541.10:c.6026C>G ENSP00000364691.4:p.Ser2009Cys HLA-A*29:02 8 2 7 RSSAPFCP RSSAPFSP SMM 5219.754 5219.754 1.0 SMM 3.8 3.8 85 0.988 56 1.0 80 0.0 11.753 2.147 0.989 64.937 62.044 0.0170750990509986 0.0583108253777027 0.0033191040526975 0.0038410274517063 99.28660326086955 99.28660326086955 32422.454083719957 32400.048899462992 69.087125 63.8735 5952.44 5984.02 73.97 90.36 40729.2 40369.42 83.0 81.0 34798.02 35559.24 78.0 81.0 37478.3 37014.88 55.0 53.0 1.7e-05 7e-06 51.0 66.0 37739.55580632669 38150.10646105872 40.0 41.0 5219.753570100537 5219.753570100537 3.8 3.8 32281.224489863776 27098.796275090655 33.0 27.0 2.CROCC.ENST00000375541.10.missense.2009S/C 7 0.32857142857142846 0.32857142857142846 False False True 1 False 0 +chr1 16972417 16972418 C G ENST00000375541.10 5 2017 protein_coding ENSG00000058453 missense S/C 2009 CROCC ENST00000375541.10:c.6026C>G ENSP00000364691.4:p.Ser2009Cys HLA-A*29:02 8 3 6 SSAPFCPP SSAPFSPP SMM 3031.447 3031.447 1.0 SMM 1.4 1.4 85 0.988 56 1.0 80 0.0 11.753 2.147 0.996 49.0 48.128 0.098813459277153 0.0869735516607761 0.004635326890338 0.0044954204267487 62.74467391304348 62.74467391304348 30918.610070190254 30567.20717676396 47.25637500000001 44.13500000000001 5932.86 5946.47 61.69 70.28 35544.62 36266.01 56.0 59.0 28026.9 28640.0 50.0 52.0 36156.32 35315.37 49.0 46.0 4e-05 1.6e-05 38.0 52.0 42509.44689711614 37333.42327399945 44.0 40.0 3031.447141744229 3031.447141744229 1.4 1.4 11666.752908720136 13897.926162143007 12.0 13.0 2.CROCC.ENST00000375541.10.missense.2009S/C 6 0.21428571428571416 0.32857142857142846 False False True 1 False 0 +chr1 16972417 16972418 C G ENST00000375541.10 5 2017 protein_coding ENSG00000058453 missense S/C 2009 CROCC ENST00000375541.10:c.6026C>G ENSP00000364691.4:p.Ser2009Cys HLA-A*29:02 8 4 5 SAPFCPPS SAPFSPPS MHCnuggetsI 5830.52 5778.11 0.991 MHCnuggetsI 24.28 17.11 85 1.988 56 1.0 80 0.0 11.753 2.147 1.061 58.5 59.0 0.0144078508019447 0.004819992929697 0.0034351568937058 0.0033663533299563 99.28660326086955 99.28660326086955 30989.07336299006 30557.53944594346 47.25637500000001 44.13500000000001 5778.11 5830.52 17.11 24.28 39211.81 39389.99 74.0 75.0 32434.8 30396.07 67.0 59.0 40793.12 38535.52 71.0 60.0 1.7e-05 1.1e-05 51.0 58.0 50000.0 50000.0 48.0 48.0 129613.428914014 75622.32054801435 95.0 78.0 44355.75745387479 36893.512089950294 49.0 38.0 2.CROCC.ENST00000375541.10.missense.2009S/C 5 0.21428571428571425 0.21428571428571425 False False False 1 False 0 +chr1 16972417 16972418 C G ENST00000375541.10 5 2017 protein_coding ENSG00000058453 missense S/C 2009 CROCC ENST00000375541.10:c.6026C>G ENSP00000364691.4:p.Ser2009Cys HLA-A*29:02 8 5 4 APFCPPSG APFSPPSG MHCnuggetsI 5967.48 5960.33 0.999 SMMPMBEC 28.0 38.0 85 0.988 56 1.0 80 0.0 11.753 2.147 1.043 71.5 66.718 0.0246490016579628 0.0181423835456371 0.0035155383202336 0.0034432383999441 99.28660326086955 99.28660326086955 31407.36925018084 31337.333104871108 54.695 50.764875 5960.33 5967.48 78.74 82.78 43189.57 42831.7 96.0 94.0 35559.24 35367.39 81.0 80.0 43967.07 42784.93 89.0 82.0 3.1e-05 8e-06 42.0 63.0 34610.27293937596 33869.36732649195 38.0 37.0 40146.71361559038 42721.844998681605 43.0 46.0 36639.53893914137 27793.932598641266 38.0 28.0 2.CROCC.ENST00000375541.10.missense.2009S/C 4 -0.10000000000000007 0.21428571428571425 False False False 1 False 0 +chr1 16972417 16972418 C G ENST00000375541.10 5 2017 protein_coding ENSG00000058453 missense S/C 2009 CROCC ENST00000375541.10:c.6026C>G ENSP00000364691.4:p.Ser2009Cys HLA-A*29:02 8 6 3 PFCPPSGP PFSPPSGP MHCnuggetsI 5929.88 5945.11 1.003 SMMPMBEC 34.0 36.0 85 0.988 53 1.0 80 0.0 11.753 2.147 0.966 60.44 49.5 0.0018312484025955 0.0018649138510227 0.0033118231499674 0.0033387002836613 99.28660326086955 99.28660326086955 30740.90574033748 30489.060159375287 47.25637500000001 44.13500000000001 5945.11 5929.88 69.42 59.88 34307.16 38026.88 51.0 68.0 31229.48 30893.41 62.0 61.0 42124.85 42832.63 79.0 83.0 2.1e-05 6e-06 48.0 68.0 50000.0 50000.0 48.0 48.0 37553.46371440002 40054.37873654026 39.0 43.0 34749.6152167138 33492.68770563393 36.0 34.0 2.CROCC.ENST00000375541.10.missense.2009S/C 3 -0.10000000000000007 -0.10000000000000007 False False True 1 False 0 +chr1 16972417 16972418 C G ENST00000375541.10 5 2017 NA ENSG00000058453 missense S/C 2009 CROCC ENST00000375541.10:c.6026C>G ENSP00000364691.4:p.Ser2009Cys HLA-A*29:02 8 7 2 FCPPSGPP FSPPSGPP MHCnuggetsI 5917.49 5920.34 1.0 MHCflurry 30.892 26.709 85 0.988 56 1.0 80 0.0 11.753 2.147 0.793 66.5 51.175 0.0529707707464695 0.0080852657556533 0.0043629698384238 0.0036376750593724 62.74467391304348 99.28660326086955 27854.61522894876 28543.35284723345 26.708874999999995 30.891874999999995 5920.34 5917.49 54.35 52.77 38187.68 41120.62 69.0 85.0 34053.09 36931.66 75.0 87.0 40197.7 43352.96 68.0 85.0 2.3e-05 4e-06 46.0 75.0 50000.0 50000.0 48.0 48.0 9608.376226408696 53536.50303731279 9.0 58.0 24040.86003443855 33109.30007978277 23.0 34.0 2.CROCC.ENST00000375541.10.missense.2009S/C 2 -0.7285714285714286 -0.10000000000000007 False False True 1 False 0 +chr1 16972417 16972418 C G ENST00000375541.10 5 2017 protein_coding ENSG00000058453 missense S/C 2009 CROCC ENST00000375541.10:c.6026C>G ENSP00000364691.4:p.Ser2009Cys HLA-A*29:02 8 8 1 CPPSGPPE SPPSGPPE MHCnuggetsI 5956.84 5940.46 0.997 SMM 16.0 16.0 85 0.988 56 2.0 80 0.0 11.753 2.147 1.024 64.0 NA 0.0085133202373981 0.0027647912502288 0.0038300747966599 0.0036987774902234 99.28660326086955 99.28660326086955 27105.18088685827 27517.78152066251 24.47925 25.544875 5940.46 5956.84 66.44 76.64999999999999 43314.5 43255.05 96.0 96.0 34423.54 34237.82 76.0 75.0 44965.32 43553.72 93.0 87.0 2.3e-05 1.5e-05 46.0 53.0 50000.0 50000.0 48.0 48.0 17007.872833250163 17007.872833250163 16.0 16.0 35233.030518194704 33802.59172937616 36.0 34.0 2.CROCC.ENST00000375541.10.missense.2009S/C 1 -1.5857142857142856 -0.7285714285714286 True False False 1 False 0 +chr1 22576425 22576426 C A ENST00000166244.8 2 1005 protein_coding ENSG00000070886 missense N/K 123 EPHA8 ENST00000166244.8:c.369C>A ENSP00000166244.3:p.Asn123Lys HLA-A*29:02 8 1 8 GTCKETFK GTCKETFN MHCnuggetsI 5946.43 5984.91 1.006 PickPocket 6.8 32.0 114 0.272 0 0.0 82 0.0 0.001 0.001 1.186 32.0 70.0 0.0097893625497818 0.0387631803750991 0.0032783537149027 0.0041438699747915 99.28660326086955 62.74467391304348 31978.79473422675 27896.932977370703 59.0625 27.987124999999995 5984.91 5946.43 90.7 70.25 41635.48 41129.06 88.0 85.0 30893.41 20701.6 61.0 29.0 44260.61 36649.84 90.0 51.0 3e-06 6.1e-05 79.0 33.0 28795.3614360886 7945.936747596634 32.0 6.8 24640.52744948151 23913.877487907896 24.0 23.0 30545.694226700492 30970.6271616828 31.0 31.0 3.EPHA8.ENST00000166244.8.missense.123N/K 3 -1.0571428571428572 -0.5571428571428572 False False False 1 False 0 +chr1 22576425 22576426 C A ENST00000374644.8 1 495 protein_coding ENSG00000070886 missense N/K 123 EPHA8 ENST00000374644.8:c.369C>A ENSP00000363775.4:p.Asn123Lys HLA-A*29:02 8 1 8 GTCKETFK GTCKETFN MHCnuggetsI 5946.43 5984.91 1.006 PickPocket 6.8 32.0 114 0.272 0 0.0 82 0.0 0.001 0.0 1.186 32.0 70.0 0.0097893625497818 0.0387631803750991 0.0032783537149027 0.0041438699747915 99.28660326086955 62.74467391304348 31978.79473422675 27896.932977370703 59.0625 27.987124999999995 5984.91 5946.43 90.7 70.25 41635.48 41129.06 88.0 85.0 30893.41 20701.6 61.0 29.0 44260.61 36649.84 90.0 51.0 3e-06 6.1e-05 79.0 33.0 28795.3614360886 7945.936747596634 32.0 6.8 24640.52744948151 23913.877487907896 24.0 23.0 30545.694226700492 30970.6271616828 31.0 31.0 4.EPHA8.ENST00000374644.8.missense.123N/K 3 -1.0571428571428572 -0.5571428571428572 False False False 1 False 0 \ No newline at end of file