From eef3fad90cbad2d6f909e45d76f53d7bc656c899 Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Wed, 8 Jun 2022 13:03:43 +0900 Subject: [PATCH 01/16] Bump up version number --- pypgx/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypgx/version.py b/pypgx/version.py index 8911e95..435d64b 100644 --- a/pypgx/version.py +++ b/pypgx/version.py @@ -1 +1 @@ -__version__ = '0.16.0' +__version__ = '0.17.0' From c71e58f73eac7636e6ca68a918d55bc0be9ef557 Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Wed, 8 Jun 2022 13:04:17 +0900 Subject: [PATCH 02/16] Update docs --- CHANGELOG.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d2b8a0b..9218256 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,9 @@ Changelog ********* +0.17.0 (in development) +----------------------- + 0.16.0 (2022-06-08) ------------------- From d4509f7526e79f79f73bf9f235d84353a07e2aa2 Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Thu, 9 Jun 2022 10:14:48 +0900 Subject: [PATCH 03/16] Fix bug in `api.utils.estimate_phase_beagle` (#63): * :issue:`63`: Fix bug in :meth:`api.utils.estimate_phase_beagle` when there is only one variant in input VCF and Beagle throws an error. --- CHANGELOG.rst | 2 ++ pypgx/api/utils.py | 48 ++++++++++++++++++++++++++++++---------------- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9218256..51dce4a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,8 @@ Changelog 0.17.0 (in development) ----------------------- +* :issue:`63`: Fix bug in :meth:`api.utils.estimate_phase_beagle` when there is only one variant in input VCF and Beagle throws an error. + 0.16.0 (2022-06-08) ------------------- diff --git a/pypgx/api/utils.py b/pypgx/api/utils.py index 830b671..764a1f4 100644 --- a/pypgx/api/utils.py +++ b/pypgx/api/utils.py @@ -830,23 +830,37 @@ def estimate_phase_beagle( if vf1.empty: return sdk.Archive(metadata, vf1) - with tempfile.TemporaryDirectory() as t: - vf1.to_file(f'{t}/input.vcf') - command = [ - 'java', '-Xmx2g', '-jar', beagle, - f'gt={t}/input.vcf', - f'chrom={region}', - f'ref={panel}', - f'out={t}/output', - f'impute={str(impute).lower()}' - ] - subprocess.run(command, check=True, stdout=subprocess.DEVNULL) - vf2 = pyvcf.VcfFrame.from_file(f'{t}/output.vcf.gz') - - if has_chr_prefix: - vf2 = vf2.update_chr_prefix('remove') - - return sdk.Archive(metadata, vf2) + # Beagle will throw an error if there is only one marker overlapping with + # the reference panel in a given window. This typically occurs when the + # input VCF has very few markers or only one marker. Therefore, these + # cases need to be handled manually. + vf2 = pyvcf.VcfFrame.from_file(panel) + variants1 = vf1.to_variants() + variants2 = vf2.to_variants() + common_variants = list(set(variants1).intersection(variants2)) + + if len(common_variants) == 1: + (chrom, pos, ref, alt) = common.parse_variant(common_variants[0]) + df = vf1.df[vf1.df.POS == pos] + vf3 = pyvcf.VcfFrame([], df) + vf3 = vf3.pseudophase().strip() + else: + with tempfile.TemporaryDirectory() as t: + vf1.to_file(f'{t}/input.vcf') + command = [ + 'java', '-Xmx2g', '-jar', beagle, + f'gt={t}/input.vcf', + f'chrom={region}', + f'ref={panel}', + f'out={t}/output', + f'impute={str(impute).lower()}' + ] + subprocess.run(command, check=True, stdout=subprocess.DEVNULL) + vf3 = pyvcf.VcfFrame.from_file(f'{t}/output.vcf.gz') + if has_chr_prefix: + vf3 = vf3.update_chr_prefix('remove') + + return sdk.Archive(metadata, vf3) def filter_samples(archive, samples, exclude=False): """ From 92bb6d6b6229d39728c9dd2cb7ffa393f61dedf8 Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Thu, 9 Jun 2022 14:04:00 +0900 Subject: [PATCH 04/16] Update `compare-genotypes`: * Update :command:`compare-genotypes` command to print the entire discordant calls when ``--verbose`` is used. --- CHANGELOG.rst | 1 + pypgx/api/utils.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 51dce4a..0cc3488 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,7 @@ Changelog ----------------------- * :issue:`63`: Fix bug in :meth:`api.utils.estimate_phase_beagle` when there is only one variant in input VCF and Beagle throws an error. +* Update :command:`compare-genotypes` command to print the entire discordant calls when ``--verbose`` is used. 0.16.0 (2022-06-08) ------------------- diff --git a/pypgx/api/utils.py b/pypgx/api/utils.py index 764a1f4..840dfc3 100644 --- a/pypgx/api/utils.py +++ b/pypgx/api/utils.py @@ -334,7 +334,7 @@ def show_comparison(col): if df.Concordant.all(): print('None') else: - print(df[~df.Concordant]) + print(df[~df.Concordant].to_string()) for col in ['Genotype', 'CNV']: show_comparison(col) From 7279f622ebabed28c2508beae1bb01c43ba798c1 Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Thu, 9 Jun 2022 16:03:52 +0900 Subject: [PATCH 05/16] Update `compute-copy-number`: * Update :command:`compute-copy-number` command to ensure that the samples in CovFrame[ReadDepth] and SampleTable[Statistics] are in the same order. --- CHANGELOG.rst | 1 + pypgx/api/utils.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0cc3488..4eaddd5 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,7 @@ Changelog * :issue:`63`: Fix bug in :meth:`api.utils.estimate_phase_beagle` when there is only one variant in input VCF and Beagle throws an error. * Update :command:`compare-genotypes` command to print the entire discordant calls when ``--verbose`` is used. +* Update :command:`compute-copy-number` command to ensure that the samples in CovFrame[ReadDepth] and SampleTable[Statistics] are in the same order. 0.16.0 (2022-06-08) ------------------- diff --git a/pypgx/api/utils.py b/pypgx/api/utils.py index 840dfc3..0f33acf 100644 --- a/pypgx/api/utils.py +++ b/pypgx/api/utils.py @@ -447,6 +447,9 @@ def compute_copy_number( if set(read_depth.data.samples) != set(control_statistics.data.index): raise ValueError('Different sample sets found') + # Make sure samples are in the same order. + control_statistics.data = control_statistics.data.loc[read_depth.data.samples] + # Apply intra-sample normalization. df = read_depth.data.copy_df() medians = control_statistics.data['50%'] From 6c01f567b4ecb89b1d4d952a9e6c38ecdf76c065 Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Fri, 10 Jun 2022 09:05:08 +0900 Subject: [PATCH 06/16] Update docs --- README.rst | 6 +++--- docs/create.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index b1b1a3a..3453aeb 100644 --- a/README.rst +++ b/README.rst @@ -238,13 +238,13 @@ visually inspect SV calls. Below are CYP2D6 examples: * - Normal - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/gene-model-CYP2D6-1.png - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/GRCh37-CYP2D6-8.png - * - DeletionHet + * - WholeDel1 - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/gene-model-CYP2D6-2.png - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/GRCh37-CYP2D6-1.png - * - DeletionHom + * - WholeDel1Hom - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/gene-model-CYP2D6-3.png - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/GRCh37-CYP2D6-6.png - * - Duplication + * - WholeDup1 - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/gene-model-CYP2D6-4.png - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/GRCh37-CYP2D6-2.png * - Tandem3 diff --git a/docs/create.py b/docs/create.py index 4d58dc7..2bba46e 100644 --- a/docs/create.py +++ b/docs/create.py @@ -265,13 +265,13 @@ * - Normal - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/gene-model-CYP2D6-1.png - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/GRCh37-CYP2D6-8.png - * - DeletionHet + * - WholeDel1 - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/gene-model-CYP2D6-2.png - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/GRCh37-CYP2D6-1.png - * - DeletionHom + * - WholeDel1Hom - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/gene-model-CYP2D6-3.png - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/GRCh37-CYP2D6-6.png - * - Duplication + * - WholeDup1 - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/gene-model-CYP2D6-4.png - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/GRCh37-CYP2D6-2.png * - Tandem3 From 138f7262b8e1c7f346f1f451fa9123d2a5548743 Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Fri, 10 Jun 2022 09:43:14 +0900 Subject: [PATCH 07/16] Update `api.utils.import_variants` (#64): * :issue:`64`: Update :meth:`api.utils.import_variants` method to 'diploidize' the input VCF when the target gene is G6PD. This is because some variant callers output haploid genotypes for males for the X chromosome, interfering with downstream analyses. --- CHANGELOG.rst | 1 + pypgx/api/utils.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4eaddd5..1a6b0bd 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,7 @@ Changelog * :issue:`63`: Fix bug in :meth:`api.utils.estimate_phase_beagle` when there is only one variant in input VCF and Beagle throws an error. * Update :command:`compare-genotypes` command to print the entire discordant calls when ``--verbose`` is used. * Update :command:`compute-copy-number` command to ensure that the samples in CovFrame[ReadDepth] and SampleTable[Statistics] are in the same order. +* :issue:`64`: Update :meth:`api.utils.import_variants` method to 'diploidize' the input VCF when the target gene is G6PD. This is because some variant callers output haploid genotypes for males for the X chromosome, interfering with downstream analyses. 0.16.0 (2022-06-08) ------------------- diff --git a/pypgx/api/utils.py b/pypgx/api/utils.py index 0f33acf..357377f 100644 --- a/pypgx/api/utils.py +++ b/pypgx/api/utils.py @@ -1015,6 +1015,12 @@ def import_variants( vf = vf.unphase() semantic_type = 'VcfFrame[Imported]' + # Some variant callers output haploid genotypes for males for the X + # chromosome. Because this can interfere with downstream analyses, we + # should 'diploidize' the input VCF when the gene is G6PD. + if gene == 'G6PD': + vf = vf.diploidize() + metadata = { 'Platform': platform, 'Gene': gene, From f6e06a125c2c47bd7f5e8d1a782b95650e5577ee Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Mon, 13 Jun 2022 08:50:51 +0900 Subject: [PATCH 08/16] Update docs --- docs/faq.rst | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/docs/faq.rst b/docs/faq.rst index 08d2f7f..e426357 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -77,3 +77,36 @@ consistent with the other variant-level analyses you may also just use the same VCF for PyPGx. The bottom line is, if you are going to create your own input VCF, then you need to know what you are doing. Otherwise, it's probably safer to use :command:`create-input-vcf`. + +``chr22_KI270879v1_alt`` in GRCh38 +================================== + +Users may encounter an error like below when working with GRCh38 data: + +.. code-block:: text + + $ pypgx prepare-depth-of-coverage \ + depth-of-coverage.zip \ + HG00276_PyPGx.sorted.markdup.recal.bam \ + --assembly GRCh38 + Traceback (most recent call last): + File "/Users/sbslee/opt/anaconda3/envs/fuc/bin/pypgx", line 33, in + sys.exit(load_entry_point('pypgx', 'console_scripts', 'pypgx')()) + File "/Users/sbslee/Desktop/pypgx/pypgx/__main__.py", line 33, in main + commands[args.command].main(args) + File "/Users/sbslee/Desktop/pypgx/pypgx/cli/prepare_depth_of_coverage.py", line 90, in main + archive = utils.prepare_depth_of_coverage( + File "/Users/sbslee/Desktop/pypgx/pypgx/api/utils.py", line 1247, in prepare_depth_of_coverage + cf = pycov.CovFrame.from_bam(bams, regions=regions, zero=True) + File "/Users/sbslee/Desktop/fuc/fuc/api/pycov.py", line 345, in from_bam + results += pysam.depth(*(bams + args + ['-r', region])) + File "/Users/sbslee/opt/anaconda3/envs/fuc/lib/python3.9/site-packages/pysam/utils.py", line 69, in __call__ + raise SamtoolsError( + pysam.utils.SamtoolsError: 'samtools returned with error 1: stdout=, stderr=samtools depth: cannot parse region "chr22_KI270879v1_alt:267307-281486"\n' + +This is a GRCh38-specific issue. One of the genes with SV is GSTT1 and it is +located in the contig ``chr22_KI270879v1_alt``, which is missing in input BAM +file. That's why the :command:`prepare-depth-of-coverage` command is +complaining. For more details, please see the following articles: +:ref:`readme:GRCh37 vs. GRCh38` and :ref:`genes:GRCh38 data for GSTT1`. +Related GitHub issues: :issue:`65`. From c792ca9b5473ca0891ec26057b52bca96160dc46 Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Mon, 13 Jun 2022 09:02:41 +0900 Subject: [PATCH 09/16] Update docs --- docs/faq.rst | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index e426357..a67ae0e 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -87,7 +87,7 @@ Users may encounter an error like below when working with GRCh38 data: $ pypgx prepare-depth-of-coverage \ depth-of-coverage.zip \ - HG00276_PyPGx.sorted.markdup.recal.bam \ + in.bam \ --assembly GRCh38 Traceback (most recent call last): File "/Users/sbslee/opt/anaconda3/envs/fuc/bin/pypgx", line 33, in @@ -107,6 +107,19 @@ Users may encounter an error like below when working with GRCh38 data: This is a GRCh38-specific issue. One of the genes with SV is GSTT1 and it is located in the contig ``chr22_KI270879v1_alt``, which is missing in input BAM file. That's why the :command:`prepare-depth-of-coverage` command is -complaining. For more details, please see the following articles: +complaining. To solve this issue, you can either re-align sequence reads in +the presence of the contig in your FASTA reference genome or work around it +by excluding GSTT1 from your analysis: + +.. code-block:: text + + $ pypgx prepare-depth-of-coverage \ + depth-of-coverage.zip \ + in.bam \ + --assembly GRCh38 \ + --genes GSTT1 \ + --exclude + +For more details, please see the following articles: :ref:`readme:GRCh37 vs. GRCh38` and :ref:`genes:GRCh38 data for GSTT1`. Related GitHub issues: :issue:`65`. From d4ab05ea79ca44e5a88359524b13e14743b64d52 Mon Sep 17 00:00:00 2001 From: Seung-been Lee Date: Mon, 13 Jun 2022 20:11:48 +0900 Subject: [PATCH 10/16] Update docs --- docs/tutorials.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/tutorials.rst b/docs/tutorials.rst index fe296e2..8e45a8d 100644 --- a/docs/tutorials.rst +++ b/docs/tutorials.rst @@ -239,9 +239,9 @@ Let's take a look at the results: NA10851_PyPGx *1/*4 Intermediate Metabolizer *1; *4;*10;*74;*2; ; *4:22-42524947-C-T:0.467;*10:22-42523943-A-G,22-42526694-G-A:0.95,0.421;*74:22-42525821-G-T:0.447;*1:22-42522613-G-C,22-42523943-A-G:0.486,0.95;*2:default; Normal NA18484_PyPGx *1/*17 Normal Metabolizer *1; *17;*2; ; *17:22-42525772-G-A:0.6;*1:22-42522613-G-C,22-42523943-A-G:0.625,0.391;*2:default; Normal NA12006_PyPGx *4/*41 Intermediate Metabolizer *41;*2; *4;*10;*2; *69; *69:22-42526694-G-A,22-42523805-C-T:0.473,0.528;*4:22-42524947-C-T:0.448;*10:22-42523943-A-G,22-42526694-G-A:0.545,0.473;*41:22-42523805-C-T:0.528;*2:default; Normal - HG00436_PyPGx *2x2/*71 Indeterminate *71;*1; *2; ; *71:22-42526669-C-T:0.433;*1:22-42522613-G-C,22-42523943-A-G:0.462,0.353;*2:default; Duplication + HG00436_PyPGx *2x2/*71 Indeterminate *71;*1; *2; ; *71:22-42526669-C-T:0.433;*1:22-42522613-G-C,22-42523943-A-G:0.462,0.353;*2:default; WholeDup1 NA19213_PyPGx *1/*1 Normal Metabolizer *1; *1; ; *1:22-42522613-G-C,22-42523943-A-G:1.0,1.0; Normal - NA19207_PyPGx *2x2/*10 Normal Metabolizer *10;*2; *2; ; *10:22-42523943-A-G,22-42526694-G-A:0.366,0.25;*2:default; Duplication + NA19207_PyPGx *2x2/*10 Normal Metabolizer *10;*2; *2; ; *10:22-42523943-A-G,22-42526694-G-A:0.366,0.25;*2:default; WholeDup1 NA07029_PyPGx *1/*35 Normal Metabolizer *35;*2; *1; ; *1:22-42522613-G-C,22-42523943-A-G:0.596,0.476;*35:22-42526763-C-T:0.405;*2:default; Normal You can read :ref:`readme:Results interpretation` for details on how to @@ -250,7 +250,7 @@ interpret the PyPGx results. Next, we can manually inspect SV calls by visualizing copy number and allele fraction for the CYP2D6 locus (read :ref:`readme:Structural variation detection` for details). For example, above results indicate that the samples -``HG00589_PyPGx`` and ``HG00436_PyPGx`` have ``Normal`` and ``Duplication`` +``HG00589_PyPGx`` and ``HG00436_PyPGx`` have ``Normal`` and ``WholeDup1`` as CNV calls, respectively: .. list-table:: From 4078d3d75013a935a46294fe4cc89a4983512718 Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Thu, 16 Jun 2022 14:52:01 +0900 Subject: [PATCH 11/16] Update docs --- README.rst | 14 ++++++++++++ docs/create.py | 14 ++++++++++++ docs/genes.rst | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+) diff --git a/README.rst b/README.rst index 3453aeb..5702ab7 100644 --- a/README.rst +++ b/README.rst @@ -177,6 +177,15 @@ you can access a development branch with the ``git checkout`` command. When you do this, please make sure your environment already has all the dependencies installed. +.. note:: + `Beagle `__ + is one of the default software tools used by PyPGx for haplotype phasing + SNVs and indels. The program is freely available and published under the + `GNU General Public License `__. Users do not need to download Beagle separately + because a copy of the software (``beagle.28Jun21.220.jar``) is already + included in PyPGx. + .. warning:: You're not done yet! Keep scrolling down to obtain the resource bundle for PyPGx, which is essential for running the package. @@ -254,6 +263,11 @@ visually inspect SV calls. Below are CYP2D6 examples: - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/gene-model-CYP2D6-10.png - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/GRCh37-CYP2D6-7.png +PyPGx was recently applied to the entire high-coverage WGS dataset from 1KGP +(N=2,504). Click `here `__ to see individual SV calls, and corresponding copy number +profiles and allele fraction profiles. + GRCh37 vs. GRCh38 ================= diff --git a/docs/create.py b/docs/create.py index 2bba46e..033b126 100644 --- a/docs/create.py +++ b/docs/create.py @@ -204,6 +204,15 @@ you do this, please make sure your environment already has all the dependencies installed. +.. note:: + `Beagle `__ + is one of the default software tools used by PyPGx for haplotype phasing + SNVs and indels. The program is freely available and published under the + `GNU General Public License `__. Users do not need to download Beagle separately + because a copy of the software (``beagle.28Jun21.220.jar``) is already + included in PyPGx. + .. warning:: You're not done yet! Keep scrolling down to obtain the resource bundle for PyPGx, which is essential for running the package. @@ -281,6 +290,11 @@ - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/gene-model-CYP2D6-10.png - .. image:: https://raw.githubusercontent.com/sbslee/pypgx-data/main/dpsv/GRCh37-CYP2D6-7.png +PyPGx was recently applied to the entire high-coverage WGS dataset from 1KGP +(N=2,504). Click `here `__ to see individual SV calls, and corresponding copy number +profiles and allele fraction profiles. + GRCh37 vs. GRCh38 ================= diff --git a/docs/genes.rst b/docs/genes.rst index 0318e90..4bc0fe4 100644 --- a/docs/genes.rst +++ b/docs/genes.rst @@ -947,6 +947,11 @@ Below is comprehensive summary of SV described from real NGS studies: - 0.16.0 - +PyPGx was recently applied to the entire high-coverage WGS dataset from 1KGP +(N=2,504). Click `here `__ to see individual SV calls for CYP2A6, and +corresponding copy number profiles and allele fraction profiles. + Filtered alleles for CYP2A6 --------------------------- @@ -1086,6 +1091,11 @@ Below is comprehensive summary of SV described from real NGS studies: - 0.16.0 - +PyPGx was recently applied to the entire high-coverage WGS dataset from 1KGP +(N=2,504). Click `here `__ to see individual SV calls for CYP2B6, and +corresponding copy number profiles and allele fraction profiles. + Phenotype summary for CYP2B6 ---------------------------- @@ -1441,6 +1451,11 @@ Below is comprehensive summary of SV described from real NGS studies: - 0.14.0 - +PyPGx was recently applied to the entire high-coverage WGS dataset from 1KGP +(N=2,504). Click `here `__ to see individual SV calls for CYP2D6, and +corresponding copy number profiles and allele fraction profiles. + Phenotype summary for CYP2D6 ---------------------------- @@ -1649,6 +1664,11 @@ Below is comprehensive summary of SV described from real NGS studies: - 0.16.0 - +PyPGx was recently applied to the entire high-coverage WGS dataset from 1KGP +(N=2,504). Click `here `__ to see individual SV calls for CYP2E1, and +corresponding copy number profiles and allele fraction profiles. + Resources for CYP2E1 -------------------- @@ -1820,6 +1840,11 @@ Below is comprehensive summary of SV described from real NGS studies: - 0.11.0 - +PyPGx was recently applied to the entire high-coverage WGS dataset from 1KGP +(N=2,504). Click `here `__ to see individual SV calls for CYP4F2, and +corresponding copy number profiles and allele fraction profiles. + Resources for CYP4F2 -------------------- @@ -1955,6 +1980,11 @@ Below is comprehensive summary of SV described from real NGS studies: - 0.12.0 - +PyPGx was recently applied to the entire high-coverage WGS dataset from 1KGP +(N=2,504). Click `here `__ to see individual SV calls for G6PD, and +corresponding copy number profiles and allele fraction profiles. + GSTM1 ===== @@ -2102,6 +2132,11 @@ Below is comprehensive summary of SV described from real NGS studies: - 0.14.0 - +PyPGx was recently applied to the entire high-coverage WGS dataset from 1KGP +(N=2,504). Click `here `__ to see individual SV calls for GSTM1, and +corresponding copy number profiles and allele fraction profiles. + GSTT1 ===== @@ -2364,6 +2399,11 @@ Below is comprehensive summary of SV described from real NGS studies: - 0.14.0 - +PyPGx was recently applied to the entire high-coverage WGS dataset from 1KGP +(N=2,504). Click `here `__ to see individual SV calls for SLC22A2, and +corresponding copy number profiles and allele fraction profiles. + SLCO1B1 ======= @@ -2554,6 +2594,11 @@ Below is comprehensive summary of SV described from real NGS studies: - 0.16.0 - +PyPGx was recently applied to the entire high-coverage WGS dataset from 1KGP +(N=2,504). Click `here `__ to see individual SV calls for SULT1A1, and +corresponding copy number profiles and allele fraction profiles. + TBXAS1 ====== @@ -2702,6 +2747,11 @@ Below is comprehensive summary of SV described from real NGS studies: - 0.13.0 - +PyPGx was recently applied to the entire high-coverage WGS dataset from 1KGP +(N=2,504). Click `here `__ to see individual SV calls for UGT1A4, and +corresponding copy number profiles and allele fraction profiles. + UGT2B15 ======= @@ -2834,6 +2884,11 @@ Below is comprehensive summary of SV described from real NGS studies: - 0.16.0 - +PyPGx was recently applied to the entire high-coverage WGS dataset from 1KGP +(N=2,504). Click `here `__ to see individual SV calls for UGT2B15, and +corresponding copy number profiles and allele fraction profiles. + UGT2B17 ======= @@ -2956,3 +3011,8 @@ Below is comprehensive summary of SV described from real NGS studies: - NA21090 - 0.15.0 - + +PyPGx was recently applied to the entire high-coverage WGS dataset from 1KGP +(N=2,504). Click `here `__ to see individual SV calls for UGT2B17, and +corresponding copy number profiles and allele fraction profiles. From fa5856d3c0ffd2a3f1b5c4462e5d33aa7b26c44f Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Mon, 20 Jun 2022 09:30:09 +0900 Subject: [PATCH 12/16] Fix typo --- pypgx/api/data/allele-table.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pypgx/api/data/allele-table.csv b/pypgx/api/data/allele-table.csv index 5258a5f..c247f92 100644 --- a/pypgx/api/data/allele-table.csv +++ b/pypgx/api/data/allele-table.csv @@ -633,7 +633,7 @@ DPYD,c.2983G>T (*10),0,No Function,1-97544627-C-A,N/A,1-97079071-C-A,N/A,FALSE DPYD,c.1003G>T (*11),1,Normal Function,1-98058899-C-A,N/A,1-97593343-C-A,N/A,FALSE DPYD,c.1156G>T (*12),0,No Function,1-98039499-C-A,N/A,1-97573943-C-A,N/A,FALSE DPYD,c.1679T>G (*13),0,No Function,1-97981343-A-C,N/A,1-97515787-A-C,N/A,FALSE -DPYD,"c.1129-5923C>G, c.1236G>A (HapB3)",0.5,Decreased Function,"1-98039419-C-T,1-98045449-G-C",N/A,"1-97573863-C-T,1-97579893-G-C",N/A,FALSE +DPYD,"c.1129-5923C>G, c.1236G>A (HapB3)",0.5,Decreased Function,"1-98039419-C-T,1-98045449-G-C",N/A,"1-97573863-C-T,1-97579893-G-C",N/A,FALSE DPYD,c.2846A>T,0.5,Decreased Function,1-97547947-T-A,N/A,1-97082391-T-A,N/A,FALSE DPYD,c.557A>G,0.5,Decreased Function,1-98165030-T-C,N/A,1-97699474-T-C,N/A,FALSE DPYD,c.62G>A,1,Normal Function,1-98348908-C-T,N/A,1-97883352-C-T,N/A,FALSE @@ -1210,4 +1210,4 @@ VKORC1,Reference,N/A,Normal Function,N/A,N/A,N/A,N/A,FALSE VKORC1,rs9923231,N/A,Unknown Function,16-31107689-C-T,N/A,16-31096368-C-T,N/A,FALSE XPC,Reference,N/A,Normal Function,N/A,N/A,N/A,N/A,FALSE XPC,rs2228001,N/A,Unknown Function,3-14187449-G-T,N/A,3-14145949-G-T,N/A,FALSE -XPC,rs2228000,N/A,Unknown Function,3-14199887-G-A,N/A,3-14158387-G-A,N/A,FALSE \ No newline at end of file +XPC,rs2228000,N/A,Unknown Function,3-14199887-G-A,N/A,3-14158387-G-A,N/A,FALSE From 190d837f72827454a38fd0de982b47b68f12751a Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Mon, 20 Jun 2022 09:57:35 +0900 Subject: [PATCH 13/16] Update `api.core.get_ref_allele`: * Remove unnecessary optional argument ``assembly`` from :meth:`api.core.get_ref_allele`. --- CHANGELOG.rst | 2 +- pypgx/api/core.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 1a6b0bd..6dc5ab4 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,6 +8,7 @@ Changelog * Update :command:`compare-genotypes` command to print the entire discordant calls when ``--verbose`` is used. * Update :command:`compute-copy-number` command to ensure that the samples in CovFrame[ReadDepth] and SampleTable[Statistics] are in the same order. * :issue:`64`: Update :meth:`api.utils.import_variants` method to 'diploidize' the input VCF when the target gene is G6PD. This is because some variant callers output haploid genotypes for males for the X chromosome, interfering with downstream analyses. +* Remove unnecessary optional argument ``assembly`` from :meth:`api.core.get_ref_allele`. 0.16.0 (2022-06-08) ------------------- @@ -79,7 +80,6 @@ Changelog * Deprecate :meth:`sdk.utils.parse_input_bams` method. * Update :meth:`api.utils.predict_alleles` method to match ``0.31.0`` version of ``fuc`` package. * Fix bug in :command:`filter-samples` command when ``--exclude`` argument is used for archive files with SampleTable type. -* Remove unnecessary optional argument ``assembly`` from :meth:`api.core.get_ref_allele`. * Improve CNV caller for CYP2A6, CYP2B6, CYP2D6, CYP2E1, CYP4F2, GSTM1, SLC22A2, SULT1A1, UGT1A4, UGT2B15, and UGT2B17. * Add a new CNV call for CYP2D6: ``PseudogeneDeletion``. * In CYP2E1 CNV nomenclature, ``PartialDuplication`` has been renamed to ``PartialDuplicationHet`` and a new CNV call ``PartialDuplicationHom`` has been added. Furthermore, calling algorithm for CYP2E1\*S1 allele has been updated. When partial duplication is present, from now on the algorithm requires only \*7 to call \*S1 instead of both \*7 and \*4. diff --git a/pypgx/api/core.py b/pypgx/api/core.py index 90cdc20..4d262bf 100644 --- a/pypgx/api/core.py +++ b/pypgx/api/core.py @@ -481,7 +481,7 @@ def get_priority(gene, phenotype): return df[i].Priority.values[0] -def get_ref_allele(gene, assembly='GRCh37'): +def get_ref_allele(gene): """ Get the reference allele for target gene. From c9db14c89c707404925dd917c8ca7bf1d7c67727 Mon Sep 17 00:00:00 2001 From: Seung-been Lee Date: Mon, 20 Jun 2022 20:10:18 +0900 Subject: [PATCH 14/16] Update docs --- docs/genes.rst | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/docs/genes.rst b/docs/genes.rst index 4bc0fe4..bd4b42d 100644 --- a/docs/genes.rst +++ b/docs/genes.rst @@ -25,6 +25,7 @@ Below is a summary table: - Phenotype - PharmVar - CPIC + - Function - GRCh37 - GRCh38 - Notes @@ -34,6 +35,7 @@ Below is a summary table: - - - + - Disposition - `chr7:87130178-87345639 `__ - `chr7:87500862-87716323 `__ - @@ -43,6 +45,7 @@ Below is a summary table: - ✅ - - ✅ + - Disposition - `chr4:89008420-89082791 `__ - `chr4:88087268-88161639 `__ - @@ -52,6 +55,7 @@ Below is a summary table: - ✅ - - ✅ + - Target - `chr1:201005639-201084694 `__ - `chr1:201036511-201115426 `__ - @@ -61,6 +65,7 @@ Below is a summary table: - ✅ - - ✅ + - Target - `chr7:117117016-117311719 `__ - `chr7:117477024-117671665 `__ - @@ -70,6 +75,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr15:75008882-75020951 `__ - `chr15:74716541-74728528 `__ - @@ -79,6 +85,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr15:75038183-75051941 `__ - `chr15:74745844-74759607 `__ - @@ -88,6 +95,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr2:38291745-38306323 `__ - `chr2:38064602-38079181 `__ - @@ -97,6 +105,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr19:41339442-41396352 `__ - `chr19:40833540-40890447 `__ - CYP2A6 has pseudogene (CYP2A7). @@ -106,6 +115,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr19:41574355-41622100 `__ - `chr19:41068450-41116195 `__ - @@ -115,6 +125,7 @@ Below is a summary table: - ✅ - ✅ - ✅ + - Metabolism - `chr19:41427203-41534301 `__ - `chr19:40921281-41028398 `__ - CYP2B6 has pseudogene (CYP2B7). @@ -124,6 +135,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr10:96793528-96832254 `__ - `chr10:95033771-95072497 `__ - @@ -133,6 +145,7 @@ Below is a summary table: - ✅ - ✅ - ✅ + - Metabolism - `chr10:96695414-96752148 `__ - `chr10:94935657-94993091 `__ - @@ -142,6 +155,7 @@ Below is a summary table: - ✅ - ✅ - ✅ + - Metabolism - `chr10:96519437-96615962 `__ - `chr10:94759680-94858547 `__ - @@ -151,6 +165,7 @@ Below is a summary table: - ✅ - ✅ - ✅ + - Metabolism - `chr22:42512500-42551883 `__ - `chr22:42116498-42155810 `__ - CYP2D6 has pseudogene (CYP2D7). @@ -160,6 +175,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr10:135330866-135362620 `__ - `chr10:133517362-133549123 `__ - @@ -169,6 +185,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr19:41617336-41637286 `__ - `chr19:41111431-41131381 `__ - @@ -178,6 +195,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr1:60355979-60395470 `__ - `chr1:59890307-59929773 `__ - @@ -187,6 +205,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr11:14896554-14916751 `__ - `chr11:14875008-14895205 `__ - @@ -196,6 +215,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr19:41696111-41716444 `__ - `chr19:41190218-41210539 `__ - @@ -205,6 +225,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr7:1019834-1032276 `__ - `chr7:980180-992640 `__ - @@ -214,6 +235,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr7:99351582-99384811 `__ - `chr7:99753966-99787184 `__ - @@ -223,6 +245,7 @@ Below is a summary table: - ✅ - ✅ - ✅ + - Metabolism - `chr7:99242811-99280649 `__ - `chr7:99645193-99682996 `__ - @@ -232,6 +255,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr7:99299659-99335823 `__ - `chr7:99702035-99738196 `__ - @@ -241,6 +265,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr7:99422635-99466727 `__ - `chr7:99825012-99869093 `__ - @@ -250,6 +275,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr1:47391859-47410148 `__ - `chr1:46926187-46944476 `__ - @@ -259,6 +285,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr1:47600112-47618399 `__ - `chr1:47134440-47152727 `__ - @@ -268,6 +295,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr1:47261669-47288021 `__ - `chr1:46796045-46822413 `__ - @@ -277,6 +305,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr19:15973833-16023930 `__ - `chr19:15863022-15913074 `__ - @@ -286,6 +315,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr10:104587287-104600170 `__ - `chr10:102827530-102840413 `__ - @@ -295,6 +325,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr15:51497253-51633795 `__ - `chr15:51205056-51341596 `__ - @@ -304,6 +335,7 @@ Below is a summary table: - - ✅ - + - Metabolism - `chr10:94830646-94840641 `__ - `chr10:93070892-93080885 `__ - @@ -313,6 +345,7 @@ Below is a summary table: - ✅ - ✅ - ✅ + - Excretion - `chr1:97540298-98389615 `__ - `chr1:97074742-97924034 `__ - @@ -322,6 +355,7 @@ Below is a summary table: - ✅ - - + - Other - `chr1:169478188-169558719 `__ - `chr1:169508950-169589481 `__ - @@ -331,6 +365,7 @@ Below is a summary table: - - - + - Disease - `chrX:153756604-153778233 `__ - `chrX:154528389-154550018 `__ - G6PD is located on X chromosome. @@ -340,6 +375,7 @@ Below is a summary table: - - - + - Metabolism - `chr1:110227417-110239367 `__ - `chr1:109684816-109696745 `__ - @@ -349,6 +385,7 @@ Below is a summary table: - - - + - Metabolism - `chr11:67348065-67357124 `__ - `chr11:67580811-67589653 `__ - @@ -358,6 +395,7 @@ Below is a summary table: - - - + - Metabolism - `chr22:24373132-24387311 `__ - `chr22_KI270879v1_alt:267307-281486 `__ - GSTT1 is located on different contigs between GRCh37 and GRCh38. @@ -367,6 +405,7 @@ Below is a summary table: - ✅ - - + - Other - `chr19:39731245-39738646 `__ - `chr19:39240552-39248006 `__ - @@ -376,6 +415,7 @@ Below is a summary table: - - - + - Metabolism - `chr8:18064617-18084198 `__ - `chr8:18207108-18226689 `__ - @@ -385,6 +425,7 @@ Below is a summary table: - - - + - Metabolism - `chr8:18245791-18261728 `__ - `chr8:18388281-18404218 `__ - @@ -394,6 +435,7 @@ Below is a summary table: - ✅ - ✅ - ✅ + - Metabolism - `chr13:48608702-48624364 `__ - `chr13:48034725-48050221 `__ - @@ -403,6 +445,7 @@ Below is a summary table: - - ✅ - + - Disease - `chr7:75541419-75619173 `__ - `chr7:75912154-75989855 `__ - @@ -412,6 +455,7 @@ Below is a summary table: - - ✅ - + - Other - `chr20:48117410-48187674 `__ - `chr20:49500873-49571137 `__ - @@ -421,6 +465,7 @@ Below is a summary table: - ✅ - ✅ - + - Disease - `chr19:38921339-39081204 `__ - `chr19:38430690-38590564 `__ - @@ -430,6 +475,7 @@ Below is a summary table: - - - + - Excretion - `chr3:121610170-121666034 `__ - `chr3:121891400-121947188 `__ - @@ -439,6 +485,7 @@ Below is a summary table: - - - + - Excretion - `chr6:160627786-160689853 `__ - `chr6:160206754-160268821 `__ - @@ -448,6 +495,7 @@ Below is a summary table: - ✅ - ✅ - ✅ + - Absorption - `chr12:21281127-21395730 `__ - `chr12:21128193-21242796 `__ - @@ -457,6 +505,7 @@ Below is a summary table: - - - + - Absorption - `chr12:20960637-21072845 `__ - `chr12:20807704-20919911 `__ - @@ -466,6 +515,7 @@ Below is a summary table: - - - + - Absorption - `chr11:74859151-74920594 `__ - `chr11:75148106-75209549 `__ - @@ -475,6 +525,7 @@ Below is a summary table: - - - + - Metabolism - `chr16:28601907-28636365 `__ - `chr16:28590586-28625044 `__ - @@ -484,6 +535,7 @@ Below is a summary table: - - ✅ - + - Other - `chr7:139525951-139723125 `__ - `chr7:139826263-140023321 `__ - @@ -493,6 +545,7 @@ Below is a summary table: - ✅ - - ✅ + - Metabolism - `chr6:18125541-18158400 `__ - `chr6:18125310-18158169 `__ - @@ -502,6 +555,7 @@ Below is a summary table: - ✅ - - ✅ + - Excretion - `chr2:234662918-234687945 `__ - `chr2:233754269-233779300 `__ - @@ -511,6 +565,7 @@ Below is a summary table: - - - + - Excretion - `chr2:234624437-234684945 `__ - `chr2:233715735-233776300 `__ - @@ -520,6 +575,7 @@ Below is a summary table: - - - + - Excretion - `chr4:69959191-69981705 `__ - `chr4:69093473-69115987 `__ - @@ -529,6 +585,7 @@ Below is a summary table: - - - + - Excretion - `chr4:69506314-69542494 `__ - `chr4:68640596-68676652 `__ - @@ -538,6 +595,7 @@ Below is a summary table: - - - + - Excretion - `chr4:69399901-69437245 `__ - `chr4:68534183-68571527 `__ - @@ -547,6 +605,7 @@ Below is a summary table: - - - ✅ + - Target - `chr16:31099162-31109320 `__ - `chr16:31087853-31097797 `__ - @@ -556,6 +615,7 @@ Below is a summary table: - - - + - Other - `chr3:14183646-14223172 `__ - `chr3:14142146-14181672 `__ - From a564b35193abee9bec6da26e0e5ba1002cd7bd1f Mon Sep 17 00:00:00 2001 From: Seung-been Lee Date: Fri, 24 Jun 2022 20:40:42 +0900 Subject: [PATCH 15/16] Fix `api.core.get_ref_allele`-related issue --- pypgx/api/core.py | 2 +- pypgx/api/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pypgx/api/core.py b/pypgx/api/core.py index 4d262bf..eb2c5e1 100644 --- a/pypgx/api/core.py +++ b/pypgx/api/core.py @@ -1372,7 +1372,7 @@ def func1(allele): impacts = [get_variant_impact(x) for x in core_variants] impacts = [x for x in impacts if x] c = len(impacts) * -1 - d = allele == get_ref_allele(gene, assembly=assembly) + d = allele == get_ref_allele(gene) return (a, b, c, d) def func2(allele): diff --git a/pypgx/api/utils.py b/pypgx/api/utils.py index 357377f..86d8056 100644 --- a/pypgx/api/utils.py +++ b/pypgx/api/utils.py @@ -1053,7 +1053,7 @@ def predict_alleles(consolidated_variants): assembly = consolidated_variants.metadata['Assembly'] definition_table = core.build_definition_table(gene, assembly) - ref_allele = core.get_ref_allele(gene, assembly) + ref_allele = core.get_ref_allele(gene) default_allele = core.get_default_allele(gene, assembly) defining_variants = core.list_variants(gene, assembly=assembly) variant_synonyms = core.get_variant_synonyms(gene, assembly=assembly) From 05a8998e5834361f8fbcf297731b22256897df17 Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Tue, 12 Jul 2022 11:58:28 +0900 Subject: [PATCH 16/16] Update docs --- CHANGELOG.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 6dc5ab4..1dee978 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,8 +1,8 @@ Changelog ********* -0.17.0 (in development) ------------------------ +0.17.0 (2022-07-12) +------------------- * :issue:`63`: Fix bug in :meth:`api.utils.estimate_phase_beagle` when there is only one variant in input VCF and Beagle throws an error. * Update :command:`compare-genotypes` command to print the entire discordant calls when ``--verbose`` is used.